lucene+MongoDB+IKAnalyzer 做全文搜索

kong_resty 发布于 2012/07/24 11:05
阅读 6K+
收藏 7
package sample3;

import java.io.File;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.store.FSDirectory;
import org.wltea.analyzer.lucene.IKAnalyzer;

import com.mongodb.DB;
import com.mongodb.DBCollection;
import com.mongodb.DBCursor;
import com.mongodb.Mongo;

/**
 * 创建索引
 * @author  zhanghaijun
 *
 */
public class Demo1 {
	public static void main(String[] args) throws Exception {
		//先在数据库中拿到要创建索引的数据 
		Mongo mongo = new Mongo();
		DB db = mongo.getDB("zhang");
		DBCollection msg = db.getCollection("test3");
		DBCursor cursor = msg.find();
		
		//是否重新创建索引文件,false:在原有的基础上追加
		boolean create = true;
		//IK中文分词器
		Analyzer analyzer = new IKAnalyzer(); 
		
		//创建索引
		IndexWriter indexWriter = new IndexWriter(FSDirectory.open(new File("E:\\lucene\\index")),
				                            analyzer, create, IndexWriter.MaxFieldLength.UNLIMITED);
		boolean exist = cursor.hasNext();
		while(exist){
			//System.out.println(cursor.next().get("text").toString());
			Document doc = new Document();
			Field fieldText = new Field("text",cursor.next().get("text").toString(),Field.Store.YES, 
					  Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
			doc.add(fieldText);
			exist = cursor.hasNext();
		}
		cursor = null;
		//optimize()方法是对索引进行优化
	    indexWriter.optimize();     
	    //最后关闭索引
	    indexWriter.close();
	}
}
package sample3;

import java.io.File;
import java.io.IOException;

import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TopScoreDocCollector;
import org.apache.lucene.store.FSDirectory;
import org.wltea.analyzer.lucene.IKAnalyzer;
import org.wltea.analyzer.lucene.IKQueryParser;
import org.wltea.analyzer.lucene.IKSimilarity;

/**
 * 查找索引
 */
public class Demo2 {
	public static void main(String[] args) throws Exception {
		// onlysearching, so read-only=true
		IndexReader reader =IndexReader.open(FSDirectory.open(new File("E:\\lucene\\index")), true); 
		IndexSearcher searcher = new IndexSearcher(reader);
		searcher.setSimilarity(new IKSimilarity());   //在索引器中使用IKSimilarity相似度评估器 
		//String[] keys = {"4","testtest"};      //关键字数组
		//String[] fields = {"id","title"};  //搜索的字段
		//BooleanClause.Occur[] flags = {BooleanClause.Occur.MUST,BooleanClause.Occur.MUST};    //BooleanClause.Occur[]数组,它表示多个条件之间的关系 
		//使用 IKQueryParser类提供的parseMultiField方法构建多字段多条件查询
		//Query query = IKQueryParser.parseMultiField(fields,keys, flags);     //IKQueryParser多个字段搜索  
		Query query =IKQueryParser.parse("text","上海人");  //IK搜索单个字段       
		IKAnalyzer analyzer = new IKAnalyzer();
//		     Query query =MultiFieldQueryParser.parse(Version.LUCENE_CURRENT, keys, fields, flags,analyzer);   //用MultiFieldQueryParser得到query对象   
		 
//		     System.out.println("query"+query.toString()); //查询条件    
		TopScoreDocCollector topCollector = TopScoreDocCollector.create(searcher.maxDoc(), false);
		searcher.search(query,topCollector);
		
		ScoreDoc[] docs = topCollector.topDocs(3).scoreDocs;
		System.out.println(docs.length);
		reader.close(); //关闭索引   
	}
}

结果:

Exception in thread "Main Thread" java.lang.IllegalArgumentException: numHits must be > 0; please use TotalHitCountCollector if you just need the total hit count
	at org.apache.lucene.search.TopScoreDocCollector.create(TopScoreDocCollector.java:254)
	at org.apache.lucene.search.TopScoreDocCollector.create(TopScoreDocCollector.java:238)
	at sample3.Demo2.main(Demo2.java:35)

 哪位朋友分析下吧  我是按照网上的模仿下来的不知道哪里出问题了

 

mongo中的一段数据
/* 0 */
{
  "_id" : ObjectId("500d1a96df1d4d3b58245f95"),
  "username" : "zhangsan7046",
  "text" : "我是上海人,我的工作是承续员"
}

加载中
0
无夏之年
无夏之年

起码这里有问题吧

创建索引的时候,得:indexWriter.addDocument(doc);

zheng_pat
zheng_pat
也很奇怪这个地方
0
h
hotterwei

searcher.search(query,topCollector);

这句代码中的topCollector不能小于等于0

返回顶部
顶部