总结下如何使用lucene全文搜索
首先导入依赖
<!--核心包--> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-core</artifactId> <version>4.10.4</version> </dependency> <!--对分词索引查询解析--> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-queryparser</artifactId> <version>4.10.4</version> </dependency> <!--一般分词器,适用于英文分词--> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-analyzers-common</artifactId> <version>4.10.4</version> </dependency> <!--检索关键字高亮显示 --> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-highlighter</artifactId> <version>4.10.4</version> </dependency> <dependency> <groupId>com.janeluo</groupId> <artifactId>ikanalyzer</artifactId> <version>2012_u6</version> </dependency>
创建工具类LuceneUtil工具类
public class LuceneUtil { private final static Logger logger = LoggerFactory.getLogger(LuceneUtil.class); public static IndexWriter getIndexWriter(String mainLogsIndexDir) throws IOException { Path path = Paths.get(mainLogsIndexDir); File indexFile = path.toFile(); if(!indexFile.exists()) { //如果文件夹不存在,则创建 indexFile.mkdirs(); } FSDirectory fsDirectory = FSDirectory.open(indexFile); IKAnalyzer ikAnalyzer = new IKAnalyzer(); IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_4_10_4,ikAnalyzer); IndexWriter indexWriter = new IndexWriter(fsDirectory, indexWriterConfig); if(IndexWriter.isLocked(fsDirectory )){ indexWriter.close(); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_4_10_4 , ikAnalyzer ); return new IndexWriter(fsDirectory, config); } return indexWriter; } public static IndexSearcher getIndexSearcher(String indexDirectory){ Path path = Paths.get(indexDirectory); File file = path.toFile(); if (!file.exists()) { //如果文件夹不存在,则创建 file.mkdirs(); } try { FSDirectory directory = FSDirectory.open(file); IndexReader reader = DirectoryReader.open(directory); return new IndexSearcher(reader); } catch (IOException e) { logger.error(e.getMessage(),e); return null; } } }
添加文档到索引库
List<Document> docs = new ArrayList<>(); LineNumberReader lineNumberReader = new LineNumberReader(new FileReader(file)); String[] split = file.getName().split("\\."); String name = split[0]; String line; while ((line = lineNumberReader.readLine()) != null) { int lineNumber = lineNumberReader.getLineNumber(); Document document = new Document(); document.add(new StringField("recordId", recordId, Field.Store.YES)); document.add(new StringField("subRecordId", subRecordId, Field.Store.YES)); document.add(new StringField("fileName", name, Field.Store.YES)); document.add(new LongField("lineNumber",lineNumber,Field.Store.YES)); document.add(new TextField("content",line, Field.Store.YES)); docs.add(document); } IndexWriter indexWriter = LuceneUtil.getIndexWriter(moduleLogsIndexDir); indexWriter.addDocuments(docs); indexWriter.close(); lineNumberReader.close();
从索引库查询结果
IndexSearcher logsIndexSearcher = LuceneUtil.getIndexSearcher(logIndexDir); // 总查询条件 BooleanQuery booleanQuery = new BooleanQuery(); // subrecordId String subRecordId = logPageDTO.getSubRecordId(); booleanQuery.add(new TermQuery(new Term("subRecordId", subRecordId)), BooleanClause.Occur.MUST); TopDocs topDocs = logsIndexSearcher.search(booleanQuery, querySize, sort); ScoreDoc[] scoreDocs = topDocs.scoreDocs; ScoreDoc lastScoreDoc; int last = (pageNum - 1) * pageSize - 1; // 查询第一页的时候scoreDoc传null if (last < 0) { lastScoreDoc = null; } else { lastScoreDoc = scoreDocs[last]; } // 注意searchAfter与search的区别 topDocs = logsIndexSearcher.searchAfter(lastScoreDoc, booleanQuery, pageSize, sort); List<LogItemVO> logItemList = new ArrayList<>(); scoreDocs = topDocs.scoreDocs; for (ScoreDoc scoreDoc : scoreDocs) { Document doc = logsIndexSearcher.doc(scoreDoc.doc); LogItemVO logItemVO = new LogItemVO(); logItemVO.setFileName(doc.get("fileName")); logItemVO.setLineNum(Long.parseLong(doc.get("lineNumber"))); logItemVO.setContent(doc.get("content")); logItemList.add(logItemVO); }