步骤 2 : 模仿和排错 步骤 3 : 两种方式 步骤 4 : 第一种 步骤 5 : 第二种
老规矩,先下载右上角的可运行项目,配置运行起来,确认可用之后,再学习做了哪些步骤以达到这样的效果。
在确保可运行项目能够正确无误地运行之后,再严格照着教程的步骤,对代码模仿一遍。
模仿过程难免代码有出入,导致无法得到期望的运行结果,此时此刻通过比较正确答案 ( 可运行项目 ) 和自己的代码,来定位问题所在。 采用这种方式,学习有效果,排错有效率,可以较为明显地提升学习速度,跨过学习路上的各个槛。 推荐使用diffmerge软件,进行文件夹比较。把你自己做的项目文件夹,和我的可运行项目文件夹进行比较。 这个软件很牛逼的,可以知道文件夹里哪两个文件不对,并且很明显地标记出来 这里提供了绿色安装和使用教程:diffmerge 下载和使用教程
分页查询是很常见的需求,比如要查询第10页,每页10条数据。
Lucene 分页通常来讲有两种方式: 第一种是把100条数据查出来,然后取最后10条。 优点是快,缺点是对内存消耗大。 第二种是把第90条查询出来,然后基于这一条,通过searchAfter方法查询10条数据。 优点是内存消耗小,缺点是比第一种更慢 private static ScoreDoc[] pageSearch1(Query query, IndexSearcher searcher, int pageNow, int pageSize) throws IOException { TopDocs topDocs = searcher.search(query, pageNow*pageSize); System.out.println("查询到的总条数\t"+topDocs.totalHits); ScoreDoc [] alllScores = topDocs.scoreDocs; List<ScoreDoc> hitScores = new ArrayList<>(); int start = (pageNow -1)*pageSize ; int end = pageSize*pageNow; for(int i=start;i<end;i++) hitScores.add(alllScores[i]); ScoreDoc[] hits = hitScores.toArray(new ScoreDoc[]{}); return hits; } 一共查出 pageNow*pageSize条,然后取最后pageSize条 package com.how2java;
import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
import org.wltea.analyzer.lucene.IKAnalyzer;
public class TestLucene {
public static void main(String[] args) throws Exception {
// 1. 准备中文分词器
IKAnalyzer analyzer = new IKAnalyzer();
// 2. 索引
Directory index = createIndex(analyzer);
// 3. 查询器
String keyword = "手机";
System.out.println("当前关键字是:"+keyword);
Query query = new QueryParser( "name", analyzer).parse(keyword);
// 4. 搜索
IndexReader reader = DirectoryReader.open(index);
IndexSearcher searcher=new IndexSearcher(reader);
int pageNow = 1;
int pageSize = 10;
ScoreDoc[] hits = pageSearch1(query, searcher, pageNow, pageSize);
// 5. 显示查询结果
showSearchResults(searcher, hits,query,analyzer);
// 6. 关闭查询
reader.close();
}
private static ScoreDoc[] pageSearch1(Query query, IndexSearcher searcher, int pageNow, int pageSize)
throws IOException {
TopDocs topDocs = searcher.search(query, pageNow*pageSize);
System.out.println("查询到的总条数\t"+topDocs.totalHits);
ScoreDoc [] alllScores = topDocs.scoreDocs;
List<ScoreDoc> hitScores = new ArrayList<>();
int start = (pageNow -1)*pageSize ;
int end = pageSize*pageNow;
for(int i=start;i<end;i++)
hitScores.add(alllScores[i]);
ScoreDoc[] hits = hitScores.toArray(new ScoreDoc[]{});
return hits;
}
private static void showSearchResults(IndexSearcher searcher, ScoreDoc[] hits, Query query, IKAnalyzer analyzer) throws Exception {
System.out.println("找到 " + hits.length + " 个命中.");
SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("<span style='color:red'>", "</span>");
Highlighter highlighter = new Highlighter(simpleHTMLFormatter, new QueryScorer(query));
System.out.println("找到 " + hits.length + " 个命中.");
System.out.println("序号\t匹配度得分\t结果");
for (int i = 0; i < hits.length; ++i) {
ScoreDoc scoreDoc= hits[i];
int docId = scoreDoc.doc;
Document d = searcher.doc(docId);
List<IndexableField> fields= d.getFields();
System.out.print((i + 1) );
System.out.print("\t" + scoreDoc.score);
for (IndexableField f : fields) {
if("name".equals(f.name())){
TokenStream tokenStream = analyzer.tokenStream(f.name(), new StringReader(d.get(f.name())));
String fieldContent = highlighter.getBestFragment(tokenStream, d.get(f.name()));
System.out.print("\t"+fieldContent);
}
else{
System.out.print("\t"+d.get(f.name()));
}
}
System.out.println("<br>");
}
}
private static Directory createIndex(IKAnalyzer analyzer) throws IOException {
Directory index = new RAMDirectory();
IndexWriterConfig config = new IndexWriterConfig(analyzer);
IndexWriter writer = new IndexWriter(index, config);
String fileName = "140k_products.txt";
List<Product> products = ProductUtil.file2list(fileName);
int total = products.size();
int count = 0;
int per = 0;
int oldPer =0;
for (Product p : products) {
addDoc(writer, p);
count++;
per = count*100/total;
if(per!=oldPer){
oldPer = per;
System.out.printf("索引中,总共要添加 %d 条记录,当前添加进度是: %d%% %n",total,per);
}
if(per>10)
break;
}
writer.close();
return index;
}
private static void addDoc(IndexWriter w, Product p) throws IOException {
Document doc = new Document();
doc.add(new TextField("id", String.valueOf(p.getId()), Field.Store.YES));
doc.add(new TextField("name", p.getName(), Field.Store.YES));
doc.add(new TextField("category", p.getCategory(), Field.Store.YES));
doc.add(new TextField("price", String.valueOf(p.getPrice()), Field.Store.YES));
doc.add(new TextField("place", p.getPlace(), Field.Store.YES));
doc.add(new TextField("code", p.getCode(), Field.Store.YES));
w.addDocument(doc);
}
}
private static ScoreDoc[] pageSearch2(Query query, IndexSearcher searcher, int pageNow, int pageSize) throws IOException { int start = (pageNow - 1) * pageSize; if(0==start){ TopDocs topDocs = searcher.search(query, pageNow*pageSize); return topDocs.scoreDocs; } // 查询数据, 结束页面自前的数据都会查询到,但是只取本页的数据 TopDocs topDocs = searcher.search(query, start); //获取到上一页最后一条 ScoreDoc preScore= topDocs.scoreDocs[start-1]; //查询最后一条后的数据的一页数据 topDocs = searcher.searchAfter(preScore, query, pageSize); return topDocs.scoreDocs; } 首先是边界条件,如果是第一页,就直接查询了。 如果不是第一页,那么就取start-1那一条,然后再根据它通过searchAfter 来查询 package com.how2java;
import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
import org.wltea.analyzer.lucene.IKAnalyzer;
public class TestLucene {
public static void main(String[] args) throws Exception {
// 1. 准备中文分词器
IKAnalyzer analyzer = new IKAnalyzer();
// 2. 索引
Directory index = createIndex(analyzer);
// 3. 查询器
String keyword = "手机";
System.out.println("当前关键字是:"+keyword);
Query query = new QueryParser( "name", analyzer).parse(keyword);
// 4. 搜索
IndexReader reader = DirectoryReader.open(index);
IndexSearcher searcher=new IndexSearcher(reader);
int pageNow = 1;
int pageSize = 10;
ScoreDoc[] hits = pageSearch2(query, searcher, pageNow, pageSize);
// 5. 显示查询结果
showSearchResults(searcher, hits,query,analyzer);
// 6. 关闭查询
reader.close();
}
private static ScoreDoc[] pageSearch1(Query query, IndexSearcher searcher, int pageNow, int pageSize)
throws IOException {
TopDocs topDocs = searcher.search(query, pageNow*pageSize);
System.out.println("查询到的总条数\t"+topDocs.totalHits);
ScoreDoc [] alllScores = topDocs.scoreDocs;
List<ScoreDoc> hitScores = new ArrayList<>();
int start = (pageNow -1)*pageSize ;
int end = pageSize*pageNow;
for(int i=start;i<end;i++)
hitScores.add(alllScores[i]);
ScoreDoc[] hits = hitScores.toArray(new ScoreDoc[]{});
return hits;
}
private static ScoreDoc[] pageSearch2(Query query, IndexSearcher searcher, int pageNow, int pageSize)
throws IOException {
int start = (pageNow - 1) * pageSize;
if(0==start){
TopDocs topDocs = searcher.search(query, pageNow*pageSize);
return topDocs.scoreDocs;
}
// 查询数据, 结束页面自前的数据都会查询到,但是只取本页的数据
TopDocs topDocs = searcher.search(query, start);
//获取到上一页最后一条
ScoreDoc preScore= topDocs.scoreDocs[start-1];
//查询最后一条后的数据的一页数据
topDocs = searcher.searchAfter(preScore, query, pageSize);
return topDocs.scoreDocs;
}
private static void showSearchResults(IndexSearcher searcher, ScoreDoc[] hits, Query query, IKAnalyzer analyzer) throws Exception {
System.out.println("找到 " + hits.length + " 个命中.");
SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("<span style='color:red'>", "</span>");
Highlighter highlighter = new Highlighter(simpleHTMLFormatter, new QueryScorer(query));
System.out.println("找到 " + hits.length + " 个命中.");
System.out.println("序号\t匹配度得分\t结果");
for (int i = 0; i < hits.length; ++i) {
ScoreDoc scoreDoc= hits[i];
int docId = scoreDoc.doc;
Document d = searcher.doc(docId);
List<IndexableField> fields= d.getFields();
System.out.print((i + 1) );
System.out.print("\t" + scoreDoc.score);
for (IndexableField f : fields) {
if("name".equals(f.name())){
TokenStream tokenStream = analyzer.tokenStream(f.name(), new StringReader(d.get(f.name())));
String fieldContent = highlighter.getBestFragment(tokenStream, d.get(f.name()));
System.out.print("\t"+fieldContent);
}
else{
System.out.print("\t"+d.get(f.name()));
}
}
System.out.println("<br>");
}
}
private static Directory createIndex(IKAnalyzer analyzer) throws IOException {
Directory index = new RAMDirectory();
IndexWriterConfig config = new IndexWriterConfig(analyzer);
IndexWriter writer = new IndexWriter(index, config);
String fileName = "140k_products.txt";
List<Product> products = ProductUtil.file2list(fileName);
int total = products.size();
int count = 0;
int per = 0;
int oldPer =0;
for (Product p : products) {
addDoc(writer, p);
count++;
per = count*100/total;
if(per!=oldPer){
oldPer = per;
System.out.printf("索引中,总共要添加 %d 条记录,当前添加进度是: %d%% %n",total,per);
}
if(per>10)
break;
}
writer.close();
return index;
}
private static void addDoc(IndexWriter w, Product p) throws IOException {
Document doc = new Document();
doc.add(new TextField("id", String.valueOf(p.getId()), Field.Store.YES));
doc.add(new TextField("name", p.getName(), Field.Store.YES));
doc.add(new TextField("category", p.getCategory(), Field.Store.YES));
doc.add(new TextField("price", String.valueOf(p.getPrice()), Field.Store.YES));
doc.add(new TextField("place", p.getPlace(), Field.Store.YES));
doc.add(new TextField("code", p.getCode(), Field.Store.YES));
w.addDocument(doc);
}
}
HOW2J公众号,关注后实时获知最新的教程和优惠活动,谢谢。
问答区域
2021-06-21
为什么我测试两个方法耗时 都差不多 而且第二种耗时还能少一些
回答已经提交成功,正在审核。 请于 我的回答 处查看回答记录,谢谢
2018-07-11
想问问...
回答已经提交成功,正在审核。 请于 我的回答 处查看回答记录,谢谢
提问之前请登陆
提问已经提交成功,正在审核。 请于 我的提问 处查看提问记录,谢谢
|