步骤 2 : 先说没有删除前的情形 步骤 3 : 删除索引 步骤 4 : 更多删除 步骤 5 : 更新索引
索引建立好了之后,还是需要维护的,比如新增,删除和维护。 新增就是建立索引的过程,这里就不表了,本教材主要讲索引的删除和更新。
索引里的数据,其实就是一个一个的Document 对象,那么本文就是介绍如何删除和更新这些Documen对象。
直接使用14万条数据 里的代码,不过使用不一样的查询语句。
如图所示,通过关键字 “鞭" 可以查询到一条id是51173的数据。 package com.how2java;
import java.io.IOException;
import java.io.StringReader;
import java.util.List;
import java.util.Scanner;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
import org.wltea.analyzer.lucene.IKAnalyzer;
public class TestLucene {
public static void main(String[] args) throws Exception {
// 1. 准备中文分词器
IKAnalyzer analyzer = new IKAnalyzer();
// 2. 索引
Directory index = createIndex(analyzer);
// 3. 查询器
Scanner s = new Scanner(System.in);
while(true){
System.out.print("请输入查询关键字:");
String keyword = s.nextLine();
System.out.println("当前关键字是:"+keyword);
Query query = new QueryParser( "name", analyzer).parse(keyword);
// 4. 搜索
IndexReader reader = DirectoryReader.open(index);
IndexSearcher searcher=new IndexSearcher(reader);
int numberPerPage = 10;
ScoreDoc[] hits = searcher.search(query, numberPerPage).scoreDocs;
// 5. 显示查询结果
showSearchResults(searcher, hits,query,analyzer);
// 6. 关闭查询
reader.close();
}
}
private static void showSearchResults(IndexSearcher searcher, ScoreDoc[] hits, Query query, IKAnalyzer analyzer) throws Exception {
System.out.println("找到 " + hits.length + " 个命中.");
SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("<span style='color:red'>", "</span>");
Highlighter highlighter = new Highlighter(simpleHTMLFormatter, new QueryScorer(query));
System.out.println("找到 " + hits.length + " 个命中.");
System.out.println("序号\t匹配度得分\t结果");
for (int i = 0; i < hits.length; ++i) {
ScoreDoc scoreDoc= hits[i];
int docId = scoreDoc.doc;
Document d = searcher.doc(docId);
List<IndexableField> fields= d.getFields();
System.out.print((i + 1) );
System.out.print("\t" + scoreDoc.score);
for (IndexableField f : fields) {
if("name".equals(f.name())){
TokenStream tokenStream = analyzer.tokenStream(f.name(), new StringReader(d.get(f.name())));
String fieldContent = highlighter.getBestFragment(tokenStream, d.get(f.name()));
System.out.print("\t"+fieldContent);
}
else{
System.out.print("\t"+d.get(f.name()));
}
}
System.out.println("<br>");
}
}
private static Directory createIndex(IKAnalyzer analyzer) throws IOException {
Directory index = new RAMDirectory();
IndexWriterConfig config = new IndexWriterConfig(analyzer);
IndexWriter writer = new IndexWriter(index, config);
String fileName = "140k_products.txt";
List<Product> products = ProductUtil.file2list(fileName);
int total = products.size();
int count = 0;
int per = 0;
int oldPer =0;
for (Product p : products) {
addDoc(writer, p);
count++;
per = count*100/total;
if(per!=oldPer){
oldPer = per;
System.out.printf("索引中,总共要添加 %d 条记录,当前添加进度是: %d%% %n",total,per);
}
}
writer.close();
return index;
}
private static void addDoc(IndexWriter w, Product p) throws IOException {
Document doc = new Document();
doc.add(new TextField("id", String.valueOf(p.getId()), Field.Store.YES));
doc.add(new TextField("name", p.getName(), Field.Store.YES));
doc.add(new TextField("category", p.getCategory(), Field.Store.YES));
doc.add(new TextField("price", String.valueOf(p.getPrice()), Field.Store.YES));
doc.add(new TextField("place", p.getPlace(), Field.Store.YES));
doc.add(new TextField("code", p.getCode(), Field.Store.YES));
w.addDocument(doc);
}
}
删除id=51173的Document之后,如图所示,再搜索鞭字,就查询不到结果了。
删除关键代码如下,通过 Term对象删除 //删除id=51173的数据 IndexWriterConfig config = new IndexWriterConfig(analyzer); IndexWriter indexWriter = new IndexWriter(index, config); indexWriter.deleteDocuments(new Term("id", "51173")); indexWriter.commit(); indexWriter.close();
//删除id=51173的数据
IndexWriterConfig config = new IndexWriterConfig(analyzer);
IndexWriter indexWriter = new IndexWriter(index, config);
indexWriter.deleteDocuments(new Term("id", "51173"));
indexWriter.commit();
indexWriter.close();
package com.how2java;
import java.io.IOException;
import java.io.StringReader;
import java.util.List;
import java.util.Scanner;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
import org.wltea.analyzer.lucene.IKAnalyzer;
public class TestLucene {
public static void main(String[] args) throws Exception {
// 1. 准备中文分词器
IKAnalyzer analyzer = new IKAnalyzer();
// 2. 索引
Directory index = createIndex(analyzer);
// 3. 查询器
Scanner s = new Scanner(System.in);
//删除id=51173的数据
IndexWriterConfig config = new IndexWriterConfig(analyzer);
IndexWriter indexWriter = new IndexWriter(index, config);
indexWriter.deleteDocuments(new Term("id", "51173"));
indexWriter.commit();
indexWriter.close();
while(true){
System.out.print("请输入查询关键字:");
String keyword = s.nextLine();
System.out.println("当前关键字是:"+keyword);
Query query = new QueryParser( "name", analyzer).parse(keyword);
// 4. 搜索
IndexReader reader = DirectoryReader.open(index);
IndexSearcher searcher=new IndexSearcher(reader);
int numberPerPage = 10;
ScoreDoc[] hits = searcher.search(query, numberPerPage).scoreDocs;
// 5. 显示查询结果
showSearchResults(searcher, hits,query,analyzer);
// 6. 关闭查询
reader.close();
}
}
private static void showSearchResults(IndexSearcher searcher, ScoreDoc[] hits, Query query, IKAnalyzer analyzer) throws Exception {
System.out.println("找到 " + hits.length + " 个命中.");
SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("<span style='color:red'>", "</span>");
Highlighter highlighter = new Highlighter(simpleHTMLFormatter, new QueryScorer(query));
System.out.println("找到 " + hits.length + " 个命中.");
System.out.println("序号\t匹配度得分\t结果");
for (int i = 0; i < hits.length; ++i) {
ScoreDoc scoreDoc= hits[i];
int docId = scoreDoc.doc;
Document d = searcher.doc(docId);
List<IndexableField> fields= d.getFields();
System.out.print((i + 1) );
System.out.print("\t" + scoreDoc.score);
for (IndexableField f : fields) {
if("name".equals(f.name())){
TokenStream tokenStream = analyzer.tokenStream(f.name(), new StringReader(d.get(f.name())));
String fieldContent = highlighter.getBestFragment(tokenStream, d.get(f.name()));
System.out.print("\t"+fieldContent);
}
else{
System.out.print("\t"+d.get(f.name()));
}
}
System.out.println("<br>");
}
}
private static Directory createIndex(IKAnalyzer analyzer) throws IOException {
Directory index = new RAMDirectory();
IndexWriterConfig config = new IndexWriterConfig(analyzer);
IndexWriter writer = new IndexWriter(index, config);
String fileName = "140k_products.txt";
List<Product> products = ProductUtil.file2list(fileName);
int total = products.size();
int count = 0;
int per = 0;
int oldPer =0;
for (Product p : products) {
addDoc(writer, p);
count++;
per = count*100/total;
if(per!=oldPer){
oldPer = per;
System.out.printf("索引中,总共要添加 %d 条记录,当前添加进度是: %d%% %n",total,per);
}
}
writer.close();
return index;
}
private static void addDoc(IndexWriter w, Product p) throws IOException {
Document doc = new Document();
doc.add(new TextField("id", String.valueOf(p.getId()), Field.Store.YES));
doc.add(new TextField("name", p.getName(), Field.Store.YES));
doc.add(new TextField("category", p.getCategory(), Field.Store.YES));
doc.add(new TextField("price", String.valueOf(p.getPrice()), Field.Store.YES));
doc.add(new TextField("place", p.getPlace(), Field.Store.YES));
doc.add(new TextField("code", p.getCode(), Field.Store.YES));
w.addDocument(doc);
}
}
还可以按照如下方法来删除索引,API 很明显,就不做代码示例了
DeleteDocuments(Query query):根据Query条件来删除单个或多个Document DeleteDocuments(Query[] queries):根据Query条件来删除单个或多个Document DeleteDocuments(Term term):根据Term来删除单个或多个Document DeleteDocuments(Term[] terms):根据Term来删除单个或多个Document DeleteAll():删除所有的Document
如图所示,更新索引后,再用鞭查询,得到的结果是查出了更新之后的数据。 更新的关键代码:
// 更新索引 IndexWriterConfig config = new IndexWriterConfig(analyzer); IndexWriter indexWriter = new IndexWriter(index, config); Document doc = new Document(); doc.add(new TextField("id", "51173", Field.Store.YES)); doc.add(new TextField("name", "神鞭,鞭没了,神还在", Field.Store.YES)); doc.add(new TextField("category", "道具", Field.Store.YES)); doc.add(new TextField("price", "998", Field.Store.YES)); doc.add(new TextField("place", "南海群岛", Field.Store.YES)); doc.add(new TextField("code", "888888", Field.Store.YES)); indexWriter.updateDocument(new Term("id", "51173"), doc ); indexWriter.commit(); indexWriter.close(); package com.how2java;
import java.io.IOException;
import java.io.StringReader;
import java.util.List;
import java.util.Scanner;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
import org.wltea.analyzer.lucene.IKAnalyzer;
public class TestLucene {
public static void main(String[] args) throws Exception {
// 1. 准备中文分词器
IKAnalyzer analyzer = new IKAnalyzer();
// 2. 索引
Directory index = createIndex(analyzer);
// 3. 查询器
// 更新索引
IndexWriterConfig config = new IndexWriterConfig(analyzer);
IndexWriter indexWriter = new IndexWriter(index, config);
Document doc = new Document();
doc.add(new TextField("id", "51173", Field.Store.YES));
doc.add(new TextField("name", "神鞭,鞭没了,神还在", Field.Store.YES));
doc.add(new TextField("category", "道具", Field.Store.YES));
doc.add(new TextField("price", "998", Field.Store.YES));
doc.add(new TextField("place", "南海群岛", Field.Store.YES));
doc.add(new TextField("code", "888888", Field.Store.YES));
indexWriter.updateDocument(new Term("id", "51173"), doc );
indexWriter.commit();
indexWriter.close();
Scanner s = new Scanner(System.in);
while(true){
System.out.print("请输入查询关键字:");
String keyword = s.nextLine();
System.out.println("当前关键字是:"+keyword);
Query query = new QueryParser( "name", analyzer).parse(keyword);
// 4. 搜索
IndexReader reader = DirectoryReader.open(index);
IndexSearcher searcher=new IndexSearcher(reader);
int numberPerPage = 10;
ScoreDoc[] hits = searcher.search(query, numberPerPage).scoreDocs;
// 5. 显示查询结果
showSearchResults(searcher, hits,query,analyzer);
// 6. 关闭查询
reader.close();
}
}
private static void showSearchResults(IndexSearcher searcher, ScoreDoc[] hits, Query query, IKAnalyzer analyzer) throws Exception {
System.out.println("找到 " + hits.length + " 个命中.");
SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("<span style='color:red'>", "</span>");
Highlighter highlighter = new Highlighter(simpleHTMLFormatter, new QueryScorer(query));
System.out.println("找到 " + hits.length + " 个命中.");
System.out.println("序号\t匹配度得分\t结果");
for (int i = 0; i < hits.length; ++i) {
ScoreDoc scoreDoc= hits[i];
int docId = scoreDoc.doc;
Document d = searcher.doc(docId);
List<IndexableField> fields= d.getFields();
System.out.print((i + 1) );
System.out.print("\t" + scoreDoc.score);
for (IndexableField f : fields) {
if("name".equals(f.name())){
TokenStream tokenStream = analyzer.tokenStream(f.name(), new StringReader(d.get(f.name())));
String fieldContent = highlighter.getBestFragment(tokenStream, d.get(f.name()));
System.out.print("\t"+fieldContent);
}
else{
System.out.print("\t"+d.get(f.name()));
}
}
System.out.println("<br>");
}
}
private static Directory createIndex(IKAnalyzer analyzer) throws IOException {
Directory index = new RAMDirectory();
IndexWriterConfig config = new IndexWriterConfig(analyzer);
IndexWriter writer = new IndexWriter(index, config);
String fileName = "140k_products.txt";
List<Product> products = ProductUtil.file2list(fileName);
int total = products.size();
int count = 0;
int per = 0;
int oldPer =0;
for (Product p : products) {
addDoc(writer, p);
count++;
per = count*100/total;
if(per!=oldPer){
oldPer = per;
System.out.printf("索引中,总共要添加 %d 条记录,当前添加进度是: %d%% %n",total,per);
}
}
writer.close();
return index;
}
private static void addDoc(IndexWriter w, Product p) throws IOException {
Document doc = new Document();
doc.add(new TextField("id", String.valueOf(p.getId()), Field.Store.YES));
doc.add(new TextField("name", p.getName(), Field.Store.YES));
doc.add(new TextField("category", p.getCategory(), Field.Store.YES));
doc.add(new TextField("price", String.valueOf(p.getPrice()), Field.Store.YES));
doc.add(new TextField("place", p.getPlace(), Field.Store.YES));
doc.add(new TextField("code", p.getCode(), Field.Store.YES));
w.addDocument(doc);
}
}
HOW2J公众号,关注后实时获知最新的教程和优惠活动,谢谢。
问答区域
2021-05-12
更新问题
2 个答案
萌森 跳转到问题位置 答案时间:2021-11-25 new Term("id", "51173")
把知道的换上去,虽然这样更新不准确
AbCdEFf 跳转到问题位置 答案时间:2021-09-11 盖伦!
回答已经提交成功,正在审核。 请于 我的回答 处查看回答记录,谢谢
提问之前请登陆
提问已经提交成功,正在审核。 请于 我的提问 处查看提问记录,谢谢
|