2021-05-30 更新
整个流程
1、读取数据库数据
2、分词(我在这里用的是盘古分词)
3、单独存在硬盘上
4、基于Lucene单独存的文件上查找,不是数据库查找
实例源码下载地址: https://pan.baidu.com/s/1cl88QsYp_zhY-yclORZHlg 提取码: s39x
相关的视频教程 https://www.bilibili.com/video/BV1K44y1k7Vq?share_source=copy_web

1、读取数据库数据
2、分词(我在这里用的是盘古分词)
3、单独存在硬盘上
4、基于Lucene单独存的文件上查找,不是数据库查找
实例源码下载地址: https://pan.baidu.com/s/1cl88QsYp_zhY-yclORZHlg 提取码: s39x
相关的视频教程 https://www.bilibili.com/video/BV1K44y1k7Vq?share_source=copy_web
//引用的第三方库有以下5个 //PanGu.Lucene.Analyzer //PanGu //PanGu.HighLight //Lucene.Net //ICSharpCode.SharpZipLib盘古分词类
public class LuceneAnalyze
{
#region AnalyzerKey
/// <summary>
/// 将搜索的keyword分词
/// </summary>
/// <param name="keyword"></param>
/// <returns></returns>
public string[] AnalyzerKey(string keyword)
{
Analyzer analyzer = new PanGuAnalyzer();
QueryParser parser = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, "title", analyzer);
Query query = parser.Parse(this.CleanKeyword(keyword));
if (query is TermQuery)
{
Term term = ((TermQuery)query).Term;
return new string[] { term.Text };
}
else if (query is PhraseQuery)
{
Term[] term = ((PhraseQuery)query).GetTerms();
return term.Select(t => t.Text).ToArray();
}
else if (query is BooleanQuery)
{
BooleanClause[] clauses = ((BooleanQuery)query).GetClauses();
List<string> analyzerWords = new List<string>();
foreach (BooleanClause clause in clauses)
{
Query childQuery = clause.Query;
if (childQuery is TermQuery)
{
Term term = ((TermQuery)childQuery).Term;
analyzerWords.Add(term.Text);
}
else if (childQuery is PhraseQuery)
{
Term[] term = ((PhraseQuery)childQuery).GetTerms();
analyzerWords.AddRange(term.Select(t => t.Text));
}
}
return analyzerWords.ToArray();
}
else
{
return new string[] { keyword };
}
}
/// <summary>
/// 清理头尾and or 关键字
/// </summary>
/// <param name="keyword"></param>
/// <returns></returns>
private string CleanKeyword(string keyword)
{
if (string.IsNullOrWhiteSpace(keyword))
{ }
else
{
bool isClean = false;
while (!isClean)
{
keyword = keyword.Trim();
if (keyword.EndsWith(" AND"))
{
keyword = string.Format("{0}and", keyword.Remove(keyword.Length - 3, 3));
}
else if (keyword.EndsWith(" OR"))
{
keyword = string.Format("{0}or", keyword.Remove(keyword.Length - 2, 2));
}
else if (keyword.StartsWith("AND "))
{
keyword = string.Format("and{0}", keyword.Substring(3));
}
else if (keyword.StartsWith("OR "))
{
keyword = string.Format("or{0}", keyword.Substring(2));
}
else if (keyword.Contains(" OR "))
{
keyword = keyword.Replace(" OR ", " or ");
}
else if (keyword.Contains(" AND "))
{
keyword = keyword.Replace(" AND ", " and ");
}
else
isClean = true;
}
}
return QueryParser.Escape(keyword);
}
#endregion AnalyzerKey
}
Lucene初始化和搜索的方法
/// <summary>
/// 为keyword做盘古分词
/// </summary>
/// <param name="keyword"></param>
/// <param name="luceneQuery"></param>
/// <returns></returns>
private string AnalyzerKeyword(string keyword)
{
StringBuilder queryStringBuilder = new StringBuilder();
LuceneAnalyze analyzer = new LuceneAnalyze();
string[] words = analyzer.AnalyzerKey(keyword);
if (words.Length == 1)
{
queryStringBuilder.AppendFormat("{0}:{1}* ", "title", words[0]);
}
else
{
//string.Join(" ", words.Select(w => $"title:{w}"));
foreach (string word in words)
{
queryStringBuilder.AppendFormat("{0}:{1} ", "title", word);
}
}
string result = queryStringBuilder.ToString().TrimEnd();
//logger.Info(string.Format("AnalyzerKeyword 将 keyword={0}转换为{1}", keyword, result));
return result;
}
private FSDirectory CreateFSDirectory()
{
string dirPath = AppDomain.CurrentDomain.BaseDirectory + "\\LuceneData";//文件夹 在这里是把数据写入了硬盘 也可以放在内存
if (!System.IO.Directory.Exists(dirPath))
{
System.IO.Directory.CreateDirectory(dirPath);
}
return FSDirectory.Open(dirPath);
}
/// <summary>
/// 初始化 将数据写到本地
/// </summary>
public void InitIndex()
{
DataTable dt = GetList();//从数据库获取数据库数据
if (dt == null || dt.Rows.Count < 1) { return; }//没有读到数据则不做处理
FSDirectory directory = CreateFSDirectory();
using (IndexWriter writer = new IndexWriter(directory, new PanGuAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED))//索引写入器
{
foreach (DataRow dr in dt.Rows)
{
//在这里我只加了 id和title 我们也可以加入创建时间 文章内容等等
Document doc = new Document();//一条数据
doc.Add(new Field("id", dr[0].ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED));//一个字段 列名 值 是否保存值 是否分词
doc.Add(new Field("title", dr[1].ToString(), Field.Store.YES, Field.Index.ANALYZED));
writer.AddDocument(doc);//写进去
}
writer.Optimize();//优化 就是合并
}
}
/// <summary>
/// 获取数据库数据
/// </summary>
/// <returns></returns>
private DataTable GetList()
{
//因为数据库存数据不是很多 在这里全读取出来了 如果数据大 可以分批处理
DataSet ds = SQLiteHelper.ExecuteQuery("select ID,Title from Article");
if (ds != null && ds.Tables.Count > 0)
{
return ds.Tables[0];
}
return null;
}
/// <summary>
/// 多个词组查询
/// </summary>
/// <param name="keyword"></param>
public void SearchData(string keyword)
{
FSDirectory dir = CreateFSDirectory();
IndexSearcher searcher = new IndexSearcher(dir);//查找器
QueryParser parser = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, "title", new PanGuAnalyzer());//解析器
Query query = parser.Parse(AnalyzerKeyword(keyword));
TopDocs docs = searcher.Search(query, null, 10000);//找到的数据
int i = 0;
foreach (ScoreDoc sd in docs.ScoreDocs)
{
if (i++ < 1000) //查询最多1000 我们也可以做分页查询
{
Document doc = searcher.Doc(sd.Doc);
Console.WriteLine("***************************************");
Console.WriteLine(string.Format("id={0}", doc.Get("id")));
Console.WriteLine(string.Format("title={0}", doc.Get("title")));
//在这里我只加了 id和title 我们也可以加入创建时间 文章内容等等
}
}
Console.WriteLine($"一共命中{docs.TotalHits}");
}
/// <summary>
/// 单个词查询
/// </summary>
public void SingelSearchData()
{
FSDirectory dir = CreateFSDirectory();
IndexSearcher searcher = new IndexSearcher(dir);//查找器
TermQuery query = new TermQuery(new Term("title", "图书馆"));//包含
TopDocs docs = searcher.Search(query, null, 10000);//找到的数据
foreach (ScoreDoc sd in docs.ScoreDocs)
{
Document doc = searcher.Doc(sd.Doc);
Console.WriteLine("***************************************");
Console.WriteLine(string.Format("id={0}", doc.Get("id")));
Console.WriteLine(string.Format("title={0}", doc.Get("title")));
Console.WriteLine(string.Format("time={0}", doc.Get("time")));
Console.WriteLine(string.Format("price={0}", doc.Get("price")));
Console.WriteLine(string.Format("content={0}", doc.Get("content")));
}
Console.WriteLine("1一共命中了{0}个", docs.TotalHits);
}
/// <summary>
/// 带排序多个词查询
/// </summary>
public void OrderSearchData(string keyword)
{
FSDirectory dir = CreateFSDirectory();
IndexSearcher searcher = new IndexSearcher(dir);//查找器
QueryParser parser = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, "title", new PanGuAnalyzer());//解析器
Query query = parser.Parse(keyword);
NumericRangeFilter<int> timeFilter = NumericRangeFilter.NewIntRange("time", 20180000, 20181822, true, true);//过滤
SortField sortPrice = new SortField("price", SortField.DOUBLE, false);//降序
SortField sortTime = new SortField("time", SortField.INT, true);//升序
Sort sort = new Sort(sortTime, sortPrice);//排序 哪个前哪个后
TopDocs docs = searcher.Search(query, timeFilter, 10000, sort);//找到的数据
int i = 0;
foreach (ScoreDoc sd in docs.ScoreDocs)
{
if (i++ < 1000)//查询最多1000 我们也可以做分页查询
{
Document doc = searcher.Doc(sd.Doc);
Console.WriteLine("***************************************");
Console.WriteLine(string.Format("id={0}", doc.Get("id")));
Console.WriteLine(string.Format("title={0}", doc.Get("title")));
Console.WriteLine(string.Format("time={0}", doc.Get("time")));
Console.WriteLine(string.Format("price={0}", doc.Get("price")));
}
}
Console.WriteLine("3一共命中了{0}个", docs.TotalHits);
}
