用Lucene建立索引及查询示例

首先去 apache 网站下载 lucene 的开发包,并配置好环境变量

http://jakarta.apache.org/lucene/docs/index.html

建立索引程序:

* Created on 2004-4-26

import org.apache.lucene.index.*;

import org.apache.lucene.analysis.standard.*;

import org.apache.lucene.document.*;

import java.io.*;

/**

* @author bell.wang

public class IndexFiles {

public static void main(String[] args) {

try{

IndexWriter writer = new IndexWriter("myindex", new StandardAnalyzer(), true);

File files = new File("mydoc");

String[] Fnamelist = files.list();

for (int i = 0; i < Fnamelist.length; i++){

File file = new File(files,Fnamelist[i]);

Document doc = new Document();

Field fld = Field.Text("path", file.getPath());

doc.add(fld);

fld = Field.Keyword("modified", DateField.timeToString(file.lastModified()));

doc.add(fld);

FileInputStream in = new FileInputStream(file);

Reader reader = new BufferedReader(new InputStreamReader(in));

fld = Field.Text("contents", reader);

doc.add(fld);

writer.addDocument(doc);

System.out.println("Added : " + doc.get("path"));

}

writer.optimize();

writer.close();

System.out.println("Has Added Total: " + Fnamelist.length);

}catch(Exception e){

System.out.println(e);

}

程序对当前路径下mydoc目录下所有文件建立索引，其中索引有三个字段: 文件路径，

最后修改时间，文件内容. 建立的索引文件在当前路径下的myindex目录

检索程序:

* Created on 2004-4-26

import org.apache.lucene.analysis.*;

import org.apache.lucene.analysis.standard.*;

import org.apache.lucene.search.*;

import org.apache.lucene.queryParser.*;

import org.apache.lucene.document.*;

//import com.augmentum.hrms.*;

import java.util.Date;

/**

* @author bell.wang

public class SearchFile {

public static void main(String[] args) {

//XMap a = new XMap("");

Analyzer anlzr = new StandardAnalyzer();

try{

Query q = QueryParser.parse("数据库", "contents", anlzr);

System.out.println("Searching for : " + q.toString("contents"));

Searcher serch = new IndexSearcher("myindex");

Hits hts = serch.search(q);

for(int i=0; i<hts.length(); i++){

Document doc = hts.doc(i);

String path = doc.get("path");

System.out.println("Find: " +i+": "+ path);

System.out.println("Find: " + doc.get("modified"));

System.out.println("Find: " + doc.get("path"));

}

System.out.println("Find Total: " + hts.length());

}catch(Exception e){

System.out.println(e);

}

程序对索引的contents字段用“数据库“关键字进行查询，返回

的是所有包含有关键字的文档集合，分别打印出各个字段.

上面的程序我用纯文本文件测试通过，.txt,.jsp,.html 都可以，

word,pdf 等文件需要经过转化才能对其进行索引。