1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116
|
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.analysis.core.SimpleAnalyzer;
import org.apache.lucene.document.*;
import org.apache.lucene.document.Field.Store;
import java.io.*;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.Scanner;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.FuzzyQuery;
public class SearchEngine {
public static void main(String[] args) {
indexDirectory();
String userInput = "";
Scanner getInputWord = new Scanner(System.in);
System.out.print("Please type a word to search for.\nInput: ");
userInput = getInputWord.next();
search(userInput);
System.out.print("Search again? (type Y or N) Y = Yes, N = No: ");
userInput = getInputWord.next();
while (!userInput.equals("n") && !userInput.equals("N")) {
System.out.print("Input: ");
userInput = getInputWord.next();
search(userInput);
System.out.print("Search again? (Y or N): ");
userInput = getInputWord.next();
}
}
private static void indexDirectory() {
//Apache Lucene Indexing Directory .txt files
try {
Path path;
path = Paths.get("C:\\Users\\USER\\Desktop\\index");
Directory directory = FSDirectory.open(path);
IndexWriterConfig config = new IndexWriterConfig(new SimpleAnalyzer());
IndexWriter indexWriter = new IndexWriter(directory, config);
indexWriter.deleteAll();
File f = new File("C:\\seminar\\test_data"); // current directory
for (File file : f.listFiles()) {
//System.out.println("indexed " + file.getCanonicalPath());
Document doc = new Document();
doc.add(new TextField("FileName", file.getName(), Store.YES));
FileInputStream is = new FileInputStream(file);
BufferedReader reader = new BufferedReader(new InputStreamReader(is));
StringBuffer stringBuffer = new StringBuffer();
String line = null;
while ((line = reader.readLine()) != null) {
stringBuffer.append(line).append("\n");
}
reader.close();
doc.add(new TextField("contents", stringBuffer.toString(), Store.YES));
indexWriter.addDocument(doc);
}
indexWriter.close();
directory.close();
System.out.println("indexing finished");
} catch (Exception e) {
// TODO: handle exception
e.printStackTrace();
}
}
private static void search(String text) {
//Apache Lucene searching text inside .txt files
try {
Path path = Paths.get("C:\\Users\\USER\\Desktop\\index");
Directory directory = FSDirectory.open(path);
IndexReader indexReader = DirectoryReader.open(directory);
IndexSearcher indexSearcher = new IndexSearcher(indexReader);
FuzzyQuery query = new FuzzyQuery(new Term("contents", text), 2);
TopDocs topDocs = indexSearcher.search(query, 10);
int i = 0;
if (topDocs.totalHits > 0) {
System.out.println("Found " + topDocs.totalHits + " result(s).");
for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
Document document = indexSearcher.doc(scoreDoc.doc);
i = i + 1;
System.out.println("Result #" + i + " " + document.get("FileName"));
}
} else {
System.out.println("No maches found!");
}
} catch (Exception e) {
// TODO: handle exception
e.printStackTrace();
}
}
} |
Partager