[Lucene]Rechercher un mot dans un fichier
je veux effectuer une recherche dans un fichier .txt en se servant des librairies LUCENE, je veux que le résultat de ma recherche soit comme suit:
une ligne du nom du fichier
une ligne de phrase comme suit: les 3 mots qui précède mon mot recherché + mot recherché + les 3 mots qui suivent
ceci est le code que j'ai essayé
Code:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116
|
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.analysis.core.SimpleAnalyzer;
import org.apache.lucene.document.*;
import org.apache.lucene.document.Field.Store;
import java.io.*;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.Scanner;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.FuzzyQuery;
public class SearchEngine {
public static void main(String[] args) {
indexDirectory();
String userInput = "";
Scanner getInputWord = new Scanner(System.in);
System.out.print("Please type a word to search for.\nInput: ");
userInput = getInputWord.next();
search(userInput);
System.out.print("Search again? (type Y or N) Y = Yes, N = No: ");
userInput = getInputWord.next();
while (!userInput.equals("n") && !userInput.equals("N")) {
System.out.print("Input: ");
userInput = getInputWord.next();
search(userInput);
System.out.print("Search again? (Y or N): ");
userInput = getInputWord.next();
}
}
private static void indexDirectory() {
//Apache Lucene Indexing Directory .txt files
try {
Path path;
path = Paths.get("C:\\Users\\USER\\Desktop\\index");
Directory directory = FSDirectory.open(path);
IndexWriterConfig config = new IndexWriterConfig(new SimpleAnalyzer());
IndexWriter indexWriter = new IndexWriter(directory, config);
indexWriter.deleteAll();
File f = new File("C:\\seminar\\test_data"); // current directory
for (File file : f.listFiles()) {
//System.out.println("indexed " + file.getCanonicalPath());
Document doc = new Document();
doc.add(new TextField("FileName", file.getName(), Store.YES));
FileInputStream is = new FileInputStream(file);
BufferedReader reader = new BufferedReader(new InputStreamReader(is));
StringBuffer stringBuffer = new StringBuffer();
String line = null;
while ((line = reader.readLine()) != null) {
stringBuffer.append(line).append("\n");
}
reader.close();
doc.add(new TextField("contents", stringBuffer.toString(), Store.YES));
indexWriter.addDocument(doc);
}
indexWriter.close();
directory.close();
System.out.println("indexing finished");
} catch (Exception e) {
// TODO: handle exception
e.printStackTrace();
}
}
private static void search(String text) {
//Apache Lucene searching text inside .txt files
try {
Path path = Paths.get("C:\\Users\\USER\\Desktop\\index");
Directory directory = FSDirectory.open(path);
IndexReader indexReader = DirectoryReader.open(directory);
IndexSearcher indexSearcher = new IndexSearcher(indexReader);
FuzzyQuery query = new FuzzyQuery(new Term("contents", text), 2);
TopDocs topDocs = indexSearcher.search(query, 10);
int i = 0;
if (topDocs.totalHits > 0) {
System.out.println("Found " + topDocs.totalHits + " result(s).");
for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
Document document = indexSearcher.doc(scoreDoc.doc);
i = i + 1;
System.out.println("Result #" + i + " " + document.get("FileName"));
}
} else {
System.out.println("No maches found!");
}
} catch (Exception e) {
// TODO: handle exception
e.printStackTrace();
}
}
} |