1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
|
/**
* Get the text content of a PDF file
*/
public String getText(File document) {
StringBuffer text = new StringBuffer();
FileInputStream in = null;
StringWriter output = null;
PDDocument doc = null;
try {
if (document.length() < tailleLimiteLong*1024*1024){
in = new FileInputStream(document);
PDFParser parser = new PDFParser(in);
parser.parse();
doc = parser.getPDDocument();
PDFTextStripper stripper = new PDFTextStripper();
stripper.setStartPage(0);
output = new StringWriter();
stripper.writeText(doc, output);
text = output.getBuffer();
} else {
logger.error("File is too big");
}
} catch (Exception e) {
logger.error("Error during indexation : " + document.getPath(),e);
}
try {
if (in!=null) in.close();
} catch (Exception e) {
logger.error("Problem when closing FileInputStream");
}
try {
if (output!=null) output.close();
} catch (Exception e) {
logger.error("Problem when closing StringWriter");
}
try {
if (doc!=null) doc.close();
} catch (Exception e) {
logger.error("Problem when closing PDDocument");
}
return text.toString();
} |
Partager