1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33
|
package com.slim;
import org.apache.poi.poifs.filesystem.*;
import org.apache.poi.hwpf.*;
import org.apache.poi.hwpf.extractor.*;
import java.io.*;
public class ReadDoc {
public static void main(String[] args) {
// TODO Auto-generated method stub
String filesname = "slim.doc";
POIFSFileSystem fs = null;
try{
fs = new POIFSFileSystem(new FileInputStream(filesname) );
HWPFDocument doc = new HWPFDocument(fs);
WordExtractor we = new WordExtractor(doc);
String[] paragraphs = we.getParagraphText();
System.out.println( "Word Document has " + paragraphs.length + " paragraphs" );
for( int i=0; i<paragraphs .length; i++ ) {
paragraphs[i] = paragraphs[i].replaceAll("\\cM?\r?\n","");
System.out.println( "Length:"+paragraphs[ i ].length());
}
}
catch(Exception e) {
e.printStackTrace();
}
}
} |