1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71
| public static void insert(NGTN root1, String ngramFile, String tmpFolder, int n) {
//ngramFile is binary
//Algorithm:
//load as many groups of ngrams as possible from ngramFile -> root2
//insert the new ones (root1) in the old ones (root2)
//append that in a temporary file
//continue to browse the ngram file
//k: number of lines (77 bytes: size of one NGTN)
//X: buffer size
int k = (int) (50 * 1024 * 1024 / (77 * n * 3));
int lineSize = (n + 1) * 4;
int X = k * lineSize;
String tmpFile = generateFileName(tmpFolder);
try {
FileInputStream fis = new FileInputStream(ngramFile);
BufferedInputStream bis = new BufferedInputStream(fis, X);
//index: tells at which node in root1 we are
int index = 0;
boolean lastLoop = false;
while(!lastLoop){
System.gc();
lastLoop = (bis.available() < X);
bis.mark(X);
byte[] b = new byte[X];
int readBytes = bis.read(b);
bis.reset();
//root1: the new tree to be inserted
//root2: old subtrees
NGTN root2;
//if readBytes <= 0 ---> empty file
if (readBytes > 0){
//wrap bytes into a ByteBuffer
ByteBuffer bb = ByteBuffer.wrap(b, 0, readBytes);
//get the corresponding subtree
root2 = loadNGrams(bb, n, lastLoop);
}
else root2 = new NGTN();
int loaded = root2.getLeafCount();
bis.skip(loaded * lineSize);
//put "young" nodes into the "old" subtree
index = putFromIndex(root1, index, root2, lastLoop);
//append the subtree to the temporary file
append(root2, n, tmpFile);
}
bis.close();
fis.close();
//replace ngramFile with tmpFile
replace(ngramFile, tmpFile);
remove(tmpFile);
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
} |
Partager