1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92
| package jp.ndca.similarity.distance;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
/**
* Accumulate Jaccard Similarity
*
* @author hattori_tsukasa
*
*/
public class Jaccard {
public Jaccard(){
super();
}
/**
*
* @param a
* @param b
* @return
*/
public double calc(Object[] a, Object[] b){
int alen = a.length;
int blen = b.length;
Set<Object> set = new HashSet<Object>(alen + blen);
set.addAll(Arrays.asList(a));
set.addAll(Arrays.asList(b));
return innerCalc(alen, blen, set.size());
}
/**
*
* @param a
* @param b
* @return
*/
public double calc(List<? extends Object> a, List<? extends Object> b){
int alen = a.size();
int blen = b.size();
Set<Object> set = new HashSet<Object>(alen + blen);
set.addAll(a);
set.addAll(b);
return innerCalc(alen, blen, set.size());
}
public <K extends Comparable<K>> double calcByMerge( K[] a, K[] b ){
return calcByMerge( a, 0, b, 0 );
}
public <K extends Comparable<K>> double calcByMerge( K[] a, int offsetA, K[] b, int offsetB ){
int aLen = a.length - offsetA;
int bLen = b.length - offsetB;
int overlap = 0;
int i = offsetA;
int j = offsetB;
while( i < a.length && j < b.length ){
if( a[i].equals( b[j] ) ){
overlap++;
i++;
j++;
}
else if( a[i].compareTo(b[j]) < 0 ) // a < b
i++;
else
j++;
}
return overlap / (double)( aLen + bLen - overlap );
}
/**
*
* @param alen
* @param blen
* @param union
* @return
*/
private double innerCalc(int alen, int blen, int union){
double overlap = alen + blen - union;
if( overlap <= 0 )
return 0.0;
return overlap / union;
}
} |
Partager