Monday, April 3, 2017

Jaccard Similarity



# similarity: count(intersection) / count(union)
# arr1 = [3,7,9,12,4...]
# arr2 = [5,9,10,1,2...]

#Case 1
# not sorted
# no duplicates

function computeSimilarity(arr1, arr2){
    
    var map = {};
    var i;
    for(i=0; i < arr1.length; i++){
      map[arr1[i]] = true;
    }
    
    var intersection = 0;
    for(i=0; i < arr2.length; i++){
      var val = arr2[i];
      if(map[val] != null){
          intersection++;
      }
    }
    
    var union = arr1.length + arr2.length - intersection;
    return (intersection/union);
}



# Case 2: sorted arr inputs
# O(n)
# arr1 = [3,7,9,12...]
# arr2 = [5,9,10,...]

function computeSortedSim(arr1, arr2){
    var intersection = 0;
    var i, j;
    for(i =0, j=0; i < arr1.length && j < arr2.length; ){
        if(arr1[i] < arr2[j]) i++;
        else if(arr1[i] > arr2[j]) j++;
        else { i++; j++; intersection++; }
    }
    
    var union = arr1.length + arr2.length - intersection;
    return (intersection/union);
}