1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29
| import numpy as np
#with open(r'C:\Users\User\Desktop\rockyou.txt', "r",encoding="ISO-8859-1") as f:
# lines = f.readlines()
df_result = pd.read_excel(r'C:\Users\User\Downloads\Memoireid3125439.xlsx')
#df_result = pd.DataFrame(columns=('id', 'password'))
documents=("K KAPOOR","L KAPOOR")
tfidf_vectorizer=TfidfVectorizer()
tfidf_matrix=tfidf_vectorizer.fit_transform(documents)
#for i,line in enumerate(lines):
# id, password = line.split()
# df_result.loc[i] = [id, password]
# print(df_result)
# read by default 1st sheet of an excel file
for i in df_result.index:
documents=(df_result["id"][i],df_result["password"][i])
tfidf_vectorizer=TfidfVectorizer()
tfidf_matrix=tfidf_vectorizer.fit_transform(documents)
result = cosine_similarity(tfidf_matrix[0:1],tfidf_matrix)
#Levensthein = Leveinshtein.distance(df_result["id"][i],df_result["password"][i])
#jaccard = nltk.jaccard_distance(df_result["id"][i],df_result["password"][i])
print(result) |
Partager