aimport gensim.models.keyedvectors as word2vec
import numpy as np
model=word2vec.KeyedVectors.load_word2vec_format('./model/GoogleNews-vectors-negative300.bin',binary=True)
def get_score(tokens,ground_truth):
sc2=[]
for tk in tokens:
i =0
sc=[]
for tk_2 in ground_truth:
sim =model.similarity(tk,tk_2)
#print(i, sim)
sc.append(sim)
i = i + 1
sc=np.array(sc)
sc2.append(np.max(sc,axis=0))
return sc2
def get_softP(pred,tokens,tokens_scores):
i=1
sum1=0
for (tk,tk_score) in zip(tokens,token_scores):
#print (tk, tk_score)
if tk in pred:
sum1= sum1 + tk_score
i = i +1
print (tk, tk_score)
#print ("Sum is ",sum1)
#print sum1/i
return sum1/i
def get_softR(pred,ground_truth,tokens,tokens_scores):
sum1=0
for (tk,tk_score) in zip(tokens,token_scores):
#print (tk, tk_score)
if tk in pred:
if tk in ground_truth:
sum1= sum1 + tk_score
print (tk, tk_score)
#print ("Sum is ",sum1)
#print sum1/i
return sum1/len(ground_truth)
ground_truth=['Finland','University','UEF','Joensuu']
tokens = ['Finland','Departmet','School','Computing','University', 'UEF', 'Science', 'Park', 'Joensuu']
pred=['Finland','School','Park','Joensuu','Computing']
token_scores= get_score(tokens,ground_truth)
get_softP(pred,tokens,token_scores)
get_softR(pred,ground_truth,tokens,token_scores)
import numpy as np
model=word2vec.KeyedVectors.load_word2vec_format('./model/GoogleNews-vectors-negative300.bin',binary=True)
def get_score(tokens,ground_truth):
sc2=[]
for tk in tokens:
i =0
sc=[]
for tk_2 in ground_truth:
sim =model.similarity(tk,tk_2)
#print(i, sim)
sc.append(sim)
i = i + 1
sc=np.array(sc)
sc2.append(np.max(sc,axis=0))
return sc2
def get_softP(pred,tokens,tokens_scores):
i=1
sum1=0
for (tk,tk_score) in zip(tokens,token_scores):
#print (tk, tk_score)
if tk in pred:
sum1= sum1 + tk_score
i = i +1
print (tk, tk_score)
#print ("Sum is ",sum1)
#print sum1/i
return sum1/i
def get_softR(pred,ground_truth,tokens,tokens_scores):
sum1=0
for (tk,tk_score) in zip(tokens,token_scores):
#print (tk, tk_score)
if tk in pred:
if tk in ground_truth:
sum1= sum1 + tk_score
print (tk, tk_score)
#print ("Sum is ",sum1)
#print sum1/i
return sum1/len(ground_truth)
ground_truth=['Finland','University','UEF','Joensuu']
tokens = ['Finland','Departmet','School','Computing','University', 'UEF', 'Science', 'Park', 'Joensuu']
pred=['Finland','School','Park','Joensuu','Computing']
token_scores= get_score(tokens,ground_truth)
get_softP(pred,tokens,token_scores)
get_softR(pred,ground_truth,tokens,token_scores)
No comments:
Post a Comment