Tuesday, July 9, 2019

word2vec test

aimport gensim.models.keyedvectors as word2vec
import numpy as np

model=word2vec.KeyedVectors.load_word2vec_format('./model/GoogleNews-vectors-negative300.bin',binary=True)
def get_score(tokens,ground_truth):
    sc2=[]
    for tk in tokens:
        i =0
        sc=[]
        for tk_2 in ground_truth:
            sim =model.similarity(tk,tk_2)
        #print(i, sim)
            sc.append(sim)
            i = i + 1
        sc=np.array(sc)
        sc2.append(np.max(sc,axis=0))
    return sc2
def get_softP(pred,tokens,tokens_scores):
    i=1
    sum1=0
    for (tk,tk_score) in zip(tokens,token_scores):
        #print (tk, tk_score)
         if tk in pred:
            sum1= sum1 + tk_score
            i = i +1
            print (tk, tk_score)
        #print ("Sum is ",sum1)
    #print sum1/i
    return sum1/i

def get_softR(pred,ground_truth,tokens,tokens_scores):
   
    sum1=0
    for (tk,tk_score) in zip(tokens,token_scores):
        #print (tk, tk_score)
        if tk in pred:
             if tk in ground_truth:
                sum1= sum1 + tk_score
                print (tk, tk_score)
        #print ("Sum is ",sum1)
    #print sum1/i
    return sum1/len(ground_truth)

ground_truth=['Finland','University','UEF','Joensuu']
tokens = ['Finland','Departmet','School','Computing','University', 'UEF', 'Science', 'Park', 'Joensuu']
pred=['Finland','School','Park','Joensuu','Computing']

token_scores= get_score(tokens,ground_truth)
get_softP(pred,tokens,token_scores)
get_softR(pred,ground_truth,tokens,token_scores)

No comments:

Post a Comment