File size: 1,726 Bytes
7e2e9ae
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0b10eee
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6c2dbae
0b10eee
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
import gradio as gr
import joblib
import pickle
import numpy as np
from janome.tokenizer import Tokenizer

model = joblib.load('./doc2vec.pkl')
with open('document_vecs.pkl', 'rb') as f:
  document_vecs = pickle.load(f)
with open('Title.pkl', 'rb') as f:
  Title = pickle.load(f)

def sep_by_janome(text):
    t = Tokenizer()
    tokens = t.tokenize(text)
    docs=[]
    for token in tokens:
        docs.append(token.surface)
    return docs

def cos_calc(text):
  tokens = sep_by_janome(text)
  input_vec = model.infer_vector(tokens)

  rank_size = 5
  
  v1 = np.linalg.norm(input_vec)
  cos_sim = []
  for v2 in document_vecs:
      cos_sim.append( np.dot(input_vec,v2)/(v1*np.linalg.norm(v2)) )
  doc_sort = np.argsort(np.array(cos_sim))[::-1]
  cos_sort = sorted(cos_sim,reverse=True)
  
  cos = []
  titles = []
  for i in range(rank_size):
      cos.append(cos_sort[i])
      titles.append(Title[doc_sort[i]])
  
  rank_1 = f'{titles[0]} \nコサイン類似度:{cos[0]}'
  rank_2 = f'{titles[1]} \nコサイン類似度:{cos[1]}'
  rank_3 = f'{titles[2]} \nコサイン類似度:{cos[2]}'
  rank_4 = f'{titles[3]} \nコサイン類似度:{cos[3]}'
  rank_5 = f'{titles[4]} \nコサイン類似度:{cos[4]}'
  
  return rank_1, rank_2, rank_3, rank_4, rank_5

demo = gr.Interface(fn=cos_calc,
                    title="プログラミング参考書推薦アプリ",
                    inputs=gr.Textbox(label="検索ワード"),
                    outputs=[gr.Textbox(label='1位'),
                    gr.Textbox(label='2位'),
                    gr.Textbox(label='3位'),
                    gr.Textbox(label='4位'),
                    gr.Textbox(label='5位')
                    ])

demo.launch()