gamza commited on
Commit
a35fa85
Β·
1 Parent(s): 9084911

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +42 -0
app.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import BertTokenizerFast, BertModel
2
+ from sklearn.metrics.pairwise import cosine_similarity
3
+ import pandas as pd
4
+
5
+ tokenizer_bert = BertTokenizerFast.from_pretrained("kykim/bert-kor-base")
6
+ model_bert = BertModel.from_pretrained("kykim/bert-kor-base")
7
+
8
+ df = pd.read_pickle('BookData_real_real_final.pkl')
9
+ df_emb = pd.read_pickle('review_emb.pkl')
10
+
11
+ title = "πŸ€κ³ λ―Ό ν•΄κ²° λ„μ„œ μΆ”μ²œ μ±—λ΄‡πŸ€"
12
+ description = "λ‹Ήμ‹ μ˜ κ³ λ―Ό 해결을 도와쀄 책을 μΆ”μ²œ ν•΄λ“œλ¦½λ‹ˆλ‹€"
13
+ examples = [["μš”μ¦˜ 잠이 μ•ˆ 와"]]
14
+
15
+ def embed_text(text):
16
+ inputs = tokenizer_bert(text, return_tensors="pt")
17
+ outputs = model_bert(**inputs)
18
+ embeddings = outputs.last_hidden_state.mean(dim=1) # 평균 μž„λ² λ”© μ‚¬μš©
19
+ return embeddings.detach().numpy()[0]
20
+
21
+ def recommend(message):
22
+
23
+ columns = ['거리']
24
+ list_df = pd.DataFrame(columns=columns)
25
+
26
+ emb = embed_text(message)
27
+ list_df['거리'] = df_emb['μ„œν‰μž„λ² λ”©'].map(lambda x: cosine_similarity([emb], [x]).squeeze())
28
+ answer = df.loc[list_df['거리'].idxmax()]
29
+ book_title = answer['제λͺ©']
30
+ book_author = answer['μž‘κ°€']
31
+ book_publi = answer['μΆœνŒμ‚¬']
32
+ return "[" + book_author + "] μž‘κ°€λ‹˜μ˜ γ€Œ" + book_title + "」 μΆ”μ²œν•©λ‹ˆλ‹€πŸ˜Š" + " (μΆœνŒμ‚¬:" + book_publi + ")"
33
+
34
+
35
+ iface = gr.Interface(fn=recommend,
36
+ inputs="text",
37
+ outputs="text",
38
+ theme="finlaymacklon/boxy_violet",
39
+ title=title,
40
+ description=description,
41
+ examples=examples)
42
+ iface.launch()