File size: 1,622 Bytes
a35fa85
 
 
a4aea41
a35fa85
 
 
 
 
 
 
b6ce7b1
 
 
a35fa85
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53841ab
a35fa85
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
from transformers import BertTokenizerFast, BertModel
from sklearn.metrics.pairwise import cosine_similarity
import pandas as pd
import gradio as gr

tokenizer_bert = BertTokenizerFast.from_pretrained("kykim/bert-kor-base")
model_bert = BertModel.from_pretrained("kykim/bert-kor-base")

df = pd.read_pickle('BookData_real_real_final.pkl')
df_emb = pd.read_pickle('review_emb.pkl')

title = "πŸ€κ³ λ―Ό ν•΄κ²° λ„μ„œ μΆ”μ²œπŸ€"
description = "λ‹Ήμ‹ μ˜ κ³ λ―Ό 해결을 도와쀄 책을 μΆ”μ²œ ν•΄λ“œλ¦½λ‹ˆλ‹€β™₯"
examples = [["μš”μ¦˜ 잠이 μ•ˆ 와"], ["μ•žμœΌλ‘œ 뭘 ν•΄μ•Ό ν• κΉŒ?"]]

def embed_text(text):
    inputs = tokenizer_bert(text, return_tensors="pt")
    outputs = model_bert(**inputs)
    embeddings = outputs.last_hidden_state.mean(dim=1)  # 평균 μž„λ² λ”© μ‚¬μš©
    return embeddings.detach().numpy()[0]

def recommend(message):
  
  columns = ['거리']
  list_df = pd.DataFrame(columns=columns)

  emb = embed_text(message)
  list_df['거리'] = df_emb['μ„œν‰μž„λ² λ”©'].map(lambda x: cosine_similarity([emb], [x]).squeeze())
  answer = df.loc[list_df['거리'].idxmax()]
  book_title = answer['제λͺ©']
  book_author = answer['μž‘κ°€']
  book_publi = answer['μΆœνŒμ‚¬']
  return "[" + book_author + "] μž‘κ°€λ‹˜μ˜ γ€Œ" + book_title + "」 μΆ”μ²œν•©λ‹ˆλ‹€πŸ˜Š" + " (μΆœνŒμ‚¬:" + book_publi + ")"


iface = gr.Interface(fn=recommend,
                     inputs="text",
                     outputs="text",
                     theme="finlaymacklon/boxy_violet",
                     title=title,
                     description=description,
                     examples=examples)
iface.launch()