from transformers import BertTokenizerFast, BertModel from sklearn.metrics.pairwise import cosine_similarity import pandas as pd import gradio as gr tokenizer_bert = BertTokenizerFast.from_pretrained("kykim/bert-kor-base") model_bert = BertModel.from_pretrained("kykim/bert-kor-base") df = pd.read_pickle('BookData_real_real_final.pkl') df_emb = pd.read_pickle('review_emb.pkl') title = "πŸ€κ³ λ―Ό ν•΄κ²° λ„μ„œ μΆ”μ²œπŸ€" description = "λ‹Ήμ‹ μ˜ κ³ λ―Ό 해결을 도와쀄 책을 μΆ”μ²œ ν•΄λ“œλ¦½λ‹ˆλ‹€β™₯" examples = [["μš”μ¦˜ 잠이 μ•ˆ 와"], ["μ•žμœΌλ‘œ 뭘 ν•΄μ•Ό ν• κΉŒ?"]] def embed_text(text): inputs = tokenizer_bert(text, return_tensors="pt") outputs = model_bert(**inputs) embeddings = outputs.last_hidden_state.mean(dim=1) # 평균 μž„λ² λ”© μ‚¬μš© return embeddings.detach().numpy()[0] def recommend(message): columns = ['거리'] list_df = pd.DataFrame(columns=columns) emb = embed_text(message) list_df['거리'] = df_emb['μ„œν‰μž„λ² λ”©'].map(lambda x: cosine_similarity([emb], [x]).squeeze()) answer = df.loc[list_df['거리'].idxmax()] book_title = answer['제λͺ©'] book_author = answer['μž‘κ°€'] book_publi = answer['μΆœνŒμ‚¬'] return "[" + book_author + "] μž‘κ°€λ‹˜μ˜ γ€Œ" + book_title + "」 μΆ”μ²œν•©λ‹ˆλ‹€πŸ˜Š" + " (μΆœνŒμ‚¬:" + book_publi + ")" iface = gr.Interface(fn=recommend, inputs="text", outputs="text", theme="finlaymacklon/boxy_violet", title=title, description=description, examples=examples) iface.launch()