Spaces:
Sleeping
Sleeping
| from transformers import BertTokenizerFast, BertModel | |
| from sklearn.metrics.pairwise import cosine_similarity | |
| import pandas as pd | |
| import gradio as gr | |
| tokenizer_bert = BertTokenizerFast.from_pretrained("kykim/bert-kor-base") | |
| model_bert = BertModel.from_pretrained("kykim/bert-kor-base") | |
| df = pd.read_pickle('BookData_real_real_final.pkl') | |
| df_emb = pd.read_pickle('review_emb.pkl') | |
| title = "πκ³ λ―Ό ν΄κ²° λμ μΆμ²π" | |
| description = "λΉμ μ κ³ λ―Ό ν΄κ²°μ λμμ€ μ± μ μΆμ² ν΄λ립λλ€β₯" | |
| examples = [["μμ¦ μ μ΄ μ μ"], ["μμΌλ‘ λ ν΄μΌ ν κΉ?"]] | |
| def embed_text(text): | |
| inputs = tokenizer_bert(text, return_tensors="pt") | |
| outputs = model_bert(**inputs) | |
| embeddings = outputs.last_hidden_state.mean(dim=1) # νκ· μλ² λ© μ¬μ© | |
| return embeddings.detach().numpy()[0] | |
| def recommend(message): | |
| columns = ['거리'] | |
| list_df = pd.DataFrame(columns=columns) | |
| emb = embed_text(message) | |
| list_df['거리'] = df_emb['μνμλ² λ©'].map(lambda x: cosine_similarity([emb], [x]).squeeze()) | |
| answer = df.loc[list_df['거리'].idxmax()] | |
| book_title = answer['μ λͺ©'] | |
| book_author = answer['μκ°'] | |
| book_publi = answer['μΆνμ¬'] | |
| return "[" + book_author + "] μκ°λμ γ" + book_title + "γ μΆμ²ν©λλ€π" + " (μΆνμ¬:" + book_publi + ")" | |
| iface = gr.Interface(fn=recommend, | |
| inputs="text", | |
| outputs="text", | |
| theme="finlaymacklon/boxy_violet", | |
| title=title, | |
| description=description, | |
| examples=examples) | |
| iface.launch() |