Spaces:
Runtime error
Runtime error
| ## Import | |
| ## ---------------- | |
| import pandas as pd | |
| import streamlit as st | |
| from sentence_transformers import SentenceTransformer, util | |
| ## Init | |
| ## ---------------- | |
| # set config | |
| st.set_page_config(layout="wide", page_title="EmojiFinder π΅") | |
| # load the summarization model (cache for faster loading) | |
| def load_similarity_model(model_name='all-MiniLM-L6-v2'): | |
| # model = pipeline("summarization", model='sshleifer/distilbart-cnn-12-6') | |
| model = SentenceTransformer(model_name) | |
| # return the model | |
| return model | |
| # list of supported models | |
| supported_models = ['all-MiniLM-L6-v2', 'paraphrase-albert-small-v2', 'paraphrase-MiniLM-L3-v2', 'all-distilroberta-v1', 'all-mpnet-base-v2'] | |
| # read the emoji df and extract the relevant columns | |
| emoji_df = pd.read_csv('EmojiCharts_unicodeorg.csv')[['name', 'codepoints']] | |
| # function to encode and decode the emoji text | |
| def encode_emoji(emoji): | |
| emoji_text = "" | |
| emoji = emoji.replace("U+", "") | |
| if len(emoji) == 4: | |
| emoji_text = f"\\U0000{emoji}" | |
| elif len(emoji) == 5: | |
| emoji_text = f"\\U000{emoji}" | |
| return emoji_text.encode().decode('unicode-escape') | |
| # function to find the top similar sentences | |
| def find_similar_sentences(query, target_sentences, n=5): | |
| # compute embeddings | |
| embeddings_query = model.encode([query], convert_to_tensor=True) | |
| embeddings_target = model.encode(target_sentences, convert_to_tensor=True) | |
| # compute cosine-similarities for each sentence with each other sentence | |
| cosine_scores = util.pytorch_cos_sim(embeddings_query, embeddings_target) | |
| # return the index of top 5 values in a list | |
| score_list = cosine_scores.tolist()[0] | |
| top_indices = sorted(range(len(score_list)), key=lambda i: score_list[i], reverse=True)[:n] | |
| return top_indices | |
| ## App Development | |
| ## ---------------- | |
| # settings | |
| selected_model_name = st.sidebar.selectbox('Similarity model', options=supported_models) | |
| emoji_count = st.sidebar.slider('Emoji output count', min_value=1, max_value=10, value=5, step=1) | |
| # title and headers | |
| st.title("EmojiFinder π΅") | |
| st.markdown("Want to find the *most relevant* emoji for your text? **EmojiFinder** is here to help! π") | |
| query_text = st.text_area("Enter your text here: ", "I love walking on the beach") | |
| find_button = st.button("EmojiFinder help!") | |
| # load the model | |
| model = load_similarity_model(selected_model_name) | |
| # callback | |
| with st.spinner("EmojiFinder is looking for clues to find the best emoji...."): | |
| if find_button: | |
| # fidn the top N similar sentences | |
| top_indices = find_similar_sentences(query_text, emoji_df['name'], emoji_count) | |
| # print the emojis | |
| for i in top_indices: | |
| emoji = emoji_df.iloc[i] | |
| # prep the text | |
| text = f'{emoji["name"]} - ' | |
| # add all of the codepoints | |
| text += ' '.join([encode_emoji(x) for x in emoji['codepoints'].split(' ')]) | |
| st.write(text) |