Spaces:

mohitmayank
/

EmojiFinder

Runtime error

EmojiFinder / app.py

4e79aa0 about 4 years ago

2.97 kB

	## Import
	## ----------------
	import pandas as pd
	import streamlit as st
	from sentence_transformers import SentenceTransformer, util

	## Init
	## ----------------
	# set config
	st.set_page_config(layout="wide", page_title="EmojiFinder 🕵")

	# load the summarization model (cache for faster loading)
	@st.cache(allow_output_mutation=True)
	def load_similarity_model(model_name='all-MiniLM-L6-v2'):
	# model = pipeline("summarization", model='sshleifer/distilbart-cnn-12-6')
	model = SentenceTransformer(model_name)
	# return the model
	return model

	# list of supported models
	supported_models = ['all-MiniLM-L6-v2', 'paraphrase-albert-small-v2', 'paraphrase-MiniLM-L3-v2', 'all-distilroberta-v1', 'all-mpnet-base-v2']

	# read the emoji df and extract the relevant columns
	emoji_df = pd.read_csv('EmojiCharts_unicodeorg.csv')[['name', 'codepoints']]

	# function to encode and decode the emoji text
	def encode_emoji(emoji):
	emoji_text = ""
	emoji = emoji.replace("U+", "")
	if len(emoji) == 4:
	emoji_text = f"\\U0000{emoji}"
	elif len(emoji) == 5:
	emoji_text = f"\\U000{emoji}"
	return emoji_text.encode().decode('unicode-escape')

	# function to find the top similar sentences
	def find_similar_sentences(query, target_sentences, n=5):
	# compute embeddings
	embeddings_query = model.encode([query], convert_to_tensor=True)
	embeddings_target = model.encode(target_sentences, convert_to_tensor=True)

	# compute cosine-similarities for each sentence with each other sentence
	cosine_scores = util.pytorch_cos_sim(embeddings_query, embeddings_target)

	# return the index of top 5 values in a list
	score_list = cosine_scores.tolist()[0]
	top_indices = sorted(range(len(score_list)), key=lambda i: score_list[i], reverse=True)[:n]

	return top_indices

	## App Development
	## ----------------

	# settings
	selected_model_name = st.sidebar.selectbox('Similarity model', options=supported_models)
	emoji_count = st.sidebar.slider('Emoji output count', min_value=1, max_value=10, value=5, step=1)

	# title and headers
	st.title("EmojiFinder 🕵")
	st.markdown("Want to find the most relevant emoji for your text? EmojiFinder is here to help! 😎")
	query_text = st.text_area("Enter your text here: ", "I love walking on the beach")
	find_button = st.button("EmojiFinder help!")

	# load the model
	model = load_similarity_model(selected_model_name)

	# callback
	with st.spinner("EmojiFinder is looking for clues to find the best emoji...."):
	if find_button:
	# fidn the top N similar sentences
	top_indices = find_similar_sentences(query_text, emoji_df['name'], emoji_count)
	# print the emojis
	for i in top_indices:
	emoji = emoji_df.iloc[i]
	# prep the text
	text = f'{emoji["name"]} - '
	# add all of the codepoints
	text += ' '.join([encode_emoji(x) for x in emoji['codepoints'].split(' ')])
	st.write(text)