Spaces:

wendys-llc
/

example-multilingual-comparison

Build error

App Files Files Community

example-multilingual-comparison / app.py

wendys-llc

Update app.py

a74d5ea verified about 2 years ago

raw

history blame contribute delete

3.19 kB

	import gradio as gr
	import pandas as pd
	from sentence_transformers import SentenceTransformer, util

	sentences = [
	"Molly ate a fish",
	"Давайте посетим море",
	"I would like to sell you a house",
	"Я пытаюсь купить дачу", # I'm trying to buy a summer home
	"J'aimerais vous louer un grand appartement", # I would like to rent a large apartment to you
	"This is a wonderful investment opportunity",
	"これは素晴らしい投資機会です", # investment opportunity
	"野球はあなたが思うよりも面白いことがあります", # baseball can be more interesting than you think
	]
	translations = {
	"Давайте посетим море": "Let's visit the seaside",
	"Я пытаюсь купить дачу": "I'm trying to buy a summer home",
	"J'aimerais vous louer un grand appartement": "I would like to rent a large apartment to you",
	"これは素晴らしい投資機会です": "This is a great investment opportunity",
	"野球はあなたが思うよりも面白いことがあります": "Baseball can be more interesting than you think"
	}
	samples = '\n'.join(sentences)

	model = SentenceTransformer("sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
	model2 = SentenceTransformer("sentence-transformers/all-MiniLM-L12-v2")


	def do_action(query, documents):
	sentences = documents.split("\n")

	query_embedding = model.encode([query])
	corpus_embeddings = model.encode(sentences)
	rankings = util.semantic_search(query_embedding, corpus_embeddings)[0]

	query_embedding2 = model2.encode([query])
	corpus_embeddings2 = model2.encode(sentences)
	rankings2 = util.semantic_search(query_embedding2, corpus_embeddings2)[0]


	results = []
	for ranking, ranking2 in zip(rankings, rankings2):
	text = sentences[ranking['corpus_id']]
	if text in translations:
	text = f"{text} [english: {translations[text]}]"
	result = {
	'text': text,
	'score_multi': ranking['score'],
	'score_en': ranking2['score'],
	}
	results.append(result)

	title = f"## Matches for \"{query}\""

	return title, pd.DataFrame(results) \
	.sort_values(by='score_multi', ascending=False) \
	.style.format(precision=2) \
	.background_gradient('YlGnBu')

	query_input = gr.Textbox(value="Get rich quick by flipping cheap houses")
	docs_input = gr.Textbox(value=samples, label="Sentences")
	output_scores = gr.DataFrame(label="", wrap=True, scale=2)

	title = "Multilingual Semantic Search/Similarity Comparison"
	desc = """
	A small demo to compare the [all-MiniLM-L12-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L12-v2) and [paraphrase-multilingual-MiniLM-L12-v2](https://huggingface.co/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2) embedding models. Notice how the multilingual model scores similar sentences higher, even if they aren't in the same language!
	"""
	demo = gr.Interface(fn=do_action, title=title, description=desc, inputs=[query_input, docs_input], outputs=["markdown", output_scores])
	demo.launch()