Spaces:
Build error
Build error
| import gradio as gr | |
| import pandas as pd | |
| from sentence_transformers import SentenceTransformer, util | |
| sentences = [ | |
| "Molly ate a fish", | |
| "Давайте посетим море", | |
| "I would like to sell you a house", | |
| "Я пытаюсь купить дачу", # I'm trying to buy a summer home | |
| "J'aimerais vous louer un grand appartement", # I would like to rent a large apartment to you | |
| "This is a wonderful investment opportunity", | |
| "これは素晴らしい投資機会です", # investment opportunity | |
| "野球はあなたが思うよりも面白いことがあります", # baseball can be more interesting than you think | |
| ] | |
| translations = { | |
| "Давайте посетим море": "Let's visit the seaside", | |
| "Я пытаюсь купить дачу": "I'm trying to buy a summer home", | |
| "J'aimerais vous louer un grand appartement": "I would like to rent a large apartment to you", | |
| "これは素晴らしい投資機会です": "This is a great investment opportunity", | |
| "野球はあなたが思うよりも面白いことがあります": "Baseball can be more interesting than you think" | |
| } | |
| samples = '\n'.join(sentences) | |
| model = SentenceTransformer("sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2") | |
| model2 = SentenceTransformer("sentence-transformers/all-MiniLM-L12-v2") | |
| def do_action(query, documents): | |
| sentences = documents.split("\n") | |
| query_embedding = model.encode([query]) | |
| corpus_embeddings = model.encode(sentences) | |
| rankings = util.semantic_search(query_embedding, corpus_embeddings)[0] | |
| query_embedding2 = model2.encode([query]) | |
| corpus_embeddings2 = model2.encode(sentences) | |
| rankings2 = util.semantic_search(query_embedding2, corpus_embeddings2)[0] | |
| results = [] | |
| for ranking, ranking2 in zip(rankings, rankings2): | |
| text = sentences[ranking['corpus_id']] | |
| if text in translations: | |
| text = f"{text} [english: {translations[text]}]" | |
| result = { | |
| 'text': text, | |
| 'score_multi': ranking['score'], | |
| 'score_en': ranking2['score'], | |
| } | |
| results.append(result) | |
| title = f"## Matches for \"{query}\"" | |
| return title, pd.DataFrame(results) \ | |
| .sort_values(by='score_multi', ascending=False) \ | |
| .style.format(precision=2) \ | |
| .background_gradient('YlGnBu') | |
| query_input = gr.Textbox(value="Get rich quick by flipping cheap houses") | |
| docs_input = gr.Textbox(value=samples, label="Sentences") | |
| output_scores = gr.DataFrame(label="", wrap=True, scale=2) | |
| title = "Multilingual Semantic Search/Similarity Comparison" | |
| desc = """ | |
| A small demo to compare the [all-MiniLM-L12-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L12-v2) and [paraphrase-multilingual-MiniLM-L12-v2](https://huggingface.co/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2) embedding models. Notice how the multilingual model scores similar sentences higher, even if they aren't in the same language! | |
| """ | |
| demo = gr.Interface(fn=do_action, title=title, description=desc, inputs=[query_input, docs_input], outputs=["markdown", output_scores]) | |
| demo.launch() |