Spaces:
Sleeping
Sleeping
Upload 2 files
Browse files- app.py +57 -0
- requirements.txt +3 -0
app.py
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
from sentence_transformers import SentenceTransformer, util
|
| 3 |
+
|
| 4 |
+
# Fast, CPU-friendly defaults (you can add/remove)
|
| 5 |
+
MODEL_CHOICES = [
|
| 6 |
+
"sentence-transformers/all-MiniLM-L6-v2",
|
| 7 |
+
"sentence-transformers/all-mpnet-base-v2",
|
| 8 |
+
"jinaai/jina-embeddings-v2-base-en",
|
| 9 |
+
"Alibaba-NLP/gte-small",
|
| 10 |
+
"intfloat/e5-small-v2",
|
| 11 |
+
]
|
| 12 |
+
|
| 13 |
+
# Simple cache so we don't reload models repeatedly
|
| 14 |
+
_model_cache = {}
|
| 15 |
+
|
| 16 |
+
def get_model(name: str) -> SentenceTransformer:
|
| 17 |
+
if name not in _model_cache:
|
| 18 |
+
_model_cache[name] = SentenceTransformer(name)
|
| 19 |
+
return _model_cache[name]
|
| 20 |
+
|
| 21 |
+
def compare(text_a: str, text_b: str, models: list[str]):
|
| 22 |
+
text_a = (text_a or "").strip()
|
| 23 |
+
text_b = (text_b or "").strip()
|
| 24 |
+
models = models or []
|
| 25 |
+
|
| 26 |
+
if not text_a or not text_b or not models:
|
| 27 |
+
return []
|
| 28 |
+
|
| 29 |
+
rows = []
|
| 30 |
+
for m in models:
|
| 31 |
+
model = get_model(m)
|
| 32 |
+
a = model.encode(text_a, convert_to_tensor=True, normalize_embeddings=True)
|
| 33 |
+
b = model.encode(text_b, convert_to_tensor=True, normalize_embeddings=True)
|
| 34 |
+
sim = util.cos_sim(a, b).item()
|
| 35 |
+
rows.append([m, round(sim, 6)])
|
| 36 |
+
# Highest similarity first
|
| 37 |
+
rows.sort(key=lambda r: r[1], reverse=True)
|
| 38 |
+
return rows
|
| 39 |
+
|
| 40 |
+
with gr.Blocks(title="Embedding Similarity (Two Texts)") as demo:
|
| 41 |
+
gr.Markdown("## 🔎 Embedding Similarity\n"
|
| 42 |
+
"Enter two texts. Pick one or more embedding models. "
|
| 43 |
+
"Get cosine similarity scores per model.")
|
| 44 |
+
|
| 45 |
+
with gr.Row():
|
| 46 |
+
text_a = gr.Textbox(label="Text A", placeholder="Type or paste text A here", lines=3)
|
| 47 |
+
text_b = gr.Textbox(label="Text B", placeholder="Type or paste text B here", lines=3)
|
| 48 |
+
|
| 49 |
+
models = gr.CheckboxGroup(MODEL_CHOICES, value=MODEL_CHOICES[:3], label="Embedding models")
|
| 50 |
+
|
| 51 |
+
btn = gr.Button("Compute similarity")
|
| 52 |
+
out = gr.Dataframe(headers=["model", "cosine_similarity"], datatype=["str", "number"], wrap=True)
|
| 53 |
+
|
| 54 |
+
btn.click(compare, [text_a, text_b, models], out)
|
| 55 |
+
|
| 56 |
+
if __name__ == "__main__":
|
| 57 |
+
demo.launch()
|
requirements.txt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
gradio
|
| 2 |
+
sentence-transformers
|
| 3 |
+
torch --index-url https://download.pytorch.org/whl/cpu
|