Spaces:

zazou2552
/

embedding-lab

Sleeping

App Files Files Community

zazou2552 commited on Sep 4, 2025

Commit

182ca5a

verified ·

1 Parent(s): c60f47f

Upload 2 files

Browse files

Files changed (2) hide show

app.py +57 -0
requirements.txt +3 -0

app.py ADDED Viewed

	@@ -0,0 +1,57 @@

+import gradio as gr
+from sentence_transformers import SentenceTransformer, util
+# Fast, CPU-friendly defaults (you can add/remove)
+MODEL_CHOICES = [
+    "sentence-transformers/all-MiniLM-L6-v2",
+    "sentence-transformers/all-mpnet-base-v2",
+    "jinaai/jina-embeddings-v2-base-en",
+    "Alibaba-NLP/gte-small",
+    "intfloat/e5-small-v2",
+]
+# Simple cache so we don't reload models repeatedly
+_model_cache = {}
+def get_model(name: str) -> SentenceTransformer:
+    if name not in _model_cache:
+        _model_cache[name] = SentenceTransformer(name)
+    return _model_cache[name]
+def compare(text_a: str, text_b: str, models: list[str]):
+    text_a = (text_a or "").strip()
+    text_b = (text_b or "").strip()
+    models = models or []
+    if not text_a or not text_b or not models:
+        return []
+    rows = []
+    for m in models:
+        model = get_model(m)
+        a = model.encode(text_a, convert_to_tensor=True, normalize_embeddings=True)
+        b = model.encode(text_b, convert_to_tensor=True, normalize_embeddings=True)
+        sim = util.cos_sim(a, b).item()
+        rows.append([m, round(sim, 6)])
+    # Highest similarity first
+    rows.sort(key=lambda r: r[1], reverse=True)
+    return rows
+with gr.Blocks(title="Embedding Similarity (Two Texts)") as demo:
+    gr.Markdown("## 🔎 Embedding Similarity\n"
+                "Enter two texts. Pick one or more embedding models. "
+                "Get cosine similarity scores per model.")
+    with gr.Row():
+        text_a = gr.Textbox(label="Text A", placeholder="Type or paste text A here", lines=3)
+        text_b = gr.Textbox(label="Text B", placeholder="Type or paste text B here", lines=3)
+    models = gr.CheckboxGroup(MODEL_CHOICES, value=MODEL_CHOICES[:3], label="Embedding models")
+    btn = gr.Button("Compute similarity")
+    out = gr.Dataframe(headers=["model", "cosine_similarity"], datatype=["str", "number"], wrap=True)
+    btn.click(compare, [text_a, text_b, models], out)
+if __name__ == "__main__":
+    demo.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+gradio
+sentence-transformers
+torch --index-url https://download.pytorch.org/whl/cpu