zazou2552 commited on
Commit
182ca5a
·
verified ·
1 Parent(s): c60f47f

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +57 -0
  2. requirements.txt +3 -0
app.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from sentence_transformers import SentenceTransformer, util
3
+
4
+ # Fast, CPU-friendly defaults (you can add/remove)
5
+ MODEL_CHOICES = [
6
+ "sentence-transformers/all-MiniLM-L6-v2",
7
+ "sentence-transformers/all-mpnet-base-v2",
8
+ "jinaai/jina-embeddings-v2-base-en",
9
+ "Alibaba-NLP/gte-small",
10
+ "intfloat/e5-small-v2",
11
+ ]
12
+
13
+ # Simple cache so we don't reload models repeatedly
14
+ _model_cache = {}
15
+
16
+ def get_model(name: str) -> SentenceTransformer:
17
+ if name not in _model_cache:
18
+ _model_cache[name] = SentenceTransformer(name)
19
+ return _model_cache[name]
20
+
21
+ def compare(text_a: str, text_b: str, models: list[str]):
22
+ text_a = (text_a or "").strip()
23
+ text_b = (text_b or "").strip()
24
+ models = models or []
25
+
26
+ if not text_a or not text_b or not models:
27
+ return []
28
+
29
+ rows = []
30
+ for m in models:
31
+ model = get_model(m)
32
+ a = model.encode(text_a, convert_to_tensor=True, normalize_embeddings=True)
33
+ b = model.encode(text_b, convert_to_tensor=True, normalize_embeddings=True)
34
+ sim = util.cos_sim(a, b).item()
35
+ rows.append([m, round(sim, 6)])
36
+ # Highest similarity first
37
+ rows.sort(key=lambda r: r[1], reverse=True)
38
+ return rows
39
+
40
+ with gr.Blocks(title="Embedding Similarity (Two Texts)") as demo:
41
+ gr.Markdown("## 🔎 Embedding Similarity\n"
42
+ "Enter two texts. Pick one or more embedding models. "
43
+ "Get cosine similarity scores per model.")
44
+
45
+ with gr.Row():
46
+ text_a = gr.Textbox(label="Text A", placeholder="Type or paste text A here", lines=3)
47
+ text_b = gr.Textbox(label="Text B", placeholder="Type or paste text B here", lines=3)
48
+
49
+ models = gr.CheckboxGroup(MODEL_CHOICES, value=MODEL_CHOICES[:3], label="Embedding models")
50
+
51
+ btn = gr.Button("Compute similarity")
52
+ out = gr.Dataframe(headers=["model", "cosine_similarity"], datatype=["str", "number"], wrap=True)
53
+
54
+ btn.click(compare, [text_a, text_b, models], out)
55
+
56
+ if __name__ == "__main__":
57
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ gradio
2
+ sentence-transformers
3
+ torch --index-url https://download.pytorch.org/whl/cpu