Spaces:

ccocks-deca
/

embed-this

Runtime error

App Files Files Community

ccocks-deca commited on Apr 4

Commit

a6b7976

verified ·

1 Parent(s): 13a3776

Update app.py

Browse files

Files changed (1) hide show

app.py +24 -35

app.py CHANGED Viewed

@@ -2,66 +2,55 @@ import gradio as gr
 import os
 from model2vec import StaticModel
 os.environ["TOKENIZERS_PARALLELISM"] = "false"
-# Distilled Qwen3-Embedding-0.6B (static + ultra-fast on CPU)
-model = StaticModel.from_pretrained("futur/Qwen3-Embedding-0.6B-model2vec-onnx")
-def generate_embedding(text: str, use_query_mode: bool):
-    """Single text input → returns one embedding vector."""
     if not text or not text.strip():
         return {
             "embedding": [],
             "text": "",
-            "dimension": 1024,
-            "mode": "query (with instruction)" if use_query_mode else "document",
             "note": "Empty input"
         }
     cleaned_text = text.strip()
-    # Query mode adds instruction (keeps Qwen3 behavior for better retrieval)
-    prompt_name = "query" if use_query_mode else None
     embedding = model.encode(
-        [cleaned_text],                    # single text wrapped as list
-        prompt_name=prompt_name,
         convert_to_numpy=True,
-        normalize_embeddings=True,         # cosine similarity ready
-    )[0].tolist()                          # take first (and only) vector
     return {
-        "embedding": embedding,            # single list of floats (1024-dim)
         "text": cleaned_text,
         "dimension": len(embedding),
-        "mode": "query (with instruction)" if use_query_mode else "document (raw)",
-        "note": "Distilled Qwen3-Embedding-0.6B (Model2Vec) • <1s even for 500–1000+ tokens on free CPU"
     }
-# Simple Gradio 5+ interface + clean REST API
 demo = gr.Interface(
     fn=generate_embedding,
-    inputs=[
-        gr.Textbox(
-            lines=10,
-            placeholder="Paste your text here (500–1000+ tokens is fine – it will be fast)...",
-            label="Input Text",
-        ),
-        gr.Checkbox(
-            label="Use Query Mode (recommended for search / RAG)",
-            value=True,
-            info="Adds instruction automatically for much better retrieval performance",
-        ),
-    ],
     outputs=gr.JSON(label="Embedding Response"),
-    title="🚀 Qwen3-Embedding-0.6B (Distilled) – Single Text API",
-    description="""Distilled version of Qwen/Qwen3-Embedding-0.6B using Model2Vec.
-    Same family & quality • 500× faster on CPU • Handles long texts instantly.
-    Returns **one** embedding vector per call.""",
     examples=[
-        ["What is the capital of France? Explain it in detail with historical context.", True],
-        ["A very long document text repeated many times to simulate 800 tokens... " * 40, False],
     ],
 )

 import os
 from model2vec import StaticModel
+# Suppress tokenizer warnings
 os.environ["TOKENIZERS_PARALLELISM"] = "false"
+# Best working static model – ultra-fast on CPU + long texts
+model = StaticModel.from_pretrained("minishlab/potion-base-32M")
+def generate_embedding(text: str):
+    """Single text input → one embedding vector (fast even for 500–1000+ tokens)."""
     if not text or not text.strip():
         return {
             "embedding": [],
             "text": "",
+            "dimension": 256,
             "note": "Empty input"
         }
     cleaned_text = text.strip()
+    # Static Model2Vec – no query/document prompt needed (always high-quality)
     embedding = model.encode(
+        [cleaned_text],
         convert_to_numpy=True,
+        normalize_embeddings=True,   # ready for cosine similarity
+    )[0].tolist()
     return {
+        "embedding": embedding,      # single list of 256 floats
         "text": cleaned_text,
         "dimension": len(embedding),
+        "note": "minishlab/potion-base-32M (static) • <0.5s even for 500–1000+ tokens on free CPU"
     }
+# Clean single-text Gradio interface + full REST API
 demo = gr.Interface(
     fn=generate_embedding,
+    inputs=gr.Textbox(
+        lines=12,
+        placeholder="Paste your text here (500–1000+ tokens works instantly)...",
+        label="Input Text",
+    ),
     outputs=gr.JSON(label="Embedding Response"),
+    title="⚡ Qwen3-Style Fast Embedding API (Single Text)",
+    description="""Ultra-fast static embedding model (potion-base-32M).
+    Best reliable CPU option • 500× faster than transformers • Handles long texts instantly.
+    Returns **one** 256-dim embedding vector per call.""",
     examples=[
+        ["What is the capital of France? Explain it in detail with historical context and why it matters today."],
+        ["A very long document with many tokens to test speed... " * 50],
     ],
 )