Spaces:

ccocks-deca
/

embed-this

Running

File size: 1,777 Bytes

1ff4499
6d14d39
 
1ff4499
a6b7976
6d14d39
 
a6b7976
 
1ff4499
a6b7976
 
13a3776
 
 
 
a6b7976
13a3776
 
41bddaf
13a3776
 
a6b7976
13a3776
a6b7976
41bddaf
a6b7976
 
41bddaf
1ff4499
a6b7976
1ff4499
 
41bddaf
a6b7976
1ff4499
13a3776
a6b7976
 
 
 
 
41bddaf
a6b7976
 
 
 
1ff4499
a6b7976
 
6d14d39
1ff4499

import gradio as gr
import os
from model2vec import StaticModel

# Suppress tokenizer warnings
os.environ["TOKENIZERS_PARALLELISM"] = "false"

# Best working static model – ultra-fast on CPU + long texts
model = StaticModel.from_pretrained("minishlab/potion-base-32M")

def generate_embedding(text: str):
    """Single text input → one embedding vector (fast even for 500–1000+ tokens)."""
    if not text or not text.strip():
        return {
            "embedding": [],
            "text": "",
            "dimension": 256,
            "note": "Empty input"
        }

    cleaned_text = text.strip()

    # Static Model2Vec – no query/document prompt needed (always high-quality)
    embedding = model.encode(
        [cleaned_text],
        convert_to_numpy=True,
        normalize_embeddings=True,   # ready for cosine similarity
    )[0].tolist()

    return {
        "embedding": embedding,      # single list of 256 floats
    }


# Clean single-text Gradio interface + full REST API
demo = gr.Interface(
    fn=generate_embedding,
    inputs=gr.Textbox(
        lines=12,
        placeholder="Paste your text here (500–1000+ tokens works instantly)...",
        label="Input Text",
    ),
    outputs=gr.JSON(label="Embedding Response"),
    title="⚡ Qwen3-Style Fast Embedding API (Single Text)",
    description="""Ultra-fast static embedding model (potion-base-32M).
    Best reliable CPU option • 500× faster than transformers • Handles long texts instantly.
    Returns **one** 256-dim embedding vector per call.""",
    examples=[
        ["What is the capital of France? Explain it in detail with historical context and why it matters today."],
        ["A very long document with many tokens to test speed... " * 50],
    ],
)

demo.launch()