Spaces:
Running
Running
File size: 1,777 Bytes
1ff4499 6d14d39 1ff4499 a6b7976 6d14d39 a6b7976 1ff4499 a6b7976 13a3776 a6b7976 13a3776 41bddaf 13a3776 a6b7976 13a3776 a6b7976 41bddaf a6b7976 41bddaf 1ff4499 a6b7976 1ff4499 41bddaf a6b7976 1ff4499 13a3776 a6b7976 41bddaf a6b7976 1ff4499 a6b7976 6d14d39 1ff4499 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 | import gradio as gr
import os
from model2vec import StaticModel
# Suppress tokenizer warnings
os.environ["TOKENIZERS_PARALLELISM"] = "false"
# Best working static model β ultra-fast on CPU + long texts
model = StaticModel.from_pretrained("minishlab/potion-base-32M")
def generate_embedding(text: str):
"""Single text input β one embedding vector (fast even for 500β1000+ tokens)."""
if not text or not text.strip():
return {
"embedding": [],
"text": "",
"dimension": 256,
"note": "Empty input"
}
cleaned_text = text.strip()
# Static Model2Vec β no query/document prompt needed (always high-quality)
embedding = model.encode(
[cleaned_text],
convert_to_numpy=True,
normalize_embeddings=True, # ready for cosine similarity
)[0].tolist()
return {
"embedding": embedding, # single list of 256 floats
}
# Clean single-text Gradio interface + full REST API
demo = gr.Interface(
fn=generate_embedding,
inputs=gr.Textbox(
lines=12,
placeholder="Paste your text here (500β1000+ tokens works instantly)...",
label="Input Text",
),
outputs=gr.JSON(label="Embedding Response"),
title="β‘ Qwen3-Style Fast Embedding API (Single Text)",
description="""Ultra-fast static embedding model (potion-base-32M).
Best reliable CPU option β’ 500Γ faster than transformers β’ Handles long texts instantly.
Returns **one** 256-dim embedding vector per call.""",
examples=[
["What is the capital of France? Explain it in detail with historical context and why it matters today."],
["A very long document with many tokens to test speed... " * 50],
],
)
demo.launch() |