File size: 804 Bytes
6a6492b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModel

MODEL_NAME = "BAAI/bge-multilingual-gemma2"

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModel.from_pretrained(MODEL_NAME)

def embed(text):
    inputs = tokenizer(
        text,
        return_tensors="pt",
        padding=True,
        truncation=True
    )
    with torch.no_grad():
        outputs = model(**inputs)
        embeddings = outputs.last_hidden_state[:, 0]  # CLS token
    return embeddings[0].tolist()

demo = gr.Interface(
    fn=embed,
    inputs=gr.Textbox(lines=4, placeholder="Enter text in any language"),
    outputs="json",
    title="BAAI/bge-multilingual-gemma2 Embedding Space",
    description="Multilingual embedding model for semantic search & RAG"
)

demo.launch()