tymbarq123's picture
change files
6213a2a
import requests
from fastapi import FastAPI
from pydantic import BaseModel
app = FastAPI()
OLLAMA_API_BASE = "http://localhost:11434"
class EmbeddingRequest(BaseModel):
input: str
model: str = "dengcao/qwen3-embedding-0.6b:q8_0"
instruction: str = "Represent this sentence for retrieval: " # Zgodnie z rekomendacją z dokumentacji
normalize: bool = True # Zalecane dla Qwen3
options: dict = None
@app.post("/api/embeddings")
def get_embedding(request: EmbeddingRequest):
full_prompt = request.instruction + request.input
payload = {
"model": request.model,
"prompt": full_prompt,
"options": request.options or {}
}
try:
response = requests.post(f"{OLLAMA_API_BASE}/api/embeddings", json=payload)
data = response.json()
embedding = data.get("embedding", [])
if request.normalize and embedding:
# Opcjonalna normalizacja (dokumentacja sugeruje jej użycie)
from math import sqrt
norm = sqrt(sum(x**2 for x in embedding))
if norm > 0:
embedding = [x / norm for x in embedding]
return {
"object": "list",
"data": [
{
"object": "embedding",
"embedding": embedding,
"index": 0
}
],
"model": request.model,
"usage": {
"prompt_tokens": data.get("prompt_eval_count", 0),
"total_tokens": data.get("prompt_eval_count", 0)
}
}
except Exception as e:
return {
"error": str(e),
"message": "Failed to generate embedding. Check if Ollama is running."
}
@app.get("/")
def home():
return {
"status": "running",
"model": "dengcao/qwen3-embedding-0.6b:q8_0",
"endpoint": "/api/embeddings",
"method": "POST",
"body": {
"input": "Your text here",
"instruction": "Optional task-specific instruction"
},
"info": "Qwen3-Embedding-0.6B supports instructions for better performance."
}