Spaces:

tymbarq123
/

qwen_Embedding_0_6B_test

Sleeping

App Files Files Community

qwen_Embedding_0_6B_test / app.py

tymbarq123

change files

6213a2a 9 months ago

raw

history blame contribute delete

2.17 kB

	import requests
	from fastapi import FastAPI
	from pydantic import BaseModel

	app = FastAPI()

	OLLAMA_API_BASE = "http://localhost:11434"

	class EmbeddingRequest(BaseModel):
	input: str
	model: str = "dengcao/qwen3-embedding-0.6b:q8_0"
	instruction: str = "Represent this sentence for retrieval: " # Zgodnie z rekomendacją z dokumentacji
	normalize: bool = True # Zalecane dla Qwen3
	options: dict = None

	@app.post("/api/embeddings")
	def get_embedding(request: EmbeddingRequest):
	full_prompt = request.instruction + request.input

	payload = {
	"model": request.model,
	"prompt": full_prompt,
	"options": request.options or {}
	}

	try:
	response = requests.post(f"{OLLAMA_API_BASE}/api/embeddings", json=payload)
	data = response.json()

	embedding = data.get("embedding", [])
	if request.normalize and embedding:
	# Opcjonalna normalizacja (dokumentacja sugeruje jej użycie)
	from math import sqrt
	norm = sqrt(sum(x**2 for x in embedding))
	if norm > 0:
	embedding = [x / norm for x in embedding]

	return {
	"object": "list",
	"data": [
	{
	"object": "embedding",
	"embedding": embedding,
	"index": 0
	}
	],
	"model": request.model,
	"usage": {
	"prompt_tokens": data.get("prompt_eval_count", 0),
	"total_tokens": data.get("prompt_eval_count", 0)
	}
	}
	except Exception as e:
	return {
	"error": str(e),
	"message": "Failed to generate embedding. Check if Ollama is running."
	}

	@app.get("/")
	def home():
	return {
	"status": "running",
	"model": "dengcao/qwen3-embedding-0.6b:q8_0",
	"endpoint": "/api/embeddings",
	"method": "POST",
	"body": {
	"input": "Your text here",
	"instruction": "Optional task-specific instruction"
	},
	"info": "Qwen3-Embedding-0.6B supports instructions for better performance."
	}