Spaces:
Runtime error
Runtime error
| import os | |
| os.environ["HF_HOME"] = "/tmp/hf" # cache scriibil în Space | |
| from fastapi import FastAPI, Request | |
| import json | |
| import faiss | |
| from sentence_transformers import SentenceTransformer | |
| from transformers import AutoTokenizer, AutoModelForSeq2SeqLM | |
| import torch | |
| app = FastAPI() | |
| # --------------------------- | |
| # 1. Încarcă modelul | |
| # --------------------------- | |
| MODEL_NAME = "google/flan-t5-small" # public și mic | |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) | |
| model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME) | |
| # --------------------------- | |
| # 2. Încarcă articolele și embeddings | |
| # --------------------------- | |
| with open("articles.json", "r", encoding="utf-8") as f: | |
| articles = json.load(f) | |
| # fiecare articol -> text | |
| sentences = [a["content"] for a in articles] | |
| # embeddings rapide | |
| embedder = SentenceTransformer("all-MiniLM-L6-v2") | |
| embeddings = embedder.encode(sentences) | |
| index = faiss.IndexFlatL2(embeddings.shape[1]) | |
| index.add(embeddings) | |
| # --------------------------- | |
| # 3. Endpoint pentru întrebări | |
| # --------------------------- | |
| async def ask(request: Request): | |
| data = await request.json() | |
| question = data.get("question", "") | |
| # căutare semantică | |
| q_emb = embedder.encode([question]) | |
| D, I = index.search(q_emb, k=3) | |
| context = " ".join([sentences[i] for i in I[0]]) | |
| # prompt pentru model | |
| prompt = f"Context: {context}\nÎntrebare: {question}\nRăspuns:" | |
| inputs = tokenizer(prompt, return_tensors="pt") | |
| outputs = model.generate(**inputs, max_new_tokens=150) | |
| answer = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| return {"answer": answer} | |
| # --------------------------- | |
| # 4. Run server | |
| # --------------------------- | |
| if __name__ == "__main__": | |
| import uvicorn | |
| uvicorn.run(app, host="0.0.0.0", port=7860) | |