import os os.environ["HF_HOME"] = "/tmp/hf" # cache scriibil în Space from fastapi import FastAPI, Request import json import faiss from sentence_transformers import SentenceTransformer from transformers import AutoTokenizer, AutoModelForSeq2SeqLM import torch app = FastAPI() # --------------------------- # 1. Încarcă modelul # --------------------------- MODEL_NAME = "google/flan-t5-small" # public și mic tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME) # --------------------------- # 2. Încarcă articolele și embeddings # --------------------------- with open("articles.json", "r", encoding="utf-8") as f: articles = json.load(f) # fiecare articol -> text sentences = [a["content"] for a in articles] # embeddings rapide embedder = SentenceTransformer("all-MiniLM-L6-v2") embeddings = embedder.encode(sentences) index = faiss.IndexFlatL2(embeddings.shape[1]) index.add(embeddings) # --------------------------- # 3. Endpoint pentru întrebări # --------------------------- @app.post("/ask") async def ask(request: Request): data = await request.json() question = data.get("question", "") # căutare semantică q_emb = embedder.encode([question]) D, I = index.search(q_emb, k=3) context = " ".join([sentences[i] for i in I[0]]) # prompt pentru model prompt = f"Context: {context}\nÎntrebare: {question}\nRăspuns:" inputs = tokenizer(prompt, return_tensors="pt") outputs = model.generate(**inputs, max_new_tokens=150) answer = tokenizer.decode(outputs[0], skip_special_tokens=True) return {"answer": answer} # --------------------------- # 4. Run server # --------------------------- if __name__ == "__main__": import uvicorn uvicorn.run(app, host="0.0.0.0", port=7860)