from fastapi import FastAPI from pydantic import BaseModel from huggingface_hub import InferenceClient # Inicializar FastAPI app = FastAPI(title="Chat FastAPI", version="1.0") # Cliente del modelo MODEL_ID = "google/gemma-2b-it" client = InferenceClient(model=MODEL_ID) # Estructura del body que recibirá el endpoint class ChatRequest(BaseModel): message: str history: list[list[str]] | None = None @app.get("/") def home(): return {"message": "✅ Chat FastAPI está corriendo correctamente"} @app.post("/chat") def chat_endpoint(request: ChatRequest): history = request.history or [] prompt = "" for user_msg, bot_msg in history: prompt += f"User: {user_msg}\nAssistant: {bot_msg}\n" prompt += f"User: {request.message}\nAssistant:" try: response = "" for chunk in client.text_generation(prompt, max_new_tokens=128, stream=True): response += chunk.token.text history.append([request.message, response.strip()]) return {"response": response.strip(), "history": history} except Exception as e: return {"response": f"⚠️ Error interno: {e}", "history": history}