from fastapi import FastAPI
from pydantic import BaseModel
from huggingface_hub import InferenceClient

# Inicializar FastAPI
app = FastAPI(title="Chat FastAPI", version="1.0")

# Cliente del modelo
MODEL_ID = "google/gemma-2b-it"
client = InferenceClient(model=MODEL_ID)

# Estructura del body que recibirá el endpoint
class ChatRequest(BaseModel):
    message: str
    history: list[list[str]] | None = None

@app.get("/")
def home():
    return {"message": "✅ Chat FastAPI está corriendo correctamente"}

@app.post("/chat")
def chat_endpoint(request: ChatRequest):
    history = request.history or []
    prompt = ""
    for user_msg, bot_msg in history:
        prompt += f"User: {user_msg}\nAssistant: {bot_msg}\n"
    prompt += f"User: {request.message}\nAssistant:"

    try:
        response = ""
        for chunk in client.text_generation(prompt, max_new_tokens=128, stream=True):
            response += chunk.token.text
        history.append([request.message, response.strip()])
        return {"response": response.strip(), "history": history}
    except Exception as e:
        return {"response": f"⚠️ Error interno: {e}", "history": history}