File size: 1,167 Bytes
8fc0df9
 
2ac7db4
aa078c9
3476852
 
 
 
77fdb6e
2ac7db4
aa078c9
3476852
8fc0df9
 
 
 
3476852
 
 
 
8fc0df9
 
 
77fdb6e
 
 
8fc0df9
77fdb6e
2ac7db4
77fdb6e
 
 
3476852
8fc0df9
2ac7db4
8fc0df9
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
from fastapi import FastAPI
from pydantic import BaseModel
from huggingface_hub import InferenceClient

# Inicializar FastAPI
app = FastAPI(title="Chat FastAPI", version="1.0")

# Cliente del modelo
MODEL_ID = "google/gemma-2b-it"
client = InferenceClient(model=MODEL_ID)

# Estructura del body que recibirá el endpoint
class ChatRequest(BaseModel):
    message: str
    history: list[list[str]] | None = None

@app.get("/")
def home():
    return {"message": "✅ Chat FastAPI está corriendo correctamente"}

@app.post("/chat")
def chat_endpoint(request: ChatRequest):
    history = request.history or []
    prompt = ""
    for user_msg, bot_msg in history:
        prompt += f"User: {user_msg}\nAssistant: {bot_msg}\n"
    prompt += f"User: {request.message}\nAssistant:"

    try:
        response = ""
        for chunk in client.text_generation(prompt, max_new_tokens=128, stream=True):
            response += chunk.token.text
        history.append([request.message, response.strip()])
        return {"response": response.strip(), "history": history}
    except Exception as e:
        return {"response": f"⚠️ Error interno: {e}", "history": history}