import os from fastapi import FastAPI, Request from huggingface_hub import InferenceClient api_key = os.getenv("HF_API_KEY") client = InferenceClient( model="mistralai/Mistral-7B-Instruct-v0.1", token=api_key ) app = FastAPI() @app.post("/chat") async def chat(request: Request): data = await request.json() prompt = data.get("prompt", "") response = client.text_generation(prompt=prompt, max_new_tokens=300) return {"response": response}