from fastapi import FastAPI from pydantic import BaseModel import requests app = FastAPI() class PromptRequest(BaseModel): text: str persona: str = "You are a knowledgable and helpful assistant." max_tokens: int = 500 model: str = "granite4:latest" # --- NEW: Add a Root Route (The Health Check) --- @app.get("/") def read_root(): return {"status": "System Ready", "model": "Granite 4"} @app.post("/generate") def generate(req: PromptRequest): try: res = requests.post( "http://localhost:11434/api/generate", json={ "model": req.model, "prompt": f"{req.persona}\n\n{req.text}", "stream": False, "options": { # This maps to the user's "Max Length" slider "num_predict": req.max_tokens if hasattr(req, 'max_tokens') else 500 } } ) json_res = res.json() # Capture Token Counts # prompt_eval_count = Input Tokens # eval_count = Output Tokens usage = { "input": json_res.get("prompt_eval_count", 0), "output": json_res.get("eval_count", 0) } return {"response": json_res["response"], "usage": usage} except Exception as e: return {"error": str(e)}