Spaces:
Sleeping
Sleeping
File size: 1,354 Bytes
051936d 859d9ed a51b178 051936d e8a2779 051936d 299c5b1 051936d 8a4b599 051936d 8a4b599 051936d 98262d5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 | from fastapi import FastAPI
from pydantic import BaseModel
import requests
app = FastAPI()
class PromptRequest(BaseModel):
text: str
persona: str = "You are a knowledgable and helpful assistant."
max_tokens: int = 500
model: str = "granite4:latest"
# --- NEW: Add a Root Route (The Health Check) ---
@app.get("/")
def read_root():
return {"status": "System Ready", "model": "Granite 4"}
@app.post("/generate")
def generate(req: PromptRequest):
try:
res = requests.post(
"http://localhost:11434/api/generate",
json={
"model": req.model,
"prompt": f"{req.persona}\n\n{req.text}",
"stream": False,
"options": {
# This maps to the user's "Max Length" slider
"num_predict": req.max_tokens if hasattr(req, 'max_tokens') else 500
}
}
)
json_res = res.json()
# Capture Token Counts
# prompt_eval_count = Input Tokens
# eval_count = Output Tokens
usage = {
"input": json_res.get("prompt_eval_count", 0),
"output": json_res.get("eval_count", 0)
}
return {"response": json_res["response"], "usage": usage}
except Exception as e:
return {"error": str(e)} |