File size: 1,354 Bytes
051936d
 
 
 
 
 
 
 
859d9ed
 
a51b178
051936d
 
 
 
e8a2779
051936d
 
 
 
 
 
 
299c5b1
051936d
8a4b599
 
 
 
 
051936d
 
8a4b599
 
 
 
 
 
 
 
 
 
 
051936d
98262d5
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
from fastapi import FastAPI
from pydantic import BaseModel
import requests

app = FastAPI()

class PromptRequest(BaseModel):
    text: str
    persona: str = "You are a knowledgable and helpful assistant."
    max_tokens: int = 500
    model: str = "granite4:latest"

# --- NEW: Add a Root Route (The Health Check) ---
@app.get("/")
def read_root():
    return {"status": "System Ready", "model": "Granite 4"}

@app.post("/generate")
def generate(req: PromptRequest):
    try:
        res = requests.post(
            "http://localhost:11434/api/generate",
            json={
                "model": req.model, 
                "prompt": f"{req.persona}\n\n{req.text}",
                "stream": False,
                "options": {
                    # This maps to the user's "Max Length" slider
                    "num_predict": req.max_tokens if hasattr(req, 'max_tokens') else 500 
                }
            }
        )
        json_res = res.json()
        
        # Capture Token Counts
        # prompt_eval_count = Input Tokens
        # eval_count = Output Tokens
        usage = {
            "input": json_res.get("prompt_eval_count", 0),
            "output": json_res.get("eval_count", 0)
        }
        
        return {"response": json_res["response"], "usage": usage}
    except Exception as e:
        return {"error": str(e)}