Spaces:

triflix
/

brainfuncall

Running

App Files Files Community

triflix commited on 18 days ago

Commit

e72da52

verified ·

1 Parent(s): e81c5d7

Create main.py

Browse files

Files changed (1) hide show

main.py +83 -0

main.py ADDED Viewed

	@@ -0,0 +1,83 @@

+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel
+from typing import List, Optional, Dict, Any
+import torch
+from transformers import AutoTokenizer, AutoModelForCausalLM
+import datetime
+# 1. Initialize App
+app = FastAPI(title="FunctionGemma Brain API")
+# 2. Global Variables for Model (Loaded on Startup)
+MODEL_ID = "google/functiongemma-270m-it"
+tokenizer = None
+model = None
+# 3. Request Schema
+# This is what your Go Backend will send to this Python Service
+class ChatRequest(BaseModel):
+    query: str
+    tools: List[Dict[str, Any]]  # The JSON schema of tools
+    include_date: bool = True    # Option to inject today's date
+# 4. Load Model on Startup
+@app.on_event("startup")
+async def load_model():
+    global tokenizer, model
+    print("🧠 Loading FunctionGemma 270M...")
+    try:
+        tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
+        # Run on CPU (It's fast enough for 270M)
+        model = AutoModelForCausalLM.from_pretrained(MODEL_ID, device_map="cpu")
+        print("✅ Model Loaded Successfully!")
+    except Exception as e:
+        print(f"❌ Failed to load model: {e}")
+# 5. The Endpoint
+@app.post("/generate")
+async def generate_function_call(request: ChatRequest):
+    global tokenizer, model
+    if not model or not tokenizer:
+        raise HTTPException(status_code=503, detail="Model not loaded yet")
+    try:
+        # A. Prepare System Prompt
+        today = datetime.date.today().strftime("%Y-%m-%d")
+        system_content = "You are a model that can do function calling with the following functions."
+        if request.include_date:
+            system_content += f" Today is {today}."
+        # B. Construct Messages
+        messages = [
+            {"role": "system", "content": system_content},
+            {"role": "user", "content": request.query}
+        ]
+        # C. Apply Chat Template (This handles the JSON Schema formatting automatically)
+        inputs = tokenizer.apply_chat_template(
+            messages,
+            tools=request.tools,
+            add_generation_prompt=True,
+            return_dict=True,
+            return_tensors="pt"
+        )
+        # D. Generate
+        # We limit tokens because we only want the function call, not a long story
+        outputs = model.generate(**inputs, max_new_tokens=128)
+        # E. Decode
+        # We skip the input tokens to only get the new generated text
+        generated_text = tokenizer.decode(outputs[0][len(inputs["input_ids"][0]):], skip_special_tokens=True)
+        return {"response": generated_text}
+    except Exception as e:
+        print(f"Error during generation: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
+# Health check endpoint
+@app.get("/")
+def health_check():
+    return {"status": "running", "model": MODEL_ID}