Spaces:

triflix
/

brainfuncall

Running

App Files Files Community

triflix commited on 18 days ago

Commit

2a263c0

verified ·

1 Parent(s): 221c179

Update main.py

Browse files

Files changed (1) hide show

main.py +105 -41

main.py CHANGED Viewed

@@ -1,83 +1,147 @@
 from fastapi import FastAPI, HTTPException
-from pydantic import BaseModel
-from typing import List, Optional, Dict, Any
 import torch
 from transformers import AutoTokenizer, AutoModelForCausalLM
-import datetime
-# 1. Initialize App
-app = FastAPI(title="FunctionGemma Brain API")
-# 2. Global Variables for Model (Loaded on Startup)
 MODEL_ID = "google/functiongemma-270m-it"
 tokenizer = None
 model = None
-# 3. Request Schema
-# This is what your Go Backend will send to this Python Service
 class ChatRequest(BaseModel):
-    query: str
-    tools: List[Dict[str, Any]]  # The JSON schema of tools
-    include_date: bool = True    # Option to inject today's date
-# 4. Load Model on Startup
 @app.on_event("startup")
-async def load_model():
     global tokenizer, model
-    print("🧠 Loading FunctionGemma 270M...")
     try:
         tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
-        # Run on CPU (It's fast enough for 270M)
-        model = AutoModelForCausalLM.from_pretrained(MODEL_ID, device_map="cpu")
-        print("✅ Model Loaded Successfully!")
     except Exception as e:
-        print(f"❌ Failed to load model: {e}")
-# 5. The Endpoint
 @app.post("/generate")
 async def generate_function_call(request: ChatRequest):
-    global tokenizer, model
-    if not model or not tokenizer:
-        raise HTTPException(status_code=503, detail="Model not loaded yet")
     try:
-        # A. Prepare System Prompt
-        today = datetime.date.today().strftime("%Y-%m-%d")
-        system_content = "You are a model that can do function calling with the following functions."
         if request.include_date:
             system_content += f" Today is {today}."
-        # B. Construct Messages
         messages = [
             {"role": "system", "content": system_content},
-            {"role": "user", "content": request.query}
         ]
-        # C. Apply Chat Template (This handles the JSON Schema formatting automatically)
         inputs = tokenizer.apply_chat_template(
             messages,
             tools=request.tools,
             add_generation_prompt=True,
             return_dict=True,
-            return_tensors="pt"
         )
-        # D. Generate
-        # We limit tokens because we only want the function call, not a long story
-        outputs = model.generate(**inputs, max_new_tokens=128)
-        # E. Decode
-        # We skip the input tokens to only get the new generated text
-        generated_text = tokenizer.decode(outputs[0][len(inputs["input_ids"][0]):], skip_special_tokens=True)
         return {"response": generated_text}
     except Exception as e:
-        print(f"Error during generation: {e}")
         raise HTTPException(status_code=500, detail=str(e))
-# Health check endpoint
-@app.get("/")
 def health_check():
-    return {"status": "running", "model": MODEL_ID}

+# app.py
 from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel, Field
+from typing import List, Dict, Any
+import os
+import datetime
 import torch
 from transformers import AutoTokenizer, AutoModelForCausalLM
+from huggingface_hub import login, HfHubHTTPError
+# ==========================================
+# 1. CONFIGURATION (Secure Defaults)
+# ==========================================
 MODEL_ID = "google/functiongemma-270m-it"
+HF_TOKEN_ENV = "HF_TOKEN"
+def get_hf_token() -> str:
+    """
+    Fetch Hugging Face token from environment.
+    Raises:
+        RuntimeError: if token is missing
+    """
+    token = os.getenv(HF_TOKEN_ENV)
+    if not token:
+        raise RuntimeError(
+            f"Missing required environment variable: {HF_TOKEN_ENV}"
+        )
+    return token
+# ==========================================
+# 2. APP SETUP
+# ==========================================
+app = FastAPI(
+    title="FunctionGemma Brain API",
+    version="1.0.0",
+)
 tokenizer = None
 model = None
+# ==========================================
+# 3. DATA MODELS
+# ==========================================
 class ChatRequest(BaseModel):
+    """
+    Request schema for function-call generation.
+    """
+    query: str = Field(..., min_length=1, max_length=4096)
+    tools: List[Dict[str, Any]]
+    include_date: bool = True
+class HealthResponse(BaseModel):
+    status: str
+    model: str
+    auth: str
+# ==========================================
+# 4. STARTUP (Auth + Load Model)
+# ==========================================
 @app.on_event("startup")
+async def startup():
     global tokenizer, model
+    # A. Authenticate (fail-fast)
+    try:
+        hf_token = get_hf_token()
+        login(token=hf_token)
+    except (RuntimeError, HfHubHTTPError) as e:
+        raise RuntimeError(f"Hugging Face authentication failed: {e}")
+    # B. Load Model
     try:
         tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
+        model = AutoModelForCausalLM.from_pretrained(
+            MODEL_ID,
+            device_map="cpu",
+            torch_dtype=torch.float32,
+        )
     except Exception as e:
+        raise RuntimeError(f"Model load failed: {e}")
+# ==========================================
+# 5. API ENDPOINT
+# ==========================================
 @app.post("/generate")
 async def generate_function_call(request: ChatRequest):
+    if model is None or tokenizer is None:
+        raise HTTPException(status_code=503, detail="Model not ready")
     try:
+        # System context
+        system_content = (
+            "You are a model that can do function calling with the following functions."
+        )
         if request.include_date:
+            today = datetime.date.today().isoformat()
             system_content += f" Today is {today}."
         messages = [
             {"role": "system", "content": system_content},
+            {"role": "user", "content": request.query},
         ]
         inputs = tokenizer.apply_chat_template(
             messages,
             tools=request.tools,
             add_generation_prompt=True,
+            return_tensors="pt",
             return_dict=True,
         )
+        outputs = model.generate(
+            **inputs,
+            max_new_tokens=128,
+            do_sample=False,  # deterministic
+        )
+        generated_text = tokenizer.decode(
+            outputs[0][len(inputs["input_ids"][0]):],
+            skip_special_tokens=True,
+        )
         return {"response": generated_text}
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
+@app.get("/", response_model=HealthResponse)
 def health_check():
+    return {
+        "status": "running",
+        "model": MODEL_ID,
+        "auth": "env",
+    }