Spaces:

KGNINJA
/

FunctionGemmabotdemo

Runtime error

App Files Files Community

KGNINJA commited on Dec 21, 2025

Commit

8b4b273

verified ·

1 Parent(s): c469103

Update app.py

Browse files

Files changed (1) hide show

app.py +145 -150

app.py CHANGED Viewed

@@ -1,217 +1,212 @@
 import os
 import re
-from typing import Any, Dict, Tuple
 from fastapi import FastAPI
 from pydantic import BaseModel
 from transformers import AutoProcessor, AutoModelForCausalLM
-# -------------------------
-# 1) Function call parser
-# -------------------------
-_ESCAPE = "<escape>"
-_CALL_START = "<start_function_call>"
-_CALL_END = "<end_function_call>"
-def _split_top_level_commas(s: str) -> list[str]:
-    """
-    Split "k1:v1,k2:<escape>v,2<escape>,k3:3" by commas, but ignore commas inside <escape> ... <escape>.
-    """
-    parts = []
-    buf = []
     i = 0
-    in_escape = False
     while i < len(s):
-        if s.startswith(_ESCAPE, i):
-            in_escape = not in_escape
-            buf.append(_ESCAPE)
-            i += len(_ESCAPE)
             continue
-        ch = s[i]
-        if ch == "," and not in_escape:
             parts.append("".join(buf).strip())
             buf = []
         else:
-            buf.append(ch)
         i += 1
     if buf:
         parts.append("".join(buf).strip())
-    return [p for p in parts if p]
-def _parse_value(raw: str) -> Any:
-    raw = raw.strip()
-    # string wrapped with <escape> ... <escape>
-    if raw.startswith(_ESCAPE) and raw.endswith(_ESCAPE) and len(raw) >= 2 * len(_ESCAPE):
-        return raw[len(_ESCAPE):-len(_ESCAPE)]
-    # bool
-    if raw.lower() in ("true", "false"):
-        return raw.lower() == "true"
-    # int / float
     try:
-        if "." in raw:
-            return float(raw)
-        return int(raw)
     except ValueError:
-        # fallback: plain string
-        return raw
-def parse_function_call(text: str) -> Tuple[Dict[str, Any] | None, str]:
-    """
-    Returns (call, raw_text).
-    call = {"name": "...", "arguments": {...}} if a function call exists, else None.
-    """
-    if _CALL_START not in text:
-        return None, text.strip()
-    # Grab the first function call block
-    m = re.search(rf"{re.escape(_CALL_START)}(.*?){re.escape(_CALL_END)}", text, re.DOTALL)
     if not m:
-        return None, text.strip()
-    inside = m.group(1).strip()  # ex: "call:move_robot{direction:<escape>forward<escape>,meters:1}"
-    m2 = re.match(r"call:([A-Za-z0-9_\-]+)\{(.*)\}$", inside, re.DOTALL)
     if not m2:
-        return None, text.strip()
     name = m2.group(1)
-    args_blob = m2.group(2).strip()
-    arguments: Dict[str, Any] = {}
-    if args_blob:
-        for kv in _split_top_level_commas(args_blob):
-            if ":" not in kv:
-                continue
-            k, v = kv.split(":", 1)
-            arguments[k.strip()] = _parse_value(v)
-    return {"name": name, "arguments": arguments}, text.strip()
-# -------------------------
-# 2) FastAPI + Model
-# -------------------------
-GEMMA_MODEL_ID = os.getenv("GEMMA_MODEL_ID", "google/functiongemma-270m-it")
-app = FastAPI(title="FunctionGemma FastAPI Minimal")
-processor = None
-model = None
-# Tool schemas (simulation actions)
 TOOLS = [
     {
         "type": "function",
         "function": {
-            "name": "move_robot",
-            "description": "Move the robot in the simulator.",
             "parameters": {
                 "type": "object",
                 "properties": {
-                    "direction": {"type": "string", "description": "forward|backward|left|right"},
-                    "meters": {"type": "number", "description": "distance in meters"},
                 },
-                "required": ["direction", "meters"],
-            },
-        },
     },
     {
         "type": "function",
         "function": {
-            "name": "turn_robot",
-            "description": "Turn the robot in place in the simulator.",
             "parameters": {
                 "type": "object",
                 "properties": {
-                    "angle_deg": {"type": "number", "description": "positive=right, negative=left"},
                 },
-                "required": ["angle_deg"],
-            },
-        },
     },
     {
         "type": "function",
         "function": {
-            "name": "speak",
-            "description": "Make the robot speak (subtitle in the simulator).",
             "parameters": {
                 "type": "object",
-                "properties": {
-                    "text": {"type": "string", "description": "utterance"},
-                },
-                "required": ["text"],
-            },
-        },
-    },
 ]
-class PlanRequest(BaseModel):
-    prompt: str
-    # simulator state/observation from browser (optional but recommended)
-    observation: Dict[str, Any] | None = None
-class PlanResponse(BaseModel):
-    tool_call: Dict[str, Any] | None = None
-    raw_output: str
-    note: str | None = None
-@app.on_event("startup")
-def _load():
     global processor, model
-    # If model is gated, HF_TOKEN must be set in Space Secrets.
-    processor = AutoProcessor.from_pretrained(GEMMA_MODEL_ID, device_map="auto")
-    model = AutoModelForCausalLM.from_pretrained(GEMMA_MODEL_ID, dtype="auto", device_map="auto")
 @app.get("/health")
 def health():
-    return {"ok": True, "model": GEMMA_MODEL_ID}
-@app.post("/plan", response_model=PlanResponse)
-def plan(req: PlanRequest):
-    """
-    Browser sends: user prompt + observation (e.g., lidar-like distances, detected labels, etc.)
-    Server returns: one tool call (move/turn/speak) as FunctionGemma formatted output parsed into JSON.
-    """
-    # Essential developer instruction is required to activate function calling behavior. :contentReference[oaicite:4]{index=4}
     messages = [
-        {"role": "developer", "content": "You are a model that can do function calling with the following functions"},
-        {"role": "user", "content": _build_user_content(req.prompt, req.observation)},
     ]
     inputs = processor.apply_chat_template(
         messages,
         tools=TOOLS,
         add_generation_prompt=True,
-        return_dict=True,
-        return_tensors="pt",
     )
-    out = model.generate(
         **inputs.to(model.device),
-        pad_token_id=processor.eos_token_id,
         max_new_tokens=128,
     )
-    # decode only newly generated tokens
-    gen = out[0][len(inputs["input_ids"][0]):]
-    raw = processor.decode(gen, skip_special_tokens=True)
-    tool_call, _ = parse_function_call(raw)
-    note = None
-    if tool_call is None:
-        note = "No structured function call produced. Consider enriching tool descriptions or simplifying the prompt."
-    return PlanResponse(tool_call=tool_call, raw_output=raw, note=note)
-def _build_user_content(prompt: str, observation: Dict[str, Any] | None) -> str:
-    # Keep it single-step: decide ONLY the next action.
-    # FunctionGemma is strongest in single-turn / parallel calls, not multi-step chaining. :contentReference[oaicite:5]{index=5}
-    obs_txt = ""
-    if observation:
-        obs_txt = f"\n\n[OBSERVATION]\n{observation}\n"
-    return (
-        f"{prompt}\n"
-        f"{obs_txt}\n"
-        "Decide ONLY the next simulator action. "
-        "Use exactly one function call from the provided tools."
     )

 import os
 import re
+from typing import Any, Dict, Optional
+from contextlib import asynccontextmanager
 from fastapi import FastAPI
 from pydantic import BaseModel
 from transformers import AutoProcessor, AutoModelForCausalLM
+import torch
+# =========================
+# Configuration
+# =========================
+MODEL_ID = os.getenv("GEMMA_MODEL_ID", "google/functiongemma-270m-it")
+processor = None
+model = None
+# =========================
+# Function Call Parser
+# =========================
+ESC = "<escape>"
+START = "<start_function_call>"
+END = "<end_function_call>"
+def _split_commas(s: str):
+    parts, buf, esc = [], [], False
     i = 0
     while i < len(s):
+        if s.startswith(ESC, i):
+            esc = not esc
+            buf.append(ESC)
+            i += len(ESC)
             continue
+        if s[i] == "," and not esc:
             parts.append("".join(buf).strip())
             buf = []
         else:
+            buf.append(s[i])
         i += 1
     if buf:
         parts.append("".join(buf).strip())
+    return parts
+def _parse_value(v: str):
+    v = v.strip()
+    if v.startswith(ESC) and v.endswith(ESC):
+        return v[len(ESC):-len(ESC)]
+    if v.lower() in ("true", "false"):
+        return v.lower() == "true"
     try:
+        if "." in v:
+            return float(v)
+        return int(v)
     except ValueError:
+        return v
+def parse_function_call(text: str):
+    if START not in text:
+        return None
+    m = re.search(rf"{START}(.*?){END}", text, re.DOTALL)
     if not m:
+        return None
+    body = m.group(1).strip()
+    m2 = re.match(r"call:([a-zA-Z0-9_]+)\{(.*)\}$", body, re.DOTALL)
     if not m2:
+        return None
     name = m2.group(1)
+    args_raw = m2.group(2).strip()
+    args = {}
+    if args_raw:
+        for kv in _split_commas(args_raw):
+            if ":" in kv:
+                k, v = kv.split(":", 1)
+                args[k.strip()] = _parse_value(v)
+    return {"name": name, "arguments": args}
+# =========================
+# Tools (Robot Actions)
+# =========================
 TOOLS = [
     {
         "type": "function",
         "function": {
+            "name": "move",
+            "description": "Move forward or backward",
             "parameters": {
                 "type": "object",
                 "properties": {
+                    "direction": {"type": "string"},
+                    "speed": {"type": "number"}
                 },
+                "required": ["direction", "speed"]
+            }
+        }
     },
     {
         "type": "function",
         "function": {
+            "name": "turn",
+            "description": "Rotate left or right",
             "parameters": {
                 "type": "object",
                 "properties": {
+                    "angle": {"type": "number"}
                 },
+                "required": ["angle"]
+            }
+        }
     },
     {
         "type": "function",
         "function": {
+            "name": "pause",
+            "description": "Stop and observe",
             "parameters": {
                 "type": "object",
+                "properties": {}
+            }
+        }
+    }
 ]
+# =========================
+# FastAPI Lifespan
+# =========================
+@asynccontextmanager
+async def lifespan(app: FastAPI):
     global processor, model
+    processor = AutoProcessor.from_pretrained(MODEL_ID)
+    model = AutoModelForCausalLM.from_pretrained(
+        MODEL_ID,
+        torch_dtype=torch.float32,
+        device_map="auto"
+    )
+    yield
+    # shutdown処理（今回は不要）
+app = FastAPI(
+    title="FunctionGemma Robot Brain",
+    lifespan=lifespan
+)
+# =========================
+# API Schema
+# =========================
+class DecideRequest(BaseModel):
+    observation: Dict[str, Any]
+    persona: Optional[str] = "curious"
+class DecideResponse(BaseModel):
+    action: Optional[Dict[str, Any]]
+    raw: str
+# =========================
+# Endpoints
+# =========================
 @app.get("/health")
 def health():
+    return {"status": "ok", "model": MODEL_ID}
+@app.post("/decide", response_model=DecideResponse)
+def decide(req: DecideRequest):
+    system = (
+        "You are a small exploration robot.\n"
+        "You must choose exactly ONE action function.\n"
+        "You are curious but avoid real danger.\n"
+        f"Persona: {req.persona}"
+    )
+    user = f"""
+Observation:
+{req.observation}
+Choose the next action.
+"""
     messages = [
+        {"role": "developer", "content": "You can call functions."},
+        {"role": "system", "content": system},
+        {"role": "user", "content": user}
     ]
     inputs = processor.apply_chat_template(
         messages,
         tools=TOOLS,
         add_generation_prompt=True,
+        return_tensors="pt"
     )
+    outputs = model.generate(
         **inputs.to(model.device),
         max_new_tokens=128,
+        pad_token_id=processor.eos_token_id
     )
+    decoded = processor.decode(
+        outputs[0][inputs["input_ids"].shape[-1]:],
+        skip_special_tokens=True
     )
+    action = parse_function_call(decoded)
+    return {
+        "action": action,
+        "raw": decoded
+    }