Spaces:

build-small-hackathon
/

hackathon-advisor

Running on Zero

App Files Files Community

JacobLinCool Codex commited on 3 days ago

Commit

73b4c3f

verified ·

1 Parent(s): 8fb1ae9

feat: add model tool-call contracts

Browse files

Co-authored-by: Codex <noreply@openai.com>

Files changed (5) hide show

README.md +6 -0
app.py +14 -0
hackathon_advisor/tool_contracts.py +236 -0
tests/test_app.py +15 -1
tests/test_tool_contracts.py +60 -0

README.md CHANGED Viewed

@@ -63,6 +63,12 @@ source, project order, and digest before the app starts.
 The app exposes a `trace_artifact` Gradio API endpoint and a `JSONL` button in the UI. Both emit the same JSONL schema:
 a manifest row followed by one row per agent turn. `data/sample_trace.jsonl` is a checked-in, Hub-published sample trace.
 ## Test
 ```bash

 The app exposes a `trace_artifact` Gradio API endpoint and a `JSONL` button in the UI. Both emit the same JSONL schema:
 a manifest row followed by one row per agent turn. `data/sample_trace.jsonl` is a checked-in, Hub-published sample trace.
+## Tool-Call Contract
+`/api/tool-contracts` exposes the JSON schemas intended for MiniCPM-style tool calling. `tool_contract_check` accepts a
+MiniCPM XML call such as `<function name="search_projects">{"query":"lullaby audio"}</function>`, validates it against
+the schemas, and returns either the valid call or a safe default call for the UI watchdog path.
 ## Test
 ```bash

app.py CHANGED Viewed

@@ -10,6 +10,7 @@ from gradio import Server
 from hackathon_advisor.agent import AdvisorEngine
 from hackathon_advisor.data import ProjectIndex
 from hackathon_advisor.trace_export import build_trace_jsonl, trace_metadata
@@ -59,6 +60,19 @@ def bootstrap() -> dict:
     }
 @app.api(name="trace_artifact", concurrency_limit=8)
 def trace_artifact(session_json: str = "{}") -> str:
     try:

 from hackathon_advisor.agent import AdvisorEngine
 from hackathon_advisor.data import ProjectIndex
+from hackathon_advisor.tool_contracts import resolve_tool_call, tool_schemas
 from hackathon_advisor.trace_export import build_trace_jsonl, trace_metadata
     }
+@app.get("/api/tool-contracts")
+def tool_contracts() -> dict:
+    return {
+        "tool_count": len(tool_schemas()),
+        "tools": tool_schemas(),
+    }
+@app.api(name="tool_contract_check", concurrency_limit=8)
+def tool_contract_check(model_output: str, fallback_query: str = "") -> dict:
+    return resolve_tool_call(model_output, fallback_query=fallback_query).to_dict()
 @app.api(name="trace_artifact", concurrency_limit=8)
 def trace_artifact(session_json: str = "{}") -> str:
     try:

hackathon_advisor/tool_contracts.py ADDED Viewed

	@@ -0,0 +1,236 @@

+from __future__ import annotations
+from dataclasses import dataclass
+import json
+from typing import Any, Literal
+from xml.etree import ElementTree
+JsonType = Literal["string", "integer", "number", "boolean", "array", "object"]
+@dataclass(frozen=True)
+class ToolField:
+    type: JsonType
+    description: str
+    required: bool = False
+    enum: tuple[str, ...] = ()
+    items_type: JsonType | None = None
+    def to_schema(self) -> dict[str, Any]:
+        schema: dict[str, Any] = {
+            "type": self.type,
+            "description": self.description,
+        }
+        if self.enum:
+            schema["enum"] = list(self.enum)
+        if self.items_type:
+            schema["items"] = {"type": self.items_type}
+        return schema
+@dataclass(frozen=True)
+class ToolSpec:
+    name: str
+    description: str
+    fields: dict[str, ToolField]
+    def to_schema(self) -> dict[str, Any]:
+        return {
+            "type": "function",
+            "function": {
+                "name": self.name,
+                "description": self.description,
+                "parameters": {
+                    "type": "object",
+                    "additionalProperties": False,
+                    "properties": {
+                        name: field.to_schema() for name, field in self.fields.items()
+                    },
+                    "required": [name for name, field in self.fields.items() if field.required],
+                },
+            },
+        }
+@dataclass(frozen=True)
+class ToolCall:
+    name: str
+    arguments: dict[str, Any]
+    def to_dict(self) -> dict[str, Any]:
+        return {"name": self.name, "arguments": self.arguments}
+@dataclass(frozen=True)
+class ToolResolution:
+    status: Literal["valid", "defaulted"]
+    call: ToolCall
+    errors: tuple[str, ...]
+    def to_dict(self) -> dict[str, Any]:
+        return {
+            "status": self.status,
+            "call": self.call.to_dict(),
+            "errors": list(self.errors),
+        }
+class ToolContractError(ValueError):
+    pass
+TOOL_SPECS: dict[str, ToolSpec] = {
+    "list_projects": ToolSpec(
+        name="list_projects",
+        description="Read prominent Build Small project cards from the offline snapshot.",
+        fields={
+            "track": ToolField("string", "Optional prize, badge, model, or topic filter."),
+            "sort": ToolField("string", "Sort key.", enum=("likes", "recent", "title")),
+        },
+    ),
+    "search_projects": ToolSpec(
+        name="search_projects",
+        description="Find existing Spaces that echo the user's project idea.",
+        fields={"query": ToolField("string", "The user idea or topic to search.", required=True)},
+    ),
+    "get_project": ToolSpec(
+        name="get_project",
+        description="Read one project card by full Space id or slug.",
+        fields={"id": ToolField("string", "Project id such as build-small-hackathon/lolaby.", required=True)},
+    ),
+    "find_whitespace": ToolSpec(
+        name="find_whitespace",
+        description="Return under-explored project regions from the offline index.",
+        fields={},
+    ),
+    "save_idea": ToolSpec(
+        name="save_idea",
+        description="Write or update the current idea page.",
+        fields={
+            "title": ToolField("string", "Short idea title.", required=True),
+            "pitch": ToolField("string", "One-sentence idea pitch.", required=True),
+            "track": ToolField("string", "Primary target track or award."),
+            "models": ToolField("array", "Model ids the idea may use.", items_type="string"),
+            "side_quests": ToolField("array", "Badge or side quest targets.", items_type="string"),
+        },
+    ),
+    "score_idea": ToolSpec(
+        name="score_idea",
+        description="Score the current idea against the fixed hackathon rubric.",
+        fields={"id": ToolField("string", "Idea id; omit to score the current idea.")},
+    ),
+    "compare_ideas": ToolSpec(
+        name="compare_ideas",
+        description="Rank the current idea board and explain tradeoffs.",
+        fields={},
+    ),
+    "make_plan": ToolSpec(
+        name="make_plan",
+        description="Draft the next build steps for the current idea.",
+        fields={"id": ToolField("string", "Idea id; omit to plan the current idea.")},
+    ),
+    "update_profile": ToolSpec(
+        name="update_profile",
+        description="Remember a user skill, constraint, preference, or available time.",
+        fields={
+            "field": ToolField(
+                "string",
+                "Profile field to update.",
+                required=True,
+                enum=("skills", "time", "preferences", "constraints"),
+            ),
+            "value": ToolField("string", "Profile value to remember.", required=True),
+        },
+    ),
+    "set_target": ToolSpec(
+        name="set_target",
+        description="Change the badge, model, or award targets used to bias ideation.",
+        fields={"side_quests": ToolField("array", "Targets to prioritize.", required=True, items_type="string")},
+    ),
+}
+def tool_schemas() -> list[dict[str, Any]]:
+    return [spec.to_schema() for spec in TOOL_SPECS.values()]
+def parse_xml_tool_call(text: str) -> ToolCall:
+    wrapped = f"<root>{text.strip()}</root>"
+    try:
+        root = ElementTree.fromstring(wrapped)
+    except ElementTree.ParseError as error:
+        raise ToolContractError(f"invalid XML tool call: {error}") from error
+    functions = [node for node in root if node.tag == "function"]
+    if len(functions) != 1:
+        raise ToolContractError(f"expected exactly one function call, got {len(functions)}")
+    node = functions[0]
+    name = str(node.attrib.get("name") or "").strip()
+    if not name:
+        raise ToolContractError("function call is missing a name")
+    raw_arguments = (node.text or "").strip() or "{}"
+    try:
+        arguments = json.loads(raw_arguments)
+    except json.JSONDecodeError as error:
+        raise ToolContractError(f"function arguments are not valid JSON: {error.msg}") from error
+    if not isinstance(arguments, dict):
+        raise ToolContractError("function arguments must be a JSON object")
+    return ToolCall(name=name, arguments=arguments)
+def validate_tool_call(call: ToolCall, specs: dict[str, ToolSpec] = TOOL_SPECS) -> ToolCall:
+    spec = specs.get(call.name)
+    if spec is None:
+        raise ToolContractError(f"unknown tool: {call.name}")
+    allowed = set(spec.fields)
+    extra = sorted(set(call.arguments) - allowed)
+    if extra:
+        raise ToolContractError(f"unexpected arguments for {call.name}: {', '.join(extra)}")
+    missing = sorted(name for name, field in spec.fields.items() if field.required and name not in call.arguments)
+    if missing:
+        raise ToolContractError(f"missing required arguments for {call.name}: {', '.join(missing)}")
+    for name, value in call.arguments.items():
+        field = spec.fields[name]
+        _validate_value(call.name, name, value, field)
+    return call
+def resolve_tool_call(model_output: str, fallback_query: str = "") -> ToolResolution:
+    errors: list[str] = []
+    try:
+        call = validate_tool_call(parse_xml_tool_call(model_output))
+        return ToolResolution(status="valid", call=call, errors=())
+    except ToolContractError as error:
+        errors.append(str(error))
+    query = fallback_query.strip()
+    if query:
+        call = ToolCall("search_projects", {"query": query})
+    else:
+        call = ToolCall("find_whitespace", {})
+    return ToolResolution(status="defaulted", call=call, errors=tuple(errors))
+def _validate_value(tool_name: str, field_name: str, value: Any, field: ToolField) -> None:
+    if field.type == "string":
+        valid = isinstance(value, str)
+    elif field.type == "integer":
+        valid = isinstance(value, int) and not isinstance(value, bool)
+    elif field.type == "number":
+        valid = (isinstance(value, int | float)) and not isinstance(value, bool)
+    elif field.type == "boolean":
+        valid = isinstance(value, bool)
+    elif field.type == "array":
+        valid = isinstance(value, list)
+    elif field.type == "object":
+        valid = isinstance(value, dict)
+    else:
+        valid = False
+    if not valid:
+        raise ToolContractError(f"{tool_name}.{field_name} must be {field.type}")
+    if field.enum and value not in field.enum:
+        raise ToolContractError(f"{tool_name}.{field_name} must be one of: {', '.join(field.enum)}")
+    if field.items_type and isinstance(value, list):
+        for index, item in enumerate(value):
+            _validate_value(tool_name, f"{field_name}[{index}]", item, ToolField(field.items_type, "array item"))

tests/test_app.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import json
-from app import bootstrap, engine, health, index, trace_artifact
 def test_health_exposes_index_metadata() -> None:
@@ -29,3 +29,17 @@ def test_trace_artifact_endpoint_exports_jsonl() -> None:
     assert lines[0]["type"] == "trace_manifest"
     assert lines[0]["turn_count"] == 1
     assert lines[1]["type"] == "agent_turn"

 import json
+from app import bootstrap, engine, health, index, tool_contract_check, tool_contracts, trace_artifact
 def test_health_exposes_index_metadata() -> None:
     assert lines[0]["type"] == "trace_manifest"
     assert lines[0]["turn_count"] == 1
     assert lines[1]["type"] == "agent_turn"
+def test_tool_contracts_endpoint_exposes_schemas() -> None:
+    payload = tool_contracts()
+    assert payload["tool_count"] >= 8
+    assert any(tool["function"]["name"] == "search_projects" for tool in payload["tools"])
+def test_tool_contract_check_endpoint_defaults_safely() -> None:
+    payload = tool_contract_check("broken", "family archive")
+    assert payload["status"] == "defaulted"
+    assert payload["call"]["name"] == "search_projects"

tests/test_tool_contracts.py ADDED Viewed

	@@ -0,0 +1,60 @@

+import pytest
+from hackathon_advisor.tool_contracts import (
+    ToolCall,
+    ToolContractError,
+    parse_xml_tool_call,
+    resolve_tool_call,
+    tool_schemas,
+    validate_tool_call,
+)
+def test_tool_schemas_are_model_ready() -> None:
+    schemas = tool_schemas()
+    assert len(schemas) >= 8
+    assert schemas[0]["type"] == "function"
+    assert {schema["function"]["name"] for schema in schemas} >= {
+        "search_projects",
+        "find_whitespace",
+        "save_idea",
+        "score_idea",
+        "make_plan",
+    }
+def test_parse_and_validate_minicpm_xml_tool_call() -> None:
+    call = parse_xml_tool_call('<function name="search_projects">{"query":"lullaby audio"}</function>')
+    assert validate_tool_call(call) == ToolCall("search_projects", {"query": "lullaby audio"})
+def test_validate_rejects_unknown_tool() -> None:
+    with pytest.raises(ToolContractError, match="unknown tool"):
+        validate_tool_call(ToolCall("invent_project", {}))
+def test_validate_rejects_bad_argument_type() -> None:
+    with pytest.raises(ToolContractError, match="search_projects.query must be string"):
+        validate_tool_call(ToolCall("search_projects", {"query": 47}))
+def test_validate_rejects_extra_arguments() -> None:
+    with pytest.raises(ToolContractError, match="unexpected arguments"):
+        validate_tool_call(ToolCall("find_whitespace", {"query": "unused"}))
+def test_resolve_defaults_to_search_when_output_is_broken() -> None:
+    resolution = resolve_tool_call("<function", fallback_query="offline archive")
+    assert resolution.status == "defaulted"
+    assert resolution.call == ToolCall("search_projects", {"query": "offline archive"})
+    assert resolution.errors
+def test_resolve_defaults_to_whitespace_without_query() -> None:
+    resolution = resolve_tool_call("no function here")
+    assert resolution.status == "defaulted"
+    assert resolution.call == ToolCall("find_whitespace", {})