Spaces:
Sleeping
Sleeping
| """Hermes tool-calling: let the model write its own long-term memory. | |
| Hermes is a tool-calling fine-tune. When `HERMES_TOOLS=1`, the remote inference | |
| path (server/model.py) advertises these tools so the model can call `remember` | |
| mid-run to save durable facts ("Dana is the soccer coach", "you decline Mondays") | |
| — the active half of "grows with you". Kept separate + small so the round-trip | |
| logic is unit-testable without a live server. | |
| """ | |
| from __future__ import annotations | |
| import json | |
| from . import memory | |
| # OpenAI-compatible tool specs (llama-server understands these with --jinja). | |
| TOOL_SPECS = [ | |
| { | |
| "type": "function", | |
| "function": { | |
| "name": "remember", | |
| "description": ( | |
| "Save a durable fact or preference about the user to long-term memory " | |
| "so future scheduling is more personal. Use for stable facts only " | |
| "(roles, recurring preferences, default locations), not one-off details." | |
| ), | |
| "parameters": { | |
| "type": "object", | |
| "properties": { | |
| "text": { | |
| "type": "string", | |
| "description": "the fact, e.g. 'Dana is the soccer coach'", | |
| }, | |
| "kind": { | |
| "type": "string", | |
| "enum": ["contact", "preference", "location", "note"], | |
| }, | |
| }, | |
| "required": ["text"], | |
| }, | |
| }, | |
| } | |
| ] | |
| def dispatch(name: str, arguments) -> str: | |
| """Execute one tool call; returns a short result string for the tool message.""" | |
| if name != "remember": | |
| return f"unknown tool: {name}" | |
| try: | |
| args = json.loads(arguments) if isinstance(arguments, str) else (arguments or {}) | |
| except (ValueError, TypeError): | |
| args = {} | |
| text = (args.get("text") or "").strip() | |
| if not text: | |
| return "no text provided" | |
| memory.remember(text, args.get("kind", "note")) | |
| return f"remembered: {text}" | |
| def run_with_tools(messages: list[dict], post_fn, max_rounds: int = 3): | |
| """Drive a tool-calling loop. ``post_fn(messages) -> openai_response_dict`` does | |
| the actual HTTP POST (tools already configured by the caller); injectable so the | |
| loop is testable. Returns (final_content, last_response).""" | |
| msgs = list(messages) | |
| resp = {} | |
| for _ in range(max_rounds): | |
| resp = post_fn(msgs) | |
| msg = resp["choices"][0]["message"] | |
| tool_calls = msg.get("tool_calls") or [] | |
| if not tool_calls: | |
| return msg.get("content", ""), resp | |
| msgs.append(msg) # assistant turn carrying the tool_calls | |
| for tc in tool_calls: | |
| fn = tc.get("function", {}) | |
| result = dispatch(fn.get("name", ""), fn.get("arguments", "{}")) | |
| msgs.append( | |
| {"role": "tool", "tool_call_id": tc.get("id", ""), "content": result} | |
| ) | |
| # ran out of rounds — one final call to get content | |
| resp = post_fn(msgs) | |
| return resp["choices"][0]["message"].get("content", ""), resp | |