Commit ·
0366d65
0
Parent(s):
Initial Commit
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- .gitattributes +4 -0
- .gitignore +43 -0
- Dockerfile +41 -0
- FIELD_NOTES.md +145 -0
- PLAN.md +182 -0
- README.md +435 -0
- app.py +298 -0
- calendar_out/__init__.py +0 -0
- calendar_out/freebusy.py +141 -0
- calendar_out/gcal.py +313 -0
- calendar_out/ics.py +60 -0
- calendar_out/tzconfig.py +46 -0
- collector/.env.example +13 -0
- collector/collector.py +175 -0
- deploy/launchd/com.offgrid.backend.plist +33 -0
- deploy/launchd/com.offgrid.collector.plist +28 -0
- deploy/launchd/com.offgrid.hermes.plist +23 -0
- docs/android-tasker.md +37 -0
- docs/architecture.md +121 -0
- docs/automations.md +83 -0
- docs/blog-eval-gated-finetuning.md +187 -0
- docs/build-small-submission.md +68 -0
- docs/eval-roadmap.md +337 -0
- docs/gcal-verify.md +73 -0
- docs/hermes.md +48 -0
- docs/on-device.md +54 -0
- requirements-ci.txt +16 -0
- requirements-docker.txt +24 -0
- requirements.txt +47 -0
- scripts/setup_mac.sh +60 -0
- scripts/start_space.sh +85 -0
- scripts/verify_gcal_e2e.py +159 -0
- server/__init__.py +0 -0
- server/agent.py +475 -0
- server/dedup.py +84 -0
- server/events.py +116 -0
- server/health.py +54 -0
- server/imageutil.py +61 -0
- server/impact.py +87 -0
- server/mcp_tools.py +117 -0
- server/memory.py +174 -0
- server/model.py +317 -0
- server/orchestrator.py +191 -0
- server/pipeline.py +98 -0
- server/schema.py +43 -0
- server/threads.py +59 -0
- server/tools.py +81 -0
- server/trace.py +98 -0
- static/app.css +961 -0
- static/logo.png +0 -0
.gitattributes
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Shell scripts must use LF so bash on Linux (HF Space, Modal) doesn't choke on
|
| 2 |
+
# carriage returns (e.g. "set: pipefail: invalid option name") when the repo is
|
| 3 |
+
# committed/edited from Windows.
|
| 4 |
+
*.sh text eol=lf
|
.gitignore
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Secrets / config
|
| 2 |
+
.env
|
| 3 |
+
*.env
|
| 4 |
+
!*.env.example
|
| 5 |
+
collector/.env
|
| 6 |
+
|
| 7 |
+
# Models & data (large; live on HF, not git)
|
| 8 |
+
*.gguf
|
| 9 |
+
*.bin
|
| 10 |
+
*.safetensors
|
| 11 |
+
models/
|
| 12 |
+
training/data/*
|
| 13 |
+
!training/data/dataset.jsonl
|
| 14 |
+
!training/data/eval.jsonl
|
| 15 |
+
!training/data/eval_unstructured.jsonl
|
| 16 |
+
!training/data/ab_results.md
|
| 17 |
+
# screenshots/ stays ignored: regenerate with training/render_screenshots.py
|
| 18 |
+
training/outputs/
|
| 19 |
+
checkpoints/
|
| 20 |
+
|
| 21 |
+
# Generated calendar files
|
| 22 |
+
*.ics
|
| 23 |
+
out/
|
| 24 |
+
|
| 25 |
+
# Google OAuth
|
| 26 |
+
token.json
|
| 27 |
+
credentials.json
|
| 28 |
+
client_secret*.json
|
| 29 |
+
|
| 30 |
+
# Python
|
| 31 |
+
__pycache__/
|
| 32 |
+
*.py[cod]
|
| 33 |
+
.venv/
|
| 34 |
+
venv/
|
| 35 |
+
.ipynb_checkpoints/
|
| 36 |
+
*.egg-info/
|
| 37 |
+
|
| 38 |
+
# OS / editor
|
| 39 |
+
.DS_Store
|
| 40 |
+
Thumbs.db
|
| 41 |
+
.vscode/
|
| 42 |
+
.idea/
|
| 43 |
+
tok.json
|
Dockerfile
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Dedicated paid-GPU Space (Docker SDK) — real Gemma 4 on the OFFICIAL llama.cpp.
|
| 2 |
+
# Compiling llama.cpp in the HF build exceeds the build time limit, so we base on the
|
| 3 |
+
# llama.cpp project's own prebuilt CUDA image (trusted, current → supports Gemma 4).
|
| 4 |
+
# It runs `llama-server`; our app (UI + /agent) calls it via INFERENCE_BASE_URL.
|
| 5 |
+
# Pick a CUDA GPU in Space settings (e.g. 1x A100). Llama Champion = the llama.cpp server.
|
| 6 |
+
FROM ghcr.io/ggml-org/llama.cpp:server-cuda
|
| 7 |
+
|
| 8 |
+
ENV PYTHONUNBUFFERED=1 \
|
| 9 |
+
DEBIAN_FRONTEND=noninteractive \
|
| 10 |
+
PORT=7860 \
|
| 11 |
+
SERVE=uvicorn \
|
| 12 |
+
HF_HOME=/tmp/hf \
|
| 13 |
+
LLAMA_CACHE=/tmp/llama-cache \
|
| 14 |
+
INFERENCE_BASE_URL="http://127.0.0.1:8080/v1" \
|
| 15 |
+
INFERENCE_MODEL="gemma-4" \
|
| 16 |
+
MODEL_HF_REPO="ParetoOptimal/gemma-4-cal-gguf" \
|
| 17 |
+
MODEL_FILE="gemma-cal-e4b-Q4_K_M.gguf" \
|
| 18 |
+
MMPROJ_REPO="unsloth/gemma-4-E4B-it-GGUF" \
|
| 19 |
+
MMPROJ_FILE="mmproj-F16.gguf"
|
| 20 |
+
# Agent-tab planner (OFF by default — set as Space variables to enable):
|
| 21 |
+
# PLANNER_HF_REPO="openbmb/MiniCPM4.1-8B-GGUF" PLANNER_FILE="MiniCPM4.1-8B-Q4_K_M.gguf"
|
| 22 |
+
# (tiny <=4B variant: openbmb/MiniCPM5-1B-GGUF / MiniCPM5-1B-Q4_K_M.gguf)
|
| 23 |
+
# PLANNER_PORT=8081 PLANNER_NGL=999 PLANNER_BASE_URL=http://127.0.0.1:8081/v1
|
| 24 |
+
|
| 25 |
+
RUN apt-get update && apt-get install -y --no-install-recommends \
|
| 26 |
+
python3 python3-pip curl ca-certificates && \
|
| 27 |
+
rm -rf /var/lib/apt/lists/*
|
| 28 |
+
|
| 29 |
+
# Keep our app out of the image's /app (where the llama-server binary lives).
|
| 30 |
+
WORKDIR /srv
|
| 31 |
+
|
| 32 |
+
COPY requirements-docker.txt .
|
| 33 |
+
# --break-system-packages: the base image's Python is PEP 668 externally-managed.
|
| 34 |
+
RUN pip3 install --no-cache-dir --break-system-packages -r requirements-docker.txt
|
| 35 |
+
|
| 36 |
+
COPY . .
|
| 37 |
+
|
| 38 |
+
# The base image's entrypoint is llama-server; we run our launcher instead.
|
| 39 |
+
ENTRYPOINT []
|
| 40 |
+
EXPOSE 7860
|
| 41 |
+
CMD ["bash", "scripts/start_space.sh"]
|
FIELD_NOTES.md
ADDED
|
@@ -0,0 +1,145 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Field Notes — building the iMessage → Calendar agent
|
| 2 |
+
|
| 3 |
+
What I set out to build, where reality bent the plan, and what I'd do next. This is
|
| 4 |
+
the "what I learned" companion to the product docs ([README](./README.md)) and the
|
| 5 |
+
design doc ([PLAN](./PLAN.md)).
|
| 6 |
+
|
| 7 |
+
## The goal in one line
|
| 8 |
+
|
| 9 |
+
Turn the calendar logistics buried in a chat thread — *"picture day moved to
|
| 10 |
+
Thursday 9am", "soccer is Tuesday now"* — into reviewed calendar events, from a
|
| 11 |
+
phone, with the data staying private.
|
| 12 |
+
|
| 13 |
+
## 1. "Read my iMessages" is impossible as literally asked — and that shaped everything
|
| 14 |
+
|
| 15 |
+
iOS exposes **no API** for iMessage/SMS *content*. There is no on-device path. The
|
| 16 |
+
only place the messages exist in a queryable form is a Mac, where they sync to
|
| 17 |
+
`~/Library/Messages/chat.db`. So the architecture forked early:
|
| 18 |
+
|
| 19 |
+
- A Mac-side collector ([`collector/collector.py`](./collector/collector.py)) reads
|
| 20 |
+
`chat.db` (read-only, `mode=ro`) and POSTs new rows to the Space.
|
| 21 |
+
- "On my phone" was reinterpreted honestly as **used from** a phone browser — the
|
| 22 |
+
Space is hosted, the UI is mobile-friendly, but the model runs in the Space.
|
| 23 |
+
|
| 24 |
+
The biggest adoption lesson came later: requiring a Mac collector + Full Disk
|
| 25 |
+
Access is a wall for a non-technical user. The fix was to make **paste-from-phone**
|
| 26 |
+
the hero path (the collector is now strictly optional) — no install, no DB, no
|
| 27 |
+
permissions. Most of that capability already existed in the Schedule tab; it was just
|
| 28 |
+
framed as secondary.
|
| 29 |
+
|
| 30 |
+
## 2. `attributedBody` is the iMessage parsing trap
|
| 31 |
+
|
| 32 |
+
Modern Messages often stores the body in `attributedBody` (an `NSAttributedString`
|
| 33 |
+
binary blob), **not** the `text` column. The collector reads `text` directly for
|
| 34 |
+
simplicity and **skips messages that only have `attributedBody`**
|
| 35 |
+
([`collector/collector.py:88-94`](./collector/collector.py)) — a deliberate, called-out
|
| 36 |
+
gap. The right move for production is to not hand-roll this: use `imessage-exporter`
|
| 37 |
+
(ReagentX) or `imessage_reader`. Noting the limitation in code beat pretending the
|
| 38 |
+
naive SQL was complete.
|
| 39 |
+
|
| 40 |
+
## 3. Relative dates are the real accuracy battleground
|
| 41 |
+
|
| 42 |
+
The hard part isn't "is there an event" — it's *when*. "Next Thursday", "the 14th",
|
| 43 |
+
"in two weeks" only resolve against a reference time. Two design responses:
|
| 44 |
+
|
| 45 |
+
- The system prompt pins **"Current datetime"** into every request and instructs the
|
| 46 |
+
model to resolve relative dates from it ([`server/agent.py:21-34`](./server/agent.py)).
|
| 47 |
+
- **Conflict math is deterministic, not model-driven.** Overlap/adjacent/tight
|
| 48 |
+
detection and alternative-time proposals live in
|
| 49 |
+
[`calendar_out/freebusy.py`](./calendar_out/freebusy.py), because once you have ISO
|
| 50 |
+
datetimes, interval math should never be left to an LLM. The model decides *what*;
|
| 51 |
+
code decides *when-it-clashes*.
|
| 52 |
+
|
| 53 |
+
The stub extractor's naive "match a time → 1h event tomorrow"
|
| 54 |
+
([`server/agent.py:152-175`](./server/agent.py)) is intentionally dumb — it exists to
|
| 55 |
+
prove the pipeline, and its dumbness is a good reminder of exactly how much the
|
| 56 |
+
fine-tune has to get right.
|
| 57 |
+
|
| 58 |
+
## 4. Stub-first was the best architectural call
|
| 59 |
+
|
| 60 |
+
`USE_STUB_EXTRACTOR=1` swaps the model for a regex heuristic
|
| 61 |
+
([`server/agent.py:85,124`](./server/agent.py)), forced on in tests
|
| 62 |
+
([`tests/conftest.py`](./tests/conftest.py)). Payoffs:
|
| 63 |
+
|
| 64 |
+
- The whole app — paste → events → conflicts → `.ics` download → impact panel —
|
| 65 |
+
**works end-to-end with no GPU**, so a demo (and CI) never depends on a model load.
|
| 66 |
+
- `llama_cpp` and the Google libs are **lazy-imported**, so `requirements-ci.txt` can
|
| 67 |
+
exclude them and the test suite runs in seconds, offline.
|
| 68 |
+
|
| 69 |
+
Lesson: make the expensive dependency optional from day one and the cheap path
|
| 70 |
+
becomes your test harness, your demo, and your free tier all at once.
|
| 71 |
+
|
| 72 |
+
## 5. Reframing around one person changed the scope more than any feature
|
| 73 |
+
|
| 74 |
+
The project started as a four-track hackathon checklist. Rewriting it around a single
|
| 75 |
+
named person — a **busy parent** whose kid's events are buried in a class group chat —
|
| 76 |
+
forced three concrete changes: phone-paste as the default, a one-tap **Try a sample**
|
| 77 |
+
class-chat ([`ui/blocks.py`](./ui/blocks.py)), and a **"This week"** impact panel.
|
| 78 |
+
|
| 79 |
+
On measurement: `minutes_saved` ([`server/impact.py`](./server/impact.py)) is a
|
| 80 |
+
**configurable estimate, not a measurement** (default 8 min/event + 15 min/conflict).
|
| 81 |
+
Saying that plainly — in the UI, the README, and here — matters more than a
|
| 82 |
+
bigger-looking number. A capture is only counted when the parent *accepts* events by
|
| 83 |
+
exporting them, so the metric tracks value taken, not previews shown.
|
| 84 |
+
|
| 85 |
+
## 6. Fine-tuning economics: Modal credits + honest scope
|
| 86 |
+
|
| 87 |
+
QLoRA on a 31B needs an 80GB GPU. [`training/modal_train.py`](./training/modal_train.py)
|
| 88 |
+
wraps the existing `train_qlora.py` + `export_gguf.sh` to run on a serverless
|
| 89 |
+
A100/H100 and publish the GGUF to HF — roughly **$5–15 per run**, so ~$250 of credit
|
| 90 |
+
is 15–40 iterations. The "Well-Tuned" track went the distance: the eval-gated **E4B** fine-tune is
|
| 91 |
+
published and is what production serves —
|
| 92 |
+
[`build-small-hackathon/gemma-4-cal-gguf`](https://huggingface.co/build-small-hackathon/gemma-4-cal-gguf)
|
| 93 |
+
— after clearing the gate over six runs at zero quality cost vs. stock E4B. (Re-running the pipeline
|
| 94 |
+
still spends your own Modal credits; the turnkey path is there whenever you want to retrain.)
|
| 95 |
+
|
| 96 |
+
A small rule that paid off: training-data generation can use *any* offline tooling —
|
| 97 |
+
the "no cloud AI API" rule applies only to the **running app's inference**, not to
|
| 98 |
+
dataset prep.
|
| 99 |
+
|
| 100 |
+
## 7. Two models, not one — a 1B planner over the same tools
|
| 101 |
+
|
| 102 |
+
What shipped is two small local models, not one. The fine-tuned **gemma-cal E4B** does the
|
| 103 |
+
*reading* (thread → validated `ActionPlan`); a 1B **OpenBMB MiniCPM** does the *orchestrating*.
|
| 104 |
+
Clicking **Run the agents** hands the job to MiniCPM, which drives the Space's own MCP tools —
|
| 105 |
+
`extract_events → check_conflicts → make_ics` — as a visible multi-step agent
|
| 106 |
+
([`server/orchestrator.py`](./server/orchestrator.py)), consuming the *public* tool contract
|
| 107 |
+
instead of calling internals. Two things I'd underline: keep the planner **optional** (a
|
| 108 |
+
deterministic scripted plan is the fallback, so the agentic path never hard-depends on a second
|
| 109 |
+
model load), and don't let "agent" become a separate destination — the same **Run the agents**
|
| 110 |
+
action drives both the home workflow and the orchestrated trace, so it stays one engine, not a
|
| 111 |
+
second UI to keep in sync.
|
| 112 |
+
|
| 113 |
+
## 8. The Off-the-Grid tension
|
| 114 |
+
|
| 115 |
+
"No cloud AI APIs" and "serve a 31B" pull against each other: a Q4 31B GGUF is
|
| 116 |
+
~18–20GB and needs a GPU. Keeping inference **in the Space via `llama.cpp`** preserves
|
| 117 |
+
the privacy story but costs GPU. The honest compromise is the **E4B edge variant** for
|
| 118 |
+
the free tier, with the 31B as the headline. I deliberately did **not** offload
|
| 119 |
+
inference to a third-party endpoint, because "your own Modal GPU" and "a cloud AI API"
|
| 120 |
+
are easy to conflate and a purist judge would be right to dock it.
|
| 121 |
+
|
| 122 |
+
The same principle drove the trace-sharing design (below): the hosted Space holds **no
|
| 123 |
+
HF token** — it only offers a **local download**, and a separate local CLI does the
|
| 124 |
+
upload with your own auth.
|
| 125 |
+
|
| 126 |
+
## 9. What I'd do next
|
| 127 |
+
|
| 128 |
+
- **Durable trace/metrics store.** The activity bus is an 800-entry in-memory ring
|
| 129 |
+
buffer ([`server/events.py`](./server/events.py)) — runs are lost on restart, so only
|
| 130 |
+
recent runs are exportable. A small append-only store (the impact log already shows
|
| 131 |
+
the pattern) would fix it.
|
| 132 |
+
- **Decode `attributedBody`** (or adopt `imessage-exporter`) so text-less messages stop
|
| 133 |
+
being dropped.
|
| 134 |
+
- **A real eval set** from the expanded dataset — measure JSON validity + field
|
| 135 |
+
accuracy, especially relative-date resolution and empty-list-on-chitchat.
|
| 136 |
+
- **Trace redaction as a tested invariant.** Today it's an allowlist over current emit
|
| 137 |
+
sites ([`server/trace.py`](./server/trace.py)); a lint/test that fails when a new
|
| 138 |
+
`emit(...)` puts free text on a non-`ingest` stage would keep it honest as the code
|
| 139 |
+
grows.
|
| 140 |
+
|
| 141 |
+
## Publishing these notes
|
| 142 |
+
|
| 143 |
+
This file is linked from the README. It can also be pasted into the Space's README
|
| 144 |
+
(the Space card renders Markdown) or posted to the model/dataset repo's **Community**
|
| 145 |
+
tab on the Hub so others can learn from the build.
|
PLAN.md
ADDED
|
@@ -0,0 +1,182 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Plan: Local-First iMessage → Calendar Agent (Gradio + llama.cpp + fine-tuned Gemma + an OpenBMB MiniCPM-planned agent)
|
| 2 |
+
|
| 3 |
+
## Who this is for
|
| 4 |
+
|
| 5 |
+
One named person: **a busy parent** whose kid's school/activity events are buried in a noisy class
|
| 6 |
+
group chat (picture day, the practice that moved, the RSVP). They read it once, mean to add it later,
|
| 7 |
+
and miss it. Success = *their* day measurably improves — events captured from the chat, conflicts
|
| 8 |
+
caught against their calendar, minutes saved — with **zero setup**: paste the thread or a screenshot
|
| 9 |
+
from a phone browser. The local-LLM / fine-tune work below is a **means** to better extraction, not
|
| 10 |
+
the point; the app must deliver value with no GPU (stub agent) first.
|
| 11 |
+
|
| 12 |
+
## Context
|
| 13 |
+
|
| 14 |
+
You want an agent that reads iMessage-style threads, understands the conversation, and turns them
|
| 15 |
+
into calendar events/reminders — exposed through a custom Gradio UI deployed as a Hugging Face Space.
|
| 16 |
+
Two local models share the work: our fine-tuned Gemma does the *reading* (thread → validated
|
| 17 |
+
ActionPlan), and an **OpenBMB MiniCPM** planner does the *orchestrating* — the brain behind **Run the agents**,
|
| 18 |
+
driving the Space's own MCP tools (`extract_events → check_conflicts → make_ics`) as a visible
|
| 19 |
+
multi-step agent. The build competes in the **Backyard AI** track (general and OpenBMB prizes are
|
| 20 |
+
awarded per track) and satisfies the quests secondary to the user story above: **Off the Grid** (no
|
| 21 |
+
cloud AI APIs, local-first), **Well-Tuned** (a fine-tuned model on HF), **Off-Brand** (custom UI),
|
| 22 |
+
and **Llama Champion** (both Gemma and MiniCPM are served through llama.cpp).
|
| 23 |
+
|
| 24 |
+
### Feasibility verdict: YES, with one re-architecture
|
| 25 |
+
|
| 26 |
+
The request as *literally* worded has two impossibilities, both solvable:
|
| 27 |
+
|
| 28 |
+
1. **No app or cloud can read iMessage on iOS.** Apple exposes no API for iMessage/SMS content.
|
| 29 |
+
→ **Solved:** you have a Mac. iMessages sync to `~/Library/Messages/chat.db`; a small local
|
| 30 |
+
collector reads it. This is the *only* supported path and it keeps data local ("off the grid").
|
| 31 |
+
2. **A model cannot "run on your phone," and a HF Space runs in the cloud, not on-device.**
|
| 32 |
+
→ **Solved:** "on my phone" = *used from* your phone's browser. The Space does its own llama.cpp
|
| 33 |
+
inference and calls no external AI service, so "hosted Space" and "off the grid" reconcile.
|
| 34 |
+
|
| 35 |
+
Confirmed decisions:
|
| 36 |
+
- **Ingestion:** Mac collector reading `chat.db`.
|
| 37 |
+
- **Calendar output:** local `.ics` files first (strictly off-grid), with an *optional* Google
|
| 38 |
+
Calendar push toggle as a bonus.
|
| 39 |
+
- **Extraction model:** fine-tune Gemma, serve as GGUF via llama.cpp (production serves the
|
| 40 |
+
**E4B** edge fine-tune, `build-small-hackathon/gemma-4-cal-gguf`).
|
| 41 |
+
- **Agent planner:** **OpenBMB MiniCPM** (`openbmb/MiniCPM4.1-8B-GGUF`, Q4; the 1B variant is a
|
| 42 |
+
config switch) on a second llama-server — it plans, the MCP tools execute, every step visible.
|
| 43 |
+
|
| 44 |
+
---
|
| 45 |
+
|
| 46 |
+
## Architecture
|
| 47 |
+
|
| 48 |
+
```
|
| 49 |
+
┌────────── Your Mac (local) ──────────┐ ┌──────── Hugging Face Space (Docker) ────────┐
|
| 50 |
+
│ collector.py (Full Disk Access) │ HTTPS │ Gradio (custom theme/CSS) ── Off-Brand │
|
| 51 |
+
│ • polls chat.db for new messages │ +token │ │ │
|
| 52 |
+
│ • parses text / attributedBody ├────────▶│ FastAPI /ingest ──▶ extraction pipeline │
|
| 53 |
+
│ • POSTs new msgs to Space /ingest │ │ │ │
|
| 54 |
+
└───────────────────────────────────────┘ │ llama.cpp (llama-cpp-python) ── Llama Champ │
|
| 55 |
+
│ running YOUR fine-tuned gemma-4-31B GGUF │
|
| 56 |
+
View/approve from phone browser ───────────────▶│ │ ── Off the Grid (local) │
|
| 57 |
+
│ JSON events → pydantic validate │
|
| 58 |
+
│ ├──▶ .ics file (download) │
|
| 59 |
+
│ └──▶ optional Google Calendar push │
|
| 60 |
+
└──────────────────────────────────────────────┘
|
| 61 |
+
```
|
| 62 |
+
|
| 63 |
+
Flow: messages → extraction prompt → model emits structured JSON of candidate events →
|
| 64 |
+
validated → shown in UI for review → user approves → `.ics` generated (and/or pushed to GCal).
|
| 65 |
+
|
| 66 |
+
**Run the agents** runs the same flow agentically: an **OpenBMB MiniCPM** planner (second local
|
| 67 |
+
llama-server, OpenAI-compatible) consumes the Space's own **MCP tool surface** —
|
| 68 |
+
`extract_events → check_conflicts → make_ics` — through smolagents, so the pipeline above is
|
| 69 |
+
demonstrated as multi-step tool use over the public tool contract, with the planner's trace on
|
| 70 |
+
screen (`server/orchestrator.py`). Stub/CI falls back to a scripted planner so the tab always works.
|
| 71 |
+
|
| 72 |
+
---
|
| 73 |
+
|
| 74 |
+
## Components
|
| 75 |
+
|
| 76 |
+
### 1. Mac-side iMessage collector (`collector/collector.py`)
|
| 77 |
+
- **Reuse, don't reinvent the DB parsing.** Modern macOS stores message text in the
|
| 78 |
+
`attributedBody` (NSAttributedString) blob, not always the `text` column. Use the battle-tested
|
| 79 |
+
**`imessage-exporter`** (ReagentX, Rust) or the Python **`imessage_reader`** lib rather than
|
| 80 |
+
hand-rolling SQL. If hand-querying: join `message` ⨝ `handle` ⨝ `chat_message_join` ⨝ `chat`,
|
| 81 |
+
track last seen `ROWID`, poll on an interval.
|
| 82 |
+
- Requires **Full Disk Access** for the running process (System Settings → Privacy & Security).
|
| 83 |
+
- Sends only new messages to the Space `/ingest` endpoint over HTTPS with a shared bearer token.
|
| 84 |
+
- Config: which chats to watch, poll interval, Space URL, token (`.env`, never committed).
|
| 85 |
+
|
| 86 |
+
### 2. HF Space backend (`app.py`, `server/`)
|
| 87 |
+
- **Docker SDK Space** (`README.md` frontmatter: `sdk: docker`, `app_port: 7860`).
|
| 88 |
+
- **llama.cpp** loads the fine-tuned GGUF and serves chat completions — satisfies
|
| 89 |
+
*Llama Champion*; no external AI call satisfies *Off the Grid*.
|
| 90 |
+
- **Agent orchestrator** (`server/orchestrator.py`): the **OpenBMB MiniCPM** planner behind
|
| 91 |
+
**Run the agents** (its own llama-server) drives the Space's MCP tools as a multi-step agent — the OpenBMB
|
| 92 |
+
per-track prize case, and the same extraction pipeline exercised through the public tool
|
| 93 |
+
contract rather than private imports.
|
| 94 |
+
- `/ingest` (FastAPI, mounted alongside Gradio) receives messages, runs the extraction prompt,
|
| 95 |
+
returns candidate events; results surface in the Gradio UI for review.
|
| 96 |
+
- **Compute:** Q4_K_M GGUF of a 31B ≈ 18–20 GB → does **not** fit the free CPU tier (16 GB / 2 cores).
|
| 97 |
+
Serve on a GPU: **ZeroGPU** (free, H200/70 GB — but cold GGUF load per acquisition; document the
|
| 98 |
+
caveat) or a **paid GPU Space** (e.g. L4/L40S) for a smooth always-warm demo. See Fallback.
|
| 99 |
+
|
| 100 |
+
### 3. Fine-tuning pipeline (`training/`)
|
| 101 |
+
- **Task:** conversation snippet → strict JSON list of events
|
| 102 |
+
`{title, start, end, location, attendees, reminder_minutes, notes}`.
|
| 103 |
+
- **Data:** build a synthetic instruction dataset (~500–2000 examples) of realistic chat threads
|
| 104 |
+
paired with the target JSON. Generation/augmentation for *training data* can use any tooling
|
| 105 |
+
offline — the "no cloud API" rule applies to the *running app's inference*, not dataset prep.
|
| 106 |
+
Include hard cases: relative dates ("next Thurs"), ranges, no-event chitchat (empty list),
|
| 107 |
+
timezones, multiple events per thread.
|
| 108 |
+
- **Method:** QLoRA via **Unsloth** (Qwen3-0.6B GRPO experience applies), 4-bit, r=16,
|
| 109 |
+
1–3 epochs. 31B QLoRA needs an A100/H100 80 GB (Colab Pro+/RunPod/Lambda, ~hours).
|
| 110 |
+
- **Export:** merge LoRA → `convert_hf_to_gguf.py` (llama.cpp) → `llama-quantize` to Q4_K_M →
|
| 111 |
+
**publish GGUF to your HF repo** (satisfies *Well-Tuned*). Space downloads it at startup via
|
| 112 |
+
`huggingface_hub`.
|
| 113 |
+
|
| 114 |
+
### 4. Custom Gradio UI (`ui/`, `static/`) — *Off-Brand*
|
| 115 |
+
- `gr.Blocks` with a custom `gr.themes.Base(...)` palette + injected `css=` (custom fonts, layout,
|
| 116 |
+
cards) to push well past the default look.
|
| 117 |
+
- Screens: connection/status, incoming-message feed, **review queue** (edit candidate events
|
| 118 |
+
inline, approve/reject), download `.ics`, optional "Push to Google Calendar" toggle, settings.
|
| 119 |
+
|
| 120 |
+
### 5. Calendar output (`calendar_out/`)
|
| 121 |
+
- **`.ics` (default, off-grid):** generate with the `icalendar` lib; offer as a download in the UI.
|
| 122 |
+
- **Google Calendar (optional bonus):** `google-api-python-client` OAuth; behind a toggle so the
|
| 123 |
+
off-grid demo path stays pure. Clearly labeled as the one optional cloud touchpoint.
|
| 124 |
+
|
| 125 |
+
---
|
| 126 |
+
|
| 127 |
+
## Hackathon requirement mapping
|
| 128 |
+
|
| 129 |
+
| Track | How it's satisfied |
|
| 130 |
+
|---|---|
|
| 131 |
+
| Off the Grid (local-first, no cloud AI APIs) | All inference is local llama.cpp in the Space; data originates on your Mac; `.ics` is the default output. |
|
| 132 |
+
| Well-Tuned (fine-tuned model on HF) | QLoRA fine-tune of `gemma-4-31B-it`, GGUF published to your HF repo. |
|
| 133 |
+
| Off-Brand (custom UI) | Custom Gradio theme + CSS, not the stock look. |
|
| 134 |
+
| Llama Champion (llama.cpp) | Inference via `llama-cpp-python`. |
|
| 135 |
+
| Gradio app on HF Space | Docker Space serving Gradio + FastAPI `/ingest`. |
|
| 136 |
+
|
| 137 |
+
---
|
| 138 |
+
|
| 139 |
+
## Build phases
|
| 140 |
+
|
| 141 |
+
1. **Hero path (no GPU):** Docker Space with custom-themed Gradio + the *stub* extractor → paste /
|
| 142 |
+
"Try a sample" / screenshot → review → `.ics` download, working end-to-end on a phone browser.
|
| 143 |
+
This is the parent's whole experience and must stand alone with no model.
|
| 144 |
+
2. **Measure impact:** persisted **This week** panel (events captured, conflicts caught, minutes
|
| 145 |
+
saved) via `server/impact.py`, recorded when the parent exports. Proves *their* day got better.
|
| 146 |
+
3. **Accuracy upgrade (optional):** wire `llama-cpp-python` with a community `gemma-4-31B-it` GGUF on
|
| 147 |
+
a GPU Space; swap the stub for the model + JSON-schema prompt + pydantic validation.
|
| 148 |
+
4. **Fine-tune (optional):** dataset → Unsloth QLoRA → GGUF → publish to HF → point the Space at it.
|
| 149 |
+
5. **Optional auto-feed:** Mac `collector.py` reading `chat.db` → POST `/ingest` (power users only).
|
| 150 |
+
|
| 151 |
+
---
|
| 152 |
+
|
| 153 |
+
## Verification
|
| 154 |
+
|
| 155 |
+
- **End-to-end (stub, phase 1):** open Space in phone browser → tap **Try a sample** (or paste a
|
| 156 |
+
chat) → event appears in review queue → download `.ics` → import to a calendar, confirm date/time.
|
| 157 |
+
- **Impact (phase 2):** after exporting, **Activity → This week** shows events captured and time
|
| 158 |
+
saved > 0; restart the app (same `IMPACT_PATH`) and confirm the weekly numbers persist while the
|
| 159 |
+
live tiles reset. `minutes_saved` is a stated estimate (`IMPACT_MIN_PER_EVENT`=8,
|
| 160 |
+
`IMPACT_MIN_PER_CONFLICT`=15, env-overridable), not a measurement.
|
| 161 |
+
- **Collector (phase 2):** send yourself a test iMessage ("lunch Tuesday 1pm") → confirm it reaches
|
| 162 |
+
`/ingest` and surfaces in the feed.
|
| 163 |
+
- **Model (phase 3+):** curated eval set of chats with known expected events; measure JSON validity
|
| 164 |
+
rate + field accuracy (esp. relative-date resolution); confirm empty-list on non-event chats.
|
| 165 |
+
- **llama.cpp:** confirm the Space logs show llama.cpp loading *your* GGUF, no external AI calls.
|
| 166 |
+
|
| 167 |
+
---
|
| 168 |
+
|
| 169 |
+
## Risks & fallbacks
|
| 170 |
+
|
| 171 |
+
- **31B serving cost/latency.** Q4 31B needs a GPU; ZeroGPU has cold-load + quota friction, paid GPU
|
| 172 |
+
has cost. **Fallback:** fine-tune **Gemma 4 E4B** (edge variant) — runs on free CPU tier / fast on
|
| 173 |
+
small GPU, far cheaper to fine-tune, and arguably *more* on-theme for "local-first." Keep 31B as
|
| 174 |
+
the headline, E4B as the safety net for a reliable live demo.
|
| 175 |
+
- **`chat.db` schema / `attributedBody`.** Mitigated by using `imessage-exporter`/`imessage_reader`.
|
| 176 |
+
- **Full Disk Access** must be granted to the collector's process or reads return empty.
|
| 177 |
+
- **Privacy:** the autonomous Mac-collector path sends messages to the Space (token-gated); the
|
| 178 |
+
hero phone-paste path keeps data client-side (calendar tokens live in the browser, nothing
|
| 179 |
+
persists server-side). The Space now lives in the public **`build-small-hackathon`** submission
|
| 180 |
+
org, so the *source* is public — but user data still never lands on the server.
|
| 181 |
+
- **Relative-date accuracy** is the main quality risk — pass the current datetime into the prompt
|
| 182 |
+
and weight the dataset toward relative-date examples.
|
README.md
ADDED
|
@@ -0,0 +1,435 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: OffGridSchedula
|
| 3 |
+
emoji: 🗓️
|
| 4 |
+
colorFrom: indigo
|
| 5 |
+
colorTo: purple
|
| 6 |
+
sdk: docker
|
| 7 |
+
app_port: 7860
|
| 8 |
+
pinned: false
|
| 9 |
+
license: apache-2.0
|
| 10 |
+
short_description: Local-first chat-to-calendar agent (Gemma-4 E4B + MiniCPM)
|
| 11 |
+
tags:
|
| 12 |
+
- track:backyard
|
| 13 |
+
- sponsor:openbmb
|
| 14 |
+
- sponsor:modal
|
| 15 |
+
- achievement:offgrid
|
| 16 |
+
- achievement:welltuned
|
| 17 |
+
- achievement:offbrand
|
| 18 |
+
- achievement:llama
|
| 19 |
+
- achievement:sharing
|
| 20 |
+
- achievement:fieldnotes
|
| 21 |
+
models:
|
| 22 |
+
- build-small-hackathon/gemma-4-cal-gguf
|
| 23 |
+
- openbmb/MiniCPM5-1B-GGUF
|
| 24 |
+
demo_video:
|
| 25 |
+
- https://youtu.be/m-o0u9X3tI4
|
| 26 |
+
social_posts:
|
| 27 |
+
- https://x.com/nate_mauer/status/2065973341651882386
|
| 28 |
+
- https://x.com/nate_mauer/status/2064920352845709419
|
| 29 |
+
- https://x.com/nate_mauer/status/2065661878441750916
|
| 30 |
+
- https://www.linkedin.com/feed/update/urn:li:ugcPost:7471440639969132545
|
| 31 |
+
blog_post:
|
| 32 |
+
- https://huggingface.co/blog/build-small-hackathon/offgridschedula
|
| 33 |
+
made_by:
|
| 34 |
+
- ParetoOptimal - a.k.a., Nate Mauer
|
| 35 |
+
---
|
| 36 |
+
|
| 37 |
+
# 🗓️ Message Scheduling Agent
|
| 38 |
+
|
| 39 |
+
**OffGridSchedula turns a pasted chat (or a flyer screenshot) into calendar events, catches conflicts, and drafts the reply — right from your phone, no app, no account,
|
| 40 |
+
no setup. iOS allows neither background iMessage access nor a persistent on-device LLM server, so there's no autonomous on-device agent to install; instead,
|
| 41 |
+
a foreground Shortcut ([docs/automations.md](./docs/automations.md)) hands a thread or screenshot to the agent in two taps (optionally using a remote model via `INFERENCE_BASE_URL`).**
|
| 42 |
+
|
| 43 |
+
The model runs on **your own server or even on the phone itself** and not on a cloud AI service. Your chats aren't shipped off to a third-party AI to be read; agent reads your snippet in memory and
|
| 44 |
+
discards it after replying. The run trace you can optionally share is a redacted, sent to the agent you control that turns it into ready-to-add calendar events.
|
| 45 |
+
|
| 46 |
+
**Hardware-aware.** With under-powered hardware, the app warns users with an upgrade banner rather than hanging, the real model needs a tiny GPU.
|
| 47 |
+
|
| 48 |
+
## Build Small submission — the idea & the tech
|
| 49 |
+
|
| 50 |
+
**The idea.** A busy parent's calendar lives in other people's messages — picture day in the
|
| 51 |
+
class chat, the practice that moved, the party flyer. OffGridSchedula turns those into calendar
|
| 52 |
+
events: paste the chat (or snap the flyer) from a phone browser, review the extracted events, the
|
| 53 |
+
conflicts against your own `.ics`, and a drafted reply — then add to Apple/Google Calendar in a tap.
|
| 54 |
+
|
| 55 |
+
**The tech.** Two small local models do the work. Extraction is [`gemma-cal` E4B](https://huggingface.co/build-small-hackathon/gemma-4-cal-gguf)
|
| 56 |
+
(~4B effective params), our QLoRA fine-tune of Gemma-4 E4B that emits a single validated
|
| 57 |
+
**ActionPlan** (events · conflicts · reply · clarifying question), served with **vision** through
|
| 58 |
+
the official **llama.cpp** server inside this Docker Gradio Space — no cloud AI APIs. The
|
| 59 |
+
fine-tune + its 60-example task eval ran entirely on **Modal** serverless GPUs, behind an
|
| 60 |
+
eval gate that rejected eight regressed models before this one shipped. Conflict math is
|
| 61 |
+
deterministic Python, the UI is fully custom, the agent doubles as an **MCP tool server**, and
|
| 62 |
+
redacted run traces are public on the [Hub](https://huggingface.co/datasets/ParetoOptimal/offgridschedula-traces).
|
| 63 |
+
Click **Run the agents** and a local **OpenBMB MiniCPM** planner (a second local llama-server)
|
| 64 |
+
drives this same Space's MCP tools as a multi-step agent — extract → check conflicts → render
|
| 65 |
+
`.ics` — with every step visible. Still zero cloud AI; every model under 32B.
|
| 66 |
+
|
| 67 |
+
**What's new.** Extraction now reads the *logistics*, not just the date (see below): arrival-aware
|
| 68 |
+
start times, duration→end conversion, type-based reminders, and calendar-ready titles — each
|
| 69 |
+
guaranteed by deterministic post-processing even when the model wobbles, and each shipped through
|
| 70 |
+
a measured A/B eval ([full result tables](./training/data/ab_results.md): regex vs text-LLM vs
|
| 71 |
+
**vision-LLM reading rendered screenshots only**). Calendar out got one-click too: a unified
|
| 72 |
+
**Connect your calendar** block (Google OAuth — the token lives in *your* browser, never on the
|
| 73 |
+
server; Outlook/Apple need no sign-in) and per-event **Google · Outlook · iCal** links, with the
|
| 74 |
+
Google push verified end-to-end (push → readback → delete, 11/11).
|
| 75 |
+
**The UX.** One decision — **Offline or Online** — re-themes the whole workflow card and sets the
|
| 76 |
+
path: off-grid `.ics` only, or a **one-click "Connect your calendar"** whose Google OAuth token
|
| 77 |
+
lives *only in the browser* (server-verified each visit; the client secret never leaves the
|
| 78 |
+
server). Results land in a single card: events, conflicts, the drafted reply, and per-event
|
| 79 |
+
**Google · Outlook · iCal · .ics** quick-add links. **Activity → This week** tallies events
|
| 80 |
+
captured, conflicts caught, and time saved; a per-device **Memory** (localStorage, one-click
|
| 81 |
+
samples) feeds names and preferences back into extraction.
|
| 82 |
+
|
| 83 |
+
**Submission links:** [requirement-by-requirement mapping](./docs/build-small-submission.md) ·
|
| 84 |
+
[demo video](https://youtu.be/m-o0u9X3tI4) ·
|
| 85 |
+
social posts [1](https://x.com/nate_mauer/status/2064920352845709419) ·
|
| 86 |
+
[2](https://x.com/nate_mauer/status/2065661878441750916)
|
| 87 |
+
|
| 88 |
+
## Who this is for
|
| 89 |
+
|
| 90 |
+
A busy parent whose kid's school and activity events are buried in a noisy class group chat —
|
| 91 |
+
picture day Thursday, the practice that moved to Tuesday, the birthday-party RSVP. They read it once,
|
| 92 |
+
mean to add it later, and miss it. With this, they **paste the chat** (or a **screenshot** of a flyer
|
| 93 |
+
or invite) from their phone's browser and get back: the events, a **conflict check** against their
|
| 94 |
+
calendar, and a **ready-to-send reply** — all surfaced for review before anything is saved. Output is
|
| 95 |
+
a local `.ics` they can add to any calendar, with optional Google Calendar push.
|
| 96 |
+
|
| 97 |
+
No app to install and no account. It reads nothing automatically — the parent pastes only what they
|
| 98 |
+
choose. Inference runs **in the Space** via `llama.cpp` (no cloud AI APIs), and works out of the box
|
| 99 |
+
with no GPU (see *Accuracy upgrade* below).
|
| 100 |
+
|
| 101 |
+
## The model: `gemma-cal` E4B — one calendar-native LLM, built for exactly this
|
| 102 |
+
|
| 103 |
+
What makes this platform different isn't a prompt wrapped around a generic chatbot — it's
|
| 104 |
+
**[`gemma-cal` E4B](https://huggingface.co/build-small-hackathon/gemma-4-cal-gguf), our own fine-tune of
|
| 105 |
+
Gemma-4 E4B purpose-built for one job: turning messy human conversation into calendar-ready
|
| 106 |
+
structure.** The model doesn't chat. It reads a thread (or a flyer photo) and emits a single
|
| 107 |
+
validated **ActionPlan** — events with exact ISO datetimes, conflicts, proposed alternatives, a
|
| 108 |
+
drafted reply, and a clarifying question when the plan is too vague to schedule. **It is the one
|
| 109 |
+
and only model the platform runs**, everywhere from the production Space to a laptop.
|
| 110 |
+
|
| 111 |
+
- **Edge-sized by design.** ~5 GB at Q4 — serves on a **~$0.40/hr 16 GB T4** (vs $4+/hr A100-class
|
| 112 |
+
for big models), a gaming GPU, or an Apple-silicon laptop, with full **vision**
|
| 113 |
+
(screenshots/flyers) via its mmproj. Local-first isn't a tagline; it's the parameter count.
|
| 114 |
+
- **Schema-bulletproof.** The fine-tune holds **100% schema validity even with no system prompt**,
|
| 115 |
+
with stronger no-event discipline (doesn't invent events from "thanks!") and a higher rate of
|
| 116 |
+
*asking* when a date is TBD — the failure modes that actually burn users of generic models.
|
| 117 |
+
- **Convention-trained.** It learns *this product's* date semantics ("next Tuesday" means next
|
| 118 |
+
week's Tuesday; weekday-anchored relative dates) instead of whatever a base model absorbed
|
| 119 |
+
from the internet.
|
| 120 |
+
- **Eval-gated, never vibes-shipped.** Every retrain runs a 60-example task eval (start-exact
|
| 121 |
+
datetime matching, F1, validity, clarification) and **cannot reach production unless it clears
|
| 122 |
+
the gate** — the pipeline has rejected eight regressed models to date. The full, honest scorecard
|
| 123 |
+
lives in [`docs/eval-roadmap.md`](./docs/eval-roadmap.md) and the
|
| 124 |
+
[post-mortem write-up](./docs/blog-eval-gated-finetuning.md).
|
| 125 |
+
|
| 126 |
+
**Hackathon size constraint (≤ 32B):** easily — E4B is ~4B effective parameters. See the in-app
|
| 127 |
+
**🏆 Submission** tab for the full compliance scorecard.
|
| 128 |
+
|
| 129 |
+
### Reads the logistics, not just the date
|
| 130 |
+
|
| 131 |
+
A confirmation like *"Time: 10:30 AM · Duration: approx. 30–45 min · (Please arrive 15 minutes
|
| 132 |
+
early to complete intake forms) · 📍 112A West 72nd Street…"* becomes one correct event:
|
| 133 |
+
|
| 134 |
+
- **Arrival-aware start** — the event starts at **10:15** (when you must show up), the official
|
| 135 |
+
10:30 is preserved in the notes, and the **end is anchored to the stated time + duration**
|
| 136 |
+
(11:00), so the calendar block covers the forms *and* the visit.
|
| 137 |
+
- **Type-based notifications** — an explicitly stated lead time always wins ("remind me 2 hours
|
| 138 |
+
before" → 120); otherwise doctor/medical visits get 60 minutes, parties 30, carpools and school
|
| 139 |
+
events 45.
|
| 140 |
+
- **Real-world addresses** — multi-line and 📍-emoji locations join into one string;
|
| 141 |
+
"(Upper West Side — 72nd & Columbus)" glosses and SMS footers ("Reply C to confirm… call us
|
| 142 |
+
at 212-223-0349") don't confuse it.
|
| 143 |
+
- **Calendar-ready titles** — an action+subject summary ("Pick up Priya — Terminal 4"), not a
|
| 144 |
+
quote of the message.
|
| 145 |
+
|
| 146 |
+
The model is *taught* these conventions (prompt + fine-tune data), but the load-bearing ones are
|
| 147 |
+
also **guaranteed by deterministic post-processing** (`apply_text_rules` in
|
| 148 |
+
[`server/agent.py`](./server/agent.py)) — same philosophy as the conflict engine: must-hold
|
| 149 |
+
logistics are never left to model temperament. Every behavior above shipped through a measured
|
| 150 |
+
A/B eval — regex baseline vs text-LLM vs **vision-LLM reading rendered chat screenshots only** —
|
| 151 |
+
with the full tables in [`training/data/ab_results.md`](./training/data/ab_results.md)
|
| 152 |
+
(headline: text-LLM event F1 0.96 structured / 0.89 unstructured vs regex 0.60/0.67; the
|
| 153 |
+
screenshot-only vision arm lands within a point of text).
|
| 154 |
+
|
| 155 |
+
## Try it in 30 seconds
|
| 156 |
+
|
| 157 |
+
Open the Space in your phone's browser → **Schedule** tab → tap **Try a sample** (or paste your own
|
| 158 |
+
group chat, and optionally a screenshot or your `.ics`) → review the detected events → **Download
|
| 159 |
+
.ics**. The **Activity → This week** panel then shows what you've captured and the time it saved.
|
| 160 |
+
|
| 161 |
+
## How it works
|
| 162 |
+
|
| 163 |
+
```
|
| 164 |
+
Paste a thread / screenshot ──▶ HF Space ──▶ llama.cpp ──▶ events + conflicts + reply
|
| 165 |
+
(phone browser) │ │
|
| 166 |
+
custom Gradio UI ◀── review ──┐ ┌────┘
|
| 167 |
+
▼ ▼
|
| 168 |
+
.ics download / optional Google Calendar
|
| 169 |
+
```
|
| 170 |
+
|
| 171 |
+
The **primary path needs nothing but a browser**: paste text and/or attach a screenshot in the
|
| 172 |
+
Schedule tab. (Power users can also auto-feed messages from a Mac — see *Optional: Mac collector*.)
|
| 173 |
+
|
| 174 |
+
For the full solution-architecture view — every workflow and which LLM (if any) it calls,
|
| 175 |
+
plus the eval-gated fine-tuning loop — see **[docs/architecture.md](./docs/architecture.md)**.
|
| 176 |
+
|
| 177 |
+
## Can it process multiple invites at once?
|
| 178 |
+
|
| 179 |
+
**Yes — multiple invites in one paste is the designed path** (on the live Space, where the real
|
| 180 |
+
model runs). `ActionPlan.events` is a *list*, and the extraction prompt explicitly tells the model
|
| 181 |
+
that one thread often holds several events — a drop-off AND a pickup, or two appointments, are
|
| 182 |
+
separate events (`server/agent.py`). Everything downstream is built for N events: the results card
|
| 183 |
+
shows "*N events found*" with one card per invite, the editable table gets one row each, the `.ics`
|
| 184 |
+
contains one `VEVENT` per event, each event carries its own Google/Outlook/Apple quick-add links,
|
| 185 |
+
and the conflict check runs across all of them. Screenshot input is multi-file too — attach several
|
| 186 |
+
flyers and they're all read in one run.
|
| 187 |
+
|
| 188 |
+
Two caveats:
|
| 189 |
+
|
| 190 |
+
- **Stub mode extracts only the first invite.** The local-dev heuristic (`_stub_plan` in
|
| 191 |
+
`server/agent.py`, enabled by `USE_STUB_EXTRACTOR=1`) works with no model and no GPU — and it's
|
| 192 |
+
now a decent parser in its own right (labeled times, explicit dates, multi-line/📍 locations,
|
| 193 |
+
durations, arrival-early shifts, type-based reminders) — but it still returns at most **one**
|
| 194 |
+
event. If you paste a multi-invite thread locally and get one event back, that's the stub, not
|
| 195 |
+
the product; the deployed Space uses the multi-event model path.
|
| 196 |
+
- **Simultaneous *runs* are serialized, not parallel.** If two users (or two tabs) hit *Run the
|
| 197 |
+
agents* at once, both complete, but inference executes one request at a time — `server/model.py`
|
| 198 |
+
holds the llama.cpp instance behind a `threading.Lock`, and Gradio queues the events. On a
|
| 199 |
+
single-GPU Space that's intentional (one model copy in memory); the second run simply waits its
|
| 200 |
+
turn, then streams its own pipeline progress.
|
| 201 |
+
|
| 202 |
+
## Repo layout
|
| 203 |
+
|
| 204 |
+
```
|
| 205 |
+
app.py # Gradio + FastAPI entrypoint (the Space)
|
| 206 |
+
server/
|
| 207 |
+
agent.py # thread (+images) -> validated ActionPlan
|
| 208 |
+
orchestrator.py # Run the agents: MiniCPM planner driving our own MCP tools
|
| 209 |
+
schema.py # Event / Conflict / ActionPlan pydantic models
|
| 210 |
+
model.py # llama.cpp load: GGUF + vision mmproj, constrained JSON
|
| 211 |
+
imageutil.py # image -> base64 data URI
|
| 212 |
+
ui/blocks.py # custom Gradio Blocks (reasoning, events, conflicts, reply)
|
| 213 |
+
static/app.css # custom CSS (Off-Brand)
|
| 214 |
+
calendar_out/
|
| 215 |
+
ics.py # .ics generation (off-grid default)
|
| 216 |
+
freebusy.py # parse existing .ics + deterministic conflict detection
|
| 217 |
+
gcal.py # optional Google Calendar push
|
| 218 |
+
collector/collector.py # Mac-side iMessage collector (text + image attachments)
|
| 219 |
+
training/ # dataset build + QLoRA fine-tune + GGUF/mmproj export
|
| 220 |
+
Dockerfile # dedicated-GPU Space: builds llama.cpp (0.3.28) WITH CUDA
|
| 221 |
+
requirements-docker.txt # runtime deps for the Docker image (llama.cpp built separately)
|
| 222 |
+
PLAN.md # full design + build plan
|
| 223 |
+
```
|
| 224 |
+
|
| 225 |
+
## Quick start (local dev) — no GPU needed
|
| 226 |
+
|
| 227 |
+
```bash
|
| 228 |
+
pip install -r requirements.txt
|
| 229 |
+
|
| 230 |
+
# Runs the whole app with the built-in heuristic agent — no model, no GPU:
|
| 231 |
+
export USE_STUB_EXTRACTOR=1 INGEST_TOKEN="dev-secret"
|
| 232 |
+
python app.py # http://localhost:7860
|
| 233 |
+
```
|
| 234 |
+
|
| 235 |
+
Open it, go to the **Schedule** tab, and tap **Try a sample** — or paste a thread, attach chat
|
| 236 |
+
**screenshots**, and optionally upload your current calendar **`.ics`** for conflict checks.
|
| 237 |
+
(Heads-up: the stub agent extracts only the **first** invite in a thread — multi-invite extraction
|
| 238 |
+
needs the real model; see *Can it process multiple invites at once?* above.) Tip for
|
| 239 |
+
self-hosted installs: set `CAL_ICS_PATH=/path/to/calendar.ics` and conflict checks use that file
|
| 240 |
+
automatically whenever no `.ics` is uploaded — step 4 completes itself, fully offline. Review
|
| 241 |
+
the detected events, conflicts, proposed times, and the suggested reply, then add any event with
|
| 242 |
+
its **Add to: Google · Outlook · iCal · .ics** links (iCal and .ics both download the event's
|
| 243 |
+
`.ics` file; with 2+ events an **iCal ��� all N events** link grabs everything at once).
|
| 244 |
+
The **Activity → This week** panel shows what you've captured.
|
| 245 |
+
|
| 246 |
+
## This week (impact)
|
| 247 |
+
|
| 248 |
+
The Activity tab has a **This week** panel that persists across restarts: **events captured**,
|
| 249 |
+
**conflicts caught**, and **estimated time saved**. A "capture" is counted when a run surfaces
|
| 250 |
+
events for review (adding to a calendar happens through the per-event links, which the server
|
| 251 |
+
can't observe).
|
| 252 |
+
|
| 253 |
+
`minutes_saved` is a deliberately conservative, **configurable estimate — not a measurement**:
|
| 254 |
+
`IMPACT_MIN_PER_EVENT` (default **8** min per captured event) + `IMPACT_MIN_PER_CONFLICT` (default
|
| 255 |
+
**15** min per conflict caught). Override either via env. State persists to `IMPACT_PATH`
|
| 256 |
+
(default `/tmp/impact_weeks.json`; point it at a persistent disk on a Space to survive rebuilds).
|
| 257 |
+
|
| 258 |
+
## Accuracy upgrade (optional) — serve the real `gemma-cal` LLM
|
| 259 |
+
|
| 260 |
+
The stub agent above makes the demo work with **no GPU**. The production Space serves our
|
| 261 |
+
fine-tuned **`gemma-cal` E4B** through `llama-server` — no cloud AI APIs either way. The same
|
| 262 |
+
config works anywhere llama.cpp runs:
|
| 263 |
+
|
| 264 |
+
```bash
|
| 265 |
+
export USE_STUB_EXTRACTOR=0
|
| 266 |
+
export MODEL_HF_REPO="build-small-hackathon/gemma-4-cal-gguf"
|
| 267 |
+
export MODEL_FILE="gemma-cal-e4b-Q4_K_M.gguf" # ~5 GB edge fine-tune (what the Space serves)
|
| 268 |
+
export MMPROJ_REPO="unsloth/gemma-4-E4B-it-GGUF" # the E4B's own vision projector
|
| 269 |
+
export MMPROJ_FILE="mmproj-F16.gguf" # enables screenshot/vision input
|
| 270 |
+
bash scripts/start_space.sh
|
| 271 |
+
```
|
| 272 |
+
|
| 273 |
+
This is the platform's **only** model — the same ~5 GB GGUF serves the production Space (16 GB
|
| 274 |
+
T4), a gaming GPU, or a laptop. (`MODEL_FILE` is explicit on purpose: the model repo also stores
|
| 275 |
+
legacy training artifacts, so the `-hf repo:Q4_K_M` shorthand is ambiguous.)
|
| 276 |
+
|
| 277 |
+
## Optional: Mac collector (power users)
|
| 278 |
+
|
| 279 |
+
The phone-paste path above needs nothing installed. If you'd rather have new iMessages fed in
|
| 280 |
+
automatically, run the collector on a Mac where iMessages sync (iOS exposes no API for message
|
| 281 |
+
content, so a Mac is the only auto-feed source):
|
| 282 |
+
|
| 283 |
+
```bash
|
| 284 |
+
cd collector && cp .env.example .env # edit SPACE_URL + INGEST_TOKEN
|
| 285 |
+
python collector.py
|
| 286 |
+
```
|
| 287 |
+
|
| 288 |
+
> ⚠️ The collector needs **Full Disk Access** (System Settings → Privacy & Security) to read `chat.db`.
|
| 289 |
+
|
| 290 |
+
## Autonomous & on a phone
|
| 291 |
+
|
| 292 |
+
There's a single backend endpoint — **`POST /agent`** (bearer `INGEST_TOKEN`) — that takes a thread
|
| 293 |
+
(or messages, + optional screenshot/`.ics`) and returns the extracted events, conflicts, and reply as
|
| 294 |
+
JSON (optionally an `.ics` or a Google Calendar push). Every front-end calls it:
|
| 295 |
+
|
| 296 |
+
- **Fully autonomous (Mac) — set-and-forget:** `INGEST_TOKEN=… MODEL_GGUF=~/models/hermes.gguf
|
| 297 |
+
scripts/setup_mac.sh` installs three launchd jobs (Hermes `llama-server` + autonomous backend +
|
| 298 |
+
collector). New iMessages **you send or accept** become calendar events automatically, deduped per
|
| 299 |
+
chat. Triggers on outgoing messages by default (`TRIGGER_ON=outgoing`; `any` to widen).
|
| 300 |
+
- **Hermes "grows-with-you" brain:** point `INFERENCE_BASE_URL` at a Hermes `llama-server`; its
|
| 301 |
+
personal **memory** (people→roles, "you decline Mondays") improves extraction over time and is shown
|
| 302 |
+
in the dashboard **Memory** tab. See **[docs/hermes.md](./docs/hermes.md)**.
|
| 303 |
+
- **iPhone, one tap:** an iOS **Shortcut** shares a thread/screenshot to `/agent` and adds the events
|
| 304 |
+
to Apple Calendar natively — no `.ics` import.
|
| 305 |
+
- **Android, hands-off:** a Tasker/MacroDroid rule on a notification/SMS calls `/agent` and inserts
|
| 306 |
+
events. See **[docs/android-tasker.md](./docs/android-tasker.md)**.
|
| 307 |
+
- **On-device model:** set `INFERENCE_BASE_URL` to a local `llama-server` (e.g. Gemma **E4B** or a
|
| 308 |
+
small Hermes in Termux) so inference runs *on the phone* — same agent, env-selected.
|
| 309 |
+
|
| 310 |
+
> **iOS can't read iMessage in the background** (no message API), so fully-autonomous iMessage needs
|
| 311 |
+
> the Mac collector; the iPhone path is one-gesture. See **[docs/automations.md](./docs/automations.md)**
|
| 312 |
+
> and **[docs/on-device.md](./docs/on-device.md)**.
|
| 313 |
+
|
| 314 |
+
## Build Small — prizes & quests
|
| 315 |
+
|
| 316 |
+
**Track: 🏡 Backyard AI** (`track:backyard`) — a practical app for a specific real person: a busy
|
| 317 |
+
parent whose family calendar is buried in a noisy class group chat.
|
| 318 |
+
|
| 319 |
+
### Sponsor awards we compete for
|
| 320 |
+
|
| 321 |
+
| Award | Why this submission qualifies |
|
| 322 |
+
|---|---|
|
| 323 |
+
| 🟢 **Modal Awards** (best Modal-powered apps) | **Modal powered the development of the platform's model end-to-end** — required note, gladly given: [`training/modal_train.py`](./training/modal_train.py) (QLoRA fine-tune on serverless A100/H100s, Volumes caching weights), [`training/modal_eval.py`](./training/modal_eval.py) + [`modal_quant_eval.py`](./training/modal_quant_eval.py) (the task eval served on llama.cpp inside Modal, incl. an f16/Q8_0/Q4_K_M quantization study and the regex/text/vision A/B harness), and [`training/gated_retrain.py`](./training/gated_retrain.py) (train → staging → eval → promote *only past the gate* — eight regressed models rejected, every run a Modal job). |
|
| 324 |
+
| 🌱 **OpenBMB Awards** (standout MiniCPM builds, per track) | The **agent is planned by OpenBMB MiniCPM** (`openbmb/MiniCPM4.1-8B-GGUF`, Q4; the 1B variant is a config switch) on a second local llama-server, driving this Space's own MCP tools (`extract_events → check_conflicts → make_ics`) as a visible multi-step agent ([`server/orchestrator.py`](./server/orchestrator.py)). MiniCPM is the agent's brain, not a garnish. |
|
| 325 |
+
|
| 326 |
+
*(Not claimed: the OpenAI Track — no Codex-attributed commits — and the NVIDIA Nemotron Quest —
|
| 327 |
+
different model family. We'd rather be honest than eligible.)*
|
| 328 |
+
|
| 329 |
+
### Special awards — our case
|
| 330 |
+
|
| 331 |
+
| Award | Our case |
|
| 332 |
+
|---|---|
|
| 333 |
+
| 🎖️ **Bonus Quest Champion** | All **six** collectable quests claimed with evidence — the full sash (table below). |
|
| 334 |
+
| 🎨 **Off-Brand Award** | Custom landing page, hero + carousel, grouped nav, bespoke results cards and Activity dashboard — [`ui/blocks.py`](./ui/blocks.py) + [`static/app.css`](./static/app.css), far past the stock Gradio look. |
|
| 335 |
+
| 🐜 **Tiny Titan** | The platform's one and only model is **Gemma E4B — ~4B *effective* parameters** (~5 GB at Q4, serves on a 16 GB T4 or a laptop), and a 1B MiniCPM planner variant is a config switch. Honest framing: E4B is a MatFormer "effective-4B" — judges' call whether that's tiny enough. |
|
| 336 |
+
| 🎬 **Best Demo** | App + demo video + social post as one package — storyboard with every quest named on-camera in [`docs/demo-script.md`](./docs/demo-script.md). |
|
| 337 |
+
| 🤖 **Best Agent** | The MiniCPM-planned, MCP-tool-driven agent above — real multi-step tool use, every model under the 32B cap. |
|
| 338 |
+
| 🃏 **Judges' Wildcard** | No entry needed — but if "eval-gated fine-tuning with a public failure post-mortem" fits no category, we know where to find you. |
|
| 339 |
+
|
| 340 |
+
### Collectable quests — all six claimed
|
| 341 |
+
|
| 342 |
+
| Quest | Evidence |
|
| 343 |
+
|---|---|
|
| 344 |
+
| 🔌 **Off the Grid** (local-first, no cloud APIs) | All inference is llama.cpp inside the Space; the only optional outbound call is the user's own Google Calendar push. |
|
| 345 |
+
| 🎯 **Well-Tuned** (published fine-tune) | [`gemma-cal` E4B](https://huggingface.co/build-small-hackathon/gemma-4-cal-gguf) — our QLoRA fine-tune **is the model production serves**, shipped through the eval gate with the [honest scorecard public](./docs/eval-roadmap.md). |
|
| 346 |
+
| 🎨 **Off-Brand** (custom UI) | See the Off-Brand Award case above. |
|
| 347 |
+
| 🦙 **Llama Champion** (llama.cpp runtime) | The official `ghcr.io/ggml-org/llama.cpp` server image runs the GGUF + vision mmproj ([`Dockerfile`](./Dockerfile), [`scripts/start_space.sh`](./scripts/start_space.sh)). |
|
| 348 |
+
| 📡 **Sharing is Caring** (open trace on the Hub) | Redacted agent traces published to [`ParetoOptimal/offgridschedula-traces`](https://huggingface.co/datasets/ParetoOptimal/offgridschedula-traces) — one click from the Activity tab. |
|
| 349 |
+
| 📓 **Field Notes** (write-up) | [`FIELD_NOTES.md`](./FIELD_NOTES.md) + the [eval-gated fine-tuning post-mortem](./docs/blog-eval-gated-finetuning.md) + [project blog](https://huggingface.co/blog/build-small-hackathon/offgridschedula). |
|
| 350 |
+
|
| 351 |
+
## Fine-tune on Modal (GPU)
|
| 352 |
+
|
| 353 |
+
`training/modal_train.py` runs the whole fine-tune on a serverless GPU and publishes the GGUF to
|
| 354 |
+
HF — no local GPU needed. It's a thin wrapper that ships this repo to Modal and runs the existing
|
| 355 |
+
pipeline (`make_dataset.py` → `train_qlora.py` → `export_gguf.sh`) on an A100/H100, then uploads the
|
| 356 |
+
quantized GGUF + `mmproj` to your HF repo. This is all *offline* prep, so **Off the Grid** is
|
| 357 |
+
untouched (the rule applies to the running app's inference, not dataset/training prep).
|
| 358 |
+
|
| 359 |
+
```bash
|
| 360 |
+
pip install modal
|
| 361 |
+
modal token new
|
| 362 |
+
modal secret create huggingface HF_TOKEN=hf_xxxxxxxx # your HF *write* token
|
| 363 |
+
|
| 364 |
+
# Validate the full pipeline cheaply first (cheap edge model, ~a couple $):
|
| 365 |
+
modal run training/modal_train.py --base-model google/gemma-4-E4B-it
|
| 366 |
+
|
| 367 |
+
# Then the real run (default A100-80GB; --gpu H100 for speed):
|
| 368 |
+
modal run training/modal_train.py
|
| 369 |
+
modal run training/modal_train.py --gpu H100 --num-epochs 3
|
| 370 |
+
```
|
| 371 |
+
|
| 372 |
+
On finish it prints the `MODEL_REPO` / `MODEL_FILE` / `MMPROJ_FILE` to set on the Space. Two
|
| 373 |
+
persistent Modal Volumes cache the base-model download and the outputs across runs, so iterating on
|
| 374 |
+
`training/data/dataset.jsonl` only re-pays for the training itself.
|
| 375 |
+
|
| 376 |
+
> Cost (A100-80GB ≈ $2.5/hr, per-second billing): a few-hundred-to-2000-example QLoRA run is
|
| 377 |
+
> ~1–3 hr ≈ $5–15, so ~$250 of credit ≈ 15–40 full iterations. Expand the dataset before the
|
| 378 |
+
> first real 31B run — the seeds in `make_dataset.py` are a smoke test, not a training set.
|
| 379 |
+
|
| 380 |
+
### Publish your fine-tune & point the Space at it
|
| 381 |
+
|
| 382 |
+
The training run is the one step that spends **your** GPU/Modal credits — it's not done for you.
|
| 383 |
+
Once you've run it, the path is turnkey:
|
| 384 |
+
|
| 385 |
+
1. **Recommended:** `python training/gated_retrain.py` — train → staging upload → 60-example eval →
|
| 386 |
+
**promote only if it beats the gate**. A regressed model cannot reach production. (Raw
|
| 387 |
+
`modal run training/modal_train.py` is the ungated equivalent for experiments.)
|
| 388 |
+
2. Point the Space at *your* model via **Space variables** (`scripts/start_space.sh` reads them at
|
| 389 |
+
launch; set in *Settings → Variables* or with `HfApi().add_space_variable`):
|
| 390 |
+
```
|
| 391 |
+
MODEL_HF_REPO = <you>/gemma-cal-gguf
|
| 392 |
+
MODEL_FILE = gemma-cal-e4b-Q4_K_M.gguf # explicit file — repo may hold several quants/tiers
|
| 393 |
+
MMPROJ_REPO = unsloth/gemma-4-E4B-it-GGUF # projector repo, if different from the LLM's
|
| 394 |
+
MMPROJ_FILE = mmproj-F16.gguf # enables screenshot/vision input
|
| 395 |
+
```
|
| 396 |
+
The deploy workflow stays a plain git mirror — the model is pulled at runtime, never committed.
|
| 397 |
+
3. Push to `main` → CI deploys → the Space now serves your fine-tune (**Well-Tuned**).
|
| 398 |
+
|
| 399 |
+
## Share a trace (Sharing is Caring)
|
| 400 |
+
|
| 401 |
+
Want others to learn from a run? In the **Activity** tab, click **⬇ Download trace (JSON)** — the
|
| 402 |
+
trace stays on your device, and the hosted Space holds **no Hub token**. Personal data is redacted by
|
| 403 |
+
default (the activity log only carries counts + status; the one chat-name field is stripped). Then
|
| 404 |
+
publish it from your own machine, with your own login:
|
| 405 |
+
|
| 406 |
+
```bash
|
| 407 |
+
huggingface-cli login # or export HF_TOKEN=...
|
| 408 |
+
python training/share_trace.py trace.json --public # -> a HF dataset repo of traces
|
| 409 |
+
```
|
| 410 |
+
|
| 411 |
+
## Field notes
|
| 412 |
+
|
| 413 |
+
[**FIELD_NOTES.md**](./FIELD_NOTES.md) is the build retrospective — the iOS→`chat.db` pivot, the
|
| 414 |
+
`attributedBody` trap, why conflict math is deterministic, stub-first architecture, the
|
| 415 |
+
reframe-around-one-person lesson, and the Off-the-Grid trade-offs.
|
| 416 |
+
|
| 417 |
+
## Remote automation (runs without an interactive session)
|
| 418 |
+
|
| 419 |
+
| Workflow | Trigger | What it does | Needs |
|
| 420 |
+
|---|---|---|---|
|
| 421 |
+
| `.github/workflows/ci.yml` → **test** | push / PR | compile + `pytest` (stub mode, no GPU) | nothing |
|
| 422 |
+
| `.github/workflows/ci.yml` → **deploy** | push to `main`, after tests pass | `huggingface-cli upload` the repo to the HF Space (Gradio SDK; model excluded, pulled at runtime) | secret `HF_TOKEN`, var `SPACE_ID` |
|
| 423 |
+
| `.github/workflows/maintenance.yml` | daily + manual | ping the Space `/health`, audit outdated deps → open/update a GitHub issue | var `SPACE_HEALTH_URL` |
|
| 424 |
+
|
| 425 |
+
One-time setup for deploy + monitoring:
|
| 426 |
+
|
| 427 |
+
```bash
|
| 428 |
+
gh secret set HF_TOKEN # HF write token
|
| 429 |
+
gh variable set SPACE_ID -b "<owner>/<space>"
|
| 430 |
+
gh variable set SPACE_HEALTH_URL -b "https://<owner>-<space>.hf.space/health"
|
| 431 |
+
```
|
| 432 |
+
|
| 433 |
+
CI installs `requirements-ci.txt` (excludes `llama-cpp-python` and the Google libs — both are
|
| 434 |
+
imported lazily and not needed for the stub-mode tests). A weekly Claude `/schedule` routine handles
|
| 435 |
+
the judgment work (grow `training/data/dataset.jsonl` → PR, triage CI failures).
|
app.py
ADDED
|
@@ -0,0 +1,298 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Space entrypoint: Gradio UI + FastAPI /ingest, served together on one port."""
|
| 2 |
+
from __future__ import annotations
|
| 3 |
+
|
| 4 |
+
import json
|
| 5 |
+
import os
|
| 6 |
+
from pathlib import Path
|
| 7 |
+
|
| 8 |
+
import gradio as gr
|
| 9 |
+
import uvicorn
|
| 10 |
+
from fastapi import BackgroundTasks, FastAPI, Header, HTTPException, Request
|
| 11 |
+
from fastapi.responses import HTMLResponse, JSONResponse, RedirectResponse
|
| 12 |
+
from pydantic import BaseModel
|
| 13 |
+
|
| 14 |
+
from server import dedup, events, health, threads
|
| 15 |
+
from server.pipeline import AgentRequest, AgentResponse, run_pipeline
|
| 16 |
+
from ui.blocks import CAROUSEL_JS, CSS, THEME, build_demo
|
| 17 |
+
|
| 18 |
+
INGEST_TOKEN = os.environ.get("INGEST_TOKEN", "")
|
| 19 |
+
FEED_PATH = Path(os.environ.get("FEED_PATH", "/tmp/ingest_feed.json"))
|
| 20 |
+
MAX_FEED = 200
|
| 21 |
+
# Opt-in: run the agent automatically on each new message (front-end A). Off by
|
| 22 |
+
# default, so /ingest keeps its store-only behavior unless explicitly enabled.
|
| 23 |
+
AUTONOMOUS = os.environ.get("AUTONOMOUS") == "1"
|
| 24 |
+
# Which message direction triggers autonomous action: "outgoing" = only when YOU
|
| 25 |
+
# send/accept an invite (is_from_me), "any" = any new message in the chat.
|
| 26 |
+
TRIGGER_ON = os.environ.get("TRIGGER_ON", "outgoing").lower()
|
| 27 |
+
|
| 28 |
+
app = FastAPI(title="iMessage Calendar Agent")
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
class IngestMessage(BaseModel):
|
| 32 |
+
chat: str
|
| 33 |
+
sender: str
|
| 34 |
+
text: str
|
| 35 |
+
timestamp: str
|
| 36 |
+
images: list[str] = [] # base64 data URIs of image attachments
|
| 37 |
+
is_from_me: bool = False # True when YOU sent it (the send/accept trigger)
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
class IngestBatch(BaseModel):
|
| 41 |
+
messages: list[IngestMessage]
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
def _load_feed() -> list[dict]:
|
| 45 |
+
try:
|
| 46 |
+
return json.loads(FEED_PATH.read_text())
|
| 47 |
+
except Exception: # noqa: BLE001 missing/corrupt -> empty
|
| 48 |
+
return []
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
def _append_feed(items: list[dict]) -> None:
|
| 52 |
+
feed = (_load_feed() + items)[-MAX_FEED:]
|
| 53 |
+
FEED_PATH.write_text(json.dumps(feed, indent=2))
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
def _require_token(authorization: str) -> None:
|
| 57 |
+
if not INGEST_TOKEN or authorization != f"Bearer {INGEST_TOKEN}":
|
| 58 |
+
raise HTTPException(status_code=401, detail="bad token")
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
def _run_autonomous(chats: set[str]) -> None:
|
| 62 |
+
"""For each affected chat, run the agent over its rolling thread and deliver
|
| 63 |
+
only the genuinely-new events (deduped). Used when AUTONOMOUS=1.
|
| 64 |
+
|
| 65 |
+
Order matters: extract WITHOUT pushing, dedup, then push only the fresh
|
| 66 |
+
events. (Pushing inside the pipeline re-pushed already-captured events on
|
| 67 |
+
every rolling-window re-run — the exact duplicate-creation dedup exists to
|
| 68 |
+
prevent.)"""
|
| 69 |
+
feed = _load_feed()
|
| 70 |
+
for chat in chats:
|
| 71 |
+
thread = threads.rolling_thread(feed, chat)
|
| 72 |
+
if not thread:
|
| 73 |
+
continue
|
| 74 |
+
resp = run_pipeline(AgentRequest(thread=thread, push_gcal=False))
|
| 75 |
+
# Filter WITHOUT recording: events are only marked seen once the push
|
| 76 |
+
# actually succeeds — recording first turns any transient push failure
|
| 77 |
+
# into silent, permanent event loss (filtered out on every retry).
|
| 78 |
+
new_events = dedup.filter_new(resp.plan.events, record=False)
|
| 79 |
+
if not new_events:
|
| 80 |
+
continue
|
| 81 |
+
try:
|
| 82 |
+
from calendar_out.gcal import push_events # lazy: google libs optional
|
| 83 |
+
|
| 84 |
+
push_events(new_events)
|
| 85 |
+
except Exception as e: # noqa: BLE001 push failure must not kill the loop
|
| 86 |
+
events.emit("calendar",
|
| 87 |
+
f"autonomous push failed (will retry next run): "
|
| 88 |
+
f"{type(e).__name__}: {e}",
|
| 89 |
+
level="error")
|
| 90 |
+
continue # NOT marked seen -> retried on the next trigger
|
| 91 |
+
dedup.mark_seen(new_events)
|
| 92 |
+
events.emit(
|
| 93 |
+
"decision",
|
| 94 |
+
f"autonomous: {len(new_events)} new event(s) in {chat}",
|
| 95 |
+
events=len(new_events),
|
| 96 |
+
)
|
| 97 |
+
|
| 98 |
+
|
| 99 |
+
@app.post("/agent", response_model=AgentResponse)
|
| 100 |
+
def agent(req: AgentRequest, authorization: str = Header(default="")):
|
| 101 |
+
"""Run the agent on a thread (or messages) and return an ActionPlan.
|
| 102 |
+
|
| 103 |
+
The shared contract every front-end calls (iOS Shortcut, Android Tasker, the
|
| 104 |
+
Mac collector). Stateless — see server/pipeline.run_pipeline.
|
| 105 |
+
"""
|
| 106 |
+
_require_token(authorization)
|
| 107 |
+
return run_pipeline(req)
|
| 108 |
+
|
| 109 |
+
|
| 110 |
+
@app.post("/ingest")
|
| 111 |
+
def ingest(batch: IngestBatch, background_tasks: BackgroundTasks,
|
| 112 |
+
authorization: str = Header(default="")):
|
| 113 |
+
"""Receive new messages from the Mac collector (bearer-token protected).
|
| 114 |
+
|
| 115 |
+
Returns immediately — autonomous runs (full LLM inference, potentially
|
| 116 |
+
minutes per chat) happen in a background task. Running them inline blew
|
| 117 |
+
the collector's 30s POST timeout, which skipped _save_rowid and re-sent
|
| 118 |
+
the same batch every poll (duplicate feed entries + duplicate runs)."""
|
| 119 |
+
_require_token(authorization)
|
| 120 |
+
items = [m.model_dump() for m in batch.messages]
|
| 121 |
+
_append_feed(items)
|
| 122 |
+
n_imgs = sum(len(m.images) for m in batch.messages)
|
| 123 |
+
chats = sorted({m.chat for m in batch.messages})
|
| 124 |
+
events.emit("ingest", f"{len(items)} msg(s) from {', '.join(chats) or '—'}", images=n_imgs)
|
| 125 |
+
if AUTONOMOUS:
|
| 126 |
+
# Trigger on YOUR sent/accepted messages by default; "any" widens it.
|
| 127 |
+
if TRIGGER_ON == "any":
|
| 128 |
+
trigger_chats = set(chats)
|
| 129 |
+
else:
|
| 130 |
+
trigger_chats = {m.chat for m in batch.messages if m.is_from_me}
|
| 131 |
+
if trigger_chats:
|
| 132 |
+
background_tasks.add_task(_run_autonomous, trigger_chats)
|
| 133 |
+
return {"received": len(items)}
|
| 134 |
+
|
| 135 |
+
|
| 136 |
+
@app.get("/health")
|
| 137 |
+
def health_route():
|
| 138 |
+
# Liveness + hardware-adequacy (device/model/degraded/reason). The on-page
|
| 139 |
+
# status banner and the maintenance monitor both read this.
|
| 140 |
+
return health.health_status()
|
| 141 |
+
|
| 142 |
+
|
| 143 |
+
# --- Per-user Google Calendar OAuth (web flow) ----------------------------- #
|
| 144 |
+
def _oauth_redirect_uri(request: Request) -> str:
|
| 145 |
+
"""Public redirect URI. On a Space, SPACE_HOST is the public host; locally,
|
| 146 |
+
fall back to the request's base URL. Must match the Google client config."""
|
| 147 |
+
host = os.environ.get("SPACE_HOST", "").strip()
|
| 148 |
+
base = f"https://{host}" if host else str(request.base_url).rstrip("/")
|
| 149 |
+
return base.rstrip("/") + "/oauth2callback"
|
| 150 |
+
|
| 151 |
+
|
| 152 |
+
@app.get("/oauth2/start")
|
| 153 |
+
def oauth2_start(request: Request):
|
| 154 |
+
"""Kick off the Google consent flow (opened as a popup from the UI)."""
|
| 155 |
+
from calendar_out import gcal
|
| 156 |
+
|
| 157 |
+
try:
|
| 158 |
+
url, _state = gcal.auth_url(_oauth_redirect_uri(request))
|
| 159 |
+
except Exception as e: # noqa: BLE001 not configured -> friendly page
|
| 160 |
+
return HTMLResponse(
|
| 161 |
+
f"<p style='font-family:sans-serif;padding:24px'>Google Calendar isn't "
|
| 162 |
+
f"configured on this Space.<br><small>{e}</small></p>",
|
| 163 |
+
status_code=503,
|
| 164 |
+
)
|
| 165 |
+
return RedirectResponse(url)
|
| 166 |
+
|
| 167 |
+
|
| 168 |
+
@app.get("/oauth2callback")
|
| 169 |
+
def oauth2_callback(request: Request):
|
| 170 |
+
"""Google redirects here after consent. Exchange the code for a per-user token,
|
| 171 |
+
hand it to the opener window (and localStorage), then close. The token is NOT
|
| 172 |
+
stored server-side."""
|
| 173 |
+
code = request.query_params.get("code")
|
| 174 |
+
if request.query_params.get("error") or not code:
|
| 175 |
+
return HTMLResponse(
|
| 176 |
+
"<p style='font-family:sans-serif;padding:24px'>Google connection cancelled. "
|
| 177 |
+
"You can close this window.</p><script>setTimeout(()=>window.close(),500)</script>"
|
| 178 |
+
)
|
| 179 |
+
from calendar_out import gcal
|
| 180 |
+
|
| 181 |
+
try:
|
| 182 |
+
token_json = gcal.exchange_code(
|
| 183 |
+
_oauth_redirect_uri(request), code, request.query_params.get("state", "")
|
| 184 |
+
)
|
| 185 |
+
except Exception as e: # noqa: BLE001
|
| 186 |
+
return HTMLResponse(
|
| 187 |
+
f"<p style='font-family:sans-serif;padding:24px'>Couldn't complete Google "
|
| 188 |
+
f"sign-in.<br><small>{e}</small></p>"
|
| 189 |
+
)
|
| 190 |
+
tok_js = json.dumps(token_json) # JS string literal of the token JSON
|
| 191 |
+
return HTMLResponse(
|
| 192 |
+
"<!doctype html><meta charset=utf-8>"
|
| 193 |
+
"<body style='font-family:sans-serif;padding:24px'>"
|
| 194 |
+
"<p>✅ Google Calendar connected. You can close this window.</p>"
|
| 195 |
+
"<script>try{var t=" + tok_js + ";localStorage.setItem('gcal_token',t);"
|
| 196 |
+
"if(window.opener)window.opener.postMessage({gcal_token:t},location.origin);}"
|
| 197 |
+
"catch(e){}setTimeout(function(){window.close();},800);</script></body>"
|
| 198 |
+
)
|
| 199 |
+
|
| 200 |
+
|
| 201 |
+
class TokenCheckBody(BaseModel):
|
| 202 |
+
token: str
|
| 203 |
+
|
| 204 |
+
|
| 205 |
+
@app.post("/oauth2/check")
|
| 206 |
+
def oauth2_check(body: TokenCheckBody):
|
| 207 |
+
"""Liveness-check a browser-held Google token with one real API call
|
| 208 |
+
(same-origin fetch from wireGcal on page load). POST so the token never
|
| 209 |
+
lands in access logs; it is checked and discarded, never stored.
|
| 210 |
+
200 = definitive verdict; non-200 = indeterminate (client keeps its
|
| 211 |
+
local shape-check state)."""
|
| 212 |
+
from calendar_out import gcal
|
| 213 |
+
|
| 214 |
+
try:
|
| 215 |
+
gcal._client_config() # mirror /oauth2/start: friendly 503 when env unset
|
| 216 |
+
except Exception as e: # noqa: BLE001
|
| 217 |
+
return JSONResponse(
|
| 218 |
+
{"ok": False, "transient": True, "reason": str(e)}, status_code=503
|
| 219 |
+
)
|
| 220 |
+
res = gcal.check_token(body.token)
|
| 221 |
+
out: dict = {"ok": res["ok"]}
|
| 222 |
+
if res["ok"]:
|
| 223 |
+
if res.get("refreshed_token"):
|
| 224 |
+
out["token"] = res["refreshed_token"]
|
| 225 |
+
else:
|
| 226 |
+
out["reason"] = res.get("reason", "")
|
| 227 |
+
out["transient"] = bool(res.get("transient"))
|
| 228 |
+
return out
|
| 229 |
+
|
| 230 |
+
|
| 231 |
+
# Register the @spaces.GPU functions at startup so ZeroGPU can schedule them.
|
| 232 |
+
import server.model # noqa: E402,F401
|
| 233 |
+
|
| 234 |
+
demo = build_demo()
|
| 235 |
+
|
| 236 |
+
# Serving mode, env-selected:
|
| 237 |
+
# - "gradio": the HF *Gradio-SDK* / ZeroGPU platform manages the launch (a self-run
|
| 238 |
+
# uvicorn gets SIGTERM'd there), so we call demo.launch(). /agent etc. aren't served.
|
| 239 |
+
# - "uvicorn": mount gradio under FastAPI and serve UI + /agent + /ingest on one port.
|
| 240 |
+
# Used locally and on the *Docker-SDK* GPU Space (Dockerfile sets SERVE=uvicorn).
|
| 241 |
+
# Default: gradio on a Space unless told otherwise, uvicorn locally.
|
| 242 |
+
_default_serve = "gradio" if (os.environ.get("SPACE_ID") or os.environ.get("SYSTEM") == "spaces") else "uvicorn"
|
| 243 |
+
SERVE = os.environ.get("SERVE", _default_serve)
|
| 244 |
+
|
| 245 |
+
# Gradio 6 applies theme/css at mount/launch time — the css set on gr.Blocks is
|
| 246 |
+
# IGNORED when mounted, so pass it here or the custom UI renders as default Gradio.
|
| 247 |
+
#
|
| 248 |
+
# The `js=` load-function does NOT reliably execute on a *mounted* (uvicorn) app in
|
| 249 |
+
# Gradio 6 — the carousel then sits on its first slide with dead arrows/dots. So we
|
| 250 |
+
# inject the carousel script as a real inline <script> before </body> via middleware;
|
| 251 |
+
# it self-bootstraps and its MutationObserver wires every .carousel once Gradio
|
| 252 |
+
# client-renders the page. (The launch() path below still passes js= for ZeroGPU.)
|
| 253 |
+
if SERVE == "uvicorn":
|
| 254 |
+
from starlette.responses import Response as _Response
|
| 255 |
+
|
| 256 |
+
_CAROUSEL_INLINE = f'<script id="cz-inline-js">({CAROUSEL_JS})();</script>'
|
| 257 |
+
# Status banner: fetch /health on load and reveal #status-banner if degraded
|
| 258 |
+
# (e.g. real model on CPU-only hardware). Same inline-script pattern as the
|
| 259 |
+
# carousel, since js= is unreliable on a mounted app; it polls for the element
|
| 260 |
+
# because Gradio renders it client-side after </body>.
|
| 261 |
+
_BANNER_JS = (
|
| 262 |
+
"(function(){fetch('/health').then(function(r){return r.json();})"
|
| 263 |
+
".then(function(h){if(!h||!h.degraded){return;}(function s(){"
|
| 264 |
+
"var b=document.getElementById('status-banner');"
|
| 265 |
+
"if(!b){return setTimeout(s,400);}"
|
| 266 |
+
"b.textContent='\\u26a0\\ufe0f '+(h.reason||'This Space needs a GPU.')+' \\u26a0\\ufe0f';"
|
| 267 |
+
"b.style.display='block';})();}).catch(function(){});})();"
|
| 268 |
+
)
|
| 269 |
+
_BANNER_INLINE = f'<script id="cz-banner-js">{_BANNER_JS}</script>'
|
| 270 |
+
|
| 271 |
+
@app.middleware("http")
|
| 272 |
+
async def _inject_carousel_js(request, call_next): # noqa: ANN001
|
| 273 |
+
resp = await call_next(request)
|
| 274 |
+
if request.url.path != "/" or "text/html" not in resp.headers.get("content-type", ""):
|
| 275 |
+
return resp
|
| 276 |
+
body = b"".join([chunk async for chunk in resp.body_iterator])
|
| 277 |
+
html = body.decode("utf-8", "ignore")
|
| 278 |
+
if "cz-inline-js" not in html and "</body>" in html:
|
| 279 |
+
html = html.replace("</body>", _CAROUSEL_INLINE + _BANNER_INLINE + "</body>", 1)
|
| 280 |
+
headers = dict(resp.headers)
|
| 281 |
+
headers.pop("content-length", None) # body length changed; let Starlette recompute
|
| 282 |
+
return _Response(content=html, status_code=resp.status_code,
|
| 283 |
+
headers=headers, media_type="text/html")
|
| 284 |
+
|
| 285 |
+
app = gr.mount_gradio_app(
|
| 286 |
+
app, demo, path="/", ssr_mode=False, theme=THEME, css=CSS, js=CAROUSEL_JS,
|
| 287 |
+
mcp_server=True, # expose extract_events/make_ics/check_conflicts as MCP tools
|
| 288 |
+
)
|
| 289 |
+
|
| 290 |
+
if __name__ == "__main__":
|
| 291 |
+
if SERVE == "gradio":
|
| 292 |
+
demo.launch(
|
| 293 |
+
server_name="0.0.0.0", server_port=7860, ssr_mode=False,
|
| 294 |
+
theme=THEME, css=CSS, js=CAROUSEL_JS,
|
| 295 |
+
mcp_server=True, # expose extract_events/make_ics/check_conflicts as MCP tools
|
| 296 |
+
)
|
| 297 |
+
else:
|
| 298 |
+
uvicorn.run(app, host="0.0.0.0", port=int(os.environ.get("PORT", "7860")))
|
calendar_out/__init__.py
ADDED
|
File without changes
|
calendar_out/freebusy.py
ADDED
|
@@ -0,0 +1,141 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Conflict detection against the user's existing calendar.
|
| 2 |
+
|
| 3 |
+
Off-grid by default: the user uploads a current-calendar .ics; we parse it into
|
| 4 |
+
busy intervals and detect clashes deterministically (time math is more reliable
|
| 5 |
+
in code than from the model). The model still writes the reasoning + reply.
|
| 6 |
+
"""
|
| 7 |
+
from __future__ import annotations
|
| 8 |
+
|
| 9 |
+
from datetime import datetime, timedelta
|
| 10 |
+
from typing import Optional
|
| 11 |
+
|
| 12 |
+
from dateutil import parser as dtparser
|
| 13 |
+
from icalendar import Calendar
|
| 14 |
+
from pydantic import BaseModel
|
| 15 |
+
|
| 16 |
+
from server import events as events_bus # aliased: 'events' is a common param name here
|
| 17 |
+
from server.schema import ActionPlan, Conflict, Event
|
| 18 |
+
|
| 19 |
+
TIGHT_GAP = timedelta(minutes=30)
|
| 20 |
+
DEFAULT_DURATION = timedelta(hours=1)
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
class Busy(BaseModel):
|
| 24 |
+
start: datetime
|
| 25 |
+
end: datetime
|
| 26 |
+
title: str = ""
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
def _naive_local(dt: datetime) -> datetime:
|
| 30 |
+
"""Aware datetimes are CONVERTED before dropping tzinfo — blindly stripping
|
| 31 |
+
shifted UTC-exported .ics (Google's default) by the whole UTC offset against
|
| 32 |
+
the model's local-time events. Conversion target = the SAME configured zone
|
| 33 |
+
gcal labels pushed events with (calendar_out/tzconfig), else process-local,
|
| 34 |
+
so conflict math and calendar pushes share one time basis."""
|
| 35 |
+
if dt.tzinfo is not None:
|
| 36 |
+
from calendar_out.tzconfig import zone
|
| 37 |
+
|
| 38 |
+
dt = dt.astimezone(zone()) # None -> process-local
|
| 39 |
+
return dt.replace(tzinfo=None)
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
def _as_dt(value) -> Optional[datetime]:
|
| 43 |
+
if value is None:
|
| 44 |
+
return None
|
| 45 |
+
if isinstance(value, datetime):
|
| 46 |
+
return _naive_local(value)
|
| 47 |
+
try:
|
| 48 |
+
return _naive_local(dtparser.isoparse(str(value)))
|
| 49 |
+
except (ValueError, TypeError):
|
| 50 |
+
return None
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
def load_ics_busy(data: bytes) -> list[Busy]:
|
| 54 |
+
"""Parse VEVENTs from an .ics into busy intervals (naive local datetimes)."""
|
| 55 |
+
busy: list[Busy] = []
|
| 56 |
+
cal = Calendar.from_ical(data)
|
| 57 |
+
for comp in cal.walk("VEVENT"):
|
| 58 |
+
start = _as_dt(getattr(comp.get("dtstart"), "dt", None))
|
| 59 |
+
if start is None:
|
| 60 |
+
continue
|
| 61 |
+
end = _as_dt(getattr(comp.get("dtend"), "dt", None)) or (start + DEFAULT_DURATION)
|
| 62 |
+
busy.append(Busy(start=start, end=end, title=str(comp.get("summary", ""))))
|
| 63 |
+
return busy
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
def _event_interval(ev: Event) -> Optional[tuple[datetime, datetime]]:
|
| 67 |
+
start = _as_dt(ev.start)
|
| 68 |
+
if start is None:
|
| 69 |
+
return None
|
| 70 |
+
end = _as_dt(ev.end) or (start + DEFAULT_DURATION)
|
| 71 |
+
return start, end
|
| 72 |
+
|
| 73 |
+
|
| 74 |
+
def _overlaps(a0, a1, b0, b1) -> bool:
|
| 75 |
+
return a0 < b1 and b0 < a1
|
| 76 |
+
|
| 77 |
+
|
| 78 |
+
def _severity(a0, a1, b0, b1) -> Optional[str]:
|
| 79 |
+
if _overlaps(a0, a1, b0, b1):
|
| 80 |
+
return "overlap"
|
| 81 |
+
gap = b0 - a1 if b0 >= a1 else a0 - b1
|
| 82 |
+
if gap <= timedelta(0):
|
| 83 |
+
return "adjacent"
|
| 84 |
+
if gap < TIGHT_GAP:
|
| 85 |
+
return "tight"
|
| 86 |
+
return None
|
| 87 |
+
|
| 88 |
+
|
| 89 |
+
def check_conflicts(events: list[Event], busy: list[Busy]) -> list[Conflict]:
|
| 90 |
+
conflicts: list[Conflict] = []
|
| 91 |
+
for idx, ev in enumerate(events):
|
| 92 |
+
iv = _event_interval(ev)
|
| 93 |
+
if iv is None:
|
| 94 |
+
continue
|
| 95 |
+
a0, a1 = iv
|
| 96 |
+
for b in busy:
|
| 97 |
+
sev = _severity(a0, a1, b.start, b.end)
|
| 98 |
+
if sev:
|
| 99 |
+
conflicts.append(
|
| 100 |
+
Conflict(event_index=idx, clashes_with=b.title or "existing event", severity=sev)
|
| 101 |
+
)
|
| 102 |
+
return conflicts
|
| 103 |
+
|
| 104 |
+
|
| 105 |
+
def propose_times(ev: Event, busy: list[Busy], n: int = 3) -> list[str]:
|
| 106 |
+
"""Suggest up to n nearby start times that don't overlap busy intervals."""
|
| 107 |
+
iv = _event_interval(ev)
|
| 108 |
+
if iv is None:
|
| 109 |
+
return []
|
| 110 |
+
start, end = iv
|
| 111 |
+
duration = end - start
|
| 112 |
+
out: list[str] = []
|
| 113 |
+
# try later today (+1h..+4h), then same time the next two days
|
| 114 |
+
candidates = [start + timedelta(hours=h) for h in (1, 2, 3, 4)]
|
| 115 |
+
candidates += [start + timedelta(days=d) for d in (1, 2)]
|
| 116 |
+
for c in candidates:
|
| 117 |
+
if not any(_overlaps(c, c + duration, b.start, b.end) for b in busy):
|
| 118 |
+
out.append(c.isoformat())
|
| 119 |
+
if len(out) >= n:
|
| 120 |
+
break
|
| 121 |
+
return out
|
| 122 |
+
|
| 123 |
+
|
| 124 |
+
def annotate_conflicts(plan: ActionPlan, busy: list[Busy]) -> ActionPlan:
|
| 125 |
+
"""Replace model-guessed conflicts with deterministic ones + propose times."""
|
| 126 |
+
if not busy:
|
| 127 |
+
return plan
|
| 128 |
+
plan.conflicts = check_conflicts(plan.events, busy)
|
| 129 |
+
events_bus.emit(
|
| 130 |
+
"conflict",
|
| 131 |
+
f"{len(plan.conflicts)} conflict(s) vs {len(busy)} existing event(s)",
|
| 132 |
+
conflicts=len(plan.conflicts),
|
| 133 |
+
)
|
| 134 |
+
clashing_idx = {c.event_index for c in plan.conflicts}
|
| 135 |
+
proposals: list[str] = []
|
| 136 |
+
for idx in sorted(clashing_idx):
|
| 137 |
+
proposals.extend(propose_times(plan.events[idx], busy))
|
| 138 |
+
# de-dupe preserving order
|
| 139 |
+
seen = set()
|
| 140 |
+
plan.proposed_times = [t for t in proposals if not (t in seen or seen.add(t))]
|
| 141 |
+
return plan
|
calendar_out/gcal.py
ADDED
|
@@ -0,0 +1,313 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""OPTIONAL Google Calendar push (the one optional cloud touchpoint).
|
| 2 |
+
|
| 3 |
+
Disabled unless the user opts in via the UI toggle and provides OAuth creds.
|
| 4 |
+
Keeps the default .ics path strictly off-grid.
|
| 5 |
+
"""
|
| 6 |
+
from __future__ import annotations
|
| 7 |
+
|
| 8 |
+
import json
|
| 9 |
+
import os
|
| 10 |
+
import threading
|
| 11 |
+
import time
|
| 12 |
+
from pathlib import Path
|
| 13 |
+
|
| 14 |
+
from dateutil import parser as dtparser
|
| 15 |
+
|
| 16 |
+
from server import events as events_bus
|
| 17 |
+
from server.schema import Event
|
| 18 |
+
|
| 19 |
+
SCOPES = ["https://www.googleapis.com/auth/calendar.events"]
|
| 20 |
+
GOOGLE_TOKEN_URI = "https://oauth2.googleapis.com/token"
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
# --------------------------------------------------------------------------- #
|
| 24 |
+
# Per-user OAuth (web flow): each visitor connects their OWN Google account.
|
| 25 |
+
# The OAuth *app* creds (client id/secret) are the owner's, set as Space secrets;
|
| 26 |
+
# the resulting per-user token is held client-side (never stored server-side) and
|
| 27 |
+
# passed back only to perform a push. See app.py /oauth2/start + /oauth2callback.
|
| 28 |
+
# --------------------------------------------------------------------------- #
|
| 29 |
+
def _client_config() -> dict:
|
| 30 |
+
"""OAuth client config from env (Space secrets). Raises if unconfigured."""
|
| 31 |
+
cid = os.environ.get("GOOGLE_OAUTH_CLIENT_ID", "").strip()
|
| 32 |
+
csecret = os.environ.get("GOOGLE_OAUTH_CLIENT_SECRET", "").strip()
|
| 33 |
+
if not (cid and csecret):
|
| 34 |
+
raise RuntimeError(
|
| 35 |
+
"Google Calendar isn't configured: set GOOGLE_OAUTH_CLIENT_ID and "
|
| 36 |
+
"GOOGLE_OAUTH_CLIENT_SECRET (a Google Cloud OAuth 'Web application' client)."
|
| 37 |
+
)
|
| 38 |
+
return {"web": {
|
| 39 |
+
"client_id": cid,
|
| 40 |
+
"client_secret": csecret,
|
| 41 |
+
"auth_uri": "https://accounts.google.com/o/oauth2/auth",
|
| 42 |
+
"token_uri": "https://oauth2.googleapis.com/token",
|
| 43 |
+
}}
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
# PKCE: authorization_url() auto-generates a code_verifier (google-auth-oauthlib
|
| 47 |
+
# >= 1.0) and sends its challenge to Google; the token exchange must then send
|
| 48 |
+
# the SAME verifier or Google rejects it with "(invalid_grant) Missing code
|
| 49 |
+
# verifier". The start and the callback are different HTTP requests, so the
|
| 50 |
+
# verifier is held server-side for a few minutes, keyed by the flow's `state`
|
| 51 |
+
# — which doubles as the CSRF check. Single-use; nothing user-identifying.
|
| 52 |
+
_PENDING_TTL_S = 600
|
| 53 |
+
_PENDING_MAX = 500 # bound memory if /oauth2/start is hammered
|
| 54 |
+
_pending: dict[str, tuple[str, float]] = {}
|
| 55 |
+
_pending_lock = threading.Lock()
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
def _remember_verifier(state: str, verifier: str) -> None:
|
| 59 |
+
now = time.time()
|
| 60 |
+
with _pending_lock:
|
| 61 |
+
for k in [k for k, (_, t) in _pending.items() if now - t > _PENDING_TTL_S]:
|
| 62 |
+
_pending.pop(k, None)
|
| 63 |
+
while len(_pending) >= _PENDING_MAX:
|
| 64 |
+
_pending.pop(next(iter(_pending)))
|
| 65 |
+
_pending[state] = (verifier, now)
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
def _pop_verifier(state: str) -> str | None:
|
| 69 |
+
with _pending_lock:
|
| 70 |
+
item = _pending.pop(state or "", None)
|
| 71 |
+
if item is None:
|
| 72 |
+
return None
|
| 73 |
+
verifier, t = item
|
| 74 |
+
return verifier if time.time() - t <= _PENDING_TTL_S else None
|
| 75 |
+
|
| 76 |
+
|
| 77 |
+
def auth_url(redirect_uri: str) -> tuple[str, str]:
|
| 78 |
+
"""Build the Google consent URL for the calendar-events scope. Returns (url, state)."""
|
| 79 |
+
from google_auth_oauthlib.flow import Flow
|
| 80 |
+
|
| 81 |
+
flow = Flow.from_client_config(_client_config(), scopes=SCOPES, redirect_uri=redirect_uri)
|
| 82 |
+
url, state = flow.authorization_url(
|
| 83 |
+
access_type="offline", include_granted_scopes="true", prompt="consent"
|
| 84 |
+
)
|
| 85 |
+
_remember_verifier(state, flow.code_verifier)
|
| 86 |
+
return url, state
|
| 87 |
+
|
| 88 |
+
|
| 89 |
+
def exchange_code(redirect_uri: str, code: str, state: str = "") -> str:
|
| 90 |
+
"""Exchange an auth code for a per-user token; returns the token as a JSON string.
|
| 91 |
+
|
| 92 |
+
``state`` must match a pending auth_url() call — it keys the PKCE verifier
|
| 93 |
+
and doubles as the CSRF check."""
|
| 94 |
+
verifier = _pop_verifier(state)
|
| 95 |
+
if verifier is None:
|
| 96 |
+
raise RuntimeError(
|
| 97 |
+
"sign-in session expired or unknown — close this window and click "
|
| 98 |
+
"Connect Google Calendar again"
|
| 99 |
+
)
|
| 100 |
+
from google_auth_oauthlib.flow import Flow
|
| 101 |
+
|
| 102 |
+
flow = Flow.from_client_config(
|
| 103 |
+
_client_config(), scopes=SCOPES, redirect_uri=redirect_uri, code_verifier=verifier
|
| 104 |
+
)
|
| 105 |
+
flow.fetch_token(code=code)
|
| 106 |
+
return _sanitize_token_json(flow.credentials.to_json())
|
| 107 |
+
|
| 108 |
+
|
| 109 |
+
def _sanitize_token_json(token_json: str) -> str:
|
| 110 |
+
"""Token JSON as handed to the BROWSER (localStorage): the OAuth app's
|
| 111 |
+
client_secret has no business there — the server re-injects it from env
|
| 112 |
+
when it needs to refresh."""
|
| 113 |
+
info = json.loads(token_json)
|
| 114 |
+
info.pop("client_secret", None)
|
| 115 |
+
return json.dumps(info)
|
| 116 |
+
|
| 117 |
+
|
| 118 |
+
def _with_client_secret(info: dict) -> dict:
|
| 119 |
+
"""Restore the env client_secret into browser-held token info so
|
| 120 |
+
creds.refresh() works. Older stored tokens that still carry a secret are
|
| 121 |
+
left untouched. The refresh endpoint is PINNED: the token JSON comes from
|
| 122 |
+
the browser, and a crafted token_uri would otherwise receive the injected
|
| 123 |
+
secret on refresh."""
|
| 124 |
+
info = {**info, "token_uri": GOOGLE_TOKEN_URI}
|
| 125 |
+
if not info.get("client_secret"):
|
| 126 |
+
secret = os.environ.get("GOOGLE_OAUTH_CLIENT_SECRET", "").strip()
|
| 127 |
+
if secret:
|
| 128 |
+
info["client_secret"] = secret
|
| 129 |
+
return info
|
| 130 |
+
|
| 131 |
+
|
| 132 |
+
def _creds_from_token_json(token_json: str):
|
| 133 |
+
from google.oauth2.credentials import Credentials
|
| 134 |
+
|
| 135 |
+
return Credentials.from_authorized_user_info(
|
| 136 |
+
_with_client_secret(json.loads(token_json)), SCOPES
|
| 137 |
+
)
|
| 138 |
+
|
| 139 |
+
|
| 140 |
+
def _refresh_if_needed(creds) -> str | None:
|
| 141 |
+
"""Refresh expired creds; returns sanitized token JSON to re-store client-side,
|
| 142 |
+
or None when no refresh happened."""
|
| 143 |
+
from google.auth.transport.requests import Request
|
| 144 |
+
|
| 145 |
+
if not creds.valid and creds.expired and creds.refresh_token:
|
| 146 |
+
creds.refresh(Request())
|
| 147 |
+
return _sanitize_token_json(creds.to_json())
|
| 148 |
+
return None
|
| 149 |
+
|
| 150 |
+
|
| 151 |
+
def _probe_events(creds) -> None:
|
| 152 |
+
"""Cheapest real API call permitted by the calendar.events scope (the only
|
| 153 |
+
scope we request — calendarList/freeBusy would 403)."""
|
| 154 |
+
from googleapiclient.discovery import build
|
| 155 |
+
|
| 156 |
+
build("calendar", "v3", credentials=creds).events().list(
|
| 157 |
+
calendarId="primary", maxResults=1, fields="items(id)"
|
| 158 |
+
).execute()
|
| 159 |
+
|
| 160 |
+
|
| 161 |
+
def _is_definitive_auth_failure(e: Exception) -> bool:
|
| 162 |
+
"""True when the token itself is dead (revoked/invalid), False for anything
|
| 163 |
+
that might heal on its own. Duck-typed by exception name / resp.status so
|
| 164 |
+
this module never has to import the google libs (absent in CI)."""
|
| 165 |
+
if type(e).__name__ == "RefreshError": # revoked / invalid_grant
|
| 166 |
+
return True
|
| 167 |
+
status = getattr(getattr(e, "resp", None), "status", None) # HttpError
|
| 168 |
+
return status in (401, 403)
|
| 169 |
+
|
| 170 |
+
|
| 171 |
+
def check_token(token_json: str) -> dict:
|
| 172 |
+
"""Liveness check for a browser-held token: refresh if needed, then one
|
| 173 |
+
real (scope-compatible) API call. Three-state result so the client only
|
| 174 |
+
discards a token on a DEFINITIVE failure:
|
| 175 |
+
|
| 176 |
+
{"ok": True, "refreshed_token": <sanitized json> | None}
|
| 177 |
+
{"ok": False, "reason": str, "transient": bool}
|
| 178 |
+
"""
|
| 179 |
+
try:
|
| 180 |
+
info = json.loads(token_json or "")
|
| 181 |
+
if not isinstance(info, dict) or not (info.get("refresh_token") or info.get("token")):
|
| 182 |
+
raise ValueError("token JSON missing token/refresh_token")
|
| 183 |
+
except Exception as e: # noqa: BLE001 garbage in localStorage -> definitive
|
| 184 |
+
return {"ok": False, "reason": f"unreadable token: {e}", "transient": False}
|
| 185 |
+
try:
|
| 186 |
+
creds = _creds_from_token_json(token_json)
|
| 187 |
+
refreshed = _refresh_if_needed(creds)
|
| 188 |
+
_probe_events(creds)
|
| 189 |
+
return {"ok": True, "refreshed_token": refreshed}
|
| 190 |
+
except ImportError as e:
|
| 191 |
+
return {"ok": False, "reason": f"google libs unavailable: {e}", "transient": True}
|
| 192 |
+
except Exception as e: # noqa: BLE001
|
| 193 |
+
return {
|
| 194 |
+
"ok": False,
|
| 195 |
+
"reason": f"{type(e).__name__}: {e}",
|
| 196 |
+
"transient": not _is_definitive_auth_failure(e),
|
| 197 |
+
}
|
| 198 |
+
|
| 199 |
+
|
| 200 |
+
def _dt_field(value: str) -> dict:
|
| 201 |
+
"""Calendar API datetime field. The model emits offset-less ISO datetimes
|
| 202 |
+
(schema: 2026-06-10T13:00:00) and the API 400s on a naive dateTime without
|
| 203 |
+
a timeZone. Uses the shared zone basis (calendar_out/tzconfig — same one
|
| 204 |
+
freebusy compares conflicts in); with no configured zone, naive datetimes
|
| 205 |
+
get the process-local offset attached instead."""
|
| 206 |
+
from calendar_out.tzconfig import configured_timezone
|
| 207 |
+
|
| 208 |
+
dt = dtparser.isoparse(value)
|
| 209 |
+
if dt.tzinfo is not None: # already has an offset — API accepts as-is
|
| 210 |
+
return {"dateTime": dt.isoformat()}
|
| 211 |
+
tz = configured_timezone()
|
| 212 |
+
if tz:
|
| 213 |
+
return {"dateTime": dt.isoformat(), "timeZone": tz}
|
| 214 |
+
# interpret as process-local: attach the local offset
|
| 215 |
+
return {"dateTime": dt.astimezone().isoformat()}
|
| 216 |
+
|
| 217 |
+
|
| 218 |
+
def _event_body(ev: Event) -> dict:
|
| 219 |
+
body = {
|
| 220 |
+
"summary": ev.title,
|
| 221 |
+
"start": _dt_field(ev.start),
|
| 222 |
+
"end": _dt_field(ev.end or ev.start),
|
| 223 |
+
}
|
| 224 |
+
if ev.location:
|
| 225 |
+
body["location"] = ev.location
|
| 226 |
+
if ev.notes:
|
| 227 |
+
body["description"] = ev.notes
|
| 228 |
+
if ev.reminder_minutes is not None:
|
| 229 |
+
body["reminders"] = {
|
| 230 |
+
"useDefault": False,
|
| 231 |
+
"overrides": [{"method": "popup", "minutes": ev.reminder_minutes}],
|
| 232 |
+
}
|
| 233 |
+
return body
|
| 234 |
+
|
| 235 |
+
|
| 236 |
+
def push_events_with_token(token_json: str, events: list[Event], calendar_id: str = "primary") -> list[str]:
|
| 237 |
+
"""Push events to the *visitor's* calendar using their per-session OAuth token."""
|
| 238 |
+
from google.auth.transport.requests import Request
|
| 239 |
+
from googleapiclient.discovery import build
|
| 240 |
+
|
| 241 |
+
creds = _creds_from_token_json(token_json)
|
| 242 |
+
if not creds.valid and creds.expired and creds.refresh_token:
|
| 243 |
+
creds.refresh(Request())
|
| 244 |
+
svc = build("calendar", "v3", credentials=creds)
|
| 245 |
+
links = []
|
| 246 |
+
for ev in events:
|
| 247 |
+
created = svc.events().insert(calendarId=calendar_id, body=_event_body(ev)).execute()
|
| 248 |
+
links.append(created.get("htmlLink", ""))
|
| 249 |
+
events_bus.emit("calendar", f"pushed {len(links)} event(s) to Google Calendar")
|
| 250 |
+
return links
|
| 251 |
+
|
| 252 |
+
|
| 253 |
+
def _service():
|
| 254 |
+
"""Build an authorized Calendar service. Requires credentials.json + token.json.
|
| 255 |
+
|
| 256 |
+
Run an OAuth flow once locally to mint token.json; do NOT commit either file.
|
| 257 |
+
"""
|
| 258 |
+
from google.auth.transport.requests import Request
|
| 259 |
+
from google.oauth2.credentials import Credentials
|
| 260 |
+
from google_auth_oauthlib.flow import InstalledAppFlow
|
| 261 |
+
from googleapiclient.discovery import build
|
| 262 |
+
|
| 263 |
+
creds = None
|
| 264 |
+
if os.path.exists("token.json"):
|
| 265 |
+
creds = Credentials.from_authorized_user_file("token.json", SCOPES)
|
| 266 |
+
if not creds or not creds.valid:
|
| 267 |
+
if creds and creds.expired and creds.refresh_token:
|
| 268 |
+
creds.refresh(Request())
|
| 269 |
+
else:
|
| 270 |
+
flow = InstalledAppFlow.from_client_secrets_file("credentials.json", SCOPES)
|
| 271 |
+
creds = flow.run_local_server(port=0)
|
| 272 |
+
with open("token.json", "w") as f:
|
| 273 |
+
f.write(creds.to_json())
|
| 274 |
+
return build("calendar", "v3", credentials=creds)
|
| 275 |
+
|
| 276 |
+
|
| 277 |
+
def push_events(events: list[Event], calendar_id: str = "primary") -> list[str]:
|
| 278 |
+
"""Create events in Google Calendar; returns created event links."""
|
| 279 |
+
svc = _service()
|
| 280 |
+
links = []
|
| 281 |
+
for ev in events:
|
| 282 |
+
created = svc.events().insert(calendarId=calendar_id, body=_event_body(ev)).execute()
|
| 283 |
+
links.append(created.get("htmlLink", ""))
|
| 284 |
+
events_bus.emit("calendar", f"pushed {len(links)} event(s) to Google Calendar")
|
| 285 |
+
return links
|
| 286 |
+
|
| 287 |
+
|
| 288 |
+
def read_recent_facts(calendar_id: str = "primary", max_results: int = 50) -> tuple[list[str], list[str]]:
|
| 289 |
+
"""OPT-IN read: scan recent/upcoming events for recurring attendees and
|
| 290 |
+
locations to seed memory. Returns (contact_names, locations). Raises if
|
| 291 |
+
Google libs/creds aren't configured (the caller degrades gracefully)."""
|
| 292 |
+
from collections import Counter
|
| 293 |
+
|
| 294 |
+
svc = _service()
|
| 295 |
+
items = (
|
| 296 |
+
svc.events()
|
| 297 |
+
.list(calendarId=calendar_id, maxResults=max_results, singleEvents=True, orderBy="startTime")
|
| 298 |
+
.execute()
|
| 299 |
+
.get("items", [])
|
| 300 |
+
)
|
| 301 |
+
people, places = Counter(), Counter()
|
| 302 |
+
for ev in items:
|
| 303 |
+
for a in ev.get("attendees", []) or []:
|
| 304 |
+
nm = (a.get("displayName") or "").strip()
|
| 305 |
+
if nm and len(nm) <= 60:
|
| 306 |
+
people[nm] += 1
|
| 307 |
+
loc = (ev.get("location") or "").strip()
|
| 308 |
+
if loc and len(loc) <= 80:
|
| 309 |
+
places[loc] += 1
|
| 310 |
+
# keep recurring ones (seen >= 2) so memory stays meaningful
|
| 311 |
+
names = [n for n, c in people.most_common(20) if c >= 2] or [n for n, _ in people.most_common(10)]
|
| 312 |
+
locs = [p for p, c in places.most_common(10) if c >= 2]
|
| 313 |
+
return names, locs
|
calendar_out/ics.py
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Generate .ics files locally (default, off-grid output)."""
|
| 2 |
+
from __future__ import annotations
|
| 3 |
+
|
| 4 |
+
import os
|
| 5 |
+
import tempfile
|
| 6 |
+
from datetime import datetime
|
| 7 |
+
|
| 8 |
+
from dateutil import parser as dtparser
|
| 9 |
+
from icalendar import Alarm, Calendar
|
| 10 |
+
from icalendar import Event as IcsEvent
|
| 11 |
+
|
| 12 |
+
from server import events as events_bus
|
| 13 |
+
from server.schema import Event
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
def events_to_ics(events: list[Event]) -> bytes:
|
| 17 |
+
cal = Calendar()
|
| 18 |
+
cal.add("prodid", "-//iMessage Calendar Agent//EN")
|
| 19 |
+
cal.add("version", "2.0")
|
| 20 |
+
|
| 21 |
+
for ev in events:
|
| 22 |
+
ie = IcsEvent()
|
| 23 |
+
ie.add("summary", ev.title)
|
| 24 |
+
ie.add("dtstart", dtparser.isoparse(ev.start))
|
| 25 |
+
if ev.end:
|
| 26 |
+
ie.add("dtend", dtparser.isoparse(ev.end))
|
| 27 |
+
if ev.location:
|
| 28 |
+
ie.add("location", ev.location)
|
| 29 |
+
if ev.notes:
|
| 30 |
+
ie.add("description", ev.notes)
|
| 31 |
+
if ev.attendees:
|
| 32 |
+
for a in ev.attendees:
|
| 33 |
+
ie.add("attendee", a)
|
| 34 |
+
if ev.reminder_minutes is not None:
|
| 35 |
+
alarm = Alarm()
|
| 36 |
+
alarm.add("action", "DISPLAY")
|
| 37 |
+
alarm.add("description", f"Reminder: {ev.title}")
|
| 38 |
+
alarm.add("trigger", _minutes_before(ev.reminder_minutes))
|
| 39 |
+
ie.add_component(alarm)
|
| 40 |
+
cal.add_component(ie)
|
| 41 |
+
|
| 42 |
+
return cal.to_ical()
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
def _minutes_before(minutes: int):
|
| 46 |
+
from datetime import timedelta
|
| 47 |
+
|
| 48 |
+
return timedelta(minutes=-minutes)
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
def write_ics(events: list[Event], path: str | None = None) -> str:
|
| 52 |
+
"""Write events to an .ics file and return the path (for Gradio download)."""
|
| 53 |
+
data = events_to_ics(events)
|
| 54 |
+
if path is None:
|
| 55 |
+
fd, path = tempfile.mkstemp(suffix=".ics", prefix="events_")
|
| 56 |
+
os.close(fd)
|
| 57 |
+
with open(path, "wb") as f:
|
| 58 |
+
f.write(data)
|
| 59 |
+
events_bus.emit("calendar", f"wrote .ics with {len(events)} event(s)")
|
| 60 |
+
return path
|
calendar_out/tzconfig.py
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""One timezone basis for the whole calendar path.
|
| 2 |
+
|
| 3 |
+
The model emits offset-less local datetimes; gcal labels them with a zone and
|
| 4 |
+
freebusy compares them against .ics busy intervals. Both MUST resolve the zone
|
| 5 |
+
the same way, or conflicts are checked in one zone while events are pushed in
|
| 6 |
+
another (off by the whole UTC offset). Resolution order:
|
| 7 |
+
|
| 8 |
+
1. CAL_TIMEZONE / TZ env — any value that validates as an IANA zone, slash or
|
| 9 |
+
not (UTC, GMT, Japan, America/New_York all count; a leading ':' is stripped).
|
| 10 |
+
2. /etc/timezone (Debian-style containers — i.e. the Space image).
|
| 11 |
+
3. None -> the process-local zone.
|
| 12 |
+
"""
|
| 13 |
+
from __future__ import annotations
|
| 14 |
+
|
| 15 |
+
import os
|
| 16 |
+
from pathlib import Path
|
| 17 |
+
from typing import Optional
|
| 18 |
+
from zoneinfo import ZoneInfo
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
def _valid(name: str) -> Optional[str]:
|
| 22 |
+
name = (name or "").strip().lstrip(":")
|
| 23 |
+
if not name:
|
| 24 |
+
return None
|
| 25 |
+
try:
|
| 26 |
+
ZoneInfo(name)
|
| 27 |
+
return name
|
| 28 |
+
except Exception: # noqa: BLE001 not an IANA name (e.g. TZ=EST5EDT rules)
|
| 29 |
+
return None
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
def configured_timezone() -> Optional[str]:
|
| 33 |
+
"""The configured IANA zone name, or None meaning 'process-local'."""
|
| 34 |
+
for env in ("CAL_TIMEZONE", "TZ"):
|
| 35 |
+
v = _valid(os.environ.get(env, ""))
|
| 36 |
+
if v:
|
| 37 |
+
return v
|
| 38 |
+
try:
|
| 39 |
+
return _valid(Path("/etc/timezone").read_text())
|
| 40 |
+
except Exception: # noqa: BLE001 not Debian-style (macOS/Windows)
|
| 41 |
+
return None
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
def zone() -> Optional[ZoneInfo]:
|
| 45 |
+
name = configured_timezone()
|
| 46 |
+
return ZoneInfo(name) if name else None
|
collector/.env.example
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copy to .env and fill in. NEVER commit the real .env.
|
| 2 |
+
SPACE_URL=https://your-space.hf.space
|
| 3 |
+
INGEST_TOKEN=change-me-to-match-the-space
|
| 4 |
+
POLL_SECONDS=20
|
| 5 |
+
# Optional: comma-separated chat names/handles to watch (blank = all)
|
| 6 |
+
WATCH_CHATS=
|
| 7 |
+
# Path to the iMessage DB (default is correct on macOS)
|
| 8 |
+
CHAT_DB=~/Library/Messages/chat.db
|
| 9 |
+
# AGENT_MODE=1 posts to /agent (client-side autonomous push) instead of /ingest.
|
| 10 |
+
# Prefer the server-side switch (run the backend with AUTONOMOUS=1) so logic lives
|
| 11 |
+
# in one place; the collector now also reports is_from_me so the backend can fire
|
| 12 |
+
# only on YOUR sent/accepted messages (backend env TRIGGER_ON=outgoing, the default).
|
| 13 |
+
AGENT_MODE=0
|
collector/collector.py
ADDED
|
@@ -0,0 +1,175 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Mac-side iMessage collector.
|
| 2 |
+
|
| 3 |
+
Polls ~/Library/Messages/chat.db for new messages and POSTs them to the Space
|
| 4 |
+
/ingest endpoint. Requires Full Disk Access for the running process.
|
| 5 |
+
|
| 6 |
+
This reads the `text` column directly for simplicity. Many modern messages store
|
| 7 |
+
their body in `attributedBody` (an NSAttributedString blob) instead — for robust
|
| 8 |
+
extraction, prefer the `imessage-exporter` CLI (ReagentX) or the `imessage_reader`
|
| 9 |
+
package rather than expanding the SQL here.
|
| 10 |
+
"""
|
| 11 |
+
from __future__ import annotations
|
| 12 |
+
|
| 13 |
+
import os
|
| 14 |
+
import sqlite3
|
| 15 |
+
import sys
|
| 16 |
+
import time
|
| 17 |
+
from pathlib import Path
|
| 18 |
+
|
| 19 |
+
import requests
|
| 20 |
+
from dotenv import load_dotenv
|
| 21 |
+
|
| 22 |
+
# Allow importing the shared image helper from the repo root.
|
| 23 |
+
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
|
| 24 |
+
from server.imageutil import to_data_uri # noqa: E402
|
| 25 |
+
|
| 26 |
+
load_dotenv()
|
| 27 |
+
|
| 28 |
+
SPACE_URL = os.environ["SPACE_URL"].rstrip("/")
|
| 29 |
+
INGEST_TOKEN = os.environ["INGEST_TOKEN"]
|
| 30 |
+
POLL_SECONDS = int(os.environ.get("POLL_SECONDS", "20"))
|
| 31 |
+
# AGENT_MODE=1: call /agent (run the agent + push to calendar) instead of /ingest
|
| 32 |
+
# (which only stores for review). The autonomous, hands-off path — see docs/automations.md.
|
| 33 |
+
AGENT_MODE = os.environ.get("AGENT_MODE") == "1"
|
| 34 |
+
CHAT_DB = Path(os.path.expanduser(os.environ.get("CHAT_DB", "~/Library/Messages/chat.db")))
|
| 35 |
+
WATCH = [c.strip() for c in os.environ.get("WATCH_CHATS", "").split(",") if c.strip()]
|
| 36 |
+
|
| 37 |
+
STATE = Path(__file__).with_name(".last_rowid")
|
| 38 |
+
|
| 39 |
+
# Apple epoch = 2001-01-01; timestamps are nanoseconds since then.
|
| 40 |
+
# Chat key: display_name is EMPTY for 1:1 chats, and your own outgoing rows
|
| 41 |
+
# have handle_id NULL — falling back to sender filed incoming messages under
|
| 42 |
+
# the phone number and your replies under "unknown", so rolling_thread never
|
| 43 |
+
# assembled the conversation. COALESCE to chat_identifier gives both
|
| 44 |
+
# directions of a direct chat one stable key.
|
| 45 |
+
QUERY = """
|
| 46 |
+
SELECT m.ROWID, m.text, m.attributedBody, h.id AS sender,
|
| 47 |
+
COALESCE(NULLIF(c.display_name, ''), c.chat_identifier) AS chat,
|
| 48 |
+
m.is_from_me,
|
| 49 |
+
datetime(m.date/1000000000 + 978307200, 'unixepoch', 'localtime') AS ts
|
| 50 |
+
FROM message m
|
| 51 |
+
LEFT JOIN handle h ON m.handle_id = h.ROWID
|
| 52 |
+
LEFT JOIN chat_message_join cmj ON cmj.message_id = m.ROWID
|
| 53 |
+
LEFT JOIN chat c ON c.ROWID = cmj.chat_id
|
| 54 |
+
WHERE m.ROWID > ?
|
| 55 |
+
ORDER BY m.ROWID ASC
|
| 56 |
+
"""
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
# Image attachments for a given message ROWID (filenames live under Attachments/).
|
| 60 |
+
ATTACH_QUERY = """
|
| 61 |
+
SELECT a.filename
|
| 62 |
+
FROM attachment a
|
| 63 |
+
JOIN message_attachment_join maj ON maj.attachment_id = a.ROWID
|
| 64 |
+
WHERE maj.message_id = ?
|
| 65 |
+
"""
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
def _attachments_for(conn: sqlite3.Connection, message_rowid: int) -> list[str]:
|
| 69 |
+
"""Return base64 data URIs for image attachments of a message."""
|
| 70 |
+
uris: list[str] = []
|
| 71 |
+
for (filename,) in conn.execute(ATTACH_QUERY, (message_rowid,)).fetchall():
|
| 72 |
+
if not filename:
|
| 73 |
+
continue
|
| 74 |
+
path = os.path.expanduser(filename)
|
| 75 |
+
uri = to_data_uri(path) # None for non-images / too large
|
| 76 |
+
if uri:
|
| 77 |
+
uris.append(uri)
|
| 78 |
+
return uris
|
| 79 |
+
|
| 80 |
+
|
| 81 |
+
def _last_rowid() -> int:
|
| 82 |
+
try:
|
| 83 |
+
return int(STATE.read_text().strip())
|
| 84 |
+
except Exception: # noqa: BLE001
|
| 85 |
+
return 0
|
| 86 |
+
|
| 87 |
+
|
| 88 |
+
def _save_rowid(rowid: int) -> None:
|
| 89 |
+
STATE.write_text(str(rowid))
|
| 90 |
+
|
| 91 |
+
|
| 92 |
+
def poll_once(conn: sqlite3.Connection) -> int:
|
| 93 |
+
last = _last_rowid()
|
| 94 |
+
rows = conn.execute(QUERY, (last,)).fetchall()
|
| 95 |
+
batch = []
|
| 96 |
+
max_rowid = last
|
| 97 |
+
for rowid, text, _attr, sender, chat, is_from_me, ts in rows:
|
| 98 |
+
max_rowid = max(max_rowid, rowid)
|
| 99 |
+
if WATCH and (chat or "") not in WATCH:
|
| 100 |
+
continue
|
| 101 |
+
images = _attachments_for(conn, rowid)
|
| 102 |
+
if not text and not images:
|
| 103 |
+
continue # nothing usable (see docstring re: attributedBody-only msgs)
|
| 104 |
+
batch.append(
|
| 105 |
+
{
|
| 106 |
+
"chat": chat or (sender or "unknown"),
|
| 107 |
+
"sender": "me" if is_from_me else (sender or "unknown"),
|
| 108 |
+
"text": text or "",
|
| 109 |
+
"timestamp": ts,
|
| 110 |
+
"images": images,
|
| 111 |
+
"is_from_me": bool(is_from_me), # you sending/accepting = the trigger
|
| 112 |
+
}
|
| 113 |
+
)
|
| 114 |
+
if batch:
|
| 115 |
+
headers = {"Authorization": f"Bearer {INGEST_TOKEN}"}
|
| 116 |
+
if AGENT_MODE:
|
| 117 |
+
# One /agent call PER CHAT — a raw batch can span conversations,
|
| 118 |
+
# and format_thread would interleave them into one bogus thread.
|
| 119 |
+
# (/ingest doesn't need this: the server groups by chat itself.)
|
| 120 |
+
# Per-chat failures are caught, not raised: the /agent path pushes
|
| 121 |
+
# to the calendar with no dedup, so aborting mid-loop and replaying
|
| 122 |
+
# the whole batch next poll would re-push the chats that already
|
| 123 |
+
# succeeded. At-most-once: a failed chat's batch is logged and
|
| 124 |
+
# dropped; its next message re-triggers the rolling window anyway.
|
| 125 |
+
by_chat: dict[str, list[dict]] = {}
|
| 126 |
+
for m in batch:
|
| 127 |
+
by_chat.setdefault(m["chat"], []).append(m)
|
| 128 |
+
for chat, msgs in by_chat.items():
|
| 129 |
+
try:
|
| 130 |
+
resp = requests.post(
|
| 131 |
+
f"{SPACE_URL}/agent",
|
| 132 |
+
json={"messages": msgs, "push_gcal": True},
|
| 133 |
+
headers=headers,
|
| 134 |
+
timeout=120,
|
| 135 |
+
)
|
| 136 |
+
resp.raise_for_status()
|
| 137 |
+
plan = resp.json().get("plan", {})
|
| 138 |
+
print(f"[{chat}] sent {len(msgs)} msg(s) -> "
|
| 139 |
+
f"{len(plan.get('events', []))} event(s)")
|
| 140 |
+
except Exception as e: # noqa: BLE001
|
| 141 |
+
print(f"[{chat}] agent call failed ({e}) — skipping this "
|
| 142 |
+
"batch for the chat; next message re-triggers it")
|
| 143 |
+
else:
|
| 144 |
+
resp = requests.post(
|
| 145 |
+
f"{SPACE_URL}/ingest",
|
| 146 |
+
json={"messages": batch},
|
| 147 |
+
headers=headers,
|
| 148 |
+
timeout=30,
|
| 149 |
+
)
|
| 150 |
+
resp.raise_for_status()
|
| 151 |
+
print(f"sent {len(batch)} message(s) -> {resp.json()}")
|
| 152 |
+
if max_rowid > last:
|
| 153 |
+
_save_rowid(max_rowid)
|
| 154 |
+
return len(batch)
|
| 155 |
+
|
| 156 |
+
|
| 157 |
+
def main():
|
| 158 |
+
if not CHAT_DB.exists():
|
| 159 |
+
raise SystemExit(f"chat.db not found at {CHAT_DB} (grant Full Disk Access?)")
|
| 160 |
+
# Read-only connection so we never mutate the Messages DB.
|
| 161 |
+
conn = sqlite3.connect(f"file:{CHAT_DB}?mode=ro", uri=True)
|
| 162 |
+
print(f"polling {CHAT_DB} every {POLL_SECONDS}s -> {SPACE_URL}/ingest")
|
| 163 |
+
try:
|
| 164 |
+
while True:
|
| 165 |
+
try:
|
| 166 |
+
poll_once(conn)
|
| 167 |
+
except Exception as e: # noqa: BLE001 - keep the loop alive
|
| 168 |
+
print(f"poll error: {e}")
|
| 169 |
+
time.sleep(POLL_SECONDS)
|
| 170 |
+
finally:
|
| 171 |
+
conn.close()
|
| 172 |
+
|
| 173 |
+
|
| 174 |
+
if __name__ == "__main__":
|
| 175 |
+
main()
|
deploy/launchd/com.offgrid.backend.plist
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<?xml version="1.0" encoding="UTF-8"?>
|
| 2 |
+
<!-- Backend: serves the Gradio UI + /agent + /ingest, runs the agent autonomously,
|
| 3 |
+
and uses Hermes via INFERENCE_BASE_URL. Template — see scripts/setup_mac.sh. -->
|
| 4 |
+
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
|
| 5 |
+
<plist version="1.0">
|
| 6 |
+
<dict>
|
| 7 |
+
<key>Label</key><string>com.offgrid.backend</string>
|
| 8 |
+
<key>ProgramArguments</key>
|
| 9 |
+
<array>
|
| 10 |
+
<string>__PYTHON__</string>
|
| 11 |
+
<string>__REPO__/app.py</string>
|
| 12 |
+
</array>
|
| 13 |
+
<key>WorkingDirectory</key><string>__REPO__</string>
|
| 14 |
+
<key>EnvironmentVariables</key>
|
| 15 |
+
<dict>
|
| 16 |
+
<key>AUTONOMOUS</key><string>1</string>
|
| 17 |
+
<key>TRIGGER_ON</key><string>outgoing</string>
|
| 18 |
+
<key>USE_STUB_EXTRACTOR</key><string>0</string>
|
| 19 |
+
<key>INFERENCE_BASE_URL</key><string>http://127.0.0.1:8080/v1</string>
|
| 20 |
+
<key>INFERENCE_MODEL</key><string>hermes</string>
|
| 21 |
+
<key>INGEST_TOKEN</key><string>__INGEST_TOKEN__</string>
|
| 22 |
+
<key>MEMORY_PATH</key><string>__HOME__/.offgrid/agent_memory.json</string>
|
| 23 |
+
<key>FEED_PATH</key><string>__HOME__/.offgrid/ingest_feed.json</string>
|
| 24 |
+
<key>DEDUP_PATH</key><string>__HOME__/.offgrid/agent_seen.json</string>
|
| 25 |
+
<key>IMPACT_PATH</key><string>__HOME__/.offgrid/impact_weeks.json</string>
|
| 26 |
+
<key>PORT</key><string>7860</string>
|
| 27 |
+
</dict>
|
| 28 |
+
<key>RunAtLoad</key><true/>
|
| 29 |
+
<key>KeepAlive</key><true/>
|
| 30 |
+
<key>StandardOutPath</key><string>__HOME__/Library/Logs/offgrid-backend.log</string>
|
| 31 |
+
<key>StandardErrorPath</key><string>__HOME__/Library/Logs/offgrid-backend.log</string>
|
| 32 |
+
</dict>
|
| 33 |
+
</plist>
|
deploy/launchd/com.offgrid.collector.plist
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<?xml version="1.0" encoding="UTF-8"?>
|
| 2 |
+
<!-- Collector: polls ~/Library/Messages/chat.db and POSTs new messages to the
|
| 3 |
+
local backend's /ingest. NEEDS Full Disk Access for the program below
|
| 4 |
+
(System Settings > Privacy & Security > Full Disk Access -> add the python
|
| 5 |
+
binary __PYTHON__). Template — see scripts/setup_mac.sh. -->
|
| 6 |
+
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
|
| 7 |
+
<plist version="1.0">
|
| 8 |
+
<dict>
|
| 9 |
+
<key>Label</key><string>com.offgrid.collector</string>
|
| 10 |
+
<key>ProgramArguments</key>
|
| 11 |
+
<array>
|
| 12 |
+
<string>__PYTHON__</string>
|
| 13 |
+
<string>__REPO__/collector/collector.py</string>
|
| 14 |
+
</array>
|
| 15 |
+
<key>WorkingDirectory</key><string>__REPO__/collector</string>
|
| 16 |
+
<key>EnvironmentVariables</key>
|
| 17 |
+
<dict>
|
| 18 |
+
<key>SPACE_URL</key><string>http://127.0.0.1:7860</string>
|
| 19 |
+
<key>INGEST_TOKEN</key><string>__INGEST_TOKEN__</string>
|
| 20 |
+
<key>POLL_SECONDS</key><string>20</string>
|
| 21 |
+
<key>CHAT_DB</key><string>__HOME__/Library/Messages/chat.db</string>
|
| 22 |
+
</dict>
|
| 23 |
+
<key>RunAtLoad</key><true/>
|
| 24 |
+
<key>KeepAlive</key><true/>
|
| 25 |
+
<key>StandardOutPath</key><string>__HOME__/Library/Logs/offgrid-collector.log</string>
|
| 26 |
+
<key>StandardErrorPath</key><string>__HOME__/Library/Logs/offgrid-collector.log</string>
|
| 27 |
+
</dict>
|
| 28 |
+
</plist>
|
deploy/launchd/com.offgrid.hermes.plist
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<?xml version="1.0" encoding="UTF-8"?>
|
| 2 |
+
<!-- Hermes brain: an OpenAI-compatible llama.cpp server the backend points at via
|
| 3 |
+
INFERENCE_BASE_URL. Template — scripts/setup_mac.sh fills the __PLACEHOLDERS__
|
| 4 |
+
and installs into ~/Library/LaunchAgents. -->
|
| 5 |
+
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
|
| 6 |
+
<plist version="1.0">
|
| 7 |
+
<dict>
|
| 8 |
+
<key>Label</key><string>com.offgrid.hermes</string>
|
| 9 |
+
<key>ProgramArguments</key>
|
| 10 |
+
<array>
|
| 11 |
+
<string>__LLAMA_SERVER__</string>
|
| 12 |
+
<string>-m</string><string>__MODEL_GGUF__</string>
|
| 13 |
+
<string>--host</string><string>127.0.0.1</string>
|
| 14 |
+
<string>--port</string><string>8080</string>
|
| 15 |
+
<string>--ctx-size</string><string>8192</string>
|
| 16 |
+
<string>--jinja</string> <!-- enable the tool-calling chat template -->
|
| 17 |
+
</array>
|
| 18 |
+
<key>RunAtLoad</key><true/>
|
| 19 |
+
<key>KeepAlive</key><true/>
|
| 20 |
+
<key>StandardOutPath</key><string>__HOME__/Library/Logs/offgrid-hermes.log</string>
|
| 21 |
+
<key>StandardErrorPath</key><string>__HOME__/Library/Logs/offgrid-hermes.log</string>
|
| 22 |
+
</dict>
|
| 23 |
+
</plist>
|
docs/android-tasker.md
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Android background capture (Scenario 2)
|
| 2 |
+
|
| 3 |
+
Unlike iOS, Android **allows background message capture**, so you get real on-phone autonomy — just
|
| 4 |
+
not for iMessage. A no-build recipe (Tasker or MacroDroid) calls the same shared **`POST /agent`**
|
| 5 |
+
backend the Mac collector and iOS Shortcut use.
|
| 6 |
+
|
| 7 |
+
## What you need
|
| 8 |
+
- The backend reachable from the phone: the HF Space's dedicated-GPU path, a Mac/cloud box, or even
|
| 9 |
+
the phone itself (Termux). The free **ZeroGPU Space does not serve `/agent`** (Gradio-SDK only) — use
|
| 10 |
+
one of the others.
|
| 11 |
+
- The same `INGEST_TOKEN` the backend uses.
|
| 12 |
+
|
| 13 |
+
## Tasker recipe (Notification Access — works for RCS/WhatsApp/SMS notifications)
|
| 14 |
+
1. **Profile → Event → UI → Notification** (or **Phone → Received Text** for SMS). Restrict it to
|
| 15 |
+
your messaging app(s).
|
| 16 |
+
2. **Task → Net → HTTP Request:**
|
| 17 |
+
- Method: `POST`
|
| 18 |
+
- URL: `https://<your-backend>/agent`
|
| 19 |
+
- Headers: `Authorization: Bearer <INGEST_TOKEN>` and `Content-Type: application/json`
|
| 20 |
+
- Body:
|
| 21 |
+
```json
|
| 22 |
+
{ "thread": "%evtprm()", "now": "%TIMES", "push_gcal": true }
|
| 23 |
+
```
|
| 24 |
+
(Use the notification text variable your trigger provides for `thread`; `%TIMES` → current time.)
|
| 25 |
+
3. **Parse the response** (`Variable → JSON Read` on `plan.events`) if you want a confirmation
|
| 26 |
+
toast/notification; otherwise `push_gcal:true` already created the events in Google Calendar.
|
| 27 |
+
|
| 28 |
+
MacroDroid is equivalent: **Trigger:** Notification Received / SMS Received → **Action:** HTTP POST
|
| 29 |
+
with the same URL/headers/body.
|
| 30 |
+
|
| 31 |
+
## Notes
|
| 32 |
+
- This is genuinely hands-off: the OS delivers the trigger in the background.
|
| 33 |
+
- For a fully on-device variant, run the backend + a small model in **Termux** and point Tasker at
|
| 34 |
+
`http://127.0.0.1:7860/agent`, with `INFERENCE_BASE_URL` → a local `llama-server` (Gemma E4B / a
|
| 35 |
+
small Hermes). See [on-device.md](./on-device.md) and [hermes.md](./hermes.md).
|
| 36 |
+
- A native Kotlin `NotificationListenerService` app could replace Tasker for a polished install — a
|
| 37 |
+
separate effort; the Tasker recipe is the MVP.
|
docs/architecture.md
ADDED
|
@@ -0,0 +1,121 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Architecture — workflows and the LLMs behind them
|
| 2 |
+
|
| 3 |
+
An AI-solution-architect view of the agentic system: every workflow through the
|
| 4 |
+
platform, and exactly which model (if any) each one calls. The architectural
|
| 5 |
+
signature: the extraction core is **one grammar-constrained LLM call**, the
|
| 6 |
+
**MiniCPM planner** adds a visible multi-step loop over the platform's own
|
| 7 |
+
public MCP tool contract, everything verifiable — conflict math, dedup, time
|
| 8 |
+
proposals, eval gates — stays deterministic, and there are **zero cloud-AI API
|
| 9 |
+
calls anywhere**, training included.
|
| 10 |
+
|
| 11 |
+
## System workflow
|
| 12 |
+
|
| 13 |
+
```mermaid
|
| 14 |
+
flowchart TB
|
| 15 |
+
subgraph ENTRY["1 · Entry points — four front-ends, one contract"]
|
| 16 |
+
direction LR
|
| 17 |
+
UIIN["🖥️ Gradio UI<br/>Schedule flow + Agent tab<br/>(paste thread, screenshots, .ics)"]
|
| 18 |
+
SHORT["📱 iOS Shortcut /<br/>Android Tasker"]
|
| 19 |
+
MAC["🍎 Mac collector<br/>polls iMessage chat.db<br/>(collector/collector.py)"]
|
| 20 |
+
MCPC["🤖 MCP clients<br/>Claude Desktop, Cursor"]
|
| 21 |
+
end
|
| 22 |
+
|
| 23 |
+
subgraph API["2 · API & orchestration — app.py (FastAPI + Gradio, one port)"]
|
| 24 |
+
AGENTEP["POST /agent<br/>bearer-token, stateless"]
|
| 25 |
+
INGEST["POST /ingest → feed store<br/>AUTONOMOUS=1 triggers on<br/>your outgoing message (is_from_me)"]
|
| 26 |
+
ROLL["threads.rolling_thread<br/>per-chat window (20 msgs / 12 h)"]
|
| 27 |
+
MCPT["MCP tools — server/mcp_tools.py<br/>extract_events · make_ics · check_conflicts"]
|
| 28 |
+
end
|
| 29 |
+
|
| 30 |
+
subgraph ORCH["2a · Agentic orchestration — server/orchestrator.py"]
|
| 31 |
+
SMOL["smolagents ToolCallingAgent<br/>planned by MiniCPM, ≤6 steps<br/>playbook: extract → check → render<br/>final ActionPlan re-derived deterministically"]
|
| 32 |
+
SCRIPT["ScriptedPlanner — no LLM<br/>identical tool sequence + step events<br/>(stub mode, CI, planner failure)"]
|
| 33 |
+
end
|
| 34 |
+
|
| 35 |
+
subgraph CORE["3 · Agent core — server/pipeline.py → server/agent.py"]
|
| 36 |
+
PROMPT["Prompt assembly:<br/>SYSTEM + memory recall block<br/>+ existing calendar + thread + images"]
|
| 37 |
+
GEN["Grammar-constrained generation<br/>→ ActionPlan JSON (always parses)"]
|
| 38 |
+
PROMPT --> GEN
|
| 39 |
+
end
|
| 40 |
+
|
| 41 |
+
subgraph LLMT["4 · LLM tier — ALL inference is local llama.cpp, zero cloud AI APIs"]
|
| 42 |
+
GEMMA["⭐ gemma-cal E4B — fine-tuned Gemma 4<br/>ParetoOptimal/gemma-4-cal-gguf<br/>gemma-cal-e4b-Q4_K_M.gguf (~5 GB)<br/>+ mmproj-F16.gguf vision projector"]
|
| 43 |
+
MODES["served either:<br/>· in-process llama-cpp-python (ZeroGPU lease)<br/>· remote llama-server via INFERENCE_BASE_URL<br/>(Space sidecar / Mac launchd / phone)"]
|
| 44 |
+
MINICPM["🧭 MiniCPM planner — OpenBMB (sponsor)<br/>openbmb/MiniCPM4.1-8B-GGUF Q4 (~5 GB)<br/>≤4B option: openbmb/MiniCPM5-1B-GGUF (config switch)<br/>2nd llama-server :8081 — enabled via<br/>PLANNER_HF_REPO / PLANNER_FILE"]
|
| 45 |
+
HERMES["(optional) Hermes-3-Llama-3.1-8B Q4_K_M<br/>HERMES_TOOLS=1 — tool-calling loop:<br/>calls remember() to write memory mid-run"]
|
| 46 |
+
STUB["(no LLM) regex stub extractor<br/>USE_STUB_EXTRACTOR=1 — CI & free tier"]
|
| 47 |
+
GEMMA --- MODES
|
| 48 |
+
end
|
| 49 |
+
|
| 50 |
+
subgraph DET["5 · Deterministic post-processing — no LLM"]
|
| 51 |
+
CONF["freebusy.annotate_conflicts<br/>overlap / adjacent / tight<br/>+ propose_times free slots"]
|
| 52 |
+
DEDUP["dedup.filter_new<br/>idempotency for autonomous runs"]
|
| 53 |
+
MEMW["memory.observe_plan<br/>learns recurring contacts"]
|
| 54 |
+
end
|
| 55 |
+
|
| 56 |
+
subgraph OUT["6 · Outputs"]
|
| 57 |
+
CARDS["Event cards + reply draft<br/>+ clarification question"]
|
| 58 |
+
ICS["📥 .ics download<br/>(off-grid default)"]
|
| 59 |
+
GCAL["📆 Google Calendar push<br/>(per-user OAuth web flow, opt-in)"]
|
| 60 |
+
TRACE["Redacted trace export<br/>→ public HF dataset"]
|
| 61 |
+
end
|
| 62 |
+
|
| 63 |
+
UIIN -->|"run_orchestrator (step trace streams into the UI)"| SMOL
|
| 64 |
+
SHORT --> AGENTEP
|
| 65 |
+
MAC -->|"store-only"| INGEST
|
| 66 |
+
MAC -->|"AGENT_MODE=1"| AGENTEP
|
| 67 |
+
MCPC --> MCPT
|
| 68 |
+
AGENTEP --> CORE
|
| 69 |
+
INGEST --> ROLL --> CORE
|
| 70 |
+
SMOL ==>|"planning loop, ≤6 steps"| MINICPM
|
| 71 |
+
SMOL -->|"tool calls — the Space's OWN MCP<br/>endpoint (localhost SSE)"| MCPT
|
| 72 |
+
SMOL -.->|"planner down / stub mode"| SCRIPT
|
| 73 |
+
SCRIPT -->|"same tool sequence,<br/>deterministic"| MCPT
|
| 74 |
+
MCPT -->|"extract_events → 1 LLM call"| CORE
|
| 75 |
+
MCPT -.->|"make_ics / check_conflicts → 0 LLM calls"| DET
|
| 76 |
+
|
| 77 |
+
GEN ==>|"default"| GEMMA
|
| 78 |
+
GEN -.->|"opt-in autonomous brain"| HERMES
|
| 79 |
+
GEN -.->|"tests / free demo"| STUB
|
| 80 |
+
HERMES -->|"remember()"| MEMW
|
| 81 |
+
|
| 82 |
+
LLMT --> DET --> OUT
|
| 83 |
+
```
|
| 84 |
+
|
| 85 |
+
## Offline loop — eval-gated fine-tuning (produces the serving LLM)
|
| 86 |
+
|
| 87 |
+
```mermaid
|
| 88 |
+
flowchart LR
|
| 89 |
+
SEEDS["Seed data — NO LLM<br/>139 hand-authored template examples<br/>(gen_new_seeds.py / make_dataset.py)"]
|
| 90 |
+
SMC["SMCalFlow import — NO LLM<br/>deterministic LISP-program parse, ~2000 rows"]
|
| 91 |
+
TRAIN["QLoRA fine-tune — Unsloth on Modal A100-80GB<br/>base: google/gemma-4-31B-it or gemma-4-E4B-it<br/>r=16, lr 5e-5, 2 epochs, responses-only loss"]
|
| 92 |
+
GGUF["convert_hf_to_gguf + llama-quantize<br/>→ staging Q4_K_M GGUF"]
|
| 93 |
+
EVAL["Eval — NO LLM judge, deterministic metrics<br/>60-example held-out set:<br/>schema validity · event F1 · start-exact recall"]
|
| 94 |
+
GATE{"Gate<br/>validity ≥ 0.95<br/>F1 ≥ 0.81<br/>recall ≥ 0.773"}
|
| 95 |
+
PROD["Promote → ParetoOptimal/gemma-4-cal-gguf<br/>(the model the Space serves)"]
|
| 96 |
+
TRASH["Discard staging —<br/>production untouched"]
|
| 97 |
+
|
| 98 |
+
SEEDS --> TRAIN
|
| 99 |
+
SMC --> TRAIN
|
| 100 |
+
TRAIN --> GGUF --> EVAL --> GATE
|
| 101 |
+
GATE -->|pass| PROD
|
| 102 |
+
GATE -->|fail| TRASH
|
| 103 |
+
```
|
| 104 |
+
|
| 105 |
+
See [eval-roadmap.md](./eval-roadmap.md) and the
|
| 106 |
+
[eval-gated fine-tuning post-mortem](./blog-eval-gated-finetuning.md) for the
|
| 107 |
+
gate's history and rationale; [hermes.md](./hermes.md) for the optional
|
| 108 |
+
tool-calling backend; [build-small-submission.md](./build-small-submission.md)
|
| 109 |
+
for how the MiniCPM planner maps to the `sponsor:openbmb` track.
|
| 110 |
+
|
| 111 |
+
## Which LLM each workflow calls
|
| 112 |
+
|
| 113 |
+
| # | Workflow | Trigger | LLM call(s) | Where it runs |
|
| 114 |
+
|---|----------|---------|-------------|----------------|
|
| 115 |
+
| 1 | Agentic orchestration (Schedule flow + Agent tab) | User pastes thread / uploads screenshots, clicks Find the events / Run the agents | **1× MiniCPM planning loop** (`MiniCPM4.1-8B`, or `MiniCPM5-1B` ≤4B variant; ≤6 steps) driving the Space's own MCP tools, **+ 1× gemma-cal E4B** per `extract_events` tool call (vision via mmproj); `check_conflicts`/`make_ics` are zero-LLM. Planner unconfigured or down → ScriptedPlanner runs the identical sequence, **gemma-cal only** | Two local llama-servers — gemma-cal on :8080, MiniCPM on :8081 |
|
| 116 |
+
| 2 | API extraction (`POST /agent`) | iOS Shortcut, Android Tasker, or Mac collector in `AGENT_MODE=1` | **1× gemma-cal E4B** (same pipeline, same prompt) | Same |
|
| 117 |
+
| 3 | Autonomous ingest | Mac collector → `/ingest`; your outgoing message triggers a run over the chat's rolling thread | **1× gemma-cal E4B per affected chat**, then deterministic dedup + calendar delivery | Same |
|
| 118 |
+
| 4 | Memory-writing agent (optional) | `HERMES_TOOLS=1` on the remote path | **Hermes-3-Llama-3.1-8B** in a tool loop (≤3 rounds): may call `remember()` then returns the ActionPlan | Remote llama-server (e.g. Mac launchd) |
|
| 119 |
+
| 5 | MCP tools for external agents | MCP client calls the Space | `extract_events` → **1× gemma-cal E4B**; `make_ics` and `check_conflicts` → **zero LLM calls** | Same as #1 |
|
| 120 |
+
| 6 | CI / free-tier demo | `USE_STUB_EXTRACTOR=1` | **No LLM** — regex heuristic | CPU anywhere |
|
| 121 |
+
| 7 | Training & eval (offline) | `training/gated_retrain.py` | **No LLM at the inference-API level**: data gen is template-based, eval is metric-based (no judge). The LLM here is the *training target*: QLoRA on `google/gemma-4-31B-it` / `gemma-4-E4B-it` | Modal A100/H100 |
|
docs/automations.md
ADDED
|
@@ -0,0 +1,83 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Automations — make it autonomous without a custom app
|
| 2 |
+
|
| 3 |
+
Everything below drives one endpoint. **iOS cannot read iMessage in the background** (no API), so the
|
| 4 |
+
autonomy ceiling differs by platform:
|
| 5 |
+
|
| 6 |
+
| Front-end | Autonomy | Source | Notes |
|
| 7 |
+
|---|---|---|---|
|
| 8 |
+
| Mac collector (`AGENT_MODE`/`AUTONOMOUS`) | Fully hands-off | iMessage | Needs an always-on Mac |
|
| 9 |
+
| iOS Shortcut | One gesture (you trigger it) | anything you share | No background reading possible |
|
| 10 |
+
| Android Tasker/MacroDroid | Hands-off | SMS/RCS/notifications | Not iMessage |
|
| 11 |
+
|
| 12 |
+
## The `/agent` contract (what they all call)
|
| 13 |
+
|
| 14 |
+
`POST {SPACE_URL}/agent` with `Authorization: Bearer <INGEST_TOKEN>`:
|
| 15 |
+
|
| 16 |
+
```jsonc
|
| 17 |
+
// request — `thread` OR `messages` required; rest optional
|
| 18 |
+
{
|
| 19 |
+
"thread": "Room parent: picture day Thursday 9am\nMe: thanks",
|
| 20 |
+
"messages": [{"sender": "Room parent", "text": "picture day Thursday 9am"}],
|
| 21 |
+
"images": ["data:image/png;base64,..."], // a screenshot
|
| 22 |
+
"existing_ics": "<base64 .ics>", // optional, enables conflict checks
|
| 23 |
+
"now": "2026-06-05T10:00:00",
|
| 24 |
+
"push_gcal": false,
|
| 25 |
+
"return_ics": true
|
| 26 |
+
}
|
| 27 |
+
```
|
| 28 |
+
```jsonc
|
| 29 |
+
// response
|
| 30 |
+
{
|
| 31 |
+
"plan": { "events": [{"title":"Picture day","start":"2026-06-11T09:00:00", ...}],
|
| 32 |
+
"conflicts": [], "proposed_times": [], "reply_draft": "...", "needs_clarification": null },
|
| 33 |
+
"ics_base64": "<...>",
|
| 34 |
+
"gcal_links": []
|
| 35 |
+
}
|
| 36 |
+
```
|
| 37 |
+
|
| 38 |
+
## (A) Mac collector — fully autonomous (iMessage)
|
| 39 |
+
|
| 40 |
+
Two equivalent ways; prefer the server-side switch so logic lives in one place:
|
| 41 |
+
|
| 42 |
+
- **Server-side:** run the Space with `AUTONOMOUS=1`. `/ingest` then assembles a per-chat rolling
|
| 43 |
+
thread, runs the agent, dedupes, and (if Google is configured) pushes events automatically.
|
| 44 |
+
- **Collector-side:** run the collector with `AGENT_MODE=1` — it POSTs `/agent` (with `push_gcal`)
|
| 45 |
+
instead of `/ingest`. See `collector/collector.py`.
|
| 46 |
+
|
| 47 |
+
```bash
|
| 48 |
+
# collector-side
|
| 49 |
+
cd collector && AGENT_MODE=1 python collector.py
|
| 50 |
+
```
|
| 51 |
+
|
| 52 |
+
## (B) iOS Shortcut — one tap, no `.ics` import
|
| 53 |
+
|
| 54 |
+
1. New Shortcut → accept **Share Sheet** input (Text and Images).
|
| 55 |
+
2. **Text** → set variable `Thread`.
|
| 56 |
+
3. **Get Contents of URL** → `https://<your-space>/agent`, Method **POST**, Header
|
| 57 |
+
`Authorization: Bearer <INGEST_TOKEN>`, Request Body **JSON**:
|
| 58 |
+
`{ "thread": Thread, "now": <Current Date, ISO 8601> }`
|
| 59 |
+
(To send a screenshot instead: Base64-encode the shared image into `images`.)
|
| 60 |
+
4. **Get Dictionary Value** `plan.events` from the response.
|
| 61 |
+
5. **Repeat with Each** → **Add New Event** (Calendar): Title = `title`, Start = `start`,
|
| 62 |
+
End = `end`, Location = `location`, Notes = `notes`.
|
| 63 |
+
|
| 64 |
+
Now sharing a thread/screenshot to the Shortcut adds the events to Apple Calendar in one tap — no
|
| 65 |
+
file download, no import. (Optional: read back `plan.conflicts` and show an alert.)
|
| 66 |
+
|
| 67 |
+
## (C) Android — Tasker / MacroDroid (SMS/RCS)
|
| 68 |
+
|
| 69 |
+
1. **Trigger:** Event → *Received Text* (SMS), or a Notification trigger for your messaging app.
|
| 70 |
+
2. **Action:** HTTP Request → POST `https://<your-space>/agent`, header
|
| 71 |
+
`Authorization: Bearer <INGEST_TOKEN>`, body `{ "thread": "%astext", "now": "%DATE..." }`.
|
| 72 |
+
3. Parse `plan.events` (JSON Read) → for each, **Insert Calendar Event** (Tasker writes via
|
| 73 |
+
`CalendarContract`).
|
| 74 |
+
|
| 75 |
+
Because Android can read SMS/RCS and run in the background, this path is genuinely autonomous.
|
| 76 |
+
|
| 77 |
+
## Roadmap — a native app
|
| 78 |
+
|
| 79 |
+
- **Android:** a real app using a Notification Listener / `READ_SMS`, on-device **Gemma E4B** via
|
| 80 |
+
llama.cpp/MLC (see [on-device.md](./on-device.md)), writing events through the Calendar provider —
|
| 81 |
+
the same `/agent` contract or fully local. Feasible and fully autonomous.
|
| 82 |
+
- **iOS:** no background message or LLM-server access — the Shortcut above is the ceiling. An
|
| 83 |
+
autonomous iOS iMessage app is **not possible**; we won't promise one.
|
docs/blog-eval-gated-finetuning.md
ADDED
|
@@ -0,0 +1,187 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# What Six Failed Fine-Tunes Taught Us About Evals, Templates, and Knowing When to Stop
|
| 2 |
+
|
| 3 |
+
*A post-mortem on fine-tuning Gemma-4 for structured calendar extraction — fifteen GPU runs,
|
| 4 |
+
one destroyed model, one exonerated quantizer, a chat-template landmine, and the eval harness
|
| 5 |
+
that caught every bad model before it shipped.*
|
| 6 |
+
|
| 7 |
+
---
|
| 8 |
+
|
| 9 |
+
## The setup
|
| 10 |
+
|
| 11 |
+
[OffGridSchedula](https://huggingface.co/spaces/ParetoOptimal/OffGridSchedula) is a local-first
|
| 12 |
+
scheduling agent: paste a group chat (or a flyer screenshot) and get back a constrained
|
| 13 |
+
**ActionPlan** JSON — events with exact ISO datetimes, a conflict check, a drafted reply, and a
|
| 14 |
+
`needs_clarification` question when the thread is too vague to schedule. Inference is
|
| 15 |
+
llama.cpp serving Gemma-4 GGUFs; no cloud AI APIs.
|
| 16 |
+
|
| 17 |
+
The project carried a hard requirement: ship a **fine-tuned model** that outperforms its base.
|
| 18 |
+
This is the story of trying to satisfy that requirement honestly — and what "honestly" ended up
|
| 19 |
+
costing and teaching. Everything below ran on Modal serverless A100s; total GPU spend for the
|
| 20 |
+
entire investigation was well under $100.
|
| 21 |
+
|
| 22 |
+
## Act I: The fine-tune that lost to its own base
|
| 23 |
+
|
| 24 |
+
The first QLoRA fine-tune of `google/gemma-4-31B-it` (Unsloth, r=16, 69 synthetic examples,
|
| 25 |
+
2 epochs) looked fine in a smoke test. So we built a real eval before trusting it: 28 held-out
|
| 26 |
+
examples scored on **start-exact recall** (did you produce the exact ISO start datetime),
|
| 27 |
+
event F1 with greedy datetime matching, schema validity, no-event accuracy (does chitchat
|
| 28 |
+
hallucinate events), and clarification recall (do you *ask* instead of inventing when a plan is
|
| 29 |
+
"TBD"). Temperature 0, the same `response_format: json_schema` call the production server uses.
|
| 30 |
+
|
| 31 |
+
First scores: fine-tune **F1 0.81**, base **0.977**. The fine-tune *lost to its own base*. The
|
| 32 |
+
mismatch dump showed why — three of its five misses were the same corruption: `"206-10-06"`
|
| 33 |
+
instead of `"2026-10-06"`. A dropped year digit.
|
| 34 |
+
|
| 35 |
+
Two suspects: quantization (classic low-bit digit corruption) or the training itself.
|
| 36 |
+
|
| 37 |
+
## Act II: Scaling data made it worse. Much worse.
|
| 38 |
+
|
| 39 |
+
The intuitive fix — more data — backfired in the most instructive way possible:
|
| 40 |
+
|
| 41 |
+
| training examples | schema validity | event F1 |
|
| 42 |
+
|---|---|---|
|
| 43 |
+
| 69 | 1.00 | 0.81 |
|
| 44 |
+
| 87 | 0.75 | 0.465 |
|
| 45 |
+
| 122 | 0.46 | 0.214 |
|
| 46 |
+
| 2,122 (incl. real SMCalFlow data) | 0.107 | 0.000 |
|
| 47 |
+
|
| 48 |
+
**Monotonic decay with training steps.** By the 2,122-example run the model emitted unparseable
|
| 49 |
+
output on ~90% of inputs. A raw-output probe (serve the staging GGUF, generate *without* the
|
| 50 |
+
JSON grammar) settled what "broken" meant: the model free-generated `Huddle — — — — — —…` to the
|
| 51 |
+
token limit. Not a formatting problem. Destroyed weights.
|
| 52 |
+
|
| 53 |
+
Two cheap experiments isolated the cause:
|
| 54 |
+
|
| 55 |
+
**Quantization was exonerated** by sweeping the *same* merged weights through f16 / Q8_0 /
|
| 56 |
+
Q4_K_M (one A100 lease, the fp16 was already on a Modal volume). At full fp16 the fine-tune
|
| 57 |
+
still scored validity 0.64 / F1 0.57 — nowhere near base. Precision bought ~+0.1 F1. The damage
|
| 58 |
+
preceded the quantizer.
|
| 59 |
+
|
| 60 |
+
**The chat template was half the story.** Gemma-4 ships a brand-new template —
|
| 61 |
+
`<|turn>user\n…<turn|>`, with a dedicated `<|turn>system` block. There is no
|
| 62 |
+
`<start_of_turn>` anywhere in it. Our training code used Unsloth's legacy `"gemma"` template,
|
| 63 |
+
which is built entirely on `<start_of_turn>`. Every gradient step optimized a turn syntax that
|
| 64 |
+
`llama-server --jinja` (which reads the template *embedded in the GGUF*) never renders. We
|
| 65 |
+
verified the fix end-to-end by reading `tokenizer.chat_template` out of our exported GGUF's
|
| 66 |
+
metadata with `gguf.GGUFReader` — trust the artifact, not the code — and added a hard
|
| 67 |
+
`assert "<|turn>" in rendered` to the training script so the mismatch can never silently
|
| 68 |
+
recur.
|
| 69 |
+
|
| 70 |
+
And yet: with templates verifiably aligned, response-only loss masking, and LR dropped to 5e-5,
|
| 71 |
+
the 31B *still* collapsed to validity 0.0. With dataset, template, LR, and masking all varied,
|
| 72 |
+
the one remaining common factor was the training stack itself: Unsloth's QLoRA path for the
|
| 73 |
+
brand-new Gemma-4-31B architecture (its own logs warn it can't handle `Gemma4AudioModel`
|
| 74 |
+
internals). The same recipe on Gemma-4 **E4B** trained cleanly every single time. New
|
| 75 |
+
architectures make the training framework a first-class suspect.
|
| 76 |
+
|
| 77 |
+
## Act III: The benchmark nobody wants — prompt engineering hits 1.0
|
| 78 |
+
|
| 79 |
+
While the fine-tune investigation ran, error analysis kept improving the *system*:
|
| 80 |
+
|
| 81 |
+
- **State the weekday in the prompt.** `Current datetime: Monday, 2026-09-14T09:00:00` turns
|
| 82 |
+
day-of-week resolution from memorized calendar knowledge into deterministic arithmetic — for
|
| 83 |
+
every model.
|
| 84 |
+
- **Two surgical system-prompt lines** targeting the base's only two eval misses (multi-event
|
| 85 |
+
splitting; asking on "TBD") took **stock Gemma-4-31B to 1.0 on every metric**.
|
| 86 |
+
|
| 87 |
+
That's the uncomfortable benchmark for any SFT project: against a near-ceiling base, prompt
|
| 88 |
+
engineering had ~100× better ROI than fine-tuning. The requirement, however, was a fine-tune
|
| 89 |
+
that beats *its* base — so we re-aimed at the tier where headroom actually existed.
|
| 90 |
+
|
| 91 |
+
## Act IV: The E4B campaign — six gated runs to a tie
|
| 92 |
+
|
| 93 |
+
A ~5 GB Gemma-4 E4B that runs on modest hardware is the model this local-first project actually
|
| 94 |
+
wants at the edge, and stock E4B had real room: F1 0.93. Every retrain ran through an
|
| 95 |
+
**eval-gate**: train → upload to a *staging* filename → eval → promote to production **only if
|
| 96 |
+
it beats the bar**, else delete staging. The gate rejected eight models across this project
|
| 97 |
+
without production ever serving one of them.
|
| 98 |
+
|
| 99 |
+
Each iteration fixed a diagnosed failure, not a hunch:
|
| 100 |
+
|
| 101 |
+
| run | change | F1 (eval) |
|
| 102 |
+
|---|---|---|
|
| 103 |
+
| 1 | fixed recipe, 2,122 examples | 0.884 (n=28) |
|
| 104 |
+
| 2 | weekday-in-prompt, data regenerated to match | 0.955 |
|
| 105 |
+
| 3 | dropped 74 SMCalFlow rows teaching a conflicting "next DOW" convention; 4× hand-data upsample | **1.000** |
|
| 106 |
+
| 4 | + TBD-clarify seeds, 8× upsample | 0.93 (clarify → 1.0) |
|
| 107 |
+
| 5 | clarify seeds at 4× | 0.93 |
|
| 108 |
+
| — | **eval expanded 28 → 60 examples** | — |
|
| 109 |
+
| 6 | + targeted seeds for the two shapes stock fails | 0.97 |
|
| 110 |
+
|
| 111 |
+
Three findings here deserve their own bullets:
|
| 112 |
+
|
| 113 |
+
- **Label conventions are silent killers.** SMCalFlow annotates "next Tuesday" (said on a
|
| 114 |
+
Monday) as *tomorrow*; our app's convention is *Tuesday of next week*. 74 imported rows
|
| 115 |
+
trained the bug in. Filtering them fixed it — until other data shifts brought it back.
|
| 116 |
+
When you convert someone else's dataset, you inherit someone else's semantics.
|
| 117 |
+
- **Small evals lie.** At n=28 (22 gold events), one event = 4.5 recall points, and we watched
|
| 118 |
+
**4 added training rows flip 3 eval cases**. Run-to-run SFT jitter swamped the signal — runs
|
| 119 |
+
3–5 were a seesaw, not progress. Expanding to 60 examples / 50 events made the gate mean
|
| 120 |
+
something again.
|
| 121 |
+
- **Some priors resist data.** The "next Tuesday = tomorrow" prior survived *seven* explicit
|
| 122 |
+
counter-examples. Stock makes the same error. Genuinely ambiguous English stays ambiguous.
|
| 123 |
+
|
| 124 |
+
Run 6 vs stock E4B (with the same engineered prompt): **identical confusion counts** —
|
| 125 |
+
48/50 events, tp/fp/fn 48/1/2, F1 0.97 = 0.97. A dead statistical tie.
|
| 126 |
+
|
| 127 |
+
## Act V: The bare-prompt tiebreaker
|
| 128 |
+
|
| 129 |
+
The classic argument for SFT at parity is internalization: the fine-tune shouldn't *need* the
|
| 130 |
+
prompt. So we measured it — same 60 examples, system prompt deleted for both models, identical
|
| 131 |
+
minimal user content, same JSON-schema constraint:
|
| 132 |
+
|
| 133 |
+
| bare, n=60 | stock E4B | fine-tuned E4B |
|
| 134 |
+
|---|---|---|
|
| 135 |
+
| schema validity | 0.967 | **1.0** |
|
| 136 |
+
| no-event accuracy | 0.70 | **0.80** |
|
| 137 |
+
| clarification recall | 0.50 | **0.625** |
|
| 138 |
+
| event F1 | **0.682** | 0.644 |
|
| 139 |
+
|
| 140 |
+
The fine-tune is more *disciplined* bare (never breaks schema, hallucinates less, asks more);
|
| 141 |
+
stock edges bare extraction. No decisive gap. **Final verdict: at this data scale (139
|
| 142 |
+
hand-authored + 2,000 converted examples, QLoRA, 1 epoch), the fine-tune reaches parity with
|
| 143 |
+
its base — not superiority.** It shipped as the project's edge model with exactly that claim on
|
| 144 |
+
the model card, by explicit owner decision; the strict-dominance auto-gate, correctly, never
|
| 145 |
+
promoted it.
|
| 146 |
+
|
| 147 |
+
## What we'd tell you to do differently
|
| 148 |
+
|
| 149 |
+
1. **Build the eval before the fine-tune, and gate every publish on it.** Ours rejected eight
|
| 150 |
+
bad models, caught a regression that had already overwritten a good artifact (server-side
|
| 151 |
+
`CommitOperationCopy` restored it for free), and converted every failure into a diagnosis.
|
| 152 |
+
The eval harness was the single highest-value artifact of the project.
|
| 153 |
+
2. **Train format must equal serve format — and verify it in the artifact.** Read the chat
|
| 154 |
+
template out of the exported GGUF's metadata. Assert it in the training script. A template
|
| 155 |
+
mismatch doesn't error; it just quietly ruins everything at a rate proportional to your
|
| 156 |
+
training steps.
|
| 157 |
+
3. **Suspect the training stack on new architectures.** The same recipe destroyed Gemma-4-31B
|
| 158 |
+
and trained Gemma-4-E4B flawlessly, six times in a row. Framework warnings about unhandled
|
| 159 |
+
submodules (`Gemma4AudioModel`) are not noise.
|
| 160 |
+
4. **Exonerate quantization cheaply before blaming it.** Sweep the same weights across
|
| 161 |
+
f16/Q8/Q4 in one GPU lease. Ours cost a few dollars and killed the most plausible-sounding
|
| 162 |
+
hypothesis of the whole project.
|
| 163 |
+
5. **Put deterministic facts in the prompt instead of hoping the model memorized them.**
|
| 164 |
+
Weekday-in-the-prompt improved every model, including the ones we didn't train.
|
| 165 |
+
6. **Match your eval's resolution to your iteration size.** If one flipped case moves a gated
|
| 166 |
+
metric by 4+ points, your gate is a coin flip.
|
| 167 |
+
7. **Diff label conventions when importing datasets.** Resolution semantics ("next DOW"),
|
| 168 |
+
reference-time handling, and reply style all transfer — whether you want them to or not.
|
| 169 |
+
8. **Respect the parity outcome.** Against a strong instruction-tuned base on a narrow,
|
| 170 |
+
well-prompted task, SFT parity is a common honest result. The defensible claims left are
|
| 171 |
+
discipline-under-no-prompt, token savings, and convention control — claim those, not wins
|
| 172 |
+
you didn't measure.
|
| 173 |
+
|
| 174 |
+
### What might still beat the base
|
| 175 |
+
10–100× more *real* (non-template) data; full fine-tuning rather than QLoRA once the stack
|
| 176 |
+
supports the architecture; preference optimization (DPO) specifically on the
|
| 177 |
+
clarify-vs-extract boundary; and a harder eval where the ceiling isn't 0.97. The gate is
|
| 178 |
+
already in place to referee all of it.
|
| 179 |
+
|
| 180 |
+
---
|
| 181 |
+
|
| 182 |
+
*Artifacts: model repo
|
| 183 |
+
[`ParetoOptimal/gemma-4-cal-gguf`](https://huggingface.co/ParetoOptimal/gemma-4-cal-gguf)
|
| 184 |
+
(31B v1 + E4B edge + mmproj), eval harness `training/eval.py` + `training/data/eval.jsonl`,
|
| 185 |
+
gate `training/gated_retrain.py`, importer `training/import_smcalflow.py` (SMCalFlow,
|
| 186 |
+
CC BY-SA 4.0 — Semantic Machines et al., TACL 2020), full run-by-run log in
|
| 187 |
+
`docs/eval-roadmap.md`.*
|
docs/build-small-submission.md
ADDED
|
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Build Small — submission mapping
|
| 2 |
+
|
| 3 |
+
How OffGridSchedula lines up with every requirement, track, sponsor prize, and badge of the
|
| 4 |
+
[Build Small hackathon](https://huggingface.co/build-small-hackathon)
|
| 5 |
+
([field guide](https://huggingface.co/spaces/build-small-hackathon/field-guide)).
|
| 6 |
+
Tags claimed in the README frontmatter use the field guide's namespaced taxonomy
|
| 7 |
+
(`track:*`, `sponsor:*`, `achievement:*`).
|
| 8 |
+
|
| 9 |
+
## Hard rules
|
| 10 |
+
|
| 11 |
+
| # | Rule | Status | Evidence |
|
| 12 |
+
|---|------|--------|----------|
|
| 13 |
+
| 1 | Every model under 32B parameters | ✅ | Two local models, both far under the cap: extraction is [`gemma-cal` E4B](https://huggingface.co/build-small-hackathon/gemma-4-cal-gguf) (~4B effective params, ~5 GB GGUF at Q4) and planning is [`openbmb/MiniCPM5-1B`](https://huggingface.co/openbmb/MiniCPM5-1B-GGUF) (1B). |
|
| 14 |
+
| 2 | Gradio app, hosted as an HF Space (Docker OK) | ✅ | [`app.py`](../app.py) is a Gradio Blocks app served from a Docker SDK Space running llama.cpp, now hosted in the hackathon org: [`build-small-hackathon/OffGridSchedula`](https://huggingface.co/spaces/build-small-hackathon/OffGridSchedula). |
|
| 15 |
+
| 3 | Demo video | ✅ | Recorded and linked from the README: [youtu.be/m-o0u9X3tI4](https://youtu.be/m-o0u9X3tI4) (storyboard in [`docs/demo-script.md`](./demo-script.md)). |
|
| 16 |
+
| 4 | Social media post, linked from the README | ✅ | Published and linked from the README: [X (1)](https://x.com/nate_mauer/status/2064920352845709419), [X (2)](https://x.com/nate_mauer/status/2065661878441750916), and [LinkedIn](https://www.linkedin.com/feed/update/urn:li:ugcPost:7471440639969132545) (drafts in [`docs/social-post.md`](./social-post.md)). |
|
| 17 |
+
| 5 | ≤ 10 ZeroGPU apps per user | ✅ n/a | Runs on cpu-basic (stub preview) or a dedicated T4 — no ZeroGPU dependency. |
|
| 18 |
+
| 6 | README frontmatter tags + short write-up of idea & tech | ✅ | Namespaced tags + the idea-and-tech write-up are in [`README.md`](../README.md). |
|
| 19 |
+
|
| 20 |
+
## Track — `track:backyard` (Backyard AI)
|
| 21 |
+
|
| 22 |
+
A specific real person: a busy parent whose kid's school and activity events are buried in a
|
| 23 |
+
noisy class group chat. They paste the chat (or a flyer screenshot) from their phone's browser
|
| 24 |
+
and get back events, a conflict check against their own calendar, and a ready-to-send reply —
|
| 25 |
+
reviewed before anything is saved, exported as a local `.ics` (Apple/Google Calendar one tap
|
| 26 |
+
away). Short pasted chats and screenshots are exactly the workload a small local model handles
|
| 27 |
+
well — an honest fit, not a stretch.
|
| 28 |
+
|
| 29 |
+
## Sponsor prize — `sponsor:modal` (Best Use of Modal)
|
| 30 |
+
|
| 31 |
+
Modal powered the **development** of the platform's model end-to-end:
|
| 32 |
+
|
| 33 |
+
- [`training/modal_train.py`](../training/modal_train.py) — full QLoRA fine-tune on serverless A100/H100s (dataset → train → GGUF export → HF publish), with persistent Volumes caching base weights and outputs across runs.
|
| 34 |
+
- [`training/modal_eval.py`](../training/modal_eval.py) / [`modal_quant_eval.py`](../training/modal_quant_eval.py) — the 60-example task eval served on llama.cpp inside Modal, including an on-volume quantization study (f16 / Q8_0 / Q4_K_M).
|
| 35 |
+
- [`training/gated_retrain.py`](../training/gated_retrain.py) — the eval-gated pipeline: train → staging upload → eval → promote **only if it beats the gate**. It rejected eight regressed models before the published one; every one of those runs was a Modal job.
|
| 36 |
+
|
| 37 |
+
## Sponsor prize — `sponsor:openbmb` (Best MiniCPM Build)
|
| 38 |
+
|
| 39 |
+
Clicking **Run the agents** invokes **OpenBMB MiniCPM** as the planner (`openbmb/MiniCPM5-1B-GGUF`;
|
| 40 |
+
the larger `MiniCPM4.1-8B` variant is a config switch) on a second local llama.cpp instance. It
|
| 41 |
+
drives this Space's own MCP tools (`extract_events` → `check_conflicts` → `make_ics`) as a visible
|
| 42 |
+
multi-step agent ([`server/orchestrator.py`](../server/orchestrator.py)) — MiniCPM is core to the
|
| 43 |
+
agent experience, not a garnish (a deterministic scripted plan is the fallback when the planner
|
| 44 |
+
isn't configured). Also the natural evidence for the judged **Best Agent** award.
|
| 45 |
+
|
| 46 |
+
## Achievement badges (self-declared, all claimed)
|
| 47 |
+
|
| 48 |
+
| Tag | Badge | Evidence |
|
| 49 |
+
|-----|-------|----------|
|
| 50 |
+
| `achievement:offgrid` | Off the Grid | All inference runs inside the Space via llama.cpp — no cloud AI APIs. The only optional outbound call is the user's own Google Calendar push. |
|
| 51 |
+
| `achievement:welltuned` | Well-Tuned | [`build-small-hackathon/gemma-4-cal-gguf`](https://huggingface.co/build-small-hackathon/gemma-4-cal-gguf) — our published QLoRA fine-tune of Gemma-4 E4B **is the model production serves**, shipped through the eval gate with the [honest scorecard public](./eval-roadmap.md). |
|
| 52 |
+
| `achievement:offbrand` | Off-Brand | Custom landing page, grouped nav, dark hero + carousel, elevated tool card, bespoke CSS/JS ([`ui/blocks.py`](../ui/blocks.py), [`static/app.css`](../static/app.css)) — far past the stock Gradio look. |
|
| 53 |
+
| `achievement:llama` | Llama Champion | The official `ghcr.io/ggml-org/llama.cpp` server image runs the GGUF + vision mmproj ([`Dockerfile`](../Dockerfile), [`scripts/start_space.sh`](../scripts/start_space.sh)). |
|
| 54 |
+
| `achievement:sharing` | Sharing is Caring | Redacted agent traces published to the public dataset [`ParetoOptimal/offgridschedula-traces`](https://huggingface.co/datasets/ParetoOptimal/offgridschedula-traces) — one-click from the Activity tab, or [`training/share_trace.py`](../training/share_trace.py). |
|
| 55 |
+
| `achievement:fieldnotes` | Field Notes | [`FIELD_NOTES.md`](../FIELD_NOTES.md) (build retrospective) + [`docs/blog-eval-gated-finetuning.md`](./blog-eval-gated-finetuning.md) (fine-tuning post-mortem) + the [published project blog](https://huggingface.co/blog/build-small-hackathon/offgridschedula) ([source](./blog-offgridschedula.md)). |
|
| 56 |
+
|
| 57 |
+
Sponsor prizes **not** claimed: OpenAI Codex (no Codex-attributed commits) and NVIDIA Nemotron
|
| 58 |
+
(different model family). The cash bonus badges (Off Brand, Tiny Titan, Best Demo, Best Agent,
|
| 59 |
+
Bonus Quest Champion, Judges' Wildcard) are judged across all submissions and take no tags.
|
| 60 |
+
|
| 61 |
+
## Status
|
| 62 |
+
|
| 63 |
+
All six hard rules are met — nothing outstanding:
|
| 64 |
+
|
| 65 |
+
- The Space is live in the hackathon org: [`build-small-hackathon/OffGridSchedula`](https://huggingface.co/spaces/build-small-hackathon/OffGridSchedula).
|
| 66 |
+
- The model is published at [`build-small-hackathon/gemma-4-cal-gguf`](https://huggingface.co/build-small-hackathon/gemma-4-cal-gguf) (the planner is [`openbmb/MiniCPM5-1B-GGUF`](https://huggingface.co/openbmb/MiniCPM5-1B-GGUF)).
|
| 67 |
+
- The [demo video](https://youtu.be/m-o0u9X3tI4) and the social posts (X + LinkedIn) are published and linked from the README.
|
| 68 |
+
- The write-up is live as a Hugging Face blog post: [build-small-hackathon/offgridschedula](https://huggingface.co/blog/build-small-hackathon/offgridschedula).
|
docs/eval-roadmap.md
ADDED
|
@@ -0,0 +1,337 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Eval roadmap — improving the scheduling fine-tune
|
| 2 |
+
|
| 3 |
+
How we measure and improve `ParetoOptimal/gemma-4-cal-gguf` (the fine-tuned
|
| 4 |
+
Gemma-4-31B that turns chat/images into a calendar `ActionPlan`). The eval is
|
| 5 |
+
**task-specific** — generic LLM benchmarks (MMLU etc.) don't apply.
|
| 6 |
+
|
| 7 |
+
Harness: `training/eval.py` (scores), `training/gen_eval.py` + `training/data/eval.jsonl`
|
| 8 |
+
(28 held-out examples, disjoint from `dataset.jsonl`), `training/modal_eval.py`
|
| 9 |
+
(serves the GGUF on the same `llama-server` the Space uses, then scores).
|
| 10 |
+
|
| 11 |
+
## Baseline scores (Q4_K_M, n=28, 2026-06-09)
|
| 12 |
+
|
| 13 |
+
| Metric | Score |
|
| 14 |
+
| --- | --- |
|
| 15 |
+
| schema validity | 1.00 |
|
| 16 |
+
| no-event accuracy | 1.00 |
|
| 17 |
+
| clarification recall | 1.00 |
|
| 18 |
+
| end-time exact | 1.00 |
|
| 19 |
+
| event precision | 0.85 |
|
| 20 |
+
| **event recall (start-exact)** | **0.77** |
|
| 21 |
+
| event F1 | 0.81 |
|
| 22 |
+
| title similarity | 0.87 |
|
| 23 |
+
|
| 24 |
+
Discipline (never invents events, always asks when ambiguous) is perfect; all 9
|
| 25 |
+
relative-date cases passed. The gap is **exact start datetime** on a few
|
| 26 |
+
explicit far-future dates (misses: `e02`, `e05`, `e06`, `e15`, one leg of `m02`).
|
| 27 |
+
|
| 28 |
+
## The 3 steps
|
| 29 |
+
|
| 30 |
+
### 1. Diagnose the 5 misses (cheap)
|
| 31 |
+
Enhance `eval.py` to dump the model's actual `start`/`title` for mismatched events,
|
| 32 |
+
then one re-run shows whether they're date-shift, time/AM-PM, or wrong-year errors —
|
| 33 |
+
which tells us exactly what training data to add. (~one A100 eval run; the GGUF is
|
| 34 |
+
cached in the Modal Volume, so it's fast.)
|
| 35 |
+
|
| 36 |
+
### 2. Baseline comparison (the "Well-Tuned" proof)
|
| 37 |
+
Run `modal run training/modal_eval.py --model-hf-repo unsloth/gemma-4-31B-it-GGUF`
|
| 38 |
+
to score **stock** Gemma-4-31B on the same set. If the fine-tune's discipline
|
| 39 |
+
(no-event 1.0, clarification 1.0) and datetime recall beat stock, that's concrete
|
| 40 |
+
evidence the fine-tune helps. (Separate ~18 GB model download + A100 time.)
|
| 41 |
+
|
| 42 |
+
### 3. Close the gap
|
| 43 |
+
Add ~15–20 explicit-date examples (especially next-month dates and times) to
|
| 44 |
+
`training/data/dataset.jsonl`, re-train on Modal (`training/modal_train.py`),
|
| 45 |
+
re-eval — and watch start-exact recall move.
|
| 46 |
+
|
| 47 |
+
## Results log
|
| 48 |
+
|
| 49 |
+
### Step 1 — diagnosis (2026-06-09)
|
| 50 |
+
The mismatch dump showed the misses are **not** a reasoning failure. 3 of 5 are the
|
| 51 |
+
same bug — a dropped year digit, **"206" instead of "2026"** — on next-month dates
|
| 52 |
+
(month/day/time all correct):
|
| 53 |
+
|
| 54 |
+
```
|
| 55 |
+
[e02] gold 2026-10-06T15:30 pred 206-10-06T15:30
|
| 56 |
+
[e05] gold 2026-10-01T08:15 pred 206-10-01T08:15
|
| 57 |
+
[e15] gold 2026-10-08T19:00 pred 206-10-08T19:00
|
| 58 |
+
[e06] gold 2026-09-28T09:00 pred [] (abstained)
|
| 59 |
+
[m02] Standup + Sprint demo pred Standup only (dropped 2nd leg)
|
| 60 |
+
```
|
| 61 |
+
|
| 62 |
+
Fix indicated: more far-future explicit-date examples reinforcing 4-digit years
|
| 63 |
+
(+ multi-event 2nd legs). → Step 3.
|
| 64 |
+
|
| 65 |
+
### Step 2 — baseline vs fine-tune (2026-06-09, n=28, Q4_K_M)
|
| 66 |
+
|
| 67 |
+
| Metric | Stock `gemma-4-31B-it-GGUF` | Fine-tune `gemma-4-cal-gguf` |
|
| 68 |
+
| --- | --- | --- |
|
| 69 |
+
| schema validity | 1.00 | 1.00 |
|
| 70 |
+
| event precision | **1.00** | 0.85 |
|
| 71 |
+
| start-exact recall | **0.955** | 0.773 |
|
| 72 |
+
| event F1 | **0.977** | 0.81 |
|
| 73 |
+
| end-exact | 1.00 | 1.00 |
|
| 74 |
+
| no-event accuracy | 1.00 | 1.00 |
|
| 75 |
+
| clarification recall | 0.75 | **1.00** |
|
| 76 |
+
|
| 77 |
+
**Honest read:** stock Gemma-4-31B is already strong at this extraction and *beats*
|
| 78 |
+
the current fine-tune on datetime recall — the "206" bug is a fine-tune regression.
|
| 79 |
+
The fine-tune's only clear win is **clarification discipline** (asks when a thread is
|
| 80 |
+
"date TBD"; stock missed `q04`). As-is, the fine-tune is **not** justified on
|
| 81 |
+
extraction. Step 3 must fix the year regression and clear baseline's 0.955 recall
|
| 82 |
+
while keeping clarification at 1.00 — otherwise the better play is stock + the
|
| 83 |
+
fine-tune's clarification behavior via prompting.
|
| 84 |
+
|
| 85 |
+
### Step 3 — after gap-closing retrain (2026-06-09) — REGRESSED
|
| 86 |
+
Dataset grown 69 → 87 (+18 Oct–Dec 2026 explicit-date examples, disjoint from eval),
|
| 87 |
+
same 2-epoch recipe, re-quantized to Q4_K_M and republished. Re-eval (n=28):
|
| 88 |
+
|
| 89 |
+
| Metric | Stock 31B | Fine-tune v1 (69) | **Fine-tune v2 (87, retrained)** |
|
| 90 |
+
| --- | --- | --- | --- |
|
| 91 |
+
| schema validity | 1.00 | 1.00 | **0.75** |
|
| 92 |
+
| event precision | 1.00 | 0.85 | **0.476** |
|
| 93 |
+
| start-exact recall | 0.955 | 0.773 | **0.455** |
|
| 94 |
+
| event F1 | 0.977 | 0.81 | **0.465** |
|
| 95 |
+
| end-exact | 1.00 | 1.00 | 1.00 |
|
| 96 |
+
| no-event accuracy | 1.00 | 1.00 | 1.00 |
|
| 97 |
+
| clarification recall | 0.75 | 1.00 | **0.75** |
|
| 98 |
+
|
| 99 |
+
**The naive retrain made it worse, not better.** New failure modes: unparseable/empty
|
| 100 |
+
JSON (validity 1.0→0.75), duplicate events, hallucinated "Drive to …" events,
|
| 101 |
+
transposed/garbage years (`2062`, `2062-15:00:00`), and previously-passing relative
|
| 102 |
+
dates now empty. Cause: overfitting — 18 of 87 examples were near-identical far-future
|
| 103 |
+
templates, biasing a tiny dataset and degrading general formatting/extraction.
|
| 104 |
+
|
| 105 |
+
## Conclusions & recommendation
|
| 106 |
+
|
| 107 |
+
1. **Stock Gemma-4-31B is already strong** at this extraction (F1 0.98). The only
|
| 108 |
+
thing fine-tuning reliably *added* was clarification discipline (v1: 1.00 vs stock
|
| 109 |
+
0.75) — and even that was lost in v2.
|
| 110 |
+
2. **Tiny-dataset SFT is fragile here.** v1 (69 ex) underperformed stock on dates;
|
| 111 |
+
v2 (87 ex) regressed hard. More data of the *same shape* hurt.
|
| 112 |
+
3. **Recommended path** (pick one):
|
| 113 |
+
- **Ship stock + prompt for clarification** — simplest; recover the one real win
|
| 114 |
+
without the regressions. (Lowest risk.)
|
| 115 |
+
- **If keeping a fine-tune:** rebuild the dataset much larger and *diverse* (not
|
| 116 |
+
template-heavy), drop to ~1 epoch with regularization, and **gate every retrain
|
| 117 |
+
on this eval** (only publish if it beats the current best). Consider a higher
|
| 118 |
+
quant (Q5/Q6) to rule out the `"206"`/`2062` digit corruption being quant-driven.
|
| 119 |
+
4. **Action — revert the live model.** v2 (worse) overwrote v1 in
|
| 120 |
+
`ParetoOptimal/gemma-4-cal-gguf`. Restore v1 (the better fine-tune) or point the
|
| 121 |
+
Space back at stock `unsloth/gemma-4-31B-it-GGUF` until a fine-tune *beats* the
|
| 122 |
+
eval baseline.
|
| 123 |
+
|
| 124 |
+
**Bottom line: the eval did its job — it caught a regression before it reached users,
|
| 125 |
+
and it says the current fine-tune is not yet worth shipping over stock.**
|
| 126 |
+
|
| 127 |
+
## Follow-up (2026-06-09)
|
| 128 |
+
|
| 129 |
+
### Live model restored to v1
|
| 130 |
+
v2 (regressed) was rolled back: `gemma-cal-Q4_K_M.gguf` in the repo was restored to the
|
| 131 |
+
v1 LFS object via a server-side `CommitOperationCopy` (no transfer, no GPU). Production
|
| 132 |
+
serves the better v1 again.
|
| 133 |
+
|
| 134 |
+
### Dataset rebuilt larger + more diverse (69 → 122)
|
| 135 |
+
Added a diversity batch (`gen_new_seeds.MORE_SEEDS3`): varied date/time formats
|
| 136 |
+
(`10/15`, "the 3rd", "half past 7", "0900", "noon", "midnight"), reschedules,
|
| 137 |
+
cancellations, recurring, all-day, deadlines (EOD/midnight), past & hypothetical
|
| 138 |
+
(must NOT schedule), richer no-event & clarify, and varied image sources (ticket,
|
| 139 |
+
invite screenshot, notice). Goal: counter the template-heavy skew that overfit v2.
|
| 140 |
+
Verified valid + disjoint from `eval.jsonl`.
|
| 141 |
+
|
| 142 |
+
### Eval-gating is now the publishing process
|
| 143 |
+
**No retrain publishes unless it beats the eval.** `training/gated_retrain.py`:
|
| 144 |
+
1. retrain on Modal → upload to a **staging** filename (`gemma-cal-staging-Q4_K_M.gguf`)
|
| 145 |
+
in the repo (production file untouched; mmproj skipped — `--skip-mmproj`);
|
| 146 |
+
2. eval the staging file (`modal_eval.py --model-file …`);
|
| 147 |
+
3. gate: `schema_validity ≥ 0.95`, `event_f1 ≥ 0.81`, `start-exact recall ≥ 0.773`
|
| 148 |
+
(defaults = the current best, v1) — tune via `--gate-f1/--gate-recall`;
|
| 149 |
+
4. **PASS** → promote staging → production via server-side `CommitOperationCopy` (free);
|
| 150 |
+
**FAIL** → delete staging, production unchanged.
|
| 151 |
+
|
| 152 |
+
Run: `python training/gated_retrain.py [--epochs 1 --gate-f1 … --gate-recall …]`.
|
| 153 |
+
|
| 154 |
+
### Step 4 — first eval-gated retrain (122 ex, 1 epoch) — GATE FAILED ✅ (protected prod)
|
| 155 |
+
The retrain scored **worse** than every prior version and the gate refused to publish:
|
| 156 |
+
|
| 157 |
+
| Metric | Stock | v1 (live) | v3 staging (122, 1ep) |
|
| 158 |
+
| --- | --- | --- | --- |
|
| 159 |
+
| schema validity | 1.00 | 1.00 | **0.46** |
|
| 160 |
+
| event F1 | 0.977 | 0.81 | **0.214** |
|
| 161 |
+
| start-exact recall | 0.955 | 0.773 | **0.136** |
|
| 162 |
+
| no-event accuracy | 1.00 | 1.00 | 1.00 |
|
| 163 |
+
| clarification recall | 0.75 | 1.00 | 1.00 |
|
| 164 |
+
|
| 165 |
+
>½ of outputs were unparseable; extraction collapsed. **Gate: FAIL → staging deleted,
|
| 166 |
+
production unchanged (still v1).** The gate worked exactly as intended.
|
| 167 |
+
|
| 168 |
+
## Verdict (after 3 fine-tune attempts)
|
| 169 |
+
All three fine-tunes — v1 (69 ex / 2 ep), v2 (87 / 2 ep), v3 (122 / 1 ep) — **underperform
|
| 170 |
+
stock Gemma-4-31B**, and the larger runs broke JSON validity. Only the safety behaviors
|
| 171 |
+
(no-event, clarification) survive fine-tuning; extraction degrades. **QLoRA-on-31B-Q4 here
|
| 172 |
+
is fragile and not worth shipping over stock.** Recommended: serve **stock
|
| 173 |
+
`unsloth/gemma-4-31B-it-GGUF`** and recover the one fine-tune win (clarification) via the
|
| 174 |
+
prompt. Keep v1 as the published fine-tune for the "Well-Tuned" artifact, but don't route
|
| 175 |
+
production extraction through it. Revisit fine-tuning only with a substantially larger, more
|
| 176 |
+
varied dataset and a recipe that holds schema validity at 1.0 — gated, as now, on this eval.
|
| 177 |
+
|
| 178 |
+
## Step 5 — quantization-penalty test (2026-06-09): quant EXONERATED
|
| 179 |
+
Hypothesis: maybe Q4 quantization (the `"206"`/`2062` digit bug) was tanking the fine-tune.
|
| 180 |
+
Tested the SAME fine-tuned weights (`gemma-cal-f16.gguf`, v2/87-ex — best fp16 still on the
|
| 181 |
+
volume) at three precisions on the 28-example eval (`training/modal_quant_eval.py`):
|
| 182 |
+
|
| 183 |
+
| precision | schema validity | event F1 | start-exact recall |
|
| 184 |
+
| --- | --- | --- | --- |
|
| 185 |
+
| f16 (full) | 0.643 | 0.571 | 0.545 |
|
| 186 |
+
| Q8_0 | 0.679 | 0.565 | 0.591 |
|
| 187 |
+
| Q4_K_M | 0.75 | 0.465 | 0.455 |
|
| 188 |
+
| base (stock) | 1.00 | 0.977 | 0.955 |
|
| 189 |
+
|
| 190 |
+
**Quantization is not the cause.** At full fp16 the fine-tune still scores validity 0.64 / F1
|
| 191 |
+
0.57 — nowhere near base; validity is actually *lower* at f16 than Q4, so quant isn't breaking
|
| 192 |
+
the JSON. Precision buys only ~+0.1 F1/recall (Q4→Q8/f16), a fraction of the gap to base. The
|
| 193 |
+
degradation is the **SFT itself**, not the GGUF conversion. Step 2 (retrain at Q8 to beat base)
|
| 194 |
+
is **not pursued** — the gate would fail. (Caveat: v1's fp16 was overwritten, so this used v2;
|
| 195 |
+
a definitive v1 test needs a retrain, but the small quant lift makes a base-beating result
|
| 196 |
+
improbable.)
|
| 197 |
+
|
| 198 |
+
### Final recommendation
|
| 199 |
+
A higher quant won't make the fine-tune beat base, and an automation agent (e.g. `ml-intern`)
|
| 200 |
+
doesn't change the binding constraints (near-ceiling base; small data; SFT degrades
|
| 201 |
+
instruction-following). **Serve stock `unsloth/gemma-4-31B-it-GGUF`** and recover the
|
| 202 |
+
clarification behavior via the system prompt; keep v1 as the "Well-Tuned" artifact. Only
|
| 203 |
+
revisit fine-tuning with a substantially larger, real, diverse dataset + a validity-preserving
|
| 204 |
+
recipe (low LR, few steps), always gated on this eval.
|
| 205 |
+
|
| 206 |
+
## Real training data: SMCalFlow importer
|
| 207 |
+
`training/import_smcalflow.py` converts **SMCalFlow** (Microsoft Semantic Machines, **CC BY-SA
|
| 208 |
+
4.0**) calendar dialogues into our `ActionPlan` format. SMCalFlow encodes events as LISP
|
| 209 |
+
"dataflow" programs; the importer parses `CreatePreflightEventWrapper` turns, extracts
|
| 210 |
+
subject/start/location/attendees, and **resolves** date/time constructs (`Tomorrow`, `NextDOW`,
|
| 211 |
+
`MD`, `NumberPM`, `HourMinuteMilitary`, …) against a per-example reference `now` spread across
|
| 212 |
+
2026 — so relative dates become concrete, self-consistent targets (directly trains the failing
|
| 213 |
+
date/time skill, with varied 4-digit years). Conservative: only emits a row when a title AND an
|
| 214 |
+
explicit start time resolve (~7.5k usable turns from train+valid).
|
| 215 |
+
|
| 216 |
+
- Run: `python training/import_smcalflow.py --limit 2000 --heldout 200` → writes
|
| 217 |
+
`training/data/smcalflow_train.jsonl` (+ `…_heldout.jsonl`). **Both are git-ignored** (CC BY-SA
|
| 218 |
+
share-alike vs this repo's Apache-2.0 → we don't commit/redistribute the derived data; the
|
| 219 |
+
importer code is ours) and **disjoint from `eval.jsonl`**.
|
| 220 |
+
- `train_qlora.py` now trains on `dataset.jsonl` **+** `smcalflow_train.jsonl` (when present).
|
| 221 |
+
`gated_retrain.py` therefore trains on real data, and still **only publishes if it beats the
|
| 222 |
+
gate** — so a bigger-but-worse model can't reach production.
|
| 223 |
+
- Attribution (required by CC BY-SA): *Semantic Machines et al., "Task-Oriented Dialogue as
|
| 224 |
+
Dataflow Synthesis," TACL 2020.*
|
| 225 |
+
|
| 226 |
+
## Step 6 — eval-gated retrain on REAL data (2026-06-09): FAILED gate (worst yet)
|
| 227 |
+
Trained the 31B on **2,122 examples** (122 hand-authored + 2,000 real SMCalFlow), 1 epoch,
|
| 228 |
+
through `gated_retrain.py` with a beat-base gate (F1≥0.95, recall≥0.90). Result on the 28-ex eval:
|
| 229 |
+
|
| 230 |
+
| Metric | base | v1 (live) | real-data (2,122 ex) |
|
| 231 |
+
| --- | --- | --- | --- |
|
| 232 |
+
| schema validity | 1.00 | 1.00 | **0.107** |
|
| 233 |
+
| event F1 | 0.977 | 0.81 | **0.000** |
|
| 234 |
+
| start-exact recall | 0.955 | 0.773 | **0.000** |
|
| 235 |
+
|
| 236 |
+
~90% unparseable output, zero events extracted. **Gate FAIL → not promoted; production stays v1.**
|
| 237 |
+
|
| 238 |
+
### Verdict across 4 fine-tunes (now incl. real data)
|
| 239 |
+
Scores **monotonically worsen with more training/data**: v1 (69 synth, F1 0.81) → v2 (87, 0.465)
|
| 240 |
+
→ v3 (122, 0.214) → real (2,122, 0.0). This is no longer a *data* problem — **the SFT recipe
|
| 241 |
+
itself degrades the model**, and more data makes it worse. Most likely root cause to investigate
|
| 242 |
+
*if* fine-tuning is ever revisited: a **train/inference chat-template mismatch** — `train_qlora.py`
|
| 243 |
+
formats with Unsloth's `get_chat_template("gemma")` while `llama-server` serves with the GGUF's
|
| 244 |
+
own `--jinja` template; if these differ for Gemma-4, training optimizes a format the server never
|
| 245 |
+
uses, and the divergence compounds with more steps (exactly the monotonic decay seen). Other
|
| 246 |
+
suspects: LR too high (2e-4) / catastrophic forgetting on a near-ceiling base.
|
| 247 |
+
|
| 248 |
+
**Final, evidence-backed recommendation: serve stock `unsloth/gemma-4-31B-it-GGUF`** (best by far)
|
| 249 |
+
and recover clarification via the system prompt. Do NOT route production through any current
|
| 250 |
+
fine-tune. The eval-gate has now correctly rejected 2 bad retrains — keep it as the publish gate.
|
| 251 |
+
|
| 252 |
+
## Step 7 — recipe fix + raw-output probe (2026-06-09): training stack implicated, fine-tuning HALTED
|
| 253 |
+
Fixed the suspected train/serve chat-template mismatch (PR #54): Gemma-4's native
|
| 254 |
+
`chat_template.jinja` uses a NEW `<|turn>role … <turn|>` format (no `<start_of_turn>` at all),
|
| 255 |
+
while training forced unsloth's legacy "gemma" template. `train_qlora.py` now formats with the
|
| 256 |
+
tokenizer's native template (hard `<|turn>` assert), masks loss to the assistant turn, LR 5e-5.
|
| 257 |
+
Retrained on the 2,122-example set through the gate: **validity 0.0 — gate FAIL** (production
|
| 258 |
+
stays v1, third bad retrain rejected).
|
| 259 |
+
|
| 260 |
+
Diagnostics that pinpointed the cause:
|
| 261 |
+
- **GGUF template check (CPU, ~free):** our exported staging GGUF embeds the correct native
|
| 262 |
+
`<|turn>` template (16,934 chars, no `<start_of_turn>`) → train and serve formats are now
|
| 263 |
+
verifiably aligned. Template is exonerated as the remaining cause.
|
| 264 |
+
- **Raw-output probe (`/outputs/gemma-cal-staging-Q4_K_M.gguf`):** free generation emits pure
|
| 265 |
+
degenerate looping — `'Huddle — — — — — …'` to the token limit; constrained generation emits
|
| 266 |
+
512 tokens of nothing. **The weights are destroyed, not misformatted.**
|
| 267 |
+
|
| 268 |
+
With dataset (69→2,122), template (legacy/native), LR (2e-4/5e-5), and masking (on/off) all
|
| 269 |
+
varied, degradation always tracks training steps and ends in token-loop collapse. The remaining
|
| 270 |
+
common factor is **Unsloth's QLoRA path for Gemma-4-31B** (new architecture; training logs warn
|
| 271 |
+
`get_input_embeddings not auto-handled for Gemma4AudioModel`). **Fine-tuning is halted** until
|
| 272 |
+
that stack demonstrably works for this arch (or is replaced with plain transformers+PEFT).
|
| 273 |
+
|
| 274 |
+
## Step 8 — improve served evals via prompt (stock + targeted SYSTEM additions)
|
| 275 |
+
Base's only eval misses are prompt-fixable: m03 dropped the 2nd event of a multi-event thread;
|
| 276 |
+
q04 didn't ask clarification on a "TBD" plan. Added two surgical SYSTEM lines (list every
|
| 277 |
+
distinct event separately; ask via needs_clarification when day/time is TBD).
|
| 278 |
+
|
| 279 |
+
**Result: PERFECT SCORE — 1.0 on every metric (n=28, tp/fp/fn = 22/0/0).**
|
| 280 |
+
|
| 281 |
+
| Metric | base (old prompt) | **base + new prompt** |
|
| 282 |
+
| --- | --- | --- |
|
| 283 |
+
| schema validity | 1.00 | **1.00** |
|
| 284 |
+
| event precision | 1.00 | **1.00** |
|
| 285 |
+
| start-exact recall | 0.955 | **1.00** |
|
| 286 |
+
| event F1 | 0.977 | **1.00** |
|
| 287 |
+
| no-event accuracy | 1.00 | **1.00** |
|
| 288 |
+
| clarification recall | 0.75 | **1.00** |
|
| 289 |
+
|
| 290 |
+
Both misses fixed, nothing regressed. **This is the production configuration: stock
|
| 291 |
+
`unsloth/gemma-4-31B-it-GGUF` + the updated SYSTEM prompt.** (Set Space var
|
| 292 |
+
`MODEL_HF_REPO=unsloth/gemma-4-31B-it-GGUF`; the prompt ships with the app.) The "Well-Tuned"
|
| 293 |
+
artifact remains `ParetoOptimal/gemma-4-cal-gguf` (v1); any future fine-tune must beat THIS
|
| 294 |
+
1.0 baseline through the gate — i.e., match it and win on a harder, expanded eval set.
|
| 295 |
+
|
| 296 |
+
## Step 9 — the E4B edge-model campaign (2026-06-10)
|
| 297 |
+
Re-aimed fine-tuning where it has headroom: a **Gemma-4 E4B (~8B)** edge model that runs without a
|
| 298 |
+
paid A100, gated against **stock E4B**. Six gated runs, each fixing a diagnosed failure (the fixed
|
| 299 |
+
recipe trained cleanly every time — validity 1.0 throughout, confirming the Step-7 breakage was
|
| 300 |
+
specific to the 31B path):
|
| 301 |
+
|
| 302 |
+
| run | change | F1 | recall | clarify | eval |
|
| 303 |
+
| --- | --- | --- | --- | --- | --- |
|
| 304 |
+
| #1 | fixed recipe, 2,122 ex | 0.884 | 0.864 | 1.0 | n=28 |
|
| 305 |
+
| #2 | + weekday-in-prompt (+data regen) | 0.955 | 0.955 | 0.75 | n=28 |
|
| 306 |
+
| #3 | + next-DOW conflict filter (74 rows), 4× hand | **1.0** | **1.0** | 0.75 | n=28 |
|
| 307 |
+
| #4 | + TBD-clarify seeds, 8× hand | 0.93 | 0.909 | 1.0 | n=28 |
|
| 308 |
+
| #5 | clarify seeds, 4× hand | 0.93 | 0.909 | 1.0 | n=28 |
|
| 309 |
+
| — | **eval expanded 28→60** (50 events; jitter-resistant) | | | | |
|
| 310 |
+
| #6 | + Batch-7 seeds (next-DOW, "opens") | 0.97 | 0.96 | 1.0 | n=60 |
|
| 311 |
+
| stock E4B (weekday prompt) | | 0.97 | 0.96 | 1.0 | n=60 |
|
| 312 |
+
|
| 313 |
+
Run #6 vs stock is an **exact statistical tie** (identical tp/fp/fn 48/1/2; both miss `e09`
|
| 314 |
+
"next Tuesday" — which resisted 7 explicit training seeds — and one "opens" case each).
|
| 315 |
+
Campaign side effects that improved the PRODUCT for every model: weekday-in-prompt, the
|
| 316 |
+
next-DOW convention cleanup, and the 60-example eval.
|
| 317 |
+
|
| 318 |
+
## Step 10 — bare-prompt (internalization) test: no decisive gap
|
| 319 |
+
Dropped the system prompt for both models (identical minimal user content, same JSON-schema
|
| 320 |
+
constraint; `modal_eval.py --minimal-prompt`), measuring internalized task knowledge:
|
| 321 |
+
|
| 322 |
+
| bare, n=60 | stock E4B | fine-tuned E4B |
|
| 323 |
+
| --- | --- | --- |
|
| 324 |
+
| schema validity | 0.967 | **1.0** |
|
| 325 |
+
| event F1 | **0.682** | 0.644 |
|
| 326 |
+
| start-exact recall | **0.60** | 0.56 |
|
| 327 |
+
| no-event accuracy | 0.70 | **0.80** |
|
| 328 |
+
| clarification recall | 0.50 | **0.625** |
|
| 329 |
+
|
| 330 |
+
Small trade-offs both ways, within noise. **Verdict: at this data scale (139 hand + 2,000
|
| 331 |
+
SMCalFlow) with QLoRA/1-epoch, the E4B fine-tune reaches PARITY with stock, not superiority** —
|
| 332 |
+
non-degraded, perfect validity everywhere, better bare-prompt discipline, slightly weaker bare
|
| 333 |
+
extraction. The strict-dominance gate therefore never auto-promoted it; the candidate GGUF
|
| 334 |
+
remains on the Modal volume (`/outputs/gemma-cal-e4b-staging-Q4_K_M.gguf`). Publishing it as
|
| 335 |
+
the project's edge model at parity is a **product decision** (zero quality cost; production
|
| 336 |
+
would serve our own fine-tune, fulfilling "Well-Tuned") — deliberately left to the owner, not
|
| 337 |
+
the gate.
|
docs/gcal-verify.md
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Verifying the Google Calendar connection end-to-end
|
| 2 |
+
|
| 3 |
+
> **Private-Space gotcha (the OAuth popup 404):** while the Space is private,
|
| 4 |
+
> `*.hf.space` URLs answer Hugging Face's 404 page for any request that lacks
|
| 5 |
+
> the signed access cookie. The app viewed EMBEDDED on huggingface.co
|
| 6 |
+
> authenticates its iframe with a short-lived signed URL, but the OAuth POPUP
|
| 7 |
+
> is a separate top-level window — when the subdomain cookie is missing or
|
| 8 |
+
> expired, **Connect opens a 404**. Fix: always open
|
| 9 |
+
> `https://paretooptimal-offgridschedula.hf.space` directly in its own tab
|
| 10 |
+
> (the redirect re-mints the cookie for the whole subdomain), then connect
|
| 11 |
+
> from there. Making the Space public would remove this entirely, at the cost
|
| 12 |
+
> of the deployed source tree becoming publicly browsable — deliberately NOT
|
| 13 |
+
> done (2026-06-12).
|
| 14 |
+
|
| 15 |
+
Two layers of verification exist:
|
| 16 |
+
|
| 17 |
+
1. **In-app, automatic** — on every page load, `wireGcal()` round-trips the stored
|
| 18 |
+
token to `POST /oauth2/check`, which makes one real (scope-compatible) Google
|
| 19 |
+
API call. The Step 2a row and the export-bar badge upgrade from
|
| 20 |
+
"✓ connected" to **"✓ connected · verified"** when Google answers; a
|
| 21 |
+
*definitive* rejection (revoked/invalid token) clears the stored token and
|
| 22 |
+
flips everything to "not connected". Transient problems (OAuth env unset,
|
| 23 |
+
network down, `SERVE=gradio` mode where FastAPI routes aren't served) never
|
| 24 |
+
destroy the token — the UI just stays at the local shape-check state.
|
| 25 |
+
2. **Scripted E2E** — `scripts/verify_gcal_e2e.py` proves the whole chain:
|
| 26 |
+
agent-extracted event → real push → API readback (title/location/start/
|
| 27 |
+
reminder) → cleanup.
|
| 28 |
+
|
| 29 |
+
## Browser loop (manual)
|
| 30 |
+
|
| 31 |
+
1. Run locally with OAuth configured:
|
| 32 |
+
```
|
| 33 |
+
set GOOGLE_OAUTH_CLIENT_ID=... # a Google Cloud OAuth "Web application" client
|
| 34 |
+
set GOOGLE_OAUTH_CLIENT_SECRET=...
|
| 35 |
+
set SERVE=uvicorn
|
| 36 |
+
python app.py
|
| 37 |
+
```
|
| 38 |
+
2. Open http://localhost:7860, switch to **☁️ Online**, open Step 2a
|
| 39 |
+
**"🔗 Connect your calendar"** → click **Connect** on the Google row →
|
| 40 |
+
consent in the popup. The row flips to "✓ connected", then upgrades to
|
| 41 |
+
**"✓ connected · verified"** within ~1s (the `/oauth2/check` round-trip).
|
| 42 |
+
3. Paste the appointment text (the CANON sample in `tests/test_agent.py`) →
|
| 43 |
+
**Run the agents** → the export toolbar appears with the badge
|
| 44 |
+
**"Google: ✓ connected · verified"** next to the three buttons.
|
| 45 |
+
4. Click **Add to Google Calendar** → the status line shows the created event
|
| 46 |
+
link; open it: *Mon Jun 22 2026, 10:15–11:00, 112A West 72nd Street,
|
| 47 |
+
New York, NY 10023*, 60-minute reminder.
|
| 48 |
+
5. **Reload the page** → still verified, no re-prompt (the acceptance test for
|
| 49 |
+
"never asks again").
|
| 50 |
+
|
| 51 |
+
### Negative paths
|
| 52 |
+
|
| 53 |
+
- Revoke access at https://myaccount.google.com/permissions → reload → the
|
| 54 |
+
check is definitive: token is cleared, every surface shows "not connected".
|
| 55 |
+
- Unset the OAuth env vars (or kill the network) → reload → stays at plain
|
| 56 |
+
"✓ connected" — transient failures never log the user out.
|
| 57 |
+
- Click **disconnect** in Step 2a → flips everywhere instantly.
|
| 58 |
+
|
| 59 |
+
## Scripted loop
|
| 60 |
+
|
| 61 |
+
One-time: after connecting in the browser, copy the `gcal_token` value from
|
| 62 |
+
DevTools → Application → Local Storage into a file (e.g. `tok.json` — it's
|
| 63 |
+
gitignored territory; don't commit it).
|
| 64 |
+
|
| 65 |
+
```
|
| 66 |
+
python scripts/verify_gcal_e2e.py --token-file tok.json --check-only # liveness only
|
| 67 |
+
python scripts/verify_gcal_e2e.py --token-file tok.json # full E2E
|
| 68 |
+
```
|
| 69 |
+
|
| 70 |
+
The full run pushes the CANON event with a nonce in the title (`[e2e-xxxxxx]`),
|
| 71 |
+
reads it back through the API, asserts summary/location/start-instant/reminder,
|
| 72 |
+
and deletes it (use `--keep` to inspect it in the calendar first). Exit code 0
|
| 73 |
+
= all checks passed.
|
docs/hermes.md
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# The Hermes "grows-with-you" brain
|
| 2 |
+
|
| 3 |
+
The agent's reasoning is pluggable through `INFERENCE_BASE_URL` (see `server/model.py`). Point it at a
|
| 4 |
+
**NousResearch Hermes** model served OpenAI-compatible and the whole pipeline uses it — **no code
|
| 5 |
+
change**. Hermes is a tool-calling Llama/Qwen fine-tune, a good fit for the autonomous daemon.
|
| 6 |
+
|
| 7 |
+
## Serve Hermes locally (llama.cpp → "Llama Champion")
|
| 8 |
+
|
| 9 |
+
```bash
|
| 10 |
+
# Hermes 3 Llama 3.1 8B (Q4_K_M) runs comfortably on a Mac with Metal.
|
| 11 |
+
llama-server -m ~/models/Hermes-3-Llama-3.1-8B.Q4_K_M.gguf \
|
| 12 |
+
--host 127.0.0.1 --port 8080 --ctx-size 8192 --jinja # --jinja = tool-calling template
|
| 13 |
+
```
|
| 14 |
+
|
| 15 |
+
Point the backend at it:
|
| 16 |
+
|
| 17 |
+
```bash
|
| 18 |
+
export INFERENCE_BASE_URL="http://127.0.0.1:8080/v1"
|
| 19 |
+
export INFERENCE_MODEL="hermes"
|
| 20 |
+
export USE_STUB_EXTRACTOR=0
|
| 21 |
+
python app.py
|
| 22 |
+
```
|
| 23 |
+
|
| 24 |
+
`server/model.py` routes `complete_json` / `stream_complete_json` to the remote server when
|
| 25 |
+
`INFERENCE_BASE_URL` is set (`_remote_*`), still grammar-constraining the output to the ActionPlan
|
| 26 |
+
schema. (Ollama or vLLM also work — any OpenAI-compatible endpoint.)
|
| 27 |
+
|
| 28 |
+
## "Grows with you" — the memory (`server/memory.py`)
|
| 29 |
+
|
| 30 |
+
Durable facts/preferences personalize every extraction; they're injected into the prompt via
|
| 31 |
+
`recall()` (`server/agent.py::build_messages`) and shown/edited in the dashboard **Memory** tab.
|
| 32 |
+
|
| 33 |
+
- **Learns automatically:** recurring event attendees become `contact` facts (`observe_plan`).
|
| 34 |
+
- **You can teach it:** add facts in the Memory tab — `"Dana is the soccer coach"`,
|
| 35 |
+
`"you decline Mondays"`, `"default location is Lincoln Elementary"`.
|
| 36 |
+
- **Hermes can update it itself:** set `HERMES_TOOLS=1` and the remote path advertises a `remember`
|
| 37 |
+
tool (`server/tools.py`); the model calls it mid-run to save durable facts, then returns the
|
| 38 |
+
ActionPlan. The tool-call loop is in `server/model.py::_remote_complete_json` (round-trip logic +
|
| 39 |
+
tests in `server/tools.py` / `tests/test_tools.py`). Requires a tool-calling server (`--jinja`).
|
| 40 |
+
- Stored at `MEMORY_PATH` (set it to a real path like `~/.offgrid/agent_memory.json`, not `/tmp`).
|
| 41 |
+
|
| 42 |
+
Over time the model resolves nicknames, applies your preferences to conflicts, and needs fewer
|
| 43 |
+
clarifications — the "grows with you" behavior.
|
| 44 |
+
|
| 45 |
+
## Where it runs
|
| 46 |
+
The Hermes brain + memory live wherever the autonomous backend runs — the **Mac daemon**
|
| 47 |
+
(`scripts/setup_mac.sh`), an Android phone via Termux (`INFERENCE_BASE_URL` → a local `llama-server`),
|
| 48 |
+
or a cloud box (if you capture from email/Slack/Telegram instead of iMessage).
|
docs/on-device.md
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Running on a cell phone (on-device or thin-client)
|
| 2 |
+
|
| 3 |
+
"Runs on a cell phone" can mean two things; the app supports both via one env switch.
|
| 4 |
+
|
| 5 |
+
## The inference switch
|
| 6 |
+
|
| 7 |
+
`server/model.py` reads `INFERENCE_BASE_URL`:
|
| 8 |
+
|
| 9 |
+
- **Unset (default):** the GGUF is loaded in-process via `llama-cpp-python` (the Space / a laptop).
|
| 10 |
+
- **Set:** generation is delegated to a remote **OpenAI-compatible / llama.cpp server** at that URL.
|
| 11 |
+
Same agent code, different inference location.
|
| 12 |
+
|
| 13 |
+
```bash
|
| 14 |
+
export INFERENCE_BASE_URL="http://127.0.0.1:8080/v1" # a llama-server on the phone
|
| 15 |
+
export INFERENCE_API_KEY="..." # optional
|
| 16 |
+
export INFERENCE_MODEL="gemma-e4b" # optional label
|
| 17 |
+
```
|
| 18 |
+
|
| 19 |
+
So "on the phone" = run a `llama-server` **on the device** and point the agent at `127.0.0.1`.
|
| 20 |
+
|
| 21 |
+
## On-device model profile (Gemma E4B edge)
|
| 22 |
+
|
| 23 |
+
A 31B Q4 GGUF (~18–20 GB) needs a GPU and will not run on a phone. Use the lightweight **Gemma E4B**
|
| 24 |
+
edge variant (see [PLAN.md](../PLAN.md) and the README *Accuracy upgrade* section), with a small
|
| 25 |
+
context window:
|
| 26 |
+
|
| 27 |
+
```bash
|
| 28 |
+
export MODEL_REPO="<your-or-community gemma E4B GGUF repo>"
|
| 29 |
+
export MODEL_FILE="<gemma-e4b-*-Q4_K_M.gguf>"
|
| 30 |
+
export N_CTX=4096 # keep the KV cache small on a phone
|
| 31 |
+
export N_GPU_LAYERS=0 # CPU; on a Mac use Metal layers instead
|
| 32 |
+
```
|
| 33 |
+
|
| 34 |
+
## Android (Termux) — genuinely on-device
|
| 35 |
+
|
| 36 |
+
```bash
|
| 37 |
+
pkg install python git cmake clang
|
| 38 |
+
git clone <this repo> && cd imessage-calendar-agent
|
| 39 |
+
pip install -r requirements-ci.txt llama-cpp-python # CPU build
|
| 40 |
+
# Option 1: run the whole app (UI + /agent) on the phone
|
| 41 |
+
USE_STUB_EXTRACTOR=0 python app.py # http://127.0.0.1:7860
|
| 42 |
+
# Option 2: run only a llama-server and point a client/app at it
|
| 43 |
+
# llama-server -m <gemma-e4b.gguf> --port 8080
|
| 44 |
+
# then set INFERENCE_BASE_URL=http://127.0.0.1:8080/v1
|
| 45 |
+
```
|
| 46 |
+
|
| 47 |
+
Expect multi-second latency per request on phone CPU — keep `N_CTX` small and threads short.
|
| 48 |
+
|
| 49 |
+
## iOS — the honest limit
|
| 50 |
+
|
| 51 |
+
iOS does **not** allow background message access or a persistent background LLM server. You cannot
|
| 52 |
+
run an autonomous on-device agent for iMessage on an iPhone. The supported iOS path is the
|
| 53 |
+
foreground **Shortcut** in [automations.md](./automations.md), optionally pointing at a remote
|
| 54 |
+
`INFERENCE_BASE_URL` for the model.
|
requirements-ci.txt
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Minimal deps for CI / local testing. The app never imports llama_cpp or the
|
| 2 |
+
# Google libs at module load (both are lazy), and tests run in stub mode
|
| 3 |
+
# (USE_STUB_EXTRACTOR=1), so we deliberately exclude:
|
| 4 |
+
# - llama-cpp-python (slow source build on CI; real inference is tested on the Space)
|
| 5 |
+
# - google-api-python-client / google-auth-* (only used by the optional GCal push)
|
| 6 |
+
gradio>=6.0
|
| 7 |
+
pandas>=2.0
|
| 8 |
+
fastapi>=0.115
|
| 9 |
+
uvicorn[standard]>=0.30
|
| 10 |
+
pydantic>=2.7
|
| 11 |
+
python-dotenv>=1.0
|
| 12 |
+
requests>=2.32
|
| 13 |
+
huggingface_hub>=0.24
|
| 14 |
+
icalendar>=5.0
|
| 15 |
+
python-dateutil>=2.9
|
| 16 |
+
pytest>=8.0
|
requirements-docker.txt
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Runtime deps for the dedicated-GPU Docker Space.
|
| 2 |
+
# Excludes: llama-cpp-python (compiled WITH CUDA in the Dockerfile), the cu124
|
| 3 |
+
# prebuilt index + nvidia-*-cu12 libs (the CUDA devel base provides the toolkit),
|
| 4 |
+
# and `spaces` (ZeroGPU only — its absence makes server/model.py's gpu decorator a
|
| 5 |
+
# no-op so llama.cpp runs directly on the always-attached dedicated GPU).
|
| 6 |
+
# [mcp] extra exposes named Gradio API endpoints as Model Context Protocol tools
|
| 7 |
+
# — same MCP surface as the Gradio-SDK Space (server/mcp_tools.py, app.py).
|
| 8 |
+
gradio[mcp]>=6.0
|
| 9 |
+
pandas>=2.0
|
| 10 |
+
fastapi>=0.115
|
| 11 |
+
uvicorn[standard]>=0.30
|
| 12 |
+
pydantic>=2.7
|
| 13 |
+
python-dotenv>=1.0
|
| 14 |
+
huggingface_hub>=0.24
|
| 15 |
+
requests>=2.32
|
| 16 |
+
icalendar>=5.0
|
| 17 |
+
python-dateutil>=2.9
|
| 18 |
+
pillow-heif>=0.16
|
| 19 |
+
google-api-python-client>=2.130
|
| 20 |
+
google-auth-oauthlib>=1.2
|
| 21 |
+
google-auth-httplib2>=0.2
|
| 22 |
+
# Agent tab: smolagents drives this Space's own MCP tools with a MiniCPM
|
| 23 |
+
# planner (lazy-imported; the stub/scripted path never touches it).
|
| 24 |
+
smolagents[mcp,openai]==1.26.0
|
requirements.txt
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# --- Space runtime (Gradio SDK + ZeroGPU) ---
|
| 2 |
+
# [mcp] extra exposes named Gradio API endpoints as Model Context Protocol tools
|
| 3 |
+
# so any MCP-aware agent (Claude Desktop, Cursor, etc.) can call this Space's
|
| 4 |
+
# extract_events / make_ics / check_conflicts — see server/mcp_tools.py + app.py.
|
| 5 |
+
gradio[mcp]>=6.0
|
| 6 |
+
pandas>=2.0 # used directly by the Activity dashboard chart
|
| 7 |
+
fastapi>=0.115
|
| 8 |
+
uvicorn[standard]>=0.30
|
| 9 |
+
pydantic>=2.7
|
| 10 |
+
python-dotenv>=1.0
|
| 11 |
+
huggingface_hub>=0.24
|
| 12 |
+
requests>=2.32
|
| 13 |
+
spaces>=0.30 # ZeroGPU: @spaces.GPU dynamic GPU allocation
|
| 14 |
+
|
| 15 |
+
# --- llama.cpp inference (Llama Champion), GPU build ---
|
| 16 |
+
# CUDA prebuilt wheel so layers offload to the ZeroGPU GPU (n_gpu_layers=-1).
|
| 17 |
+
# NOTE: the RTX Pro 6000 Blackwell is sm_120 — if the cu124 wheel lacks Blackwell
|
| 18 |
+
# kernels, build from source against CUDA 12.8:
|
| 19 |
+
# CMAKE_ARGS="-DGGML_CUDA=on -DCMAKE_CUDA_ARCHITECTURES=120" pip install llama-cpp-python
|
| 20 |
+
--extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cu124
|
| 21 |
+
# Pin to the newest version with a prebuilt cu124 cp310 wheel. With >= pip grabs a
|
| 22 |
+
# newer PyPI *sdist* and compiles from source (slow, and mismatches the base's CUDA).
|
| 23 |
+
llama-cpp-python==0.3.19
|
| 24 |
+
# CUDA userspace libs the prebuilt wheel dlopens (ZeroGPU env lacks libcudart.so.12).
|
| 25 |
+
# server/model.py::_preload_cuda_libs loads these RTLD_GLOBAL before importing llama_cpp.
|
| 26 |
+
nvidia-cuda-runtime-cu12; platform_system == "Linux"
|
| 27 |
+
nvidia-cublas-cu12; platform_system == "Linux"
|
| 28 |
+
nvidia-cuda-nvrtc-cu12; platform_system == "Linux"
|
| 29 |
+
|
| 30 |
+
# --- calendar output ---
|
| 31 |
+
icalendar>=5.0
|
| 32 |
+
python-dateutil>=2.9
|
| 33 |
+
|
| 34 |
+
# --- vision input: transcode iPhone HEIC attachments to JPEG ---
|
| 35 |
+
pillow-heif>=0.16
|
| 36 |
+
|
| 37 |
+
# --- optional Google Calendar bonus ---
|
| 38 |
+
google-api-python-client>=2.130
|
| 39 |
+
google-auth-oauthlib>=1.2
|
| 40 |
+
google-auth-httplib2>=0.2
|
| 41 |
+
|
| 42 |
+
# --- Agent tab: smolagents drives the Space's own MCP tools with a MiniCPM
|
| 43 |
+
# planner (lazy-imported; stub/scripted path and CI never touch it). ---
|
| 44 |
+
smolagents[mcp,openai]==1.26.0
|
| 45 |
+
|
| 46 |
+
# NOTE: training deps (unsloth, trl, transformers, bitsandbytes) live in
|
| 47 |
+
# training/requirements-train.txt — they are NOT installed in the Space.
|
scripts/setup_mac.sh
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env bash
|
| 2 |
+
# One-time setup for the Mac always-on daemon (Scenario 1):
|
| 3 |
+
# Hermes (llama-server) + backend (autonomous) + collector, as launchd jobs.
|
| 4 |
+
#
|
| 5 |
+
# Prereqs you provide:
|
| 6 |
+
# - llama.cpp built (llama-server on PATH, or pass LLAMA_SERVER=/path/to/llama-server)
|
| 7 |
+
# - a Hermes GGUF (e.g. Hermes-3-Llama-3.1-8B Q4_K_M) -> pass MODEL_GGUF=/path
|
| 8 |
+
# - Google OAuth: credentials.json (+ token.json after first auth) in the repo dir
|
| 9 |
+
# - Full Disk Access for the python binary (the script prints how)
|
| 10 |
+
#
|
| 11 |
+
# Usage:
|
| 12 |
+
# INGEST_TOKEN=... MODEL_GGUF=~/models/hermes-3-8b-q4.gguf ./scripts/setup_mac.sh
|
| 13 |
+
set -euo pipefail
|
| 14 |
+
|
| 15 |
+
REPO="$(cd "$(dirname "$0")/.." && pwd)"
|
| 16 |
+
HOME_DIR="$HOME"
|
| 17 |
+
PYTHON="${PYTHON:-$(command -v python3)}"
|
| 18 |
+
LLAMA_SERVER="${LLAMA_SERVER:-$(command -v llama-server || true)}"
|
| 19 |
+
MODEL_GGUF="${MODEL_GGUF:?set MODEL_GGUF=/path/to/hermes.gguf}"
|
| 20 |
+
INGEST_TOKEN="${INGEST_TOKEN:?set INGEST_TOKEN=... (same value you use elsewhere)}"
|
| 21 |
+
LA="$HOME_DIR/Library/LaunchAgents"
|
| 22 |
+
|
| 23 |
+
[ -n "$LLAMA_SERVER" ] || { echo "llama-server not found; set LLAMA_SERVER=/path"; exit 1; }
|
| 24 |
+
mkdir -p "$LA" "$HOME_DIR/.offgrid" "$HOME_DIR/Library/Logs"
|
| 25 |
+
|
| 26 |
+
install_plist() {
|
| 27 |
+
local name="$1"
|
| 28 |
+
sed -e "s|__PYTHON__|$PYTHON|g" \
|
| 29 |
+
-e "s|__REPO__|$REPO|g" \
|
| 30 |
+
-e "s|__HOME__|$HOME_DIR|g" \
|
| 31 |
+
-e "s|__LLAMA_SERVER__|$LLAMA_SERVER|g" \
|
| 32 |
+
-e "s|__MODEL_GGUF__|$MODEL_GGUF|g" \
|
| 33 |
+
-e "s|__INGEST_TOKEN__|$INGEST_TOKEN|g" \
|
| 34 |
+
"$REPO/deploy/launchd/$name" > "$LA/$name"
|
| 35 |
+
launchctl unload "$LA/$name" 2>/dev/null || true
|
| 36 |
+
launchctl load "$LA/$name"
|
| 37 |
+
echo "loaded $name"
|
| 38 |
+
}
|
| 39 |
+
|
| 40 |
+
"$PYTHON" -m pip install -q -r "$REPO/requirements-ci.txt" # runtime deps (no GPU model needed on Mac)
|
| 41 |
+
|
| 42 |
+
install_plist com.offgrid.hermes.plist
|
| 43 |
+
install_plist com.offgrid.backend.plist
|
| 44 |
+
install_plist com.offgrid.collector.plist
|
| 45 |
+
|
| 46 |
+
cat <<EOF
|
| 47 |
+
|
| 48 |
+
Done. Three launchd jobs are running (and restart on reboot):
|
| 49 |
+
com.offgrid.hermes -> llama-server (Hermes) on :8080
|
| 50 |
+
com.offgrid.backend -> Gradio UI + /agent + /ingest on :7860 (AUTONOMOUS, Hermes brain)
|
| 51 |
+
com.offgrid.collector -> reads chat.db -> /ingest
|
| 52 |
+
|
| 53 |
+
ONE MANUAL STEP: grant Full Disk Access to the python binary so the collector can read chat.db:
|
| 54 |
+
System Settings > Privacy & Security > Full Disk Access > + -> $PYTHON
|
| 55 |
+
Then: launchctl kickstart -k gui/\$(id -u)/com.offgrid.collector
|
| 56 |
+
|
| 57 |
+
Dashboard: http://127.0.0.1:7860 (Activity = live runs, Memory = what it learned)
|
| 58 |
+
Logs: ~/Library/Logs/offgrid-*.log
|
| 59 |
+
Triggers on YOUR sent/accepted iMessages (TRIGGER_ON=outgoing). Set TRIGGER_ON=any to widen.
|
| 60 |
+
EOF
|
scripts/start_space.sh
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env bash
|
| 2 |
+
# Launch the official llama.cpp server + the agent app (Docker GPU Space).
|
| 3 |
+
# llama-server downloads the GGUF from HF on first run and serves it on :8080;
|
| 4 |
+
# the app calls it via INFERENCE_BASE_URL=http://127.0.0.1:8080/v1.
|
| 5 |
+
set -u
|
| 6 |
+
|
| 7 |
+
# UI-only / preview mode: in stub mode there's no model, so skip llama-server
|
| 8 |
+
# entirely (otherwise it would download the ~20GB GGUF and fail on a CPU box).
|
| 9 |
+
# Lets the Space run the full UI for free on cpu-basic. See PLAN / docs.
|
| 10 |
+
if [ "${USE_STUB_EXTRACTOR:-0}" = "1" ]; then
|
| 11 |
+
echo "[start] UI-only (USE_STUB_EXTRACTOR=1) — skipping llama-server"
|
| 12 |
+
exec python3 app.py
|
| 13 |
+
fi
|
| 14 |
+
|
| 15 |
+
LS="$(command -v llama-server || echo /app/llama-server)"
|
| 16 |
+
# The official binary's sibling .so (libllama-server-impl.so) lives next to it in
|
| 17 |
+
# /app; we run from /srv, so add its dir to the loader path.
|
| 18 |
+
export LD_LIBRARY_PATH="$(dirname "$LS"):/app:${LD_LIBRARY_PATH:-}"
|
| 19 |
+
echo "[start] using llama-server at: $LS (LD_LIBRARY_PATH=$LD_LIBRARY_PATH)"
|
| 20 |
+
|
| 21 |
+
# Model selection: MODEL_FILE (explicit filename in MODEL_HF_REPO) is preferred —
|
| 22 |
+
# the repo holds multiple Q4_K_M GGUFs (31B + E4B edge), so the `-hf repo:quant`
|
| 23 |
+
# shorthand is ambiguous there. Falls back to -hf REPO:QUANT when MODEL_FILE unset.
|
| 24 |
+
if [ -n "${MODEL_FILE:-}" ]; then
|
| 25 |
+
echo "[start] model: ${MODEL_HF_REPO}/${MODEL_FILE} (explicit file; downloads on first run)"
|
| 26 |
+
MODEL_PATH="$(python3 -c "from huggingface_hub import hf_hub_download; print(hf_hub_download('${MODEL_HF_REPO}', '${MODEL_FILE}'))")"
|
| 27 |
+
MODEL_ARGS="-m $MODEL_PATH"
|
| 28 |
+
else
|
| 29 |
+
echo "[start] model: ${MODEL_HF_REPO}:${MODEL_QUANT:-Q4_K_M} (downloads on first run)"
|
| 30 |
+
MODEL_ARGS="-hf ${MODEL_HF_REPO}:${MODEL_QUANT:-Q4_K_M}"
|
| 31 |
+
fi
|
| 32 |
+
|
| 33 |
+
# Vision: download the mmproj projector and pass --mmproj so llama-server accepts
|
| 34 |
+
# image_url inputs (screenshots/flyers). MMPROJ_REPO lets the projector come from a
|
| 35 |
+
# different repo than the LLM (the E4B edge model uses the base E4B's projector,
|
| 36 |
+
# not the 31B mmproj stored alongside it). Falls back to text-only if unavailable.
|
| 37 |
+
MMPROJ_ARG=""
|
| 38 |
+
if [ -n "${MMPROJ_FILE:-}" ]; then
|
| 39 |
+
MMPROJ_REPO="${MMPROJ_REPO:-$MODEL_HF_REPO}"
|
| 40 |
+
echo "[start] fetching mmproj ${MMPROJ_REPO}/${MMPROJ_FILE} for vision..."
|
| 41 |
+
MMPROJ_PATH="$(python3 -c "from huggingface_hub import hf_hub_download; print(hf_hub_download('${MMPROJ_REPO}', '${MMPROJ_FILE}'))" 2>/dev/null || true)"
|
| 42 |
+
if [ -n "$MMPROJ_PATH" ]; then
|
| 43 |
+
MMPROJ_ARG="--mmproj $MMPROJ_PATH"
|
| 44 |
+
echo "[start] mmproj ready: $MMPROJ_PATH"
|
| 45 |
+
else
|
| 46 |
+
echo "[start] mmproj download failed -> text-only"
|
| 47 |
+
fi
|
| 48 |
+
fi
|
| 49 |
+
|
| 50 |
+
# -ngl 999 offloads all layers to the GPU; --jinja enables the chat/tool template.
|
| 51 |
+
"$LS" $MODEL_ARGS \
|
| 52 |
+
--host 127.0.0.1 --port 8080 \
|
| 53 |
+
-ngl 999 -c 8192 --jinja $MMPROJ_ARG &
|
| 54 |
+
LLAMA_PID=$!
|
| 55 |
+
|
| 56 |
+
# Optional second llama-server: the Agent tab's MiniCPM planner. OFF unless
|
| 57 |
+
# PLANNER_HF_REPO+PLANNER_FILE are set. VRAM note: E4B Q4 (~5GB) + MiniCPM-8B
|
| 58 |
+
# Q4 (~5GB) + KV is tight on a 16GB T4 — tune PLANNER_NGL (default 999; lower
|
| 59 |
+
# it for partial offload, planner outputs are short) or use the 1B variant
|
| 60 |
+
# (openbmb/MiniCPM5-1B-GGUF / MiniCPM5-1B-Q4_K_M.gguf).
|
| 61 |
+
# PLANNER_CTX (default 8192, matching the main model): a multi-step agent run
|
| 62 |
+
# accumulates the tool schemas + task + thread + each step's observations, so
|
| 63 |
+
# 4096 overflows on real threads ("request (4142 tokens) exceeds context").
|
| 64 |
+
if [ -n "${PLANNER_HF_REPO:-}" ] && [ -n "${PLANNER_FILE:-}" ]; then
|
| 65 |
+
echo "[start] planner: ${PLANNER_HF_REPO}/${PLANNER_FILE} on :${PLANNER_PORT:-8081}"
|
| 66 |
+
PLANNER_PATH="$(python3 -c "from huggingface_hub import hf_hub_download; print(hf_hub_download('${PLANNER_HF_REPO}', '${PLANNER_FILE}'))")"
|
| 67 |
+
"$LS" -m "$PLANNER_PATH" \
|
| 68 |
+
--host 127.0.0.1 --port "${PLANNER_PORT:-8081}" \
|
| 69 |
+
-ngl "${PLANNER_NGL:-999}" -c "${PLANNER_CTX:-8192}" --jinja &
|
| 70 |
+
echo "[start] planner launching (PLANNER_BASE_URL should be http://127.0.0.1:${PLANNER_PORT:-8081}/v1)"
|
| 71 |
+
fi
|
| 72 |
+
|
| 73 |
+
echo "[start] waiting for llama-server health (model download can take minutes)..."
|
| 74 |
+
for i in $(seq 1 900); do
|
| 75 |
+
if ! kill -0 "$LLAMA_PID" 2>/dev/null; then
|
| 76 |
+
echo "[start] ERROR: llama-server exited early"; break
|
| 77 |
+
fi
|
| 78 |
+
if curl -sf http://127.0.0.1:8080/health >/dev/null 2>&1; then
|
| 79 |
+
echo "[start] llama-server ready after ~$((i*2))s"; break
|
| 80 |
+
fi
|
| 81 |
+
sleep 2
|
| 82 |
+
done
|
| 83 |
+
|
| 84 |
+
echo "[start] launching app (UI + /agent) -> INFERENCE_BASE_URL=$INFERENCE_BASE_URL"
|
| 85 |
+
exec python3 app.py
|
scripts/verify_gcal_e2e.py
ADDED
|
@@ -0,0 +1,159 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""End-to-end Google Calendar verification: agent-extracted event -> real push
|
| 2 |
+
-> API readback -> cleanup. Manual-run only (needs a real per-user token and
|
| 3 |
+
the google libs); never imported by CI.
|
| 4 |
+
|
| 5 |
+
One-time token bootstrap: connect in the app (Step 2a), then DevTools ->
|
| 6 |
+
Application -> Local Storage -> copy the `gcal_token` value into a file.
|
| 7 |
+
|
| 8 |
+
python scripts/verify_gcal_e2e.py --token-file tok.json [--check-only] [--keep]
|
| 9 |
+
"""
|
| 10 |
+
from __future__ import annotations
|
| 11 |
+
|
| 12 |
+
import argparse
|
| 13 |
+
import json
|
| 14 |
+
import os
|
| 15 |
+
import sys
|
| 16 |
+
import uuid
|
| 17 |
+
from datetime import datetime
|
| 18 |
+
from pathlib import Path
|
| 19 |
+
|
| 20 |
+
os.environ.setdefault("USE_STUB_EXTRACTOR", "1")
|
| 21 |
+
sys.path.insert(0, str(Path(__file__).resolve().parents[1]))
|
| 22 |
+
|
| 23 |
+
from dateutil import parser as dtparser # noqa: E402
|
| 24 |
+
|
| 25 |
+
from calendar_out import gcal # noqa: E402
|
| 26 |
+
|
| 27 |
+
# The canonical appointment-confirmation sample (kept in sync with
|
| 28 |
+
# tests/test_agent.py::CANON — copied because CI test modules aren't a package).
|
| 29 |
+
CANON = (
|
| 30 |
+
"Thank you for scheduling your appointment with Primary Care of Manhattan. "
|
| 31 |
+
"We look forward to seeing you!\n"
|
| 32 |
+
"\n"
|
| 33 |
+
"Date: Monday, June 22, 2026\n"
|
| 34 |
+
"Time: 10:30 AM\n"
|
| 35 |
+
"Duration: Approx. 30–45 min\n"
|
| 36 |
+
"(Please arrive 15 minutes early to complete intake forms)\n"
|
| 37 |
+
"\n"
|
| 38 |
+
"\U0001f4cd 112A West 72nd Street\n"
|
| 39 |
+
"New York, NY 10023\n"
|
| 40 |
+
"(Upper West Side — 72nd & Columbus)\n"
|
| 41 |
+
)
|
| 42 |
+
EXPECT_START = "2026-06-22T10:15:00"
|
| 43 |
+
EXPECT_LOCATION = "112A West 72nd Street, New York, NY 10023"
|
| 44 |
+
EXPECT_REMINDER = 60
|
| 45 |
+
|
| 46 |
+
_results: list[tuple[bool, str]] = []
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
def _check(ok: bool, label: str) -> bool:
|
| 50 |
+
_results.append((ok, label))
|
| 51 |
+
print(("PASS " if ok else "FAIL ") + label)
|
| 52 |
+
return ok
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
def _bare_creds(token_json: str):
|
| 56 |
+
"""Access-token-only Credentials: works for API calls while the token is
|
| 57 |
+
fresh, but cannot refresh."""
|
| 58 |
+
from google.oauth2.credentials import Credentials
|
| 59 |
+
|
| 60 |
+
return Credentials(token=json.loads(token_json).get("token"))
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
def _enable_bare_token_fallback(token: str) -> None:
|
| 64 |
+
"""Space-minted tokens may lack client_secret (it exists only in the
|
| 65 |
+
Space's env, and GOOGLE_OAUTH_CLIENT_SECRET isn't set locally). google-auth
|
| 66 |
+
then refuses to build refresh-capable creds — fall back to using the bare
|
| 67 |
+
access token directly (valid ~1h after minting)."""
|
| 68 |
+
try:
|
| 69 |
+
gcal._creds_from_token_json(token)
|
| 70 |
+
except ValueError as e:
|
| 71 |
+
if "client_secret" not in str(e):
|
| 72 |
+
raise
|
| 73 |
+
print("note: token has no client_secret and none in env -> using the "
|
| 74 |
+
"access token directly (no refresh; re-mint if it expires)")
|
| 75 |
+
gcal._creds_from_token_json = _bare_creds
|
| 76 |
+
|
| 77 |
+
|
| 78 |
+
def main() -> int:
|
| 79 |
+
ap = argparse.ArgumentParser(description=__doc__)
|
| 80 |
+
ap.add_argument("--token-file", default=os.environ.get("GCAL_TOKEN_FILE", ""))
|
| 81 |
+
ap.add_argument("--check-only", action="store_true",
|
| 82 |
+
help="only liveness-check the token; no event is created")
|
| 83 |
+
ap.add_argument("--keep", action="store_true",
|
| 84 |
+
help="leave the test event in the calendar (default: delete)")
|
| 85 |
+
ap.add_argument("--calendar-id", default="primary")
|
| 86 |
+
args = ap.parse_args()
|
| 87 |
+
|
| 88 |
+
if not args.token_file or not Path(args.token_file).exists():
|
| 89 |
+
print("ERROR: pass --token-file (or set GCAL_TOKEN_FILE) pointing at the "
|
| 90 |
+
"localStorage gcal_token JSON")
|
| 91 |
+
return 1
|
| 92 |
+
token = Path(args.token_file).read_text(encoding="utf-8").strip()
|
| 93 |
+
_enable_bare_token_fallback(token)
|
| 94 |
+
|
| 95 |
+
# 1. liveness check (same call the /oauth2/check endpoint makes)
|
| 96 |
+
res = gcal.check_token(token)
|
| 97 |
+
if not _check(res["ok"], f"check_token: {res if not res['ok'] else 'token is live'}"):
|
| 98 |
+
return 1
|
| 99 |
+
if res.get("refreshed_token"):
|
| 100 |
+
token = res["refreshed_token"]
|
| 101 |
+
Path(args.token_file).write_text(token, encoding="utf-8")
|
| 102 |
+
print(" (token was refreshed; token file updated)")
|
| 103 |
+
if args.check_only:
|
| 104 |
+
return 0
|
| 105 |
+
|
| 106 |
+
# 2. agent extraction (stub mode = deterministic) + invariants
|
| 107 |
+
from server.agent import run_agent
|
| 108 |
+
|
| 109 |
+
plan = run_agent(CANON, now=datetime(2026, 6, 12, 9, 0))
|
| 110 |
+
if not _check(len(plan.events) == 1, f"agent extracted 1 event (got {len(plan.events)})"):
|
| 111 |
+
return 1
|
| 112 |
+
ev = plan.events[0]
|
| 113 |
+
_check(ev.start == EXPECT_START, f"start == {EXPECT_START} (got {ev.start})")
|
| 114 |
+
_check(ev.location == EXPECT_LOCATION, f"location == {EXPECT_LOCATION!r} (got {ev.location!r})")
|
| 115 |
+
_check(ev.reminder_minutes == EXPECT_REMINDER,
|
| 116 |
+
f"reminder == {EXPECT_REMINDER} (got {ev.reminder_minutes})")
|
| 117 |
+
|
| 118 |
+
# 3. push with a nonce title so readback/cleanup can never touch a real event
|
| 119 |
+
nonce = f"e2e-{uuid.uuid4().hex[:6]}"
|
| 120 |
+
ev.title = f"{ev.title} [{nonce}]"
|
| 121 |
+
links = gcal.push_events_with_token(token, [ev], calendar_id=args.calendar_id)
|
| 122 |
+
_check(bool(links and links[0]), f"push returned an event link: {links[0] if links else '-'}")
|
| 123 |
+
|
| 124 |
+
# 4. read it back through the API and compare what actually landed
|
| 125 |
+
from googleapiclient.discovery import build
|
| 126 |
+
|
| 127 |
+
creds = gcal._creds_from_token_json(token)
|
| 128 |
+
svc = build("calendar", "v3", credentials=creds)
|
| 129 |
+
found = svc.events().list(
|
| 130 |
+
calendarId=args.calendar_id, q=nonce, singleEvents=True,
|
| 131 |
+
timeMin="2026-06-21T00:00:00Z", timeMax="2026-06-23T00:00:00Z",
|
| 132 |
+
).execute().get("items", [])
|
| 133 |
+
if _check(len(found) == 1, f"readback found exactly 1 event (got {len(found)})"):
|
| 134 |
+
got = found[0]
|
| 135 |
+
_check(nonce in got.get("summary", ""), f"summary carries nonce (got {got.get('summary')!r})")
|
| 136 |
+
_check(got.get("location") == EXPECT_LOCATION,
|
| 137 |
+
f"location landed (got {got.get('location')!r})")
|
| 138 |
+
want = dtparser.isoparse(gcal._dt_field(ev.start)["dateTime"])
|
| 139 |
+
have = dtparser.isoparse(got["start"]["dateTime"])
|
| 140 |
+
# compare instants — the API echoes in the calendar's zone
|
| 141 |
+
_check(want == have or want.replace(tzinfo=None) == have.replace(tzinfo=None),
|
| 142 |
+
f"start instant matches (sent {want.isoformat()}, got {have.isoformat()})")
|
| 143 |
+
overrides = (got.get("reminders") or {}).get("overrides") or []
|
| 144 |
+
_check(any(o.get("minutes") == EXPECT_REMINDER for o in overrides),
|
| 145 |
+
f"reminder override {EXPECT_REMINDER} min landed (got {overrides})")
|
| 146 |
+
if not args.keep:
|
| 147 |
+
svc.events().delete(calendarId=args.calendar_id, eventId=got["id"]).execute()
|
| 148 |
+
print(f" (test event {got['id']} deleted)")
|
| 149 |
+
else:
|
| 150 |
+
print(f" (kept: {got.get('htmlLink')})")
|
| 151 |
+
|
| 152 |
+
failures = [label for ok, label in _results if not ok]
|
| 153 |
+
print(f"\n{'PASS' if not failures else 'FAIL'}: "
|
| 154 |
+
f"{len(_results) - len(failures)}/{len(_results)} checks passed")
|
| 155 |
+
return 0 if not failures else 1
|
| 156 |
+
|
| 157 |
+
|
| 158 |
+
if __name__ == "__main__":
|
| 159 |
+
raise SystemExit(main())
|
server/__init__.py
ADDED
|
File without changes
|
server/agent.py
ADDED
|
@@ -0,0 +1,475 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""The scheduling agent: thread (+images) -> validated ActionPlan.
|
| 2 |
+
|
| 3 |
+
Replaces the old one-shot extractor. The model reasons over a whole conversation
|
| 4 |
+
and emits a single constrained ActionPlan: events, conflicts (vs the user's
|
| 5 |
+
existing calendar), proposed alternative times, a reply draft, and an optional
|
| 6 |
+
clarification question. Output is grammar-constrained so it always parses.
|
| 7 |
+
"""
|
| 8 |
+
from __future__ import annotations
|
| 9 |
+
|
| 10 |
+
import json
|
| 11 |
+
import os
|
| 12 |
+
import re
|
| 13 |
+
from datetime import datetime, timedelta
|
| 14 |
+
from typing import Optional
|
| 15 |
+
|
| 16 |
+
from dateutil import parser as dtparser
|
| 17 |
+
from pydantic import ValidationError
|
| 18 |
+
|
| 19 |
+
from . import events, memory
|
| 20 |
+
from .schema import ActionPlan, Event
|
| 21 |
+
|
| 22 |
+
SYSTEM = (
|
| 23 |
+
"You are a scheduling assistant reading a chat conversation (text, and sometimes images "
|
| 24 |
+
"such as screenshots, invites, or flyers). Decide what calendar action is warranted and "
|
| 25 |
+
"return ONLY a JSON object matching the ActionPlan schema:\n"
|
| 26 |
+
"- reasoning: one or two sentences of why.\n"
|
| 27 |
+
"- events: concrete events with ISO 8601 datetimes; resolve relative dates from the current "
|
| 28 |
+
"datetime. Empty if there is no real plan. List EVERY distinct event separately — one thread "
|
| 29 |
+
"often holds several (e.g. a drop-off AND a pickup, or two appointments, are separate events).\n"
|
| 30 |
+
"- title: a short, self-contained calendar title summarizing the action and subject "
|
| 31 |
+
"(e.g. \"Pick up Priya — Terminal 4\", \"Mia — dental cleaning\"), not a quote of the "
|
| 32 |
+
"message.\n"
|
| 33 |
+
"- location: the venue or address when one is mentioned (join multi-line addresses into one "
|
| 34 |
+
"string); null otherwise.\n"
|
| 35 |
+
"- end: when a duration is stated (\"Duration: 30–45 min\", \"for 2 hours\", \"runs 90 "
|
| 36 |
+
"minutes\"), set end = start + duration, using the LOWER bound of a range; when an end time "
|
| 37 |
+
"is stated (\"7-9pm\"), use it; otherwise null. Never guess a duration that was not given.\n"
|
| 38 |
+
"- early arrival: if told to arrive N minutes early (\"please arrive 15 minutes early\"), "
|
| 39 |
+
"start = the arrival time (stated time minus N); end still counts from the STATED time; put "
|
| 40 |
+
"the stated time and the reason in notes.\n"
|
| 41 |
+
"- reminder_minutes: a stated lead time always wins (\"remind me 2 hours before\" -> 120); "
|
| 42 |
+
"otherwise 60 for doctor/medical visits, 30 for parties, 45 for carpools or school events; "
|
| 43 |
+
"for anything else use your judgment.\n"
|
| 44 |
+
"- conflicts: for any event that clashes with the provided existing calendar, the event_index, "
|
| 45 |
+
"what it clashes with, and severity (overlap|adjacent|tight).\n"
|
| 46 |
+
"- proposed_times: ISO 8601 alternatives when there is a conflict.\n"
|
| 47 |
+
"- reply_draft: a short, natural reply the user could send back.\n"
|
| 48 |
+
"- needs_clarification: a question if the plan is ambiguous, else null. If something should "
|
| 49 |
+
"be scheduled but its day or time is not yet known (\"TBD\", \"I'll confirm\", \"sometime "
|
| 50 |
+
"soon\"), leave events empty and ASK via needs_clarification instead of guessing.\n"
|
| 51 |
+
"Do not invent events that were not discussed."
|
| 52 |
+
)
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
def _existing_block(existing: list[Event]) -> str:
|
| 56 |
+
if not existing:
|
| 57 |
+
return "Existing calendar: (none provided)"
|
| 58 |
+
lines = [f"- {e.title}: {e.start}..{e.end or e.start}" for e in existing]
|
| 59 |
+
return "Existing calendar:\n" + "\n".join(lines)
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
def build_messages(
|
| 63 |
+
thread: str,
|
| 64 |
+
now: datetime,
|
| 65 |
+
existing: list[Event],
|
| 66 |
+
images: Optional[list[str]] = None,
|
| 67 |
+
memory_block: Optional[str] = None,
|
| 68 |
+
) -> list[dict]:
|
| 69 |
+
"""Build chat messages. ``images`` are base64 data URIs (used from phase 3).
|
| 70 |
+
``memory_block`` is the caller's recall block (per-user/localStorage memory);
|
| 71 |
+
when None, fall back to the server-side global memory.recall()."""
|
| 72 |
+
mem = memory.recall() if memory_block is None else memory_block
|
| 73 |
+
mem_block = f"{mem}\n\n" if mem else ""
|
| 74 |
+
text = (
|
| 75 |
+
f"Current datetime: {now.strftime('%A')}, {now.isoformat()}\n"
|
| 76 |
+
f"{_existing_block(existing)}\n\n"
|
| 77 |
+
f"{mem_block}"
|
| 78 |
+
f"Conversation:\n{thread}\n\n"
|
| 79 |
+
"Return the ActionPlan JSON now."
|
| 80 |
+
)
|
| 81 |
+
if not images:
|
| 82 |
+
return [
|
| 83 |
+
{"role": "system", "content": SYSTEM},
|
| 84 |
+
{"role": "user", "content": text},
|
| 85 |
+
]
|
| 86 |
+
# Multimodal content format understood by llama.cpp vision chat handlers.
|
| 87 |
+
content = [{"type": "text", "text": text}]
|
| 88 |
+
for uri in images:
|
| 89 |
+
content.append({"type": "image_url", "image_url": {"url": uri}})
|
| 90 |
+
return [
|
| 91 |
+
{"role": "system", "content": SYSTEM},
|
| 92 |
+
{"role": "user", "content": content},
|
| 93 |
+
]
|
| 94 |
+
|
| 95 |
+
|
| 96 |
+
def run_agent(
|
| 97 |
+
thread: str,
|
| 98 |
+
now: Optional[datetime] = None,
|
| 99 |
+
existing: Optional[list[Event]] = None,
|
| 100 |
+
images: Optional[list[str]] = None,
|
| 101 |
+
memory_block: Optional[str] = None,
|
| 102 |
+
) -> ActionPlan:
|
| 103 |
+
now = now or datetime.now()
|
| 104 |
+
existing = existing or []
|
| 105 |
+
|
| 106 |
+
with events.run_scope("analyze"):
|
| 107 |
+
if images:
|
| 108 |
+
events.emit("vision", f"reading {len(images)} image(s)", images=len(images))
|
| 109 |
+
|
| 110 |
+
if os.environ.get("USE_STUB_EXTRACTOR") == "1":
|
| 111 |
+
plan = _stub_plan(thread, now)
|
| 112 |
+
else:
|
| 113 |
+
from .model import complete_json # lazy: avoids llama.cpp in stub mode
|
| 114 |
+
|
| 115 |
+
raw = complete_json(
|
| 116 |
+
build_messages(thread, now, existing, images, memory_block),
|
| 117 |
+
json_schema=ActionPlan.model_json_schema(),
|
| 118 |
+
)
|
| 119 |
+
plan = apply_text_rules(thread, _polish_titles(thread, _parse_plan(raw)))
|
| 120 |
+
# Global path only: with client-owned (per-user) memory, the UI merges
|
| 121 |
+
# learned contacts itself (memory.learn_from_plan) so we don't pollute the
|
| 122 |
+
# shared server file.
|
| 123 |
+
if memory_block is None:
|
| 124 |
+
memory.observe_plan(plan) # grows-with-you: learn recurring contacts
|
| 125 |
+
events.emit("decision", f"{len(plan.events)} event(s) detected", events=len(plan.events))
|
| 126 |
+
return plan
|
| 127 |
+
|
| 128 |
+
|
| 129 |
+
def _parse_plan(raw: str) -> ActionPlan:
|
| 130 |
+
try:
|
| 131 |
+
return ActionPlan(**json.loads(raw))
|
| 132 |
+
except (json.JSONDecodeError, ValidationError):
|
| 133 |
+
# Grammar should prevent this; degrade to an empty plan rather than 500.
|
| 134 |
+
return ActionPlan(reasoning="Could not parse model output.")
|
| 135 |
+
|
| 136 |
+
|
| 137 |
+
# --------------------------------------------------------------------------- #
|
| 138 |
+
# Title polish (optional second pass, TITLE_POLISH=1): rewrite each extracted
|
| 139 |
+
# event's title into a calendar-ready action+subject summary. The extraction
|
| 140 |
+
# pass already gets a title style instruction; this pass gives the model one
|
| 141 |
+
# focused job, which helps on echo-prone inputs (flyers, forwarded notices).
|
| 142 |
+
# --------------------------------------------------------------------------- #
|
| 143 |
+
TITLE_SYSTEM = (
|
| 144 |
+
"You rewrite calendar event titles. Given a conversation and the events extracted from "
|
| 145 |
+
"it, return ONLY a JSON object {\"titles\": [...]} with exactly one title per event, in "
|
| 146 |
+
"the same order. Each title is a short, self-contained calendar entry summarizing the "
|
| 147 |
+
"action and subject (e.g. \"Pick up Priya — Terminal 4\", \"Mia — dental cleaning\"). "
|
| 148 |
+
"Keep names and places; drop filler, hype and sender wording. Never add facts that are "
|
| 149 |
+
"not in the conversation."
|
| 150 |
+
)
|
| 151 |
+
|
| 152 |
+
TITLES_SCHEMA = {
|
| 153 |
+
"type": "object",
|
| 154 |
+
"properties": {"titles": {"type": "array", "items": {"type": "string"}}},
|
| 155 |
+
"required": ["titles"],
|
| 156 |
+
}
|
| 157 |
+
|
| 158 |
+
|
| 159 |
+
def build_title_messages(thread: str, events: list[dict]) -> list[dict]:
|
| 160 |
+
"""Messages for the polish pass. ``events`` are Event-shaped dicts."""
|
| 161 |
+
lines = [
|
| 162 |
+
f"{i + 1}. {e.get('title') or '(untitled)'} @ {e.get('start')}"
|
| 163 |
+
+ (f" ({e['location']})" if e.get("location") else "")
|
| 164 |
+
for i, e in enumerate(events)
|
| 165 |
+
]
|
| 166 |
+
text = (
|
| 167 |
+
f"Conversation:\n{thread}\n\n"
|
| 168 |
+
"Extracted events:\n" + "\n".join(lines) + "\n\n"
|
| 169 |
+
"Return the titles JSON now."
|
| 170 |
+
)
|
| 171 |
+
return [
|
| 172 |
+
{"role": "system", "content": TITLE_SYSTEM},
|
| 173 |
+
{"role": "user", "content": text},
|
| 174 |
+
]
|
| 175 |
+
|
| 176 |
+
|
| 177 |
+
def merge_titles(plan: ActionPlan, raw: str) -> ActionPlan:
|
| 178 |
+
"""Apply a polish-pass response onto the plan; on any mismatch keep the
|
| 179 |
+
original titles (the polish pass must never be able to lose an event)."""
|
| 180 |
+
try:
|
| 181 |
+
titles = json.loads(raw).get("titles")
|
| 182 |
+
except (json.JSONDecodeError, AttributeError):
|
| 183 |
+
return plan
|
| 184 |
+
if not isinstance(titles, list) or len(titles) != len(plan.events):
|
| 185 |
+
return plan
|
| 186 |
+
for ev, title in zip(plan.events, titles):
|
| 187 |
+
if isinstance(title, str) and title.strip():
|
| 188 |
+
ev.title = title.strip()[:80]
|
| 189 |
+
return plan
|
| 190 |
+
|
| 191 |
+
|
| 192 |
+
def apply_text_rules(thread: str, plan: ActionPlan) -> ActionPlan:
|
| 193 |
+
"""Deterministic guarantees for explicitly-communicated logistics (same
|
| 194 |
+
philosophy as conflict detection: don't leave must-hold rules to the model).
|
| 195 |
+
Single-event plans only — multi-event threads keep per-event model judgment.
|
| 196 |
+
|
| 197 |
+
- "arrive N minutes early" -> start = arrival time, but ONLY when the model
|
| 198 |
+
demonstrably did not shift already (its start equals the stated time).
|
| 199 |
+
- end = STATED time + stated duration: a self-shifting model often counts
|
| 200 |
+
the duration from the arrival time (10:15+30=10:45 instead of 11:00).
|
| 201 |
+
- reminder: an explicit stated lead time always wins; else type defaults
|
| 202 |
+
(medical 60 / party 30 / carpool-school 45); else the model's judgment.
|
| 203 |
+
"""
|
| 204 |
+
if len(plan.events) != 1:
|
| 205 |
+
return plan
|
| 206 |
+
ev = plan.events[0]
|
| 207 |
+
early = _EARLY_RE.search(thread)
|
| 208 |
+
stated = _find_time(thread)
|
| 209 |
+
if early and stated:
|
| 210 |
+
try:
|
| 211 |
+
start_dt = datetime.fromisoformat(ev.start)
|
| 212 |
+
except ValueError:
|
| 213 |
+
start_dt = None
|
| 214 |
+
if start_dt is not None:
|
| 215 |
+
mins = int(early.group(1))
|
| 216 |
+
appt_dt = start_dt.replace(hour=stated[0], minute=stated[1])
|
| 217 |
+
if start_dt == appt_dt: # model did not shift -> start at arrival
|
| 218 |
+
start_dt = appt_dt - timedelta(minutes=mins)
|
| 219 |
+
ev.start = start_dt.isoformat()
|
| 220 |
+
if start_dt == appt_dt - timedelta(minutes=mins):
|
| 221 |
+
# The event covers arrival (we or the model shifted it): anchor
|
| 222 |
+
# the END to the stated time + stated duration, and make sure
|
| 223 |
+
# the official time survives in the notes.
|
| 224 |
+
duration = _find_duration_minutes(thread)
|
| 225 |
+
if duration:
|
| 226 |
+
ev.end = (appt_dt + timedelta(minutes=duration)).isoformat()
|
| 227 |
+
hhmm = appt_dt.strftime("%H:%M")
|
| 228 |
+
if hhmm not in (ev.notes or ""):
|
| 229 |
+
note = f"Appointment at {hhmm}; arrive {mins} min early"
|
| 230 |
+
ev.notes = f"{ev.notes} — {note}" if ev.notes else note
|
| 231 |
+
m = _REMIND_EXPLICIT_RE.search(thread)
|
| 232 |
+
if m:
|
| 233 |
+
n = int(m.group(1))
|
| 234 |
+
ev.reminder_minutes = n * 60 if m.group(2).lower().startswith("h") else n
|
| 235 |
+
elif _MEDICAL_RE.search(thread):
|
| 236 |
+
ev.reminder_minutes = 60
|
| 237 |
+
elif _PARTY_RE.search(thread):
|
| 238 |
+
ev.reminder_minutes = 30
|
| 239 |
+
elif _CARPOOL_SCHOOL_RE.search(thread):
|
| 240 |
+
ev.reminder_minutes = 45
|
| 241 |
+
return plan
|
| 242 |
+
|
| 243 |
+
|
| 244 |
+
def _polish_titles(thread: str, plan: ActionPlan) -> ActionPlan:
|
| 245 |
+
if not plan.events or os.environ.get("TITLE_POLISH") != "1":
|
| 246 |
+
return plan
|
| 247 |
+
from .model import complete_json # lazy: avoids llama.cpp in stub mode
|
| 248 |
+
|
| 249 |
+
try:
|
| 250 |
+
raw = complete_json(
|
| 251 |
+
build_title_messages(thread, [e.model_dump() for e in plan.events]),
|
| 252 |
+
json_schema=TITLES_SCHEMA,
|
| 253 |
+
max_tokens=256,
|
| 254 |
+
)
|
| 255 |
+
except Exception: # noqa: BLE001 polish is best-effort, never fatal
|
| 256 |
+
return plan
|
| 257 |
+
return merge_titles(plan, raw)
|
| 258 |
+
|
| 259 |
+
|
| 260 |
+
def run_agent_stream(
|
| 261 |
+
thread: str,
|
| 262 |
+
now: Optional[datetime] = None,
|
| 263 |
+
existing: Optional[list[Event]] = None,
|
| 264 |
+
images: Optional[list[str]] = None,
|
| 265 |
+
busy=None,
|
| 266 |
+
memory_block: Optional[str] = None,
|
| 267 |
+
):
|
| 268 |
+
"""Generator for the UI: yields (partial_text, plan_or_None). Streams the
|
| 269 |
+
model output for a live 'thinking' panel, then yields the final ActionPlan
|
| 270 |
+
(with deterministic conflicts annotated if ``busy`` intervals are given).
|
| 271 |
+
``memory_block`` carries the caller's per-user (localStorage) memory."""
|
| 272 |
+
now = now or datetime.now()
|
| 273 |
+
existing = existing or []
|
| 274 |
+
|
| 275 |
+
with events.run_scope("analyze"):
|
| 276 |
+
if images:
|
| 277 |
+
events.emit("vision", f"reading {len(images)} image(s)", images=len(images))
|
| 278 |
+
|
| 279 |
+
if os.environ.get("USE_STUB_EXTRACTOR") == "1":
|
| 280 |
+
plan = _stub_plan(thread, now)
|
| 281 |
+
text = json.dumps(plan.model_dump(), indent=2)
|
| 282 |
+
events.emit("model", "stub inference", latency_ms=0)
|
| 283 |
+
acc = ""
|
| 284 |
+
for i in range(0, len(text), 24): # simulate token streaming
|
| 285 |
+
acc += text[i : i + 24]
|
| 286 |
+
yield acc, None
|
| 287 |
+
else:
|
| 288 |
+
from .model import stream_complete_json
|
| 289 |
+
|
| 290 |
+
acc = ""
|
| 291 |
+
for delta in stream_complete_json(
|
| 292 |
+
build_messages(thread, now, existing, images, memory_block),
|
| 293 |
+
ActionPlan.model_json_schema(),
|
| 294 |
+
):
|
| 295 |
+
acc += delta
|
| 296 |
+
yield acc, None
|
| 297 |
+
plan = apply_text_rules(thread, _polish_titles(thread, _parse_plan(acc)))
|
| 298 |
+
|
| 299 |
+
# Global path only (see run_agent): client memory is merged by the UI.
|
| 300 |
+
if memory_block is None:
|
| 301 |
+
memory.observe_plan(plan) # grows-with-you: learn recurring contacts
|
| 302 |
+
events.emit("decision", f"{len(plan.events)} event(s) detected", events=len(plan.events))
|
| 303 |
+
if busy:
|
| 304 |
+
from calendar_out.freebusy import annotate_conflicts # lazy: avoid cycle
|
| 305 |
+
|
| 306 |
+
plan = annotate_conflicts(plan, busy)
|
| 307 |
+
yield (json.dumps(plan.model_dump(), indent=2), plan)
|
| 308 |
+
|
| 309 |
+
|
| 310 |
+
_TIME_RE = re.compile(r"\b(\d{1,2})(?::(\d{2}))?\s*(am|pm)?\b", re.IGNORECASE)
|
| 311 |
+
_TIME_LABEL_RE = re.compile(r"(?im)^\s*time\s*[:\-]\s*(.+)$")
|
| 312 |
+
_MONTH_DATE_RE = re.compile(
|
| 313 |
+
r"\b(?:jan(?:uary)?|feb(?:ruary)?|mar(?:ch)?|apr(?:il)?|may|jun(?:e)?|jul(?:y)?|"
|
| 314 |
+
r"aug(?:ust)?|sep(?:t(?:ember)?)?|oct(?:ober)?|nov(?:ember)?|dec(?:ember)?)\.?\s+"
|
| 315 |
+
r"\d{1,2}(?:st|nd|rd|th)?(?:,?\s*\d{4})?\b", re.IGNORECASE)
|
| 316 |
+
_WEEKDAY_RE = re.compile(
|
| 317 |
+
r"\b(monday|tuesday|wednesday|thursday|friday|saturday|sunday)\b", re.IGNORECASE)
|
| 318 |
+
_LOCATION_RE = re.compile(
|
| 319 |
+
r"(?i)^\s*(?:(?:location|where|address)\s*[:\-]|\U0001F4CD)\s*(.*)$")
|
| 320 |
+
_LABEL_LINE_RE = re.compile(r"^\s*[A-Za-z][A-Za-z ]{0,20}:\s") # "Time: ...", "Notes: ..."
|
| 321 |
+
_DURATION_RE = re.compile(r"(?im)^\s*duration\s*[:\-]\s*(.*)$")
|
| 322 |
+
_EARLY_RE = re.compile(r"(?i)arrive\s+(\d{1,3})\s*min(?:ute)?s?\s+early")
|
| 323 |
+
_REMIND_EXPLICIT_RE = re.compile(
|
| 324 |
+
r"(?i)\b(?:remind(?:er)?|notify|alert)\s*(?:me\s+)?(?:for\s+)?"
|
| 325 |
+
r"(\d{1,3})\s*(min(?:ute)?s?|h(?:ou)?rs?)\s*(?:before|ahead|prior|early)")
|
| 326 |
+
_MEDICAL_RE = re.compile(
|
| 327 |
+
r"(?i)\b(?:doctor|dr\b\.?|clinic|dentist|dental|pediatric\w*|physician|"
|
| 328 |
+
r"medical|check-?up|primary\s+care|intake\s+forms?)")
|
| 329 |
+
_PARTY_RE = re.compile( # "party of 4" is a group size, not a party
|
| 330 |
+
r"(?i)\b(?:birthday|bday)\b|\bparty\b(?!\s+of\s+\d)")
|
| 331 |
+
_CARPOOL_SCHOOL_RE = re.compile(r"(?i)\bcarpool\w*\b|\bschool\b|drive\s+the\s+kids")
|
| 332 |
+
|
| 333 |
+
|
| 334 |
+
def _find_time(thread: str) -> Optional[tuple[int, int]]:
|
| 335 |
+
"""First plausible clock time, or None. A bare integer ("June 22", "112A")
|
| 336 |
+
is not a time — a match needs a minute component or an am/pm marker."""
|
| 337 |
+
label = _TIME_LABEL_RE.search(thread)
|
| 338 |
+
scope = label.group(1) if label else thread
|
| 339 |
+
for m in _TIME_RE.finditer(scope):
|
| 340 |
+
if not (m.group(2) or m.group(3)):
|
| 341 |
+
continue
|
| 342 |
+
hour, minute = int(m.group(1)), int(m.group(2) or 0)
|
| 343 |
+
if hour > 23 or minute > 59:
|
| 344 |
+
continue
|
| 345 |
+
mer = (m.group(3) or "").lower()
|
| 346 |
+
if mer == "pm" and hour < 12:
|
| 347 |
+
hour += 12
|
| 348 |
+
elif mer == "am" and hour == 12:
|
| 349 |
+
hour = 0
|
| 350 |
+
return hour, minute
|
| 351 |
+
return None
|
| 352 |
+
|
| 353 |
+
|
| 354 |
+
def _find_date(thread: str, now: datetime):
|
| 355 |
+
"""Resolve the event's day: explicit date > today/tomorrow > weekday > tomorrow."""
|
| 356 |
+
m = _MONTH_DATE_RE.search(thread)
|
| 357 |
+
if m:
|
| 358 |
+
try:
|
| 359 |
+
return dtparser.parse(m.group(0), default=now).date()
|
| 360 |
+
except (ValueError, OverflowError):
|
| 361 |
+
pass
|
| 362 |
+
if re.search(r"\btomorrow\b", thread, re.IGNORECASE):
|
| 363 |
+
return (now + timedelta(days=1)).date()
|
| 364 |
+
if re.search(r"\btoday\b|\btonight\b", thread, re.IGNORECASE):
|
| 365 |
+
return now.date()
|
| 366 |
+
m = _WEEKDAY_RE.search(thread)
|
| 367 |
+
if m:
|
| 368 |
+
try:
|
| 369 |
+
return dtparser.parse(m.group(1), default=now).date() # next-or-same day
|
| 370 |
+
except (ValueError, OverflowError):
|
| 371 |
+
pass
|
| 372 |
+
return (now + timedelta(days=1)).date()
|
| 373 |
+
|
| 374 |
+
|
| 375 |
+
def _find_location(lines: list[str]) -> tuple[Optional[str], set[int]]:
|
| 376 |
+
"""A "Location:" line plus continuation lines (a wrapped address) until a
|
| 377 |
+
blank line or the next "Label:" line. Returns (joined location, line idxs)."""
|
| 378 |
+
for i, line in enumerate(lines):
|
| 379 |
+
m = _LOCATION_RE.match(line)
|
| 380 |
+
if not m:
|
| 381 |
+
continue
|
| 382 |
+
parts, used = [m.group(1).strip()], {i}
|
| 383 |
+
for j in range(i + 1, len(lines)):
|
| 384 |
+
nxt = lines[j].strip()
|
| 385 |
+
if not nxt or nxt.startswith("(") or _LABEL_LINE_RE.match(lines[j]):
|
| 386 |
+
break
|
| 387 |
+
parts.append(nxt)
|
| 388 |
+
used.add(j)
|
| 389 |
+
loc = ", ".join(p for p in parts if p)
|
| 390 |
+
return (loc or None), used
|
| 391 |
+
return None, set()
|
| 392 |
+
|
| 393 |
+
|
| 394 |
+
def _find_duration_minutes(thread: str) -> Optional[int]:
|
| 395 |
+
m = _DURATION_RE.search(thread)
|
| 396 |
+
if m:
|
| 397 |
+
num = re.search(r"\d+", m.group(1))
|
| 398 |
+
if num:
|
| 399 |
+
return int(num.group(0))
|
| 400 |
+
return None
|
| 401 |
+
|
| 402 |
+
|
| 403 |
+
def _reminder_minutes(thread: str) -> int:
|
| 404 |
+
"""Notification lead time: an explicit ask wins, else event-type defaults
|
| 405 |
+
(medical 60, party 30, carpool/school 45 — checked in that order), else 30."""
|
| 406 |
+
m = _REMIND_EXPLICIT_RE.search(thread)
|
| 407 |
+
if m:
|
| 408 |
+
n = int(m.group(1))
|
| 409 |
+
return n * 60 if m.group(2).lower().startswith("h") else n
|
| 410 |
+
if _MEDICAL_RE.search(thread):
|
| 411 |
+
return 60
|
| 412 |
+
if _PARTY_RE.search(thread):
|
| 413 |
+
return 30
|
| 414 |
+
if _CARPOOL_SCHOOL_RE.search(thread):
|
| 415 |
+
return 45
|
| 416 |
+
return 30
|
| 417 |
+
|
| 418 |
+
|
| 419 |
+
def _is_date_line(line: str, now: datetime) -> bool:
|
| 420 |
+
try:
|
| 421 |
+
dtparser.parse(line, default=now) # non-fuzzy: chatter raises ParserError
|
| 422 |
+
return True
|
| 423 |
+
except (ValueError, OverflowError):
|
| 424 |
+
return False
|
| 425 |
+
|
| 426 |
+
|
| 427 |
+
def _pick_title(lines: list[str], now: datetime, location_idx: set[int]) -> str:
|
| 428 |
+
nonempty = [(i, ln.strip()) for i, ln in enumerate(lines) if ln.strip()]
|
| 429 |
+
if not nonempty:
|
| 430 |
+
return "Event"
|
| 431 |
+
first_i, first = nonempty[0]
|
| 432 |
+
if not _is_date_line(first, now):
|
| 433 |
+
return first[:60]
|
| 434 |
+
# First line is just the date — find a more descriptive line instead.
|
| 435 |
+
for i, ln in nonempty[1:]:
|
| 436 |
+
if i in location_idx or _LABEL_LINE_RE.match(ln) or ln.startswith("("):
|
| 437 |
+
continue
|
| 438 |
+
if _is_date_line(ln, now):
|
| 439 |
+
continue
|
| 440 |
+
return ln[:60]
|
| 441 |
+
return "Appointment"
|
| 442 |
+
|
| 443 |
+
|
| 444 |
+
def _stub_plan(thread: str, now: datetime) -> ActionPlan:
|
| 445 |
+
"""Heuristic ActionPlan so phases without a model still demo end to end."""
|
| 446 |
+
time_found = _find_time(thread)
|
| 447 |
+
if not time_found:
|
| 448 |
+
return ActionPlan(reasoning="No time found.", reply_draft="Got it, thanks!")
|
| 449 |
+
hour, minute = time_found
|
| 450 |
+
lines = thread.strip().splitlines()
|
| 451 |
+
location, loc_idx = _find_location(lines)
|
| 452 |
+
day = _find_date(thread, now)
|
| 453 |
+
appt = now.replace(year=day.year, month=day.month, day=day.day,
|
| 454 |
+
hour=hour, minute=minute, second=0, microsecond=0)
|
| 455 |
+
duration = _find_duration_minutes(thread) or 60
|
| 456 |
+
# "Arrive N minutes early" -> start at the ARRIVAL time; the end (and the
|
| 457 |
+
# notes) stay anchored to the stated appointment time.
|
| 458 |
+
early = _EARLY_RE.search(thread)
|
| 459 |
+
start = appt - timedelta(minutes=int(early.group(1))) if early else appt
|
| 460 |
+
notes = (f"Appointment at {appt.strftime('%H:%M')}; arrive {early.group(1)} min early"
|
| 461 |
+
if early else "(stub agent — wire the model to replace this)")
|
| 462 |
+
return ActionPlan(
|
| 463 |
+
reasoning="(stub) parsed time/date/location heuristically.",
|
| 464 |
+
events=[
|
| 465 |
+
Event(
|
| 466 |
+
title=_pick_title(lines, now, loc_idx),
|
| 467 |
+
start=start.isoformat(),
|
| 468 |
+
end=(appt + timedelta(minutes=duration)).isoformat(),
|
| 469 |
+
location=location,
|
| 470 |
+
reminder_minutes=_reminder_minutes(thread),
|
| 471 |
+
notes=notes,
|
| 472 |
+
)
|
| 473 |
+
],
|
| 474 |
+
reply_draft="Sounds good, see you then!",
|
| 475 |
+
)
|
server/dedup.py
ADDED
|
@@ -0,0 +1,84 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Idempotency for autonomous mode: don't create the same event twice.
|
| 2 |
+
|
| 3 |
+
As more messages stream into a chat, re-running the agent over a rolling window
|
| 4 |
+
re-surfaces events already captured. ``filter_new`` returns only events not seen
|
| 5 |
+
before, keyed by normalized title + minute-rounded start. Durable JSON store
|
| 6 |
+
mirrors ``server/impact.py`` (env path + lock; no DB — local-first).
|
| 7 |
+
"""
|
| 8 |
+
from __future__ import annotations
|
| 9 |
+
|
| 10 |
+
import json
|
| 11 |
+
import os
|
| 12 |
+
import threading
|
| 13 |
+
from pathlib import Path
|
| 14 |
+
|
| 15 |
+
from dateutil import parser as dtparser
|
| 16 |
+
|
| 17 |
+
from .schema import Event
|
| 18 |
+
|
| 19 |
+
_lock = threading.Lock()
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
def _path() -> Path:
|
| 23 |
+
return Path(os.environ.get("DEDUP_PATH", "/tmp/agent_seen.json"))
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
def event_key(ev: Event) -> str:
|
| 27 |
+
"""Normalized identity: lowercased title + start rounded to the minute.
|
| 28 |
+
|
| 29 |
+
Conservative by design — if the model rewords a title between messages we may
|
| 30 |
+
miss a dedup (a duplicate event), which is safer than dropping a real event.
|
| 31 |
+
"""
|
| 32 |
+
title = (ev.title or "").strip().lower()
|
| 33 |
+
try:
|
| 34 |
+
start = dtparser.isoparse(ev.start).replace(second=0, microsecond=0).isoformat()
|
| 35 |
+
except (ValueError, TypeError):
|
| 36 |
+
start = (ev.start or "").strip()
|
| 37 |
+
return f"{title}|{start}"
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
def _load() -> list[str]:
|
| 41 |
+
try:
|
| 42 |
+
return json.loads(_path().read_text())
|
| 43 |
+
except Exception: # noqa: BLE001 missing/corrupt -> start fresh
|
| 44 |
+
return []
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
def filter_new(events: list[Event], record: bool = True) -> list[Event]:
|
| 48 |
+
"""Return only events whose key hasn't been recorded.
|
| 49 |
+
|
| 50 |
+
``record=False`` filters WITHOUT persisting — callers delivering the events
|
| 51 |
+
somewhere fallible (e.g. a calendar push) should filter first and
|
| 52 |
+
``mark_seen`` only after delivery succeeds; otherwise a transient failure
|
| 53 |
+
permanently swallows the events ("seen" but never delivered)."""
|
| 54 |
+
with _lock:
|
| 55 |
+
seen = set(_load())
|
| 56 |
+
fresh = []
|
| 57 |
+
for ev in events:
|
| 58 |
+
k = event_key(ev)
|
| 59 |
+
if k in seen:
|
| 60 |
+
continue
|
| 61 |
+
seen.add(k)
|
| 62 |
+
fresh.append(ev)
|
| 63 |
+
if fresh and record:
|
| 64 |
+
_path().write_text(json.dumps(sorted(seen), indent=2))
|
| 65 |
+
return fresh
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
def mark_seen(events: list[Event]) -> None:
|
| 69 |
+
"""Persist the keys of successfully delivered events."""
|
| 70 |
+
if not events:
|
| 71 |
+
return
|
| 72 |
+
with _lock:
|
| 73 |
+
seen = set(_load())
|
| 74 |
+
seen.update(event_key(ev) for ev in events)
|
| 75 |
+
_path().write_text(json.dumps(sorted(seen), indent=2))
|
| 76 |
+
|
| 77 |
+
|
| 78 |
+
def reset() -> None:
|
| 79 |
+
"""Drop the seen-set (test helper)."""
|
| 80 |
+
with _lock:
|
| 81 |
+
try:
|
| 82 |
+
_path().unlink()
|
| 83 |
+
except FileNotFoundError:
|
| 84 |
+
pass
|
server/events.py
ADDED
|
@@ -0,0 +1,116 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""In-process activity bus: every pipeline stage emits structured events here so
|
| 2 |
+
the Activity dashboard can show what the LLM and agent are doing in real time.
|
| 3 |
+
|
| 4 |
+
A thread-safe ring buffer holds recent events. A contextvar (run_scope) tags all
|
| 5 |
+
events emitted during one agent run with the same run id, so the dashboard can
|
| 6 |
+
group them into per-run traces.
|
| 7 |
+
"""
|
| 8 |
+
from __future__ import annotations
|
| 9 |
+
|
| 10 |
+
import threading
|
| 11 |
+
from collections import deque
|
| 12 |
+
from contextlib import contextmanager
|
| 13 |
+
from contextvars import ContextVar
|
| 14 |
+
from datetime import datetime
|
| 15 |
+
from itertools import count
|
| 16 |
+
|
| 17 |
+
MAXLEN = 800
|
| 18 |
+
|
| 19 |
+
# Stages of the pipeline, in display order (used by the stepper + chart).
|
| 20 |
+
STAGES = ["ingest", "vision", "model", "decision", "conflict", "calendar"]
|
| 21 |
+
|
| 22 |
+
_BUF: deque[dict] = deque(maxlen=MAXLEN)
|
| 23 |
+
_lock = threading.Lock()
|
| 24 |
+
_run_var: ContextVar = ContextVar("agent_run", default=None)
|
| 25 |
+
_seq = count(1)
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
def _now() -> str:
|
| 29 |
+
return datetime.now().isoformat(timespec="seconds")
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
def emit(stage: str, message: str, level: str = "info", **payload) -> dict:
|
| 33 |
+
"""Record one activity event. ``payload`` may carry latency_ms, events,
|
| 34 |
+
conflicts, images, tokens, etc. Returns the event dict."""
|
| 35 |
+
ev = {
|
| 36 |
+
"id": next(_seq),
|
| 37 |
+
"ts": _now(),
|
| 38 |
+
"stage": stage,
|
| 39 |
+
"level": level,
|
| 40 |
+
"message": message,
|
| 41 |
+
"run": _run_var.get(),
|
| 42 |
+
**payload,
|
| 43 |
+
}
|
| 44 |
+
with _lock:
|
| 45 |
+
_BUF.append(ev)
|
| 46 |
+
return ev
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
@contextmanager
|
| 50 |
+
def run_scope(label: str = ""):
|
| 51 |
+
"""Tag every event emitted inside the block with a shared run id."""
|
| 52 |
+
run_id = f"{next(_seq)}:{label}" if label else str(next(_seq))
|
| 53 |
+
token = _run_var.set(run_id)
|
| 54 |
+
try:
|
| 55 |
+
yield run_id
|
| 56 |
+
finally:
|
| 57 |
+
# Best-effort: when used inside a streaming generator that the server drives
|
| 58 |
+
# across different contexts (e.g. Gradio's queue), reset(token) raises
|
| 59 |
+
# "Token was created in a different Context". Clearing is enough either way.
|
| 60 |
+
try:
|
| 61 |
+
_run_var.reset(token)
|
| 62 |
+
except ValueError:
|
| 63 |
+
_run_var.set(None)
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
def recent(n: int = 120) -> list[dict]:
|
| 67 |
+
with _lock:
|
| 68 |
+
return list(_BUF)[-n:][::-1] # newest first
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
def current_stage() -> str | None:
|
| 72 |
+
with _lock:
|
| 73 |
+
return _BUF[-1]["stage"] if _BUF else None
|
| 74 |
+
|
| 75 |
+
|
| 76 |
+
def metrics() -> dict:
|
| 77 |
+
with _lock:
|
| 78 |
+
evs = list(_BUF)
|
| 79 |
+
lat = [e["latency_ms"] for e in evs if e.get("latency_ms")]
|
| 80 |
+
return {
|
| 81 |
+
"messages": sum(1 for e in evs if e["stage"] == "ingest"),
|
| 82 |
+
"events_created": sum(e.get("events", 0) for e in evs if e["stage"] == "decision"),
|
| 83 |
+
"conflicts": sum(e.get("conflicts", 0) for e in evs if e["stage"] == "conflict"),
|
| 84 |
+
"images_read": sum(e.get("images", 0) for e in evs),
|
| 85 |
+
"model_calls": len(lat),
|
| 86 |
+
"avg_latency_ms": round(sum(lat) / len(lat)) if lat else 0,
|
| 87 |
+
"errors": sum(1 for e in evs if e["level"] == "error"),
|
| 88 |
+
}
|
| 89 |
+
|
| 90 |
+
|
| 91 |
+
def stage_counts() -> list[dict]:
|
| 92 |
+
"""Counts per stage, ready for gr.BarPlot."""
|
| 93 |
+
with _lock:
|
| 94 |
+
evs = list(_BUF)
|
| 95 |
+
counts = {s: 0 for s in STAGES}
|
| 96 |
+
for e in evs:
|
| 97 |
+
if e["stage"] in counts:
|
| 98 |
+
counts[e["stage"]] += 1
|
| 99 |
+
return [{"stage": s, "count": counts[s]} for s in STAGES]
|
| 100 |
+
|
| 101 |
+
|
| 102 |
+
def recent_runs(n: int = 8) -> list[tuple[str, list[dict]]]:
|
| 103 |
+
"""Group recent events by run id (newest run first)."""
|
| 104 |
+
with _lock:
|
| 105 |
+
evs = list(_BUF)
|
| 106 |
+
groups: dict[str, list[dict]] = {}
|
| 107 |
+
order: list[str] = []
|
| 108 |
+
for e in evs:
|
| 109 |
+
rid = e.get("run")
|
| 110 |
+
if not rid:
|
| 111 |
+
continue
|
| 112 |
+
if rid not in groups:
|
| 113 |
+
groups[rid] = []
|
| 114 |
+
order.append(rid)
|
| 115 |
+
groups[rid].append(e)
|
| 116 |
+
return [(rid, groups[rid]) for rid in order[-n:][::-1]]
|
server/health.py
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Liveness + a hardware-adequacy signal for the UI banner and external monitors.
|
| 2 |
+
|
| 3 |
+
`health_status()` powers both `GET /health` and the on-page status banner. It
|
| 4 |
+
reports `degraded: true` when the *real* model would run on CPU-only hardware
|
| 5 |
+
(where extraction can be slow or time out). `FORCE_DEGRADED=1` forces it on so
|
| 6 |
+
the banner can be exercised without actually changing the Space's hardware.
|
| 7 |
+
"""
|
| 8 |
+
from __future__ import annotations
|
| 9 |
+
|
| 10 |
+
import os
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
def gpu_available() -> bool:
|
| 14 |
+
"""Best-effort GPU probe. This app serves via llama.cpp (no torch), so we
|
| 15 |
+
can't rely on torch.cuda — check for an NVIDIA device or `nvidia-smi`."""
|
| 16 |
+
import glob
|
| 17 |
+
import shutil
|
| 18 |
+
import subprocess
|
| 19 |
+
|
| 20 |
+
if glob.glob("/dev/nvidia[0-9]*"):
|
| 21 |
+
return True
|
| 22 |
+
if shutil.which("nvidia-smi"):
|
| 23 |
+
try:
|
| 24 |
+
return subprocess.run(["nvidia-smi"], capture_output=True, timeout=5).returncode == 0
|
| 25 |
+
except Exception: # noqa: BLE001 any probe failure -> treat as no GPU
|
| 26 |
+
return False
|
| 27 |
+
return False
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
def health_status() -> dict:
|
| 31 |
+
"""Return liveness + the degraded/device/model signal (read at call time)."""
|
| 32 |
+
if os.environ.get("FORCE_DEGRADED") == "1":
|
| 33 |
+
return {
|
| 34 |
+
"ok": True, "device": "cpu", "model": "real", "degraded": True,
|
| 35 |
+
"reason": "Running the model on CPU-only hardware. Extraction may be slow "
|
| 36 |
+
"or time out. Upgrade to a GPU.",
|
| 37 |
+
}
|
| 38 |
+
# Stub mode is a deliberate, fast, free preview — not a degraded state.
|
| 39 |
+
if os.environ.get("USE_STUB_EXTRACTOR") == "1":
|
| 40 |
+
return {"ok": True, "device": "cpu", "model": "stub", "degraded": False, "reason": ""}
|
| 41 |
+
|
| 42 |
+
# Real model. On the Space, INFERENCE_BASE_URL points at the *local* llama-server,
|
| 43 |
+
# so local inference still depends on this host having a GPU. A non-localhost URL
|
| 44 |
+
# means inference runs elsewhere, so this host's hardware is irrelevant.
|
| 45 |
+
base = os.environ.get("INFERENCE_BASE_URL", "")
|
| 46 |
+
local = (not base) or ("127.0.0.1" in base) or ("localhost" in base)
|
| 47 |
+
gpu = gpu_available()
|
| 48 |
+
degraded = local and not gpu
|
| 49 |
+
device = "cuda" if (local and gpu) else ("cpu" if local else "remote")
|
| 50 |
+
reason = (
|
| 51 |
+
"Running the model on CPU-only hardware. Extraction may be slow or time out. "
|
| 52 |
+
"Upgrade to a GPU." if degraded else ""
|
| 53 |
+
)
|
| 54 |
+
return {"ok": True, "device": device, "model": "real", "degraded": degraded, "reason": reason}
|
server/imageutil.py
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Encode images as base64 data URIs for llama.cpp vision chat handlers.
|
| 2 |
+
|
| 3 |
+
Shared by the Mac collector (attachments) and the UI (manual upload).
|
| 4 |
+
"""
|
| 5 |
+
from __future__ import annotations
|
| 6 |
+
|
| 7 |
+
import base64
|
| 8 |
+
import mimetypes
|
| 9 |
+
from pathlib import Path
|
| 10 |
+
|
| 11 |
+
# Skip anything bigger than this to keep payloads/context sane.
|
| 12 |
+
MAX_BYTES = 4 * 1024 * 1024 # 4 MB
|
| 13 |
+
|
| 14 |
+
IMAGE_MIMES = {"image/png", "image/jpeg", "image/gif", "image/webp", "image/heic"}
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
def is_image(path: str) -> bool:
|
| 18 |
+
mime, _ = mimetypes.guess_type(path)
|
| 19 |
+
return mime in IMAGE_MIMES
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
def _heic_to_jpeg(p: Path) -> bytes | None:
|
| 23 |
+
"""Transcode HEIC/HEIF to JPEG bytes (pillow-heif), or None if unavailable.
|
| 24 |
+
llama.cpp's clip handler can't decode HEIC, so raw pass-through would fail
|
| 25 |
+
or waste context — and iPhone attachments are predominantly HEIC."""
|
| 26 |
+
try:
|
| 27 |
+
import io
|
| 28 |
+
|
| 29 |
+
import pillow_heif
|
| 30 |
+
from PIL import Image
|
| 31 |
+
|
| 32 |
+
pillow_heif.register_heif_opener()
|
| 33 |
+
img = Image.open(p).convert("RGB")
|
| 34 |
+
buf = io.BytesIO()
|
| 35 |
+
img.save(buf, format="JPEG", quality=88)
|
| 36 |
+
return buf.getvalue()
|
| 37 |
+
except Exception: # noqa: BLE001 no pillow-heif / corrupt file -> skip
|
| 38 |
+
return None
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
def to_data_uri(path: str) -> str | None:
|
| 42 |
+
"""Return a `data:<mime>;base64,...` URI, or None if not a usable image.
|
| 43 |
+
HEIC is transcoded to JPEG (the vision stack can't decode HEIC); when
|
| 44 |
+
transcoding isn't available the file is skipped, never sent undecodable."""
|
| 45 |
+
p = Path(path)
|
| 46 |
+
if not p.exists() or p.stat().st_size > MAX_BYTES:
|
| 47 |
+
return None
|
| 48 |
+
mime, _ = mimetypes.guess_type(str(p))
|
| 49 |
+
if mime not in IMAGE_MIMES:
|
| 50 |
+
return None
|
| 51 |
+
if mime == "image/heic" or p.suffix.lower() in (".heic", ".heif"):
|
| 52 |
+
jpeg = _heic_to_jpeg(p)
|
| 53 |
+
if jpeg is None:
|
| 54 |
+
return None
|
| 55 |
+
return "data:image/jpeg;base64," + base64.b64encode(jpeg).decode("ascii")
|
| 56 |
+
b64 = base64.b64encode(p.read_bytes()).decode("ascii")
|
| 57 |
+
return f"data:{mime};base64,{b64}"
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
def paths_to_data_uris(paths: list[str]) -> list[str]:
|
| 61 |
+
return [u for u in (to_data_uri(p) for p in paths or []) if u]
|
server/impact.py
ADDED
|
@@ -0,0 +1,87 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Durable weekly impact metrics: how much the agent actually saved the user.
|
| 2 |
+
|
| 3 |
+
Unlike the in-memory activity bus (``server/events.py``), which is an 800-entry
|
| 4 |
+
ring buffer lost on restart, this records a small set of counters **per ISO week**
|
| 5 |
+
to a JSON file, so the "This week" panel accumulates over real use and survives
|
| 6 |
+
restarts.
|
| 7 |
+
|
| 8 |
+
Counters per week: ``events_captured``, ``conflicts_caught``, ``minutes_saved``.
|
| 9 |
+
A "capture" is the user *accepting* events by exporting them (``.ics`` download or
|
| 10 |
+
Google Calendar push) — see ``ui/blocks.py``. ``minutes_saved`` is a deliberately
|
| 11 |
+
conservative, fully configurable **estimate** (not a measurement): a fixed number
|
| 12 |
+
of minutes per event captured plus per conflict caught.
|
| 13 |
+
|
| 14 |
+
Persistence mirrors ``app.py``'s ``_append_feed``: an env-overridable JSON file
|
| 15 |
+
under ``/tmp`` by default. No database — local-first by design.
|
| 16 |
+
"""
|
| 17 |
+
from __future__ import annotations
|
| 18 |
+
|
| 19 |
+
import json
|
| 20 |
+
import os
|
| 21 |
+
import threading
|
| 22 |
+
from datetime import datetime
|
| 23 |
+
from pathlib import Path
|
| 24 |
+
|
| 25 |
+
_lock = threading.Lock()
|
| 26 |
+
|
| 27 |
+
_ZERO = {"events_captured": 0, "conflicts_caught": 0, "minutes_saved": 0}
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
def _path() -> Path:
|
| 31 |
+
return Path(os.environ.get("IMPACT_PATH", "/tmp/impact_weeks.json"))
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
def _min_per_event() -> int:
|
| 35 |
+
return int(os.environ.get("IMPACT_MIN_PER_EVENT", "8"))
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
def _min_per_conflict() -> int:
|
| 39 |
+
return int(os.environ.get("IMPACT_MIN_PER_CONFLICT", "15"))
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
def _week_key(when: datetime | None = None) -> str:
|
| 43 |
+
iso = (when or datetime.now()).isocalendar()
|
| 44 |
+
return f"{iso.year}-W{iso.week:02d}"
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
def _load() -> dict:
|
| 48 |
+
try:
|
| 49 |
+
return json.loads(_path().read_text())
|
| 50 |
+
except Exception: # noqa: BLE001 missing/corrupt file -> start fresh
|
| 51 |
+
return {}
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
def record_capture(events_captured: int, conflicts_caught: int = 0) -> dict:
|
| 55 |
+
"""Durably add to the current week's counters; return that week's record.
|
| 56 |
+
|
| 57 |
+
Re-reads the file before incrementing so concurrent writers and restarts
|
| 58 |
+
never drop prior counts (append/aggregate, never overwrite-from-memory).
|
| 59 |
+
"""
|
| 60 |
+
minutes = events_captured * _min_per_event() + conflicts_caught * _min_per_conflict()
|
| 61 |
+
key = _week_key()
|
| 62 |
+
with _lock:
|
| 63 |
+
data = _load()
|
| 64 |
+
wk = {**_ZERO, **data.get(key, {})}
|
| 65 |
+
wk["events_captured"] += events_captured
|
| 66 |
+
wk["conflicts_caught"] += conflicts_caught
|
| 67 |
+
wk["minutes_saved"] += minutes
|
| 68 |
+
data[key] = wk
|
| 69 |
+
_path().write_text(json.dumps(data, indent=2))
|
| 70 |
+
return dict(wk)
|
| 71 |
+
|
| 72 |
+
|
| 73 |
+
def this_week() -> dict:
|
| 74 |
+
"""Read-only current-week record (all zeros if nothing recorded yet).
|
| 75 |
+
|
| 76 |
+
The durable, weekly analogue of ``events.metrics()``.
|
| 77 |
+
"""
|
| 78 |
+
return {**_ZERO, **_load().get(_week_key(), {})}
|
| 79 |
+
|
| 80 |
+
|
| 81 |
+
def reset() -> None:
|
| 82 |
+
"""Drop all recorded impact (test helper)."""
|
| 83 |
+
with _lock:
|
| 84 |
+
try:
|
| 85 |
+
_path().unlink()
|
| 86 |
+
except FileNotFoundError:
|
| 87 |
+
pass
|
server/mcp_tools.py
ADDED
|
@@ -0,0 +1,117 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Agent-facing tool wrappers exposed via Gradio's MCP server.
|
| 2 |
+
|
| 3 |
+
Each function below has a clean signature + docstring on purpose — Gradio's MCP
|
| 4 |
+
layer (`mcp_server=True` in app.py) reads the type hints and docstring to build
|
| 5 |
+
the JSON-Schema a remote MCP client sees. Keep them stateless and JSON-friendly:
|
| 6 |
+
inputs are str / list[dict] / etc., outputs are dict / str / list[dict] (never
|
| 7 |
+
pydantic objects, which don't serialise through the MCP boundary).
|
| 8 |
+
|
| 9 |
+
These wrap the existing pipeline (server/pipeline.run_pipeline) and free/busy
|
| 10 |
+
math (calendar_out/freebusy) — no new business logic lives here, just the
|
| 11 |
+
shape adaptation an external agent expects.
|
| 12 |
+
"""
|
| 13 |
+
from __future__ import annotations
|
| 14 |
+
|
| 15 |
+
import base64
|
| 16 |
+
import time
|
| 17 |
+
from collections import OrderedDict
|
| 18 |
+
from typing import Optional
|
| 19 |
+
|
| 20 |
+
from calendar_out.freebusy import Busy, check_conflicts as _freebusy_check_conflicts, load_ics_busy
|
| 21 |
+
from calendar_out.ics import events_to_ics
|
| 22 |
+
from server.pipeline import AgentRequest, run_pipeline
|
| 23 |
+
from server.schema import Event
|
| 24 |
+
|
| 25 |
+
# Short-lived extraction cache. The Agent-tab orchestrator extracts TWICE per
|
| 26 |
+
# run — once when the MiniCPM planner calls this tool over MCP, then again when
|
| 27 |
+
# the scripted path finalizes — and each call runs the full gemma-cal E4B. With
|
| 28 |
+
# identical inputs the second call is pure waste, so memoize on the EXACT inputs
|
| 29 |
+
# (thread + images + memory). Different memory/images -> different key -> a fresh
|
| 30 |
+
# (correct) extraction; the win is the common no-memory case. TTL is generous so
|
| 31 |
+
# the scripted call still hits after a ~2-min planner run; small maxsize bounds
|
| 32 |
+
# cross-request staleness (same input -> same output anyway).
|
| 33 |
+
_EXTRACT_CACHE: "OrderedDict[tuple, tuple[float, dict]]" = OrderedDict()
|
| 34 |
+
_EXTRACT_TTL = 600.0
|
| 35 |
+
_EXTRACT_MAX = 8
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
def extract_events(thread: str, images: Optional[list[str]] = None,
|
| 39 |
+
memory: Optional[str] = None) -> dict:
|
| 40 |
+
"""Extract calendar events from a pasted iMessage thread (and optional screenshots).
|
| 41 |
+
|
| 42 |
+
The headline tool. Reads a chat or screenshot, returns an ActionPlan with the
|
| 43 |
+
events found, any conflicts against the user's calendar, and a suggested reply.
|
| 44 |
+
Runs 100% locally inside the Space via llama.cpp — no cloud AI APIs.
|
| 45 |
+
|
| 46 |
+
Args:
|
| 47 |
+
thread: Plain-text iMessage conversation, e.g. "Alice: pickup 5pm Thursday".
|
| 48 |
+
Either ``thread`` or ``images`` must be non-empty.
|
| 49 |
+
images: Optional list of base64-encoded screenshots (raw base64 or data URIs).
|
| 50 |
+
Useful when the schedule lives in a screenshot rather than text.
|
| 51 |
+
memory: Optional plain-text recall block about the user (people and their
|
| 52 |
+
roles, preferences like default reminders or days they decline) — used
|
| 53 |
+
to personalize extraction. e.g. "Dana is the soccer coach".
|
| 54 |
+
|
| 55 |
+
Returns:
|
| 56 |
+
ActionPlan as a JSON-serialisable dict with keys: ``reasoning``,
|
| 57 |
+
``events`` (list of {title, start, end, location, attendees, ...}),
|
| 58 |
+
``conflicts``, ``proposed_times``, ``reply_draft``, ``needs_clarification``.
|
| 59 |
+
"""
|
| 60 |
+
key = (thread or "", tuple(images or []), memory or "")
|
| 61 |
+
now = time.monotonic()
|
| 62 |
+
hit = _EXTRACT_CACHE.get(key)
|
| 63 |
+
if hit is not None and now - hit[0] < _EXTRACT_TTL:
|
| 64 |
+
_EXTRACT_CACHE.move_to_end(key)
|
| 65 |
+
return hit[1]
|
| 66 |
+
req = AgentRequest(thread=thread or "", images=images or [], memory=memory,
|
| 67 |
+
return_ics=False)
|
| 68 |
+
resp = run_pipeline(req)
|
| 69 |
+
plan = resp.plan.model_dump()
|
| 70 |
+
_EXTRACT_CACHE[key] = (now, plan)
|
| 71 |
+
_EXTRACT_CACHE.move_to_end(key)
|
| 72 |
+
while len(_EXTRACT_CACHE) > _EXTRACT_MAX:
|
| 73 |
+
_EXTRACT_CACHE.popitem(last=False)
|
| 74 |
+
return plan
|
| 75 |
+
|
| 76 |
+
|
| 77 |
+
def make_ics(events: list[dict]) -> str:
|
| 78 |
+
"""Render a list of event dicts as an .ics file (base64-encoded).
|
| 79 |
+
|
| 80 |
+
Args:
|
| 81 |
+
events: List of event dicts in the shape returned by ``extract_events``
|
| 82 |
+
— each needs at least ``title`` and ``start`` (ISO 8601). Optional:
|
| 83 |
+
``end``, ``location``, ``attendees``, ``reminder_minutes``, ``notes``.
|
| 84 |
+
|
| 85 |
+
Returns:
|
| 86 |
+
Base64-encoded VCALENDAR bytes. Decode and write to ``something.ics`` to
|
| 87 |
+
import into any calendar app.
|
| 88 |
+
"""
|
| 89 |
+
ev_objs = [Event(**e) for e in events]
|
| 90 |
+
return base64.b64encode(events_to_ics(ev_objs)).decode("ascii")
|
| 91 |
+
|
| 92 |
+
|
| 93 |
+
def check_conflicts(events: list[dict], ics_base64: str) -> list[dict]:
|
| 94 |
+
"""Find clashes between proposed events and busy intervals from an .ics calendar.
|
| 95 |
+
|
| 96 |
+
Deterministic free/busy math — runs without the LLM, so it's safe for agents
|
| 97 |
+
to call as a fast verification step after ``extract_events``.
|
| 98 |
+
|
| 99 |
+
Args:
|
| 100 |
+
events: List of proposed event dicts (same shape as ``extract_events``
|
| 101 |
+
output). Each event needs at least ``title`` and ``start``.
|
| 102 |
+
ics_base64: Base64-encoded .ics calendar to check against. Typically the
|
| 103 |
+
user's current calendar exported from Google/Apple/Outlook.
|
| 104 |
+
|
| 105 |
+
Returns:
|
| 106 |
+
List of conflict dicts: ``{event_index, clashes_with, severity}`` where
|
| 107 |
+
severity is one of ``"overlap"``, ``"adjacent"``, ``"tight"``. Empty list
|
| 108 |
+
if nothing clashes.
|
| 109 |
+
"""
|
| 110 |
+
if not ics_base64:
|
| 111 |
+
return []
|
| 112 |
+
try:
|
| 113 |
+
busy: list[Busy] = load_ics_busy(base64.b64decode(ics_base64))
|
| 114 |
+
except Exception: # noqa: BLE001 malformed .ics -> no conflict context
|
| 115 |
+
return []
|
| 116 |
+
ev_objs = [Event(**e) for e in events]
|
| 117 |
+
return [c.model_dump() for c in _freebusy_check_conflicts(ev_objs, busy)]
|
server/memory.py
ADDED
|
@@ -0,0 +1,174 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Persistent 'grows-with-you' agent memory.
|
| 2 |
+
|
| 3 |
+
Durable facts and preferences that personalize extraction over time:
|
| 4 |
+
people->roles ("Dana is the soccer coach"), rules ("you decline Mondays"),
|
| 5 |
+
default locations. Stored as JSON at MEMORY_PATH.
|
| 6 |
+
|
| 7 |
+
- recall() -> a compact block injected into the agent prompt (server/agent.py)
|
| 8 |
+
- remember() -> add/strengthen a fact (Memory tab, or a Hermes `remember` tool-call)
|
| 9 |
+
- forget() -> drop a fact (Memory tab)
|
| 10 |
+
- observe_plan()-> conservatively learn recurring contacts from extracted events
|
| 11 |
+
|
| 12 |
+
This is the "memory" half of a Hermes-style grows-with-you agent; the model
|
| 13 |
+
(served via INFERENCE_BASE_URL) is the reasoning half.
|
| 14 |
+
"""
|
| 15 |
+
from __future__ import annotations
|
| 16 |
+
|
| 17 |
+
import json
|
| 18 |
+
import os
|
| 19 |
+
import threading
|
| 20 |
+
from pathlib import Path
|
| 21 |
+
|
| 22 |
+
MEMORY_PATH = Path(os.environ.get("MEMORY_PATH", "/tmp/agent_memory.json"))
|
| 23 |
+
MAX_FACTS = 200
|
| 24 |
+
KINDS = ("contact", "preference", "location", "note")
|
| 25 |
+
|
| 26 |
+
_lock = threading.Lock()
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
def _norm(text: str) -> str:
|
| 30 |
+
return " ".join(text.lower().split())
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
def _load() -> dict:
|
| 34 |
+
try:
|
| 35 |
+
data = json.loads(MEMORY_PATH.read_text())
|
| 36 |
+
if isinstance(data, dict) and isinstance(data.get("facts"), list):
|
| 37 |
+
return data
|
| 38 |
+
except Exception: # noqa: BLE001 missing/corrupt -> empty
|
| 39 |
+
pass
|
| 40 |
+
return {"facts": [], "seq": 0}
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
def _save(state: dict) -> None:
|
| 44 |
+
MEMORY_PATH.parent.mkdir(parents=True, exist_ok=True)
|
| 45 |
+
MEMORY_PATH.write_text(json.dumps(state, indent=2))
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
def remember(text: str, kind: str = "note") -> dict | None:
|
| 49 |
+
"""Add a fact, or strengthen (bump weight) an existing one with the same text."""
|
| 50 |
+
text = (text or "").strip()
|
| 51 |
+
if not text:
|
| 52 |
+
return None
|
| 53 |
+
if kind not in KINDS:
|
| 54 |
+
kind = "note"
|
| 55 |
+
key = _norm(text)
|
| 56 |
+
with _lock:
|
| 57 |
+
state = _load()
|
| 58 |
+
for f in state["facts"]:
|
| 59 |
+
if _norm(f["text"]) == key:
|
| 60 |
+
f["weight"] = f.get("weight", 1) + 1
|
| 61 |
+
_save(state)
|
| 62 |
+
return f
|
| 63 |
+
state["seq"] += 1
|
| 64 |
+
fact = {"id": state["seq"], "kind": kind, "text": text, "weight": 1}
|
| 65 |
+
state["facts"].append(fact)
|
| 66 |
+
state["facts"] = state["facts"][-MAX_FACTS:]
|
| 67 |
+
_save(state)
|
| 68 |
+
return fact
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
def forget(fact_id: int) -> bool:
|
| 72 |
+
with _lock:
|
| 73 |
+
state = _load()
|
| 74 |
+
before = len(state["facts"])
|
| 75 |
+
state["facts"] = [f for f in state["facts"] if f["id"] != int(fact_id)]
|
| 76 |
+
changed = len(state["facts"]) != before
|
| 77 |
+
if changed:
|
| 78 |
+
_save(state)
|
| 79 |
+
return changed
|
| 80 |
+
|
| 81 |
+
|
| 82 |
+
def list_facts() -> list[dict]:
|
| 83 |
+
return _load()["facts"]
|
| 84 |
+
|
| 85 |
+
|
| 86 |
+
def recall(limit: int = 20) -> str:
|
| 87 |
+
"""Compact 'what I know about you' block for the prompt; '' if empty.
|
| 88 |
+
|
| 89 |
+
Strongest (most-reinforced) facts first so the prompt stays small but useful.
|
| 90 |
+
"""
|
| 91 |
+
facts = sorted(list_facts(), key=lambda f: f.get("weight", 1), reverse=True)[:limit]
|
| 92 |
+
if not facts:
|
| 93 |
+
return ""
|
| 94 |
+
lines = "\n".join(f"- {f['text']}" for f in facts)
|
| 95 |
+
return "What I know about you (memory):\n" + lines
|
| 96 |
+
|
| 97 |
+
|
| 98 |
+
def observe_plan(plan) -> None:
|
| 99 |
+
"""Conservatively learn from an extracted ActionPlan: record event attendees as
|
| 100 |
+
contacts (reinforced over time). Cheap, deterministic 'growth' without an LLM
|
| 101 |
+
round-trip; explicit facts still come via remember()/the Memory tab/tool-calls."""
|
| 102 |
+
try:
|
| 103 |
+
for ev in getattr(plan, "events", []) or []:
|
| 104 |
+
for name in getattr(ev, "attendees", []) or []:
|
| 105 |
+
name = (name or "").strip()
|
| 106 |
+
if name and len(name) <= 40:
|
| 107 |
+
remember(f"{name} is a contact you make plans with", kind="contact")
|
| 108 |
+
except Exception: # noqa: BLE001 memory must never break extraction
|
| 109 |
+
pass
|
| 110 |
+
|
| 111 |
+
|
| 112 |
+
def reset() -> None:
|
| 113 |
+
"""Clear memory (used by tests)."""
|
| 114 |
+
with _lock:
|
| 115 |
+
_save({"facts": [], "seq": 0})
|
| 116 |
+
|
| 117 |
+
|
| 118 |
+
# --------------------------------------------------------------------------- #
|
| 119 |
+
# Client-owned memory (per-user, browser localStorage). These are PURE helpers
|
| 120 |
+
# that operate on a passed-in facts list — no global file — so each visitor's
|
| 121 |
+
# memory can live on their device and be threaded through the agent per request.
|
| 122 |
+
# --------------------------------------------------------------------------- #
|
| 123 |
+
def facts_to_recall(facts: list[dict], limit: int = 20) -> str:
|
| 124 |
+
"""Same compact 'what I know about you' block as recall(), but for a passed
|
| 125 |
+
facts list (client/localStorage memory). '' if empty."""
|
| 126 |
+
facts = sorted(facts or [], key=lambda f: f.get("weight", 1), reverse=True)[:limit]
|
| 127 |
+
if not facts:
|
| 128 |
+
return ""
|
| 129 |
+
lines = "\n".join(f"- {f['text']}" for f in facts if (f or {}).get("text"))
|
| 130 |
+
return "What I know about you (memory):\n" + lines if lines else ""
|
| 131 |
+
|
| 132 |
+
|
| 133 |
+
def merge_facts(facts: list[dict], texts, kind: str = "note") -> list[dict]:
|
| 134 |
+
"""Add texts to a facts list (dedup by normalized text → bump weight), keeping
|
| 135 |
+
ids stable. Returns a NEW list (caller persists it). `texts` is an iterable of
|
| 136 |
+
strings, or of (text, kind) pairs."""
|
| 137 |
+
facts = [dict(f) for f in (facts or [])]
|
| 138 |
+
by_key = {_norm(f["text"]): f for f in facts if f.get("text")}
|
| 139 |
+
next_id = max((int(f.get("id", 0)) for f in facts), default=0) + 1
|
| 140 |
+
for item in texts or []:
|
| 141 |
+
if isinstance(item, (tuple, list)):
|
| 142 |
+
text, k = item[0], (item[1] if len(item) > 1 else kind)
|
| 143 |
+
else:
|
| 144 |
+
text, k = item, kind
|
| 145 |
+
text = (text or "").strip()
|
| 146 |
+
if not text:
|
| 147 |
+
continue
|
| 148 |
+
if k not in KINDS:
|
| 149 |
+
k = "note"
|
| 150 |
+
key = _norm(text)
|
| 151 |
+
if key in by_key:
|
| 152 |
+
f = by_key[key]
|
| 153 |
+
f["weight"] = f.get("weight", 1) + 1
|
| 154 |
+
else:
|
| 155 |
+
f = {"id": next_id, "kind": k, "text": text, "weight": 1}
|
| 156 |
+
next_id += 1
|
| 157 |
+
facts.append(f)
|
| 158 |
+
by_key[key] = f
|
| 159 |
+
return facts[-MAX_FACTS:]
|
| 160 |
+
|
| 161 |
+
|
| 162 |
+
def learn_from_plan(plan) -> list[str]:
|
| 163 |
+
"""Contact texts to learn from an ActionPlan (the observe_plan() logic, but
|
| 164 |
+
RETURNED for client-side merge instead of written to the global file)."""
|
| 165 |
+
out: list[str] = []
|
| 166 |
+
try:
|
| 167 |
+
for ev in getattr(plan, "events", []) or []:
|
| 168 |
+
for name in getattr(ev, "attendees", []) or []:
|
| 169 |
+
name = (name or "").strip()
|
| 170 |
+
if name and len(name) <= 40:
|
| 171 |
+
out.append(f"{name} is a contact you make plans with")
|
| 172 |
+
except Exception: # noqa: BLE001 memory must never break extraction
|
| 173 |
+
pass
|
| 174 |
+
return out
|
server/model.py
ADDED
|
@@ -0,0 +1,317 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Load the fine-tuned Gemma 4 GGUF and run inference via llama.cpp.
|
| 2 |
+
|
| 3 |
+
Llama Champion: all generation goes through llama-cpp-python — no cloud AI API.
|
| 4 |
+
The GGUF is downloaded from HF at startup so the Space image stays small.
|
| 5 |
+
|
| 6 |
+
Two inference locations, selected by env:
|
| 7 |
+
- in-process llama.cpp, GPU-offloaded inside an @spaces.GPU lease (ZeroGPU), or
|
| 8 |
+
- a remote OpenAI-compatible / llama.cpp server via INFERENCE_BASE_URL
|
| 9 |
+
(e.g. a llama-server on the phone itself, or a backend).
|
| 10 |
+
"""
|
| 11 |
+
from __future__ import annotations
|
| 12 |
+
|
| 13 |
+
import os
|
| 14 |
+
import threading
|
| 15 |
+
import time
|
| 16 |
+
|
| 17 |
+
from huggingface_hub import hf_hub_download
|
| 18 |
+
|
| 19 |
+
from . import events
|
| 20 |
+
|
| 21 |
+
# The platform runs the gemma-cal EDGE fine-tune (Gemma-4 E4B, ~5GB Q4) — our own
|
| 22 |
+
# calendar-native model, eval-gated before every publish (docs/eval-roadmap.md).
|
| 23 |
+
# MODEL SIZE (hackathon hard constraint, <= 32B): E4B = ~4B effective params.
|
| 24 |
+
# All inference is local via llama.cpp (no cloud AI).
|
| 25 |
+
MODEL_REPO = os.environ.get("MODEL_REPO", "ParetoOptimal/gemma-4-cal-gguf")
|
| 26 |
+
MODEL_FILE = os.environ.get("MODEL_FILE", "gemma-cal-e4b-Q4_K_M.gguf")
|
| 27 |
+
# Vision projector (mmproj). Set to enable image input; leave empty for text-only.
|
| 28 |
+
# MMPROJ_REPO lets the projector come from a different repo than the LLM — the E4B
|
| 29 |
+
# edge model pairs with the base E4B's projector, not a projector in our repo.
|
| 30 |
+
MMPROJ_REPO = os.environ.get("MMPROJ_REPO", "") or os.environ.get("MODEL_REPO", "ParetoOptimal/gemma-4-cal-gguf")
|
| 31 |
+
MMPROJ_FILE = os.environ.get("MMPROJ_FILE", "")
|
| 32 |
+
# llama-cpp-python vision handler class (in llama_cpp.llama_chat_format). Gemma 4
|
| 33 |
+
# vision may ship a dedicated handler; the generic clip/Llava handler is the default.
|
| 34 |
+
CHAT_HANDLER = os.environ.get("CHAT_HANDLER", "Llava15ChatHandler")
|
| 35 |
+
|
| 36 |
+
N_CTX = int(os.environ.get("N_CTX", "8192"))
|
| 37 |
+
N_GPU_LAYERS = int(os.environ.get("N_GPU_LAYERS", "-1")) # -1 = offload all (GPU)
|
| 38 |
+
GPU_DURATION = int(os.environ.get("GPU_DURATION", "120")) # ZeroGPU lease seconds
|
| 39 |
+
|
| 40 |
+
# Configurable inference location. If INFERENCE_BASE_URL is set, generation is
|
| 41 |
+
# delegated to a remote OpenAI-compatible / llama.cpp server (e.g. a llama-server
|
| 42 |
+
# running on the phone itself, or a backend) instead of loading the GGUF in-process.
|
| 43 |
+
# This is how the same agent runs on-device OR thin-client — selected by env.
|
| 44 |
+
INFERENCE_BASE_URL = os.environ.get("INFERENCE_BASE_URL", "")
|
| 45 |
+
INFERENCE_API_KEY = os.environ.get("INFERENCE_API_KEY", "")
|
| 46 |
+
INFERENCE_MODEL = os.environ.get("INFERENCE_MODEL", "local")
|
| 47 |
+
# Let a tool-calling model (Hermes) write its own long-term memory mid-run.
|
| 48 |
+
# Only applies to the remote path (server/tools.py); off by default.
|
| 49 |
+
HERMES_TOOLS = os.environ.get("HERMES_TOOLS") == "1"
|
| 50 |
+
|
| 51 |
+
_llm = None
|
| 52 |
+
_lock = threading.Lock()
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
# ZeroGPU: GPU-bound work must run inside an @spaces.GPU function (the GPU is
|
| 56 |
+
# attached only for that call). Locally / in CI the `spaces` package is absent,
|
| 57 |
+
# so `gpu` degrades to a no-op decorator and stub mode never touches this path.
|
| 58 |
+
try:
|
| 59 |
+
from spaces import GPU as _spaces_gpu
|
| 60 |
+
|
| 61 |
+
def gpu(fn):
|
| 62 |
+
return _spaces_gpu(duration=GPU_DURATION)(fn)
|
| 63 |
+
except Exception: # noqa: BLE001 - spaces not installed (local/CI)
|
| 64 |
+
|
| 65 |
+
def gpu(fn):
|
| 66 |
+
return fn
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
def _preload_cuda_libs():
|
| 70 |
+
"""Preload CUDA userspace libs so the prebuilt CUDA llama-cpp-python wheel can
|
| 71 |
+
dlopen. The ZeroGPU/Gradio-SDK env lacks libcudart.so.12 on the default loader
|
| 72 |
+
path; the nvidia-*-cu12 pip packages provide them. We CDLL them RTLD_GLOBAL so
|
| 73 |
+
the llama .so's NEEDED deps resolve. Path-independent (no LD_LIBRARY_PATH guess);
|
| 74 |
+
a no-op off-Linux / when the packages aren't installed."""
|
| 75 |
+
import ctypes
|
| 76 |
+
import glob
|
| 77 |
+
import os
|
| 78 |
+
|
| 79 |
+
try:
|
| 80 |
+
import nvidia # namespace package from nvidia-*-cu12 wheels
|
| 81 |
+
except Exception: # noqa: BLE001
|
| 82 |
+
return
|
| 83 |
+
# nvidia is a PEP 420 namespace package: __file__ is None, use __path__.
|
| 84 |
+
bases = list(getattr(nvidia, "__path__", []) or [])
|
| 85 |
+
# cublas before its dependents is unnecessary ($ORIGIN RPATH resolves siblings).
|
| 86 |
+
for base in bases:
|
| 87 |
+
for sub in ("cuda_runtime", "cuda_nvrtc", "cublas"):
|
| 88 |
+
for so in sorted(glob.glob(os.path.join(base, sub, "lib", "*.so*"))):
|
| 89 |
+
try:
|
| 90 |
+
ctypes.CDLL(so, mode=ctypes.RTLD_GLOBAL)
|
| 91 |
+
except OSError:
|
| 92 |
+
pass
|
| 93 |
+
|
| 94 |
+
|
| 95 |
+
def _build_chat_handler():
|
| 96 |
+
"""Return a vision chat handler if MMPROJ_FILE is set, else None (text-only)."""
|
| 97 |
+
if not MMPROJ_FILE:
|
| 98 |
+
return None
|
| 99 |
+
import llama_cpp.llama_chat_format as fmt
|
| 100 |
+
|
| 101 |
+
mmproj_path = hf_hub_download(repo_id=MMPROJ_REPO, filename=MMPROJ_FILE)
|
| 102 |
+
handler_cls = getattr(fmt, CHAT_HANDLER)
|
| 103 |
+
return handler_cls(clip_model_path=mmproj_path, verbose=False)
|
| 104 |
+
|
| 105 |
+
|
| 106 |
+
def get_llm():
|
| 107 |
+
"""Lazily download + load the GGUF once, thread-safe."""
|
| 108 |
+
global _llm
|
| 109 |
+
if _llm is None:
|
| 110 |
+
with _lock:
|
| 111 |
+
if _llm is None:
|
| 112 |
+
_preload_cuda_libs() # satisfy libcudart.so.12 etc. before loading
|
| 113 |
+
from llama_cpp import Llama # imported lazily so tests can stub
|
| 114 |
+
|
| 115 |
+
path = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILE)
|
| 116 |
+
_llm = Llama(
|
| 117 |
+
model_path=path,
|
| 118 |
+
n_ctx=N_CTX,
|
| 119 |
+
n_gpu_layers=N_GPU_LAYERS,
|
| 120 |
+
chat_handler=_build_chat_handler(), # enables image_url inputs
|
| 121 |
+
verbose=False,
|
| 122 |
+
)
|
| 123 |
+
return _llm
|
| 124 |
+
|
| 125 |
+
|
| 126 |
+
# --- GPU-scoped inner functions (run inside the ZeroGPU lease) ---
|
| 127 |
+
# These do the actual in-process llama.cpp work; emits stay in the main-process
|
| 128 |
+
# wrappers below because in-memory state (the events bus) isn't shared back from
|
| 129 |
+
# the ZeroGPU subprocess.
|
| 130 |
+
@gpu
|
| 131 |
+
def _infer_text(messages: list[dict], temperature: float, max_tokens: int) -> str:
|
| 132 |
+
out = get_llm().create_chat_completion(
|
| 133 |
+
messages=messages, temperature=temperature, max_tokens=max_tokens
|
| 134 |
+
)
|
| 135 |
+
return out["choices"][0]["message"]["content"]
|
| 136 |
+
|
| 137 |
+
|
| 138 |
+
@gpu
|
| 139 |
+
def _infer_json(messages: list[dict], json_schema: dict, temperature: float, max_tokens: int):
|
| 140 |
+
out = get_llm().create_chat_completion(
|
| 141 |
+
messages=messages,
|
| 142 |
+
temperature=temperature,
|
| 143 |
+
max_tokens=max_tokens,
|
| 144 |
+
response_format={"type": "json_object", "schema": json_schema},
|
| 145 |
+
)
|
| 146 |
+
usage = out.get("usage") or {}
|
| 147 |
+
return out["choices"][0]["message"]["content"], usage.get("completion_tokens")
|
| 148 |
+
|
| 149 |
+
|
| 150 |
+
@gpu
|
| 151 |
+
def _infer_stream(messages: list[dict], json_schema: dict, temperature: float, max_tokens: int):
|
| 152 |
+
stream = get_llm().create_chat_completion(
|
| 153 |
+
messages=messages,
|
| 154 |
+
temperature=temperature,
|
| 155 |
+
max_tokens=max_tokens,
|
| 156 |
+
response_format={"type": "json_object", "schema": json_schema},
|
| 157 |
+
stream=True,
|
| 158 |
+
)
|
| 159 |
+
for chunk in stream:
|
| 160 |
+
delta = chunk["choices"][0].get("delta", {}).get("content")
|
| 161 |
+
if delta:
|
| 162 |
+
yield delta
|
| 163 |
+
|
| 164 |
+
|
| 165 |
+
# --- remote inference seam (on-device / thin-client via INFERENCE_BASE_URL) ---
|
| 166 |
+
def _remote_payload(messages, json_schema, temperature, max_tokens, stream):
|
| 167 |
+
return {
|
| 168 |
+
"model": INFERENCE_MODEL,
|
| 169 |
+
"messages": messages,
|
| 170 |
+
"temperature": temperature,
|
| 171 |
+
"max_tokens": max_tokens,
|
| 172 |
+
# llama-server accepts json_schema (OpenAI-style); the in-process path uses
|
| 173 |
+
# the json_object+schema form. Both grammar-constrain the output.
|
| 174 |
+
"response_format": {
|
| 175 |
+
"type": "json_schema",
|
| 176 |
+
"json_schema": {"name": "ActionPlan", "schema": json_schema, "strict": True},
|
| 177 |
+
},
|
| 178 |
+
"stream": stream,
|
| 179 |
+
}
|
| 180 |
+
|
| 181 |
+
|
| 182 |
+
def _remote_headers() -> dict:
|
| 183 |
+
h = {"Content-Type": "application/json"}
|
| 184 |
+
if INFERENCE_API_KEY:
|
| 185 |
+
h["Authorization"] = f"Bearer {INFERENCE_API_KEY}"
|
| 186 |
+
return h
|
| 187 |
+
|
| 188 |
+
|
| 189 |
+
def _remote_complete_json(messages, json_schema, temperature, max_tokens) -> str:
|
| 190 |
+
import requests # already a dependency; imported here to keep import light
|
| 191 |
+
|
| 192 |
+
t0 = time.perf_counter()
|
| 193 |
+
|
| 194 |
+
if HERMES_TOOLS:
|
| 195 |
+
# Tool-calling loop: the model may call `remember` to update memory before
|
| 196 |
+
# returning the final ActionPlan JSON. See server/tools.py.
|
| 197 |
+
from .tools import TOOL_SPECS, run_with_tools
|
| 198 |
+
|
| 199 |
+
def _post(msgs):
|
| 200 |
+
payload = _remote_payload(msgs, json_schema, temperature, max_tokens, False)
|
| 201 |
+
payload["tools"] = TOOL_SPECS
|
| 202 |
+
r = requests.post(
|
| 203 |
+
f"{INFERENCE_BASE_URL.rstrip('/')}/chat/completions",
|
| 204 |
+
json=payload,
|
| 205 |
+
headers=_remote_headers(),
|
| 206 |
+
timeout=120,
|
| 207 |
+
)
|
| 208 |
+
r.raise_for_status()
|
| 209 |
+
return r.json()
|
| 210 |
+
|
| 211 |
+
content, out = run_with_tools(list(messages), _post)
|
| 212 |
+
usage = out.get("usage") or {}
|
| 213 |
+
events.emit(
|
| 214 |
+
"model",
|
| 215 |
+
"remote inference complete (tools)",
|
| 216 |
+
latency_ms=round((time.perf_counter() - t0) * 1000),
|
| 217 |
+
tokens=usage.get("completion_tokens"),
|
| 218 |
+
)
|
| 219 |
+
return content
|
| 220 |
+
|
| 221 |
+
resp = requests.post(
|
| 222 |
+
f"{INFERENCE_BASE_URL.rstrip('/')}/chat/completions",
|
| 223 |
+
json=_remote_payload(messages, json_schema, temperature, max_tokens, False),
|
| 224 |
+
headers=_remote_headers(),
|
| 225 |
+
timeout=120,
|
| 226 |
+
)
|
| 227 |
+
resp.raise_for_status()
|
| 228 |
+
out = resp.json()
|
| 229 |
+
usage = out.get("usage") or {}
|
| 230 |
+
events.emit(
|
| 231 |
+
"model",
|
| 232 |
+
"remote inference complete",
|
| 233 |
+
latency_ms=round((time.perf_counter() - t0) * 1000),
|
| 234 |
+
tokens=usage.get("completion_tokens"),
|
| 235 |
+
)
|
| 236 |
+
return out["choices"][0]["message"]["content"]
|
| 237 |
+
|
| 238 |
+
|
| 239 |
+
def _remote_stream_json(messages, json_schema, temperature, max_tokens):
|
| 240 |
+
import json as _json
|
| 241 |
+
|
| 242 |
+
import requests
|
| 243 |
+
|
| 244 |
+
t0 = time.perf_counter()
|
| 245 |
+
events.emit("model", "remote inference started")
|
| 246 |
+
with requests.post(
|
| 247 |
+
f"{INFERENCE_BASE_URL.rstrip('/')}/chat/completions",
|
| 248 |
+
json=_remote_payload(messages, json_schema, temperature, max_tokens, True),
|
| 249 |
+
headers=_remote_headers(),
|
| 250 |
+
timeout=120,
|
| 251 |
+
stream=True,
|
| 252 |
+
) as resp:
|
| 253 |
+
resp.raise_for_status()
|
| 254 |
+
for raw in resp.iter_lines():
|
| 255 |
+
if not raw:
|
| 256 |
+
continue
|
| 257 |
+
line = raw.decode("utf-8").removeprefix("data: ").strip()
|
| 258 |
+
if not line or line == "[DONE]":
|
| 259 |
+
continue
|
| 260 |
+
try:
|
| 261 |
+
delta = _json.loads(line)["choices"][0].get("delta", {}).get("content")
|
| 262 |
+
except (ValueError, KeyError, IndexError):
|
| 263 |
+
continue
|
| 264 |
+
if delta:
|
| 265 |
+
yield delta
|
| 266 |
+
events.emit(
|
| 267 |
+
"model", "remote stream complete", latency_ms=round((time.perf_counter() - t0) * 1000)
|
| 268 |
+
)
|
| 269 |
+
|
| 270 |
+
|
| 271 |
+
# --- main-process wrappers (own the activity-bus emits; pick local vs remote) ---
|
| 272 |
+
def complete(messages: list[dict], temperature: float = 0.2, max_tokens: int = 1024) -> str:
|
| 273 |
+
"""Chat-completion helper returning the assistant text."""
|
| 274 |
+
return _infer_text(messages, temperature, max_tokens)
|
| 275 |
+
|
| 276 |
+
|
| 277 |
+
def complete_json(
|
| 278 |
+
messages: list[dict],
|
| 279 |
+
json_schema: dict,
|
| 280 |
+
temperature: float = 0.2,
|
| 281 |
+
max_tokens: int = 2048,
|
| 282 |
+
) -> str:
|
| 283 |
+
"""Constrained completion: grammar-constrained so the output always parses.
|
| 284 |
+
Delegates to a remote server if INFERENCE_BASE_URL is set, else runs the
|
| 285 |
+
GPU-offloaded in-process llama.cpp path."""
|
| 286 |
+
if INFERENCE_BASE_URL:
|
| 287 |
+
return _remote_complete_json(messages, json_schema, temperature, max_tokens)
|
| 288 |
+
t0 = time.perf_counter()
|
| 289 |
+
text, tokens = _infer_json(messages, json_schema, temperature, max_tokens)
|
| 290 |
+
events.emit(
|
| 291 |
+
"model",
|
| 292 |
+
"inference complete",
|
| 293 |
+
latency_ms=round((time.perf_counter() - t0) * 1000),
|
| 294 |
+
tokens=tokens,
|
| 295 |
+
)
|
| 296 |
+
return text
|
| 297 |
+
|
| 298 |
+
|
| 299 |
+
def stream_complete_json(
|
| 300 |
+
messages: list[dict],
|
| 301 |
+
json_schema: dict,
|
| 302 |
+
temperature: float = 0.2,
|
| 303 |
+
max_tokens: int = 2048,
|
| 304 |
+
):
|
| 305 |
+
"""Streaming constrained completion: yields text deltas so the UI can show the
|
| 306 |
+
model 'thinking'. Remote seam when INFERENCE_BASE_URL is set, else GPU-offloaded
|
| 307 |
+
in-process llama.cpp. Emits model events around the call."""
|
| 308 |
+
if INFERENCE_BASE_URL:
|
| 309 |
+
yield from _remote_stream_json(messages, json_schema, temperature, max_tokens)
|
| 310 |
+
return
|
| 311 |
+
t0 = time.perf_counter()
|
| 312 |
+
events.emit("model", "inference started")
|
| 313 |
+
for delta in _infer_stream(messages, json_schema, temperature, max_tokens):
|
| 314 |
+
yield delta
|
| 315 |
+
events.emit(
|
| 316 |
+
"model", "stream complete", latency_ms=round((time.perf_counter() - t0) * 1000)
|
| 317 |
+
)
|
server/orchestrator.py
ADDED
|
@@ -0,0 +1,191 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""MiniCPM-planned agent orchestrator over the Space's own MCP tools.
|
| 2 |
+
|
| 3 |
+
The Agent tab's engine: a small planner LLM (OpenBMB MiniCPM via a second
|
| 4 |
+
llama-server, OpenAI-compatible) drives smolagents' ToolCallingAgent against
|
| 5 |
+
the SAME tools this Space already exposes over MCP (extract_events /
|
| 6 |
+
check_conflicts / make_ics) — consumed via the localhost MCP endpoint, so the
|
| 7 |
+
agent demonstrably works through the public tool contract, not private
|
| 8 |
+
imports. Everything stays local llama.cpp: no cloud AI APIs, every model
|
| 9 |
+
under the 32B cap (gemma-cal E4B ~4B + MiniCPM 8B or 1B).
|
| 10 |
+
|
| 11 |
+
Stub mode (USE_STUB_EXTRACTOR=1, used by the free preview and CI) — or any
|
| 12 |
+
planner failure — falls back to ScriptedPlanner: the same tool sequence run
|
| 13 |
+
deterministically, emitting identical step events, so the tab always works
|
| 14 |
+
and tests never need a model.
|
| 15 |
+
|
| 16 |
+
Steps are plain JSON-serialisable dicts:
|
| 17 |
+
{"kind": "plan"|"tool_call"|"tool_result"|"final"|"error", ...}
|
| 18 |
+
"""
|
| 19 |
+
from __future__ import annotations
|
| 20 |
+
|
| 21 |
+
import json
|
| 22 |
+
import os
|
| 23 |
+
from typing import Iterator, Optional
|
| 24 |
+
|
| 25 |
+
from server import events as bus
|
| 26 |
+
|
| 27 |
+
# Planner serving (second llama-server) — env-selected, OFF by default.
|
| 28 |
+
# 8B default for planning quality; MiniCPM5-1B is the <=4B tiny variant.
|
| 29 |
+
PLANNER_BASE_URL = os.environ.get("PLANNER_BASE_URL", "http://127.0.0.1:8081/v1")
|
| 30 |
+
PLANNER_MODEL_ID = os.environ.get("PLANNER_MODEL_ID", "minicpm-planner")
|
| 31 |
+
# Self MCP endpoint (localhost — no HF edge/auth between us and ourselves).
|
| 32 |
+
MCP_SSE_URL = os.environ.get(
|
| 33 |
+
"MCP_SSE_URL", f"http://127.0.0.1:{os.environ.get('PORT', '7860')}/gradio_api/mcp/sse"
|
| 34 |
+
)
|
| 35 |
+
|
| 36 |
+
ORCH_TASK = """You are a scheduling agent for a busy parent. Read the thread below.
|
| 37 |
+
|
| 38 |
+
Call exactly ONE tool — extract_events on the thread — then STOP. It returns the
|
| 39 |
+
events (the fine-tuned calendar model does the real work), a reply draft, and any
|
| 40 |
+
clarification. After that one call, return a short JSON summary: {{"events": <int>}}.
|
| 41 |
+
Do NOT call any other tool: conflict-checking and the .ics file are handled for you.
|
| 42 |
+
|
| 43 |
+
{memory}
|
| 44 |
+
|
| 45 |
+
Thread:
|
| 46 |
+
{thread}
|
| 47 |
+
"""
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
def _planner_configured() -> bool:
|
| 51 |
+
return bool(os.environ.get("PLANNER_HF_REPO") or os.environ.get("PLANNER_BASE_URL"))
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
def _use_llm_planner() -> bool:
|
| 55 |
+
return os.environ.get("USE_STUB_EXTRACTOR") != "1" and _planner_configured()
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
def _short(obj, limit: int = 1200) -> str:
|
| 59 |
+
try:
|
| 60 |
+
s = obj if isinstance(obj, str) else json.dumps(obj, default=str)
|
| 61 |
+
except Exception: # noqa: BLE001
|
| 62 |
+
s = str(obj)
|
| 63 |
+
return s if len(s) <= limit else s[:limit] + " …"
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
# --------------------------------------------------------------------------- #
|
| 67 |
+
# ScriptedPlanner — deterministic fallback / stub-mode path
|
| 68 |
+
# --------------------------------------------------------------------------- #
|
| 69 |
+
def _scripted_steps(thread: str, ics_b64: Optional[str],
|
| 70 |
+
memory_block: Optional[str],
|
| 71 |
+
images: Optional[list[str]] = None) -> Iterator[dict]:
|
| 72 |
+
from server import mcp_tools
|
| 73 |
+
|
| 74 |
+
yield {"kind": "plan",
|
| 75 |
+
"text": "Playbook: extract events from the thread"
|
| 76 |
+
+ (f" + {len(images)} screenshot(s)" if images else "")
|
| 77 |
+
+ (", check conflicts against the provided calendar" if ics_b64 else "")
|
| 78 |
+
+ ", then render an .ics."}
|
| 79 |
+
|
| 80 |
+
yield {"kind": "tool_call", "tool": "extract_events",
|
| 81 |
+
"args": {"thread": _short(thread, 300),
|
| 82 |
+
**({"images": f"{len(images)} image(s)"} if images else {}),
|
| 83 |
+
**({"memory": "<user recall block>"} if memory_block else {})}}
|
| 84 |
+
plan = mcp_tools.extract_events(thread, images or None, memory_block)
|
| 85 |
+
yield {"kind": "tool_result", "tool": "extract_events",
|
| 86 |
+
"result": {"events": len(plan.get("events", [])),
|
| 87 |
+
"reply_draft": _short(plan.get("reply_draft") or "", 200)}}
|
| 88 |
+
|
| 89 |
+
conflicts: list = list(plan.get("conflicts") or [])
|
| 90 |
+
if ics_b64 and plan.get("events"):
|
| 91 |
+
yield {"kind": "tool_call", "tool": "check_conflicts",
|
| 92 |
+
"args": {"events": f"{len(plan['events'])} event(s)", "ics_base64": "<calendar>"}}
|
| 93 |
+
conflicts = mcp_tools.check_conflicts(plan["events"], ics_b64)
|
| 94 |
+
plan["conflicts"] = conflicts
|
| 95 |
+
yield {"kind": "tool_result", "tool": "check_conflicts",
|
| 96 |
+
"result": {"conflicts": len(conflicts)}}
|
| 97 |
+
|
| 98 |
+
ics_out = None
|
| 99 |
+
if plan.get("events"):
|
| 100 |
+
yield {"kind": "tool_call", "tool": "make_ics",
|
| 101 |
+
"args": {"events": f"{len(plan['events'])} event(s)"}}
|
| 102 |
+
ics_out = mcp_tools.make_ics(plan["events"])
|
| 103 |
+
yield {"kind": "tool_result", "tool": "make_ics",
|
| 104 |
+
"result": {"ics_bytes": len(ics_out or "")}}
|
| 105 |
+
|
| 106 |
+
yield {"kind": "final", "plan": plan, "ics_base64": ics_out,
|
| 107 |
+
"summary": {"events": len(plan.get("events", [])), "conflicts": len(conflicts)}}
|
| 108 |
+
|
| 109 |
+
|
| 110 |
+
# --------------------------------------------------------------------------- #
|
| 111 |
+
# smolagents path — MiniCPM planner over the self MCP endpoint
|
| 112 |
+
# --------------------------------------------------------------------------- #
|
| 113 |
+
def _smol_steps(thread: str, ics_b64: Optional[str],
|
| 114 |
+
memory_block: Optional[str], max_steps: int,
|
| 115 |
+
images: Optional[list[str]] = None) -> Iterator[dict]:
|
| 116 |
+
# Lazy imports: smolagents is only needed on the real path, keeping CI and
|
| 117 |
+
# the stub preview dependency-free.
|
| 118 |
+
from smolagents import OpenAIServerModel, ToolCallingAgent # noqa: PLC0415
|
| 119 |
+
from smolagents.mcp_client import MCPClient # noqa: PLC0415
|
| 120 |
+
|
| 121 |
+
model = OpenAIServerModel(
|
| 122 |
+
model_id=PLANNER_MODEL_ID, api_base=PLANNER_BASE_URL,
|
| 123 |
+
api_key=os.environ.get("PLANNER_API_KEY", "local"), temperature=0.0,
|
| 124 |
+
)
|
| 125 |
+
task = ORCH_TASK.format(
|
| 126 |
+
memory=(f"What you know about this user:\n{memory_block}" if memory_block else ""),
|
| 127 |
+
thread=thread,
|
| 128 |
+
)
|
| 129 |
+
|
| 130 |
+
yield {"kind": "plan", "text": f"MiniCPM planner ({PLANNER_MODEL_ID}) engaged — "
|
| 131 |
+
f"tools via MCP at {MCP_SSE_URL}"}
|
| 132 |
+
with MCPClient({"url": MCP_SSE_URL, "transport": "sse"}) as tools:
|
| 133 |
+
# Minimal-footprint planner: expose ONLY extract_events and cap the loop
|
| 134 |
+
# at a couple of steps. The fine-tuned E4B (inside extract_events) does
|
| 135 |
+
# the real work; conflict-checking and the .ics are finalized
|
| 136 |
+
# deterministically by _scripted_steps below. This keeps the planner to a
|
| 137 |
+
# single tool call so it stays fast and never accumulates enough context
|
| 138 |
+
# to overflow (multi-step runs hit ~207s and 'request exceeds context').
|
| 139 |
+
# Restricting tools also avoids the File-input callbacks whose schemas
|
| 140 |
+
# $ref #/$defs/FileData (which the planner's jinja rendering can't resolve).
|
| 141 |
+
_WANTED = {"extract_events"}
|
| 142 |
+
tools = [t for t in tools if getattr(t, "name", "") in _WANTED]
|
| 143 |
+
agent = ToolCallingAgent(tools=tools, model=model, max_steps=min(max_steps, 3))
|
| 144 |
+
result = None
|
| 145 |
+
for step in agent.run(task, stream=True):
|
| 146 |
+
kind = type(step).__name__
|
| 147 |
+
if kind == "ActionStep":
|
| 148 |
+
for call in (getattr(step, "tool_calls", None) or []):
|
| 149 |
+
yield {"kind": "tool_call",
|
| 150 |
+
"tool": getattr(call, "name", "?"),
|
| 151 |
+
"args": _short(getattr(call, "arguments", ""))}
|
| 152 |
+
obs = getattr(step, "observations", None)
|
| 153 |
+
if obs:
|
| 154 |
+
yield {"kind": "tool_result", "tool": "(observation)",
|
| 155 |
+
"result": _short(obs)}
|
| 156 |
+
elif kind == "FinalAnswerStep":
|
| 157 |
+
result = getattr(step, "final_answer", None) or getattr(step, "output", None)
|
| 158 |
+
yield {"kind": "plan", "text": f"Planner finished: {_short(result, 300)}"}
|
| 159 |
+
|
| 160 |
+
# The planner's free-text answer isn't the product — re-derive the
|
| 161 |
+
# structured plan through the deterministic path so the UI always gets a
|
| 162 |
+
# valid ActionPlan + ics, with the planner trace above as the evidence.
|
| 163 |
+
yield from _scripted_steps(thread, ics_b64, memory_block, images)
|
| 164 |
+
|
| 165 |
+
|
| 166 |
+
# --------------------------------------------------------------------------- #
|
| 167 |
+
# Entry point
|
| 168 |
+
# --------------------------------------------------------------------------- #
|
| 169 |
+
def run_orchestrator(thread: str, ics_b64: Optional[str] = None,
|
| 170 |
+
memory_block: Optional[str] = None,
|
| 171 |
+
max_steps: int = 6,
|
| 172 |
+
images: Optional[list[str]] = None) -> Iterator[dict]:
|
| 173 |
+
"""Yield orchestration steps for a thread (+ optional screenshot data URIs);
|
| 174 |
+
always ends with a 'final' step (or an 'error' followed by the scripted
|
| 175 |
+
fallback's steps)."""
|
| 176 |
+
with bus.run_scope("agent"):
|
| 177 |
+
bus.emit("decision", "agent orchestrator run started")
|
| 178 |
+
if _use_llm_planner():
|
| 179 |
+
try:
|
| 180 |
+
yield from _smol_steps(thread, ics_b64, memory_block, max_steps, images)
|
| 181 |
+
bus.emit("decision", "agent orchestrator run finished (MiniCPM planner)")
|
| 182 |
+
return
|
| 183 |
+
except Exception as e: # noqa: BLE001 planner down -> scripted fallback
|
| 184 |
+
# Surface the actual message (e.g. which module is missing), not
|
| 185 |
+
# just the type — a bare "ModuleNotFoundError" hides the cause.
|
| 186 |
+
detail = f"{type(e).__name__}: {e}".strip().rstrip(":")
|
| 187 |
+
yield {"kind": "error",
|
| 188 |
+
"text": f"Planner unavailable ({_short(detail, 160)}) — "
|
| 189 |
+
"falling back to the scripted playbook."}
|
| 190 |
+
yield from _scripted_steps(thread, ics_b64, memory_block, images)
|
| 191 |
+
bus.emit("decision", "agent orchestrator run finished (scripted)")
|
server/pipeline.py
ADDED
|
@@ -0,0 +1,98 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""The shared 'thread in -> ActionPlan out' pipeline.
|
| 2 |
+
|
| 3 |
+
One implementation behind both the synchronous ``POST /agent`` endpoint and the
|
| 4 |
+
autonomous ingest path, so the two can never drift. Stateless: it does not touch
|
| 5 |
+
the feed or the dedup store (callers own statefulness).
|
| 6 |
+
|
| 7 |
+
Importable without Gradio. Google Calendar is imported lazily so CI / stub mode
|
| 8 |
+
(which exclude the google libs) stay clean.
|
| 9 |
+
"""
|
| 10 |
+
from __future__ import annotations
|
| 11 |
+
|
| 12 |
+
import base64
|
| 13 |
+
from typing import Optional
|
| 14 |
+
|
| 15 |
+
from dateutil import parser as dtparser
|
| 16 |
+
from pydantic import BaseModel
|
| 17 |
+
|
| 18 |
+
from calendar_out.freebusy import DEFAULT_DURATION, Busy, _as_dt, annotate_conflicts, load_ics_busy
|
| 19 |
+
from calendar_out.ics import events_to_ics
|
| 20 |
+
from server import events as bus
|
| 21 |
+
from server.agent import run_agent
|
| 22 |
+
from server.schema import ActionPlan, Event
|
| 23 |
+
from server.threads import format_thread
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
class AgentMessage(BaseModel):
|
| 27 |
+
sender: str = "?"
|
| 28 |
+
text: str = ""
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
class AgentRequest(BaseModel):
|
| 32 |
+
thread: Optional[str] = None
|
| 33 |
+
messages: Optional[list[AgentMessage]] = None
|
| 34 |
+
images: list[str] = [] # base64 data URIs
|
| 35 |
+
existing_ics: Optional[str] = None # base64-encoded .ics bytes
|
| 36 |
+
existing_events: list[Event] = []
|
| 37 |
+
now: Optional[str] = None # ISO 8601; defaults to datetime.now()
|
| 38 |
+
push_gcal: bool = False
|
| 39 |
+
return_ics: bool = False
|
| 40 |
+
memory: Optional[str] = None # per-user recall block (else server global memory)
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
class AgentResponse(BaseModel):
|
| 44 |
+
plan: ActionPlan
|
| 45 |
+
ics_base64: Optional[str] = None
|
| 46 |
+
gcal_links: list[str] = []
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
def _busy_from_request(req: AgentRequest) -> list[Busy]:
|
| 50 |
+
"""Build busy intervals from an uploaded .ics or structured existing events."""
|
| 51 |
+
if req.existing_ics:
|
| 52 |
+
try:
|
| 53 |
+
return load_ics_busy(base64.b64decode(req.existing_ics))
|
| 54 |
+
except Exception: # noqa: BLE001 malformed .ics -> no conflict context
|
| 55 |
+
return []
|
| 56 |
+
busy: list[Busy] = []
|
| 57 |
+
for ev in req.existing_events:
|
| 58 |
+
start = _as_dt(ev.start)
|
| 59 |
+
if start is None:
|
| 60 |
+
continue
|
| 61 |
+
end = _as_dt(ev.end) or (start + DEFAULT_DURATION)
|
| 62 |
+
busy.append(Busy(start=start, end=end, title=ev.title))
|
| 63 |
+
return busy
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
def _thread_text(req: AgentRequest) -> str:
|
| 67 |
+
if req.thread:
|
| 68 |
+
return req.thread
|
| 69 |
+
if req.messages:
|
| 70 |
+
return format_thread([m.model_dump() for m in req.messages])
|
| 71 |
+
return ""
|
| 72 |
+
|
| 73 |
+
|
| 74 |
+
def run_pipeline(req: AgentRequest) -> AgentResponse:
|
| 75 |
+
"""thread/messages -> run_agent -> deterministic conflicts -> optional ics/gcal."""
|
| 76 |
+
thread = _thread_text(req)
|
| 77 |
+
now = dtparser.isoparse(req.now) if req.now else None
|
| 78 |
+
busy = _busy_from_request(req)
|
| 79 |
+
|
| 80 |
+
plan = run_agent(thread, now=now, existing=req.existing_events, images=req.images,
|
| 81 |
+
memory_block=req.memory)
|
| 82 |
+
if busy:
|
| 83 |
+
plan = annotate_conflicts(plan, busy)
|
| 84 |
+
|
| 85 |
+
resp = AgentResponse(plan=plan)
|
| 86 |
+
|
| 87 |
+
if req.return_ics:
|
| 88 |
+
resp.ics_base64 = base64.b64encode(events_to_ics(plan.events)).decode("ascii")
|
| 89 |
+
|
| 90 |
+
if req.push_gcal and plan.events:
|
| 91 |
+
try:
|
| 92 |
+
from calendar_out.gcal import push_events # lazy: google libs optional
|
| 93 |
+
|
| 94 |
+
resp.gcal_links = push_events(plan.events)
|
| 95 |
+
except Exception as e: # noqa: BLE001 no token.json / offline -> degrade
|
| 96 |
+
bus.emit("calendar", f"Google Calendar push skipped: {e}", level="error")
|
| 97 |
+
|
| 98 |
+
return resp
|
server/schema.py
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Shared pydantic schemas for the scheduling agent.
|
| 2 |
+
|
| 3 |
+
The model is constrained to emit an ActionPlan (see server/agent.py); these types
|
| 4 |
+
are also the contract used by the UI and the calendar outputs.
|
| 5 |
+
"""
|
| 6 |
+
from __future__ import annotations
|
| 7 |
+
|
| 8 |
+
from typing import Optional
|
| 9 |
+
|
| 10 |
+
from pydantic import BaseModel, Field
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
class Event(BaseModel):
|
| 14 |
+
title: str
|
| 15 |
+
start: str # ISO 8601, e.g. 2026-06-10T13:00:00
|
| 16 |
+
end: Optional[str] = None
|
| 17 |
+
location: Optional[str] = None
|
| 18 |
+
attendees: list[str] = Field(default_factory=list)
|
| 19 |
+
reminder_minutes: Optional[int] = None
|
| 20 |
+
notes: Optional[str] = None
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
class Conflict(BaseModel):
|
| 24 |
+
event_index: int = Field(description="index into ActionPlan.events")
|
| 25 |
+
clashes_with: str = Field(description="summary of the existing event it clashes with")
|
| 26 |
+
severity: str = Field(description='one of: "overlap", "adjacent", "tight"')
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
class ActionPlan(BaseModel):
|
| 30 |
+
"""Everything the agent decides for one thread, in one constrained object."""
|
| 31 |
+
|
| 32 |
+
reasoning: Optional[str] = Field(
|
| 33 |
+
default=None, description="brief chain of thought shown to the user"
|
| 34 |
+
)
|
| 35 |
+
events: list[Event] = Field(default_factory=list)
|
| 36 |
+
conflicts: list[Conflict] = Field(default_factory=list)
|
| 37 |
+
proposed_times: list[str] = Field(
|
| 38 |
+
default_factory=list, description="ISO 8601 alternatives when there is a conflict"
|
| 39 |
+
)
|
| 40 |
+
reply_draft: str = Field(default="", description="suggested reply to send back")
|
| 41 |
+
needs_clarification: Optional[str] = Field(
|
| 42 |
+
default=None, description="a question to ask if the plan is ambiguous"
|
| 43 |
+
)
|
server/threads.py
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Assemble a conversation thread from individual messages.
|
| 2 |
+
|
| 3 |
+
Used by both the ``/agent`` endpoint (join a posted ``messages[]`` into a thread)
|
| 4 |
+
and autonomous mode (build a per-chat rolling window from the ingest feed). Pure —
|
| 5 |
+
no Gradio / llama / network — so it's trivially unit-testable in stub mode.
|
| 6 |
+
"""
|
| 7 |
+
from __future__ import annotations
|
| 8 |
+
|
| 9 |
+
import os
|
| 10 |
+
|
| 11 |
+
from dateutil import parser as dtparser
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
def format_thread(messages: list[dict]) -> str:
|
| 15 |
+
"""Render messages as ``"sender: text"`` lines, skipping empty bodies."""
|
| 16 |
+
lines = []
|
| 17 |
+
for m in messages:
|
| 18 |
+
text = (m.get("text") or "").strip()
|
| 19 |
+
if not text:
|
| 20 |
+
continue
|
| 21 |
+
sender = (m.get("sender") or "?").strip()
|
| 22 |
+
lines.append(f"{sender}: {text}")
|
| 23 |
+
return "\n".join(lines)
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
def _ts(value) -> float | None:
|
| 27 |
+
try:
|
| 28 |
+
return dtparser.parse(str(value)).timestamp()
|
| 29 |
+
except (ValueError, TypeError, OverflowError):
|
| 30 |
+
return None
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
def rolling_thread(
|
| 34 |
+
feed: list[dict],
|
| 35 |
+
chat: str,
|
| 36 |
+
window: int | None = None,
|
| 37 |
+
minutes: int | None = None,
|
| 38 |
+
) -> str:
|
| 39 |
+
"""Build a thread from the most recent messages of one chat in the feed.
|
| 40 |
+
|
| 41 |
+
Keeps the last ``window`` messages for ``chat`` that fall within ``minutes`` of
|
| 42 |
+
the newest one (env-tunable via AUTO_THREAD_WINDOW / AUTO_THREAD_MINUTES).
|
| 43 |
+
"""
|
| 44 |
+
window = window or int(os.environ.get("AUTO_THREAD_WINDOW", "20"))
|
| 45 |
+
minutes = minutes or int(os.environ.get("AUTO_THREAD_MINUTES", "720"))
|
| 46 |
+
|
| 47 |
+
msgs = [m for m in feed if (m.get("chat") or "") == chat]
|
| 48 |
+
if not msgs:
|
| 49 |
+
return ""
|
| 50 |
+
msgs = msgs[-window:]
|
| 51 |
+
|
| 52 |
+
# Drop messages older than `minutes` before the newest (when timestamps parse).
|
| 53 |
+
stamps = [(_ts(m.get("timestamp")), m) for m in msgs]
|
| 54 |
+
newest = max((s for s, _ in stamps if s is not None), default=None)
|
| 55 |
+
if newest is not None:
|
| 56 |
+
cutoff = newest - minutes * 60
|
| 57 |
+
msgs = [m for s, m in stamps if s is None or s >= cutoff]
|
| 58 |
+
|
| 59 |
+
return format_thread(msgs)
|
server/tools.py
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Hermes tool-calling: let the model write its own long-term memory.
|
| 2 |
+
|
| 3 |
+
Hermes is a tool-calling fine-tune. When `HERMES_TOOLS=1`, the remote inference
|
| 4 |
+
path (server/model.py) advertises these tools so the model can call `remember`
|
| 5 |
+
mid-run to save durable facts ("Dana is the soccer coach", "you decline Mondays")
|
| 6 |
+
— the active half of "grows with you". Kept separate + small so the round-trip
|
| 7 |
+
logic is unit-testable without a live server.
|
| 8 |
+
"""
|
| 9 |
+
from __future__ import annotations
|
| 10 |
+
|
| 11 |
+
import json
|
| 12 |
+
|
| 13 |
+
from . import memory
|
| 14 |
+
|
| 15 |
+
# OpenAI-compatible tool specs (llama-server understands these with --jinja).
|
| 16 |
+
TOOL_SPECS = [
|
| 17 |
+
{
|
| 18 |
+
"type": "function",
|
| 19 |
+
"function": {
|
| 20 |
+
"name": "remember",
|
| 21 |
+
"description": (
|
| 22 |
+
"Save a durable fact or preference about the user to long-term memory "
|
| 23 |
+
"so future scheduling is more personal. Use for stable facts only "
|
| 24 |
+
"(roles, recurring preferences, default locations), not one-off details."
|
| 25 |
+
),
|
| 26 |
+
"parameters": {
|
| 27 |
+
"type": "object",
|
| 28 |
+
"properties": {
|
| 29 |
+
"text": {
|
| 30 |
+
"type": "string",
|
| 31 |
+
"description": "the fact, e.g. 'Dana is the soccer coach'",
|
| 32 |
+
},
|
| 33 |
+
"kind": {
|
| 34 |
+
"type": "string",
|
| 35 |
+
"enum": ["contact", "preference", "location", "note"],
|
| 36 |
+
},
|
| 37 |
+
},
|
| 38 |
+
"required": ["text"],
|
| 39 |
+
},
|
| 40 |
+
},
|
| 41 |
+
}
|
| 42 |
+
]
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
def dispatch(name: str, arguments) -> str:
|
| 46 |
+
"""Execute one tool call; returns a short result string for the tool message."""
|
| 47 |
+
if name != "remember":
|
| 48 |
+
return f"unknown tool: {name}"
|
| 49 |
+
try:
|
| 50 |
+
args = json.loads(arguments) if isinstance(arguments, str) else (arguments or {})
|
| 51 |
+
except (ValueError, TypeError):
|
| 52 |
+
args = {}
|
| 53 |
+
text = (args.get("text") or "").strip()
|
| 54 |
+
if not text:
|
| 55 |
+
return "no text provided"
|
| 56 |
+
memory.remember(text, args.get("kind", "note"))
|
| 57 |
+
return f"remembered: {text}"
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
def run_with_tools(messages: list[dict], post_fn, max_rounds: int = 3):
|
| 61 |
+
"""Drive a tool-calling loop. ``post_fn(messages) -> openai_response_dict`` does
|
| 62 |
+
the actual HTTP POST (tools already configured by the caller); injectable so the
|
| 63 |
+
loop is testable. Returns (final_content, last_response)."""
|
| 64 |
+
msgs = list(messages)
|
| 65 |
+
resp = {}
|
| 66 |
+
for _ in range(max_rounds):
|
| 67 |
+
resp = post_fn(msgs)
|
| 68 |
+
msg = resp["choices"][0]["message"]
|
| 69 |
+
tool_calls = msg.get("tool_calls") or []
|
| 70 |
+
if not tool_calls:
|
| 71 |
+
return msg.get("content", ""), resp
|
| 72 |
+
msgs.append(msg) # assistant turn carrying the tool_calls
|
| 73 |
+
for tc in tool_calls:
|
| 74 |
+
fn = tc.get("function", {})
|
| 75 |
+
result = dispatch(fn.get("name", ""), fn.get("arguments", "{}"))
|
| 76 |
+
msgs.append(
|
| 77 |
+
{"role": "tool", "tool_call_id": tc.get("id", ""), "content": result}
|
| 78 |
+
)
|
| 79 |
+
# ran out of rounds — one final call to get content
|
| 80 |
+
resp = post_fn(msgs)
|
| 81 |
+
return resp["choices"][0]["message"].get("content", ""), resp
|
server/trace.py
ADDED
|
@@ -0,0 +1,98 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Export an agent run as a portable, shareable trace (Sharing is Caring).
|
| 2 |
+
|
| 3 |
+
The activity bus (``server/events.py``) groups every event from one agent run
|
| 4 |
+
under a ``run_scope`` id. This module serializes such a run into a small,
|
| 5 |
+
self-contained JSON envelope that a user can download and (optionally) publish to
|
| 6 |
+
the Hugging Face Hub with ``training/share_trace.py``.
|
| 7 |
+
|
| 8 |
+
Privacy: the bus is structural by design — every ``emit(...)`` carries counts +
|
| 9 |
+
short status strings, never event titles or raw thread text. The *only* free-text
|
| 10 |
+
that can carry personal data is the chat-name suffix in the ingest message
|
| 11 |
+
(``app.py``: ``"N msg(s) from {chats}"``). With ``redact=True`` (the default) that
|
| 12 |
+
tail is dropped. Steps use a fixed key allowlist, so a future payload key can't
|
| 13 |
+
silently leak into a shared trace.
|
| 14 |
+
"""
|
| 15 |
+
from __future__ import annotations
|
| 16 |
+
|
| 17 |
+
import json
|
| 18 |
+
import os
|
| 19 |
+
import re
|
| 20 |
+
import tempfile
|
| 21 |
+
from datetime import datetime
|
| 22 |
+
|
| 23 |
+
from . import events as bus
|
| 24 |
+
|
| 25 |
+
TRACE_SCHEMA = "imessage-cal-trace"
|
| 26 |
+
TRACE_SCHEMA_VERSION = 1
|
| 27 |
+
|
| 28 |
+
# Only these keys ever appear in an exported step (allowlist, not denylist).
|
| 29 |
+
_STEP_KEYS = ("stage", "level", "ts", "latency_ms", "events", "conflicts", "images", "tokens")
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
def _scrub_message(stage: str, message: str, redact: bool) -> str:
|
| 33 |
+
"""All bus messages are structural except the ingest one, which appends
|
| 34 |
+
``" from {chats}"`` (chat names — PII). Drop that tail when redacting."""
|
| 35 |
+
if redact and stage == "ingest":
|
| 36 |
+
# "3 msg(s) from 3rd grade chat" -> "3 msg(s)"
|
| 37 |
+
return re.sub(r"\s+from\s+.*$", "", message)
|
| 38 |
+
return message
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
def _step(ev: dict, redact: bool) -> dict:
|
| 42 |
+
step = {k: ev[k] for k in _STEP_KEYS if k in ev}
|
| 43 |
+
step["message"] = _scrub_message(ev.get("stage", ""), ev.get("message", ""), redact)
|
| 44 |
+
return step
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
def export_run(run_id: str | None = None, redact: bool = True) -> dict:
|
| 48 |
+
"""Serialize one agent run (newest by default) into a shareable envelope.
|
| 49 |
+
|
| 50 |
+
Returns a valid empty envelope (``steps == []``) when there is no matching
|
| 51 |
+
run, so callers don't need to handle exceptions.
|
| 52 |
+
"""
|
| 53 |
+
runs = bus.recent_runs(n=50) # newest first
|
| 54 |
+
evs: list[dict] = []
|
| 55 |
+
rid = run_id
|
| 56 |
+
if run_id is None:
|
| 57 |
+
if runs:
|
| 58 |
+
rid, evs = runs[0]
|
| 59 |
+
else:
|
| 60 |
+
for r, e in runs:
|
| 61 |
+
if r == run_id:
|
| 62 |
+
evs = e
|
| 63 |
+
break
|
| 64 |
+
|
| 65 |
+
steps = [_step(e, redact) for e in evs]
|
| 66 |
+
summary = {
|
| 67 |
+
"steps": len(steps),
|
| 68 |
+
"events": sum(s.get("events", 0) for s in steps),
|
| 69 |
+
"conflicts": sum(s.get("conflicts", 0) for s in steps),
|
| 70 |
+
"images": sum(s.get("images", 0) for s in steps),
|
| 71 |
+
"model_calls": sum(1 for s in steps if s.get("latency_ms") is not None),
|
| 72 |
+
"total_latency_ms": sum(s.get("latency_ms", 0) for s in steps),
|
| 73 |
+
}
|
| 74 |
+
return {
|
| 75 |
+
"schema": TRACE_SCHEMA,
|
| 76 |
+
"version": TRACE_SCHEMA_VERSION,
|
| 77 |
+
"exported_at": datetime.now().isoformat(timespec="seconds"),
|
| 78 |
+
"run_id": rid,
|
| 79 |
+
# run ids look like "12:analyze" — the label is the part after ":".
|
| 80 |
+
"run_label": (rid.split(":", 1)[1] if rid and ":" in rid else None),
|
| 81 |
+
"redacted": redact,
|
| 82 |
+
"steps": steps,
|
| 83 |
+
"summary": summary,
|
| 84 |
+
}
|
| 85 |
+
|
| 86 |
+
|
| 87 |
+
def write_trace(trace: dict, path: str | None = None) -> str:
|
| 88 |
+
"""Write a trace envelope to a JSON file and return the path (Gradio download).
|
| 89 |
+
|
| 90 |
+
Mirrors ``calendar_out.ics.write_ics``. Deliberately does NOT emit a bus event
|
| 91 |
+
— that would mutate the very run being exported.
|
| 92 |
+
"""
|
| 93 |
+
if path is None:
|
| 94 |
+
fd, path = tempfile.mkstemp(suffix=".json", prefix="trace_")
|
| 95 |
+
os.close(fd)
|
| 96 |
+
with open(path, "w", encoding="utf-8") as f:
|
| 97 |
+
json.dump(trace, f, indent=2, ensure_ascii=False)
|
| 98 |
+
return path
|
static/app.css
ADDED
|
@@ -0,0 +1,961 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/* OffGridSchedula — "daylight planner" theme.
|
| 2 |
+
Soft lavender-paper canvas, deep-ink text (high contrast), violet→cyan identity
|
| 3 |
+
used on the primary action + key accents. Fraunces (display) + Hanken Grotesk. */
|
| 4 |
+
@import url('https://fonts.googleapis.com/css2?family=Fraunces:opsz,wght@9..144,500;9..144,600;9..144,700&family=Hanken+Grotesk:wght@400;500;600;700&display=swap');
|
| 5 |
+
|
| 6 |
+
:root {
|
| 7 |
+
--bg: #f4f2fb; /* soft lavender paper */
|
| 8 |
+
--bg2: #ffffff; /* inputs */
|
| 9 |
+
--surface: #ffffff; /* cards */
|
| 10 |
+
--surface2: #efecf9;
|
| 11 |
+
--line: rgba(31,25,60,0.12);
|
| 12 |
+
--text: #1e1934; /* deep ink */
|
| 13 |
+
--muted: #645c84;
|
| 14 |
+
--violet: #6d4be0;
|
| 15 |
+
--cyan: #0e8ea0;
|
| 16 |
+
--coral: #d83a60;
|
| 17 |
+
--mint: #15894f;
|
| 18 |
+
--amber: #b3700a;
|
| 19 |
+
--accent: linear-gradient(100deg, #6d4be0 0%, #0b8294 100%);
|
| 20 |
+
--radius: 16px;
|
| 21 |
+
--shadow: 0 12px 30px rgba(45,32,90,0.12);
|
| 22 |
+
}
|
| 23 |
+
|
| 24 |
+
/* ---- canvas + base type ---- */
|
| 25 |
+
.gradio-container, .gradio-container * {
|
| 26 |
+
font-family: "Hanken Grotesk", ui-sans-serif, system-ui, sans-serif !important;
|
| 27 |
+
}
|
| 28 |
+
.gradio-container {
|
| 29 |
+
background:
|
| 30 |
+
radial-gradient(1100px 520px at 12% -8%, #ece5ff 0%, transparent 55%),
|
| 31 |
+
radial-gradient(900px 500px at 100% 0%, #ddf2f4 0%, transparent 50%),
|
| 32 |
+
var(--bg) !important;
|
| 33 |
+
color: var(--text) !important;
|
| 34 |
+
|
| 35 |
+
/* Map Gradio's own theme tokens to our light palette so every component is
|
| 36 |
+
light-surface / dark-text and stays readable. */
|
| 37 |
+
--body-background-fill: var(--bg);
|
| 38 |
+
--body-text-color: var(--text);
|
| 39 |
+
--body-text-color-subdued: var(--muted);
|
| 40 |
+
--background-fill-primary: var(--surface);
|
| 41 |
+
--background-fill-secondary: var(--surface2);
|
| 42 |
+
--block-background-fill: var(--surface);
|
| 43 |
+
--block-label-background-fill: var(--surface);
|
| 44 |
+
--block-label-text-color: var(--muted);
|
| 45 |
+
--block-title-text-color: var(--text);
|
| 46 |
+
--block-info-text-color: var(--muted);
|
| 47 |
+
--block-border-color: var(--line);
|
| 48 |
+
--border-color-primary: var(--line);
|
| 49 |
+
--border-color-accent: rgba(109,75,224,.5);
|
| 50 |
+
--input-background-fill: var(--bg2);
|
| 51 |
+
--input-border-color: var(--line);
|
| 52 |
+
--input-placeholder-color: var(--muted);
|
| 53 |
+
--button-secondary-background-fill: var(--surface2);
|
| 54 |
+
--button-secondary-text-color: var(--text);
|
| 55 |
+
--button-secondary-border-color: var(--line);
|
| 56 |
+
--link-text-color: var(--cyan);
|
| 57 |
+
--link-text-color-hover: var(--cyan);
|
| 58 |
+
--color-accent: var(--violet);
|
| 59 |
+
--color-accent-soft: rgba(109,75,224,.14);
|
| 60 |
+
--table-text-color: var(--text);
|
| 61 |
+
--table-even-background-fill: var(--surface);
|
| 62 |
+
--table-odd-background-fill: var(--surface2);
|
| 63 |
+
}
|
| 64 |
+
/* Belt-and-suspenders for the common readable bits (markdown, labels, inputs). */
|
| 65 |
+
.gradio-container .prose,
|
| 66 |
+
.gradio-container .prose p, .gradio-container .prose li,
|
| 67 |
+
.gradio-container .prose h1, .gradio-container .prose h2, .gradio-container .prose h3,
|
| 68 |
+
.gradio-container .prose strong, .gradio-container .prose em,
|
| 69 |
+
.gradio-container label, .gradio-container .gr-box label,
|
| 70 |
+
.gradio-container input, .gradio-container textarea {
|
| 71 |
+
color: var(--text) !important;
|
| 72 |
+
}
|
| 73 |
+
/* NOTE: checkboxes/radios are excluded — the `background` shorthand would wipe
|
| 74 |
+
Gradio's checked-state background-image (the checkmark), so they never look
|
| 75 |
+
checked. Style their accent instead and leave the rest to Gradio. */
|
| 76 |
+
.gradio-container input:not([type="checkbox"]):not([type="radio"]),
|
| 77 |
+
.gradio-container textarea { background: var(--bg2) !important; }
|
| 78 |
+
.gradio-container input[type="checkbox"], .gradio-container input[type="radio"] {
|
| 79 |
+
accent-color: var(--violet); cursor: pointer; }
|
| 80 |
+
.gradio-container .tab-nav button { color: var(--muted); }
|
| 81 |
+
.gradio-container .tab-nav button.selected { color: var(--text); border-bottom-color: var(--violet); }
|
| 82 |
+
.gradio-container h1, .gradio-container h2, .gradio-container h3,
|
| 83 |
+
#app-header, .evx-title, .evx-head {
|
| 84 |
+
font-family: "Fraunces", Georgia, serif !important;
|
| 85 |
+
letter-spacing: -0.01em;
|
| 86 |
+
}
|
| 87 |
+
|
| 88 |
+
#app-header {
|
| 89 |
+
display: flex; align-items: center; gap: .5rem;
|
| 90 |
+
font-size: 1.9rem; font-weight: 700; line-height: 1.1;
|
| 91 |
+
margin-bottom: .15rem;
|
| 92 |
+
background: var(--accent);
|
| 93 |
+
-webkit-background-clip: text; background-clip: text;
|
| 94 |
+
-webkit-text-fill-color: transparent;
|
| 95 |
+
}
|
| 96 |
+
|
| 97 |
+
/* ---- Review: input card ---- */
|
| 98 |
+
.rv-input {
|
| 99 |
+
background: var(--surface) !important;
|
| 100 |
+
border: 1px solid var(--line) !important;
|
| 101 |
+
border-radius: var(--radius) !important;
|
| 102 |
+
padding: 14px !important;
|
| 103 |
+
box-shadow: var(--shadow);
|
| 104 |
+
}
|
| 105 |
+
#rv-textbox textarea {
|
| 106 |
+
font-size: 1rem !important;
|
| 107 |
+
line-height: 1.5 !important;
|
| 108 |
+
background: var(--bg2) !important;
|
| 109 |
+
border-radius: 12px !important;
|
| 110 |
+
}
|
| 111 |
+
#rv-actions { gap: 10px; margin-top: 10px; }
|
| 112 |
+
|
| 113 |
+
/* primary action — the one place the full accent gradient lives */
|
| 114 |
+
#rv-analyze button {
|
| 115 |
+
background: var(--accent) !important;
|
| 116 |
+
color: #fff !important;
|
| 117 |
+
font-weight: 700 !important;
|
| 118 |
+
font-size: 1.02rem !important;
|
| 119 |
+
border: none !important;
|
| 120 |
+
min-height: 50px !important;
|
| 121 |
+
border-radius: 12px !important;
|
| 122 |
+
box-shadow: 0 8px 20px rgba(109,75,224,0.28);
|
| 123 |
+
transition: transform .12s ease, box-shadow .12s ease, filter .12s ease;
|
| 124 |
+
}
|
| 125 |
+
#rv-analyze button:hover { transform: translateY(-1px); filter: brightness(1.06); box-shadow: 0 12px 28px rgba(11,130,148,0.3); }
|
| 126 |
+
#rv-analyze button:active { transform: translateY(0); }
|
| 127 |
+
|
| 128 |
+
.rv-status { color: var(--muted); font-size: .9rem; min-height: 1.2em; padding: 2px 2px 0; }
|
| 129 |
+
|
| 130 |
+
/* ---- plan summary (reasoning + conflict badges + free-slot chips) ---- */
|
| 131 |
+
.pl-summary {
|
| 132 |
+
background: var(--surface) !important;
|
| 133 |
+
border: 1px solid var(--line);
|
| 134 |
+
border-left: 3px solid var(--violet);
|
| 135 |
+
border-radius: 12px;
|
| 136 |
+
padding: 14px 16px;
|
| 137 |
+
margin: 10px 0;
|
| 138 |
+
box-shadow: var(--shadow);
|
| 139 |
+
animation: rise .35s ease both;
|
| 140 |
+
}
|
| 141 |
+
.pl-reason { margin: 0 0 8px; color: var(--text); font-size: .96rem; line-height: 1.5; }
|
| 142 |
+
.pl-row { display: flex; flex-wrap: wrap; gap: 8px; align-items: center; margin-top: 8px; }
|
| 143 |
+
.pl-label { font-size: .72rem; text-transform: uppercase; letter-spacing: .06em; color: var(--muted); }
|
| 144 |
+
.pl-badge {
|
| 145 |
+
display: inline-flex; align-items: center; gap: 6px;
|
| 146 |
+
padding: 5px 11px; border-radius: 999px; font-size: .82rem; font-weight: 600;
|
| 147 |
+
}
|
| 148 |
+
.pl-conflict { background: rgba(216,58,96,.12); color: #b21d44; border: 1px solid rgba(216,58,96,.3); }
|
| 149 |
+
.pl-chip {
|
| 150 |
+
padding: 5px 11px; border-radius: 999px; font-size: .82rem; font-weight: 600;
|
| 151 |
+
background: rgba(14,142,160,.12); color: #0a6f7d; border: 1px solid rgba(14,142,160,.3);
|
| 152 |
+
}
|
| 153 |
+
.pl-clarify { margin: 10px 0 0; color: var(--amber); font-size: .9rem; }
|
| 154 |
+
.pl-clear { margin: 0; color: var(--mint); font-weight: 600; }
|
| 155 |
+
|
| 156 |
+
/* ---- events: billboard (featured) + cards ---- */
|
| 157 |
+
.evx-head { font-size: 1.05rem; font-weight: 600; color: var(--text); margin: 6px 2px 10px; }
|
| 158 |
+
|
| 159 |
+
/* Billboard / carousel slide: a soft tinted card with dark text */
|
| 160 |
+
.bb {
|
| 161 |
+
position: relative; overflow: hidden; border-radius: 18px; margin: 0 0 18px;
|
| 162 |
+
min-height: 196px; display: flex; align-items: flex-end;
|
| 163 |
+
background: linear-gradient(120deg, #ece4ff 0%, #d7f1f4 100%);
|
| 164 |
+
border: 1px solid var(--line); box-shadow: var(--shadow);
|
| 165 |
+
}
|
| 166 |
+
.bb-scrim { position: absolute; inset: 0;
|
| 167 |
+
background: linear-gradient(to top, rgba(255,255,255,.45) 0%, rgba(255,255,255,.1) 45%, transparent 100%); }
|
| 168 |
+
.bb-body { position: relative; z-index: 2; padding: 22px 24px; width: 100%; animation: rise .45s ease both; }
|
| 169 |
+
.bb-kicker { color: var(--cyan); font-weight: 700; font-size: .74rem; letter-spacing: .14em; text-transform: uppercase; margin-bottom: 8px; }
|
| 170 |
+
.bb-title { font-family: "Fraunces", serif; font-size: 2rem; line-height: 1.05; margin: 0 0 10px; color: var(--text); }
|
| 171 |
+
.bb-when { font-size: 1rem; font-weight: 600; color: #3a3357; }
|
| 172 |
+
.bb-note { margin-top: 8px; color: var(--muted); font-style: italic; }
|
| 173 |
+
|
| 174 |
+
.evx-sec { font-size: .76rem; text-transform: uppercase; letter-spacing: .08em; color: var(--muted); margin: 2px 2px 10px; }
|
| 175 |
+
.evx-cards { display: flex; flex-direction: column; gap: 12px; }
|
| 176 |
+
.evx-card {
|
| 177 |
+
position: relative; display: flex; gap: 0;
|
| 178 |
+
background: var(--surface); border: 1px solid var(--line);
|
| 179 |
+
border-radius: 14px; overflow: hidden;
|
| 180 |
+
box-shadow: 0 4px 14px rgba(45,32,90,.10);
|
| 181 |
+
transition: transform .18s ease, box-shadow .18s ease, border-color .18s ease;
|
| 182 |
+
animation: rise .4s ease both; animation-delay: calc(var(--i, 0) * 70ms);
|
| 183 |
+
}
|
| 184 |
+
.evx-card:hover { /* subtle tactile lift */
|
| 185 |
+
transform: translateY(-3px) scale(1.012);
|
| 186 |
+
box-shadow: 0 14px 32px rgba(45,32,90,.18); border-color: rgba(109,75,224,.4);
|
| 187 |
+
}
|
| 188 |
+
.evx-bar { width: 5px; flex: 0 0 5px; background: var(--accent); }
|
| 189 |
+
.evx-body { padding: 13px 16px; min-width: 0; }
|
| 190 |
+
.evx-title { margin: 0 0 8px; font-size: 1.1rem; font-weight: 600; color: var(--text); }
|
| 191 |
+
.evx-chip { display: inline-block; padding: 4px 10px; border: 1px solid var(--line);
|
| 192 |
+
border-radius: 8px; font-size: .82rem; font-weight: 600; color: var(--text); background: var(--surface2); }
|
| 193 |
+
.evx-when { font-size: .92rem; color: var(--muted); font-weight: 600; }
|
| 194 |
+
.evx-meta { font-size: .85rem; color: var(--muted); margin-top: 8px; }
|
| 195 |
+
.evx-note { font-size: .82rem; color: var(--muted); margin-top: 6px; font-style: italic; }
|
| 196 |
+
/* per-event one-click quick-add links (Online mode) */
|
| 197 |
+
.evx-add { font-size: .8rem; color: var(--muted); margin-top: 8px; }
|
| 198 |
+
.evx-add a { color: var(--cyan); font-weight: 700; text-decoration: none; }
|
| 199 |
+
.evx-add a:hover { text-decoration: underline; }
|
| 200 |
+
/* Agent tab: the orchestrator's step trace */
|
| 201 |
+
.ag-trace { display: flex; flex-direction: column; gap: 6px; }
|
| 202 |
+
.ag-step { display: flex; gap: 10px; align-items: flex-start; background: var(--surface);
|
| 203 |
+
border: 1px solid var(--line); border-left: 3px solid var(--violet);
|
| 204 |
+
border-radius: 10px; padding: 9px 12px; font-size: .9rem; animation: rise .3s ease both; }
|
| 205 |
+
.ag-step code { background: var(--surface2); padding: 1px 6px; border-radius: 6px;
|
| 206 |
+
font-size: .82em; word-break: break-all; }
|
| 207 |
+
.ag-tool_call { border-left-color: var(--cyan); }
|
| 208 |
+
.ag-tool_result { border-left-color: var(--mint); }
|
| 209 |
+
.ag-final { border-left-color: var(--mint); background: rgba(21,137,79,.07); font-weight: 600; }
|
| 210 |
+
.ag-error { border-left-color: var(--coral); }
|
| 211 |
+
.ag-ico { flex: none; }
|
| 212 |
+
|
| 213 |
+
/* iPhone share-sheet Shortcut callout (export bar) */
|
| 214 |
+
.ship-note { color: var(--muted); font-size: .82rem; margin-top: 8px; }
|
| 215 |
+
.ship-note a { color: var(--cyan); font-weight: 600; text-decoration: none; }
|
| 216 |
+
.ship-note a:hover { text-decoration: underline; }
|
| 217 |
+
.evx-empty { color: var(--muted); padding: 18px; text-align: center; border: 1px dashed var(--line); border-radius: 12px; }
|
| 218 |
+
|
| 219 |
+
/* Horizontal swipe rail (kept for any list use) */
|
| 220 |
+
.evx-rail { display: flex; gap: 12px; overflow-x: auto; padding-bottom: 6px;
|
| 221 |
+
scroll-snap-type: x mandatory; scrollbar-width: none; }
|
| 222 |
+
.evx-rail::-webkit-scrollbar { display: none; }
|
| 223 |
+
.evx-rail .evx-card { flex: 0 0 78%; max-width: 320px; scroll-snap-align: start; }
|
| 224 |
+
|
| 225 |
+
/* ---- rotating hero carousel (auto-advance + arrows + dots) ---- */
|
| 226 |
+
.carousel { position: relative; border-radius: 18px; overflow: hidden; margin: 0 0 16px;
|
| 227 |
+
box-shadow: var(--shadow); border: 1px solid var(--line); }
|
| 228 |
+
.cz-track { position: relative; min-height: 200px; }
|
| 229 |
+
.cz-slide { position: absolute; inset: 0; opacity: 0; transform: translateX(16px);
|
| 230 |
+
transition: opacity .5s ease, transform .5s ease; pointer-events: none;
|
| 231 |
+
border: 0 !important; border-radius: 0 !important; box-shadow: none !important; margin: 0 !important; }
|
| 232 |
+
.cz-slide.is-active { opacity: 1; transform: none; pointer-events: auto; }
|
| 233 |
+
.carousel .bb-body { padding: 24px 125px 42px 150px; } /* clear the prev arrow (left) and keep text 125px off the next button (right) */
|
| 234 |
+
.cz-arrow { position: absolute; top: 50%; transform: translateY(-50%); z-index: 5;
|
| 235 |
+
width: 38px; height: 38px; border-radius: 50%; border: 1px solid var(--line);
|
| 236 |
+
background: rgba(31,25,60,.55); color: #fff; font-size: 1.3rem; line-height: 1; cursor: pointer;
|
| 237 |
+
display: flex; align-items: center; justify-content: center; transition: background .15s, transform .15s; }
|
| 238 |
+
.cz-arrow:hover { background: rgba(31,25,60,.8); transform: translateY(-50%) scale(1.08); }
|
| 239 |
+
.cz-prev { left: 12px; } .cz-next { right: 12px; }
|
| 240 |
+
.cz-dots { position: absolute; bottom: 12px; left: 0; right: 0; z-index: 5;
|
| 241 |
+
display: flex; gap: 8px; justify-content: center; }
|
| 242 |
+
.cz-dot { width: 8px; height: 8px; border-radius: 50%; border: 0; cursor: pointer; padding: 0;
|
| 243 |
+
background: rgba(31,25,60,.28); transition: width .2s ease, background .2s ease; }
|
| 244 |
+
.cz-dot.is-active { width: 22px; border-radius: 4px; background: var(--accent); }
|
| 245 |
+
@media (prefers-reduced-motion: reduce) { .cz-slide { transition: opacity .01s linear; } }
|
| 246 |
+
@media (max-width: 640px) {
|
| 247 |
+
.cz-track { min-height: 172px; }
|
| 248 |
+
.carousel .bb-body { padding: 18px 18px 34px 56px; } /* clear the (smaller) mobile prev arrow */
|
| 249 |
+
.cz-arrow { width: 32px; height: 32px; font-size: 1.1rem; }
|
| 250 |
+
}
|
| 251 |
+
|
| 252 |
+
/* ---- reply + export ---- */
|
| 253 |
+
.rv-reply { margin-top: 14px; }
|
| 254 |
+
.rv-copy button { background: var(--surface2) !important; border: 1px solid var(--line) !important; color: var(--text) !important; }
|
| 255 |
+
#rv-export, .ag-export {
|
| 256 |
+
margin-top: 14px; padding: 12px !important;
|
| 257 |
+
background: var(--surface) !important; border: 1px solid var(--line) !important;
|
| 258 |
+
border-radius: 14px !important; box-shadow: var(--shadow);
|
| 259 |
+
}
|
| 260 |
+
#rv-export button, .ag-export button { min-height: 46px !important; font-weight: 600 !important; border-radius: 11px !important; }
|
| 261 |
+
#rv-export .gr-button-primary, #rv-export button.primary,
|
| 262 |
+
.ag-export .gr-button-primary, .ag-export button.primary {
|
| 263 |
+
background: var(--accent) !important; color: #fff !important; border: none !important;
|
| 264 |
+
}
|
| 265 |
+
|
| 266 |
+
/* ---- shared (Activity / Memory) ---- */
|
| 267 |
+
.muted { color: var(--muted); font-style: italic; padding: 8px 2px; }
|
| 268 |
+
.stepper { display: flex; gap: 8px; flex-wrap: wrap; margin: 4px 0 10px; }
|
| 269 |
+
.step { position: relative; padding: 6px 14px; border-radius: 999px; font-size: .8rem; font-weight: 600;
|
| 270 |
+
color: var(--muted); background: var(--surface); border: 1px solid var(--line); }
|
| 271 |
+
.step:not(:last-child)::after { content: "→"; position: absolute; right: -14px; top: 50%; transform: translateY(-50%); color: var(--muted); }
|
| 272 |
+
.step.active { color: #fff; background: var(--c); border-color: var(--c); animation: pulse 1.4s infinite; }
|
| 273 |
+
@keyframes pulse {
|
| 274 |
+
0% { box-shadow: 0 0 0 0 color-mix(in srgb, var(--c) 70%, transparent); }
|
| 275 |
+
70% { box-shadow: 0 0 0 10px transparent; }
|
| 276 |
+
100% { box-shadow: 0 0 0 0 transparent; }
|
| 277 |
+
}
|
| 278 |
+
.tiles { display: flex; gap: 10px; flex-wrap: wrap; margin: 8px 0 14px; }
|
| 279 |
+
.tile { flex: 1 1 110px; background: var(--surface); border: 1px solid rgba(109,75,224,.18);
|
| 280 |
+
border-radius: 12px; padding: 12px 14px; text-align: center; box-shadow: 0 3px 12px rgba(45,32,90,.07); }
|
| 281 |
+
.tile-v { font-size: 1.5rem; font-weight: 700; font-family: "Fraunces", serif;
|
| 282 |
+
background: var(--accent); -webkit-background-clip: text; background-clip: text; -webkit-text-fill-color: transparent; }
|
| 283 |
+
.tile-k { font-size: .72rem; color: var(--muted); text-transform: uppercase; letter-spacing: .04em; }
|
| 284 |
+
.timeline { display: flex; flex-direction: column; gap: 6px; max-height: 360px; overflow-y: auto; flex: 1; }
|
| 285 |
+
.evt { display: grid; grid-template-columns: 84px 1fr auto auto; gap: 10px; align-items: center;
|
| 286 |
+
padding: 7px 12px; background: var(--surface); border: 1px solid var(--line); border-left: 3px solid var(--c); border-radius: 8px; font-size: .82rem; }
|
| 287 |
+
.evt.err { border-left-color: var(--coral); background: #fdecef; }
|
| 288 |
+
.evt-stage { color: var(--c); font-weight: 700; text-transform: uppercase; font-size: .7rem; filter: brightness(.78); }
|
| 289 |
+
.evt-msg { color: var(--text); }
|
| 290 |
+
.evt-meta, .evt-ts { color: var(--muted); font-family: ui-monospace, monospace; font-size: .72rem; }
|
| 291 |
+
.trace { background: var(--surface); border: 1px solid var(--line); border-radius: 10px; padding: 8px 12px; margin-bottom: 6px; }
|
| 292 |
+
.trace summary { cursor: pointer; font-weight: 600; color: var(--text); }
|
| 293 |
+
.trace-line { font-family: ui-monospace, monospace; font-size: .78rem; color: var(--muted); padding: 2px 0 2px 14px; }
|
| 294 |
+
.trace-stage { font-weight: 700; text-transform: uppercase; font-size: .68rem; margin-right: 6px; filter: brightness(.78); }
|
| 295 |
+
.event-card { background: var(--surface); border: 1px solid var(--line); border-radius: 12px; padding: 12px 14px; }
|
| 296 |
+
|
| 297 |
+
@keyframes rise { from { opacity: 0; transform: translateY(8px); } to { opacity: 1; transform: none; } }
|
| 298 |
+
|
| 299 |
+
/* ---- mobile-first ---- */
|
| 300 |
+
@media (max-width: 640px) {
|
| 301 |
+
#app-header { font-size: 1.5rem; }
|
| 302 |
+
#rv-actions { flex-direction: column; }
|
| 303 |
+
#rv-actions button { width: 100% !important; }
|
| 304 |
+
.evx-title { font-size: 1.05rem; }
|
| 305 |
+
.bb { min-height: 168px; }
|
| 306 |
+
.bb-body { padding: 18px; }
|
| 307 |
+
.bb-title { font-size: 1.55rem; }
|
| 308 |
+
.evx-rail .evx-card { flex: 0 0 86%; }
|
| 309 |
+
.tiles { gap: 8px; }
|
| 310 |
+
.tile { flex: 1 1 calc(50% - 8px); }
|
| 311 |
+
.evt { grid-template-columns: 1fr; gap: 2px; }
|
| 312 |
+
.evt-meta, .evt-ts { display: none; }
|
| 313 |
+
/* keep the export actions reachable on a phone */
|
| 314 |
+
#rv-export {
|
| 315 |
+
position: sticky; bottom: 0; z-index: 20;
|
| 316 |
+
background: rgba(255,255,255,.94) !important;
|
| 317 |
+
backdrop-filter: blur(8px);
|
| 318 |
+
box-shadow: 0 -8px 24px rgba(45,32,90,.18);
|
| 319 |
+
}
|
| 320 |
+
}
|
| 321 |
+
|
| 322 |
+
/* ---- showcase carousel: image-background slides (data-URI SVG illustrations) ---- */
|
| 323 |
+
/* !important so Gradio's own h2/theme heading color can't override the slide text */
|
| 324 |
+
.carousel .bb-img .bb-title, .bb-img .bb-title { color: #ffffff !important; }
|
| 325 |
+
.carousel .bb-img .bb-when, .bb-img .bb-when { color: #ffffff !important; }
|
| 326 |
+
.bb-img .bb-note { color: #e7e2fb !important; }
|
| 327 |
+
.bb-img .bb-kicker { color: #bff2f8 !important; }
|
| 328 |
+
.bb-scrim-dark { background: linear-gradient(to top, rgba(12,8,28,.86) 0%, rgba(12,8,28,.42) 55%, rgba(12,8,28,.15) 100%); }
|
| 329 |
+
.cz-bg-chat { background-image: url("data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHZpZXdCb3g9IjAgMCA4MDAgNDAwIiBwcmVzZXJ2ZUFzcGVjdFJhdGlvPSJ4TWlkWU1pZCBzbGljZSI+PGRlZnM+PGxpbmVhckdyYWRpZW50IGlkPSJnMSIgeDE9IjAiIHkxPSIwIiB4Mj0iMSIgeTI9IjEiPjxzdG9wIG9mZnNldD0iMCIgc3RvcC1jb2xvcj0iIzNhMmE3MiIvPjxzdG9wIG9mZnNldD0iMSIgc3RvcC1jb2xvcj0iIzBlNWY2ZSIvPjwvbGluZWFyR3JhZGllbnQ+PC9kZWZzPjxyZWN0IHdpZHRoPSI4MDAiIGhlaWdodD0iNDAwIiBmaWxsPSJ1cmwoI2cxKSIvPjxnIGZpbGw9IiNmZmZmZmYiIG9wYWNpdHk9IjAuMTYiPjxyZWN0IHg9IjkwIiB5PSI5MCIgcng9IjIyIiByeT0iMjIiIHdpZHRoPSIyNDAiIGhlaWdodD0iMTIwIi8+PHJlY3QgeD0iMzYwIiB5PSIxNzAiIHJ4PSIyMiIgcnk9IjIyIiB3aWR0aD0iMzAwIiBoZWlnaHQ9IjEyMCIvPjwvZz48ZyBmaWxsPSIjZmZmZmZmIiBvcGFjaXR5PSIwLjUiPjxyZWN0IHg9IjEyMCIgeT0iMTIwIiB3aWR0aD0iMTUwIiBoZWlnaHQ9IjE0IiByeD0iNyIvPjxyZWN0IHg9IjEyMCIgeT0iMTUwIiB3aWR0aD0iMTEwIiBoZWlnaHQ9IjE0IiByeD0iNyIvPjxyZWN0IHg9IjM5MCIgeT0iMjAwIiB3aWR0aD0iMjAwIiBoZWlnaHQ9IjE0IiByeD0iNyIvPjxyZWN0IHg9IjM5MCIgeT0iMjMwIiB3aWR0aD0iMTUwIiBoZWlnaHQ9IjE0IiByeD0iNyIvPjwvZz48ZyB0cmFuc2Zvcm09InRyYW5zbGF0ZSg2MDAsNzApIiBvcGFjaXR5PSIwLjg1Ij48cmVjdCB3aWR0aD0iMTEwIiBoZWlnaHQ9IjEwMCIgcng9IjE0IiBmaWxsPSIjNTRkOGUyIi8+PHJlY3Qgd2lkdGg9IjExMCIgaGVpZ2h0PSIyNiIgcng9IjE0IiBmaWxsPSIjMGI4Mjk0Ii8+PGcgZmlsbD0iIzBlMjIzMCI+PHJlY3QgeD0iMTgiIHk9IjQ2IiB3aWR0aD0iMTgiIGhlaWdodD0iMTgiIHJ4PSIzIi8+PHJlY3QgeD0iNDYiIHk9IjQ2IiB3aWR0aD0iMTgiIGhlaWdodD0iMTgiIHJ4PSIzIi8+PHJlY3QgeD0iNzQiIHk9IjQ2IiB3aWR0aD0iMTgiIGhlaWdodD0iMTgiIHJ4PSIzIi8+PHJlY3QgeD0iMTgiIHk9IjcyIiB3aWR0aD0iMTgiIGhlaWdodD0iMTgiIHJ4PSIzIi8+PC9nPjwvZz48L3N2Zz4="); background-size: cover; background-position: center; }
|
| 330 |
+
.cz-bg-flyer { background-image: url("data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHZpZXdCb3g9IjAgMCA4MDAgNDAwIiBwcmVzZXJ2ZUFzcGVjdFJhdGlvPSJ4TWlkWU1pZCBzbGljZSI+PGRlZnM+PGxpbmVhckdyYWRpZW50IGlkPSJnMiIgeDE9IjAiIHkxPSIwIiB4Mj0iMSIgeTI9IjEiPjxzdG9wIG9mZnNldD0iMCIgc3RvcC1jb2xvcj0iIzViMmE4NiIvPjxzdG9wIG9mZnNldD0iMSIgc3RvcC1jb2xvcj0iIzBlNmY3ZSIvPjwvbGluZWFyR3JhZGllbnQ+PC9kZWZzPjxyZWN0IHdpZHRoPSI4MDAiIGhlaWdodD0iNDAwIiBmaWxsPSJ1cmwoI2cyKSIvPjxnIHRyYW5zZm9ybT0icm90YXRlKC04IDMwMCAyMDApIj48cmVjdCB4PSIxNTAiIHk9IjcwIiB3aWR0aD0iMjYwIiBoZWlnaHQ9IjI2MCIgcng9IjE2IiBmaWxsPSIjZmZmZmZmIiBvcGFjaXR5PSIwLjkyIi8+PHJlY3QgeD0iMTgwIiB5PSIxMDAiIHdpZHRoPSIyMDAiIGhlaWdodD0iNzAiIHJ4PSI4IiBmaWxsPSIjNmQ0YmUwIiBvcGFjaXR5PSIwLjg1Ii8+PGcgZmlsbD0iIzlhOTNiOCI+PHJlY3QgeD0iMTgwIiB5PSIxOTAiIHdpZHRoPSIyMDAiIGhlaWdodD0iMTIiIHJ4PSI2Ii8+PHJlY3QgeD0iMTgwIiB5PSIyMTQiIHdpZHRoPSIxNjAiIGhlaWdodD0iMTIiIHJ4PSI2Ii8+PHJlY3QgeD0iMTgwIiB5PSIyMzgiIHdpZHRoPSIxODAiIGhlaWdodD0iMTIiIHJ4PSI2Ii8+PC9nPjxyZWN0IHg9IjE4MCIgeT0iMjc4IiB3aWR0aD0iMTIwIiBoZWlnaHQ9IjI2IiByeD0iMTMiIGZpbGw9IiMwYjgyOTQiLz48L2c+PGcgdHJhbnNmb3JtPSJ0cmFuc2xhdGUoNTIwLDE1MCkiPjxyZWN0IHdpZHRoPSIxNzAiIGhlaWdodD0iMTMwIiByeD0iMTgiIGZpbGw9IiMwZTIyMzAiIG9wYWNpdHk9IjAuOSIvPjxjaXJjbGUgY3g9Ijg1IiBjeT0iNzAiIHI9IjQyIiBmaWxsPSJub25lIiBzdHJva2U9IiM1NGQ4ZTIiIHN0cm9rZS13aWR0aD0iOCIvPjxjaXJjbGUgY3g9Ijg1IiBjeT0iNzAiIHI9IjE4IiBmaWxsPSIjNTRkOGUyIi8+PHJlY3QgeD0iMTIwIiB5PSIyMCIgd2lkdGg9IjM0IiBoZWlnaHQ9IjE0IiByeD0iNyIgZmlsbD0iIzU0ZDhlMiIvPjwvZz48L3N2Zz4="); background-size: cover; background-position: center; }
|
| 331 |
+
.cz-bg-cal { background-image: url("data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHZpZXdCb3g9IjAgMCA4MDAgNDAwIiBwcmVzZXJ2ZUFzcGVjdFJhdGlvPSJ4TWlkWU1pZCBzbGljZSI+PGRlZnM+PGxpbmVhckdyYWRpZW50IGlkPSJnMyIgeDE9IjAiIHkxPSIwIiB4Mj0iMSIgeTI9IjEiPjxzdG9wIG9mZnNldD0iMCIgc3RvcC1jb2xvcj0iIzJmMmE3OCIvPjxzdG9wIG9mZnNldD0iMSIgc3RvcC1jb2xvcj0iIzBlNjQ3MCIvPjwvbGluZWFyR3JhZGllbnQ+PC9kZWZzPjxyZWN0IHdpZHRoPSI4MDAiIGhlaWdodD0iNDAwIiBmaWxsPSJ1cmwoI2czKSIvPjxnIHRyYW5zZm9ybT0idHJhbnNsYXRlKDI1MCw3MCkiPjxyZWN0IHdpZHRoPSIzMDAiIGhlaWdodD0iMjYwIiByeD0iMTgiIGZpbGw9IiNmZmZmZmYiIG9wYWNpdHk9IjAuOTUiLz48cmVjdCB3aWR0aD0iMzAwIiBoZWlnaHQ9IjU0IiByeD0iMTgiIGZpbGw9IiM2ZDRiZTAiLz48cmVjdCB4PSIyMCIgeT0iODAiIHdpZHRoPSIzNCIgaGVpZ2h0PSIzNCIgcng9IjYiIGZpbGw9IiNlN2UzZjUiLz48cmVjdCB4PSI2NiIgeT0iODAiIHdpZHRoPSIzNCIgaGVpZ2h0PSIzNCIgcng9IjYiIGZpbGw9IiNlN2UzZjUiLz48cmVjdCB4PSIxMTIiIHk9IjgwIiB3aWR0aD0iMzQiIGhlaWdodD0iMzQiIHJ4PSI2IiBmaWxsPSIjZTdlM2Y1Ii8+PHJlY3QgeD0iMTU4IiB5PSI4MCIgd2lkdGg9IjM0IiBoZWlnaHQ9IjM0IiByeD0iNiIgZmlsbD0iI2U3ZTNmNSIvPjxyZWN0IHg9IjIwNCIgeT0iODAiIHdpZHRoPSIzNCIgaGVpZ2h0PSIzNCIgcng9IjYiIGZpbGw9IiNlN2UzZjUiLz48cmVjdCB4PSIyNTAiIHk9IjgwIiB3aWR0aD0iMzQiIGhlaWdodD0iMzQiIHJ4PSI2IiBmaWxsPSIjZTdlM2Y1Ii8+PHJlY3QgeD0iMjAiIHk9IjEyNiIgd2lkdGg9IjM0IiBoZWlnaHQ9IjM0IiByeD0iNiIgZmlsbD0iI2U3ZTNmNSIvPjxyZWN0IHg9IjY2IiB5PSIxMjYiIHdpZHRoPSIzNCIgaGVpZ2h0PSIzNCIgcng9IjYiIGZpbGw9IiNlN2UzZjUiLz48cmVjdCB4PSIxMTIiIHk9IjEyNiIgd2lkdGg9IjM0IiBoZWlnaHQ9IjM0IiByeD0iNiIgZmlsbD0iI2U3ZTNmNSIvPjxyZWN0IHg9IjE1OCIgeT0iMTI2IiB3aWR0aD0iMzQiIGhlaWdodD0iMzQiIHJ4PSI2IiBmaWxsPSIjZTdlM2Y1Ii8+PHJlY3QgeD0iMjA0IiB5PSIxMjYiIHdpZHRoPSIzNCIgaGVpZ2h0PSIzNCIgcng9IjYiIGZpbGw9IiNlN2UzZjUiLz48cmVjdCB4PSIyNTAiIHk9IjEyNiIgd2lkdGg9IjM0IiBoZWlnaHQ9IjM0IiByeD0iNiIgZmlsbD0iI2U3ZTNmNSIvPjxyZWN0IHg9IjIwIiB5PSIxNzIiIHdpZHRoPSIzNCIgaGVpZ2h0PSIzNCIgcng9IjYiIGZpbGw9IiNlN2UzZjUiLz48cmVjdCB4PSI2NiIgeT0iMTcyIiB3aWR0aD0iMzQiIGhlaWdodD0iMzQiIHJ4PSI2IiBmaWxsPSIjZTdlM2Y1Ii8+PHJlY3QgeD0iMTEyIiB5PSIxNzIiIHdpZHRoPSIzNCIgaGVpZ2h0PSIzNCIgcng9IjYiIGZpbGw9IiNlN2UzZjUiLz48cmVjdCB4PSIxNTgiIHk9IjE3MiIgd2lkdGg9IjM0IiBoZWlnaHQ9IjM0IiByeD0iNiIgZmlsbD0iI2U3ZTNmNSIvPjxyZWN0IHg9IjIwNCIgeT0iMTcyIiB3aWR0aD0iMzQiIGhlaWdodD0iMzQiIHJ4PSI2IiBmaWxsPSIjZTdlM2Y1Ii8+PHJlY3QgeD0iMjUwIiB5PSIxNzIiIHdpZHRoPSIzNCIgaGVpZ2h0PSIzNCIgcng9IjYiIGZpbGw9IiNlN2UzZjUiLz48cmVjdCB4PSIyMCIgeT0iMjE4IiB3aWR0aD0iMzQiIGhlaWdodD0iMzQiIHJ4PSI2IiBmaWxsPSIjZTdlM2Y1Ii8+PHJlY3QgeD0iNjYiIHk9IjIxOCIgd2lkdGg9IjM0IiBoZWlnaHQ9IjM0IiByeD0iNiIgZmlsbD0iI2U3ZTNmNSIvPjxyZWN0IHg9IjExMiIgeT0iMjE4IiB3aWR0aD0iMzQiIGhlaWdodD0iMzQiIHJ4PSI2IiBmaWxsPSIjZTdlM2Y1Ii8+PHJlY3QgeD0iMTU4IiB5PSIyMTgiIHdpZHRoPSIzNCIgaGVpZ2h0PSIzNCIgcng9IjYiIGZpbGw9IiNlN2UzZjUiLz48cmVjdCB4PSIyMDQiIHk9IjIxOCIgd2lkdGg9IjM0IiBoZWlnaHQ9IjM0IiByeD0iNiIgZmlsbD0iI2U3ZTNmNSIvPjxyZWN0IHg9IjI1MCIgeT0iMjE4IiB3aWR0aD0iMzQiIGhlaWdodD0iMzQiIHJ4PSI2IiBmaWxsPSIjZTdlM2Y1Ii8+PHJlY3QgeD0iMTU4IiB5PSIxNzIiIHdpZHRoPSIzNCIgaGVpZ2h0PSIzNCIgcng9IjYiIGZpbGw9IiMwYjgyOTQiLz48Y2lyY2xlIGN4PSIxNzUiIGN5PSIxODkiIHI9IjgiIGZpbGw9IiNmZmZmZmYiLz48L2c+PC9zdmc+"); background-size: cover; background-position: center; }
|
| 332 |
+
.cz-bg-reply { background-image: url("data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHZpZXdCb3g9IjAgMCA4MDAgNDAwIiBwcmVzZXJ2ZUFzcGVjdFJhdGlvPSJ4TWlkWU1pZCBzbGljZSI+PGRlZnM+PGxpbmVhckdyYWRpZW50IGlkPSJnNCIgeDE9IjAiIHkxPSIwIiB4Mj0iMSIgeTI9IjEiPjxzdG9wIG9mZnNldD0iMCIgc3RvcC1jb2xvcj0iIzRhMmE4MCIvPjxzdG9wIG9mZnNldD0iMSIgc3RvcC1jb2xvcj0iIzBlNWY2ZSIvPjwvbGluZWFyR3JhZGllbnQ+PC9kZWZzPjxyZWN0IHdpZHRoPSI4MDAiIGhlaWdodD0iNDAwIiBmaWxsPSJ1cmwoI2c0KSIvPjxnIHRyYW5zZm9ybT0idHJhbnNsYXRlKDE1MCwxMjApIj48cmVjdCB3aWR0aD0iMzIwIiBoZWlnaHQ9IjE1MCIgcng9IjI2IiBmaWxsPSIjZmZmZmZmIiBvcGFjaXR5PSIwLjk1Ii8+PHBhdGggZD0iTTYwIDE1MCBsMCA1MCBsNTAgLTUwIHoiIGZpbGw9IiNmZmZmZmYiIG9wYWNpdHk9IjAuOTUiLz48ZyBmaWxsPSIjOWE5M2I4Ij48cmVjdCB4PSI0MCIgeT0iNDQiIHdpZHRoPSIyNDAiIGhlaWdodD0iMTQiIHJ4PSI3Ii8+PHJlY3QgeD0iNDAiIHk9Ijc2IiB3aWR0aD0iMTgwIiBoZWlnaHQ9IjE0IiByeD0iNyIvPjwvZz48L2c+PGcgdHJhbnNmb3JtPSJ0cmFuc2xhdGUoNTQwLDE1MCkiPjxjaXJjbGUgY3g9IjYwIiBjeT0iNjAiIHI9IjYwIiBmaWxsPSIjMTViMDcwIi8+PHBhdGggZD0iTTMyIDYyIGwyMCAyMCBsNDAgLTQ0IiBmaWxsPSJub25lIiBzdHJva2U9IiNmZmZmZmYiIHN0cm9rZS13aWR0aD0iMTIiIHN0cm9rZS1saW5lY2FwPSJyb3VuZCIgc3Ryb2tlLWxpbmVqb2luPSJyb3VuZCIvPjwvZz48L3N2Zz4="); background-size: cover; background-position: center; }
|
| 333 |
+
.cz-bg-carpool { background-image: url("data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHZpZXdCb3g9IjAgMCA4MDAgNDAwIiBwcmVzZXJ2ZUFzcGVjdFJhdGlvPSJ4TWlkWU1pZCBzbGljZSI+PGRlZnM+PGxpbmVhckdyYWRpZW50IGlkPSJnYyIgeDE9IjAiIHkxPSIwIiB4Mj0iMSIgeTI9IjEiPjxzdG9wIG9mZnNldD0iMCIgc3RvcC1jb2xvcj0iIzNhMmE3MiIvPjxzdG9wIG9mZnNldD0iMSIgc3RvcC1jb2xvcj0iIzBlNjQ3MCIvPjwvbGluZWFyR3JhZGllbnQ+PC9kZWZzPjxyZWN0IHdpZHRoPSI4MDAiIGhlaWdodD0iNDAwIiBmaWxsPSJ1cmwoI2djKSIvPjxyZWN0IHg9IjAiIHk9IjMwMCIgd2lkdGg9IjgwMCIgaGVpZ2h0PSIxMCIgZmlsbD0iI2ZmZmZmZiIgb3BhY2l0eT0iMC4zIi8+PGcgZmlsbD0iI2ZmZmZmZiIgb3BhY2l0eT0iMC41Ij48cmVjdCB4PSI2MCIgeT0iMzAxIiB3aWR0aD0iNjAiIGhlaWdodD0iNiIvPjxyZWN0IHg9IjE4MCIgeT0iMzAxIiB3aWR0aD0iNjAiIGhlaWdodD0iNiIvPjxyZWN0IHg9IjMwMCIgeT0iMzAxIiB3aWR0aD0iNjAiIGhlaWdodD0iNiIvPjxyZWN0IHg9IjQyMCIgeT0iMzAxIiB3aWR0aD0iNjAiIGhlaWdodD0iNiIvPjwvZz48ZyB0cmFuc2Zvcm09InRyYW5zbGF0ZSgxMjAsMTUwKSI+PHJlY3QgeD0iMCIgeT0iNzAiIHdpZHRoPSIyNjAiIGhlaWdodD0iNzIiIHJ4PSIyNCIgZmlsbD0iIzU0ZDhlMiIvPjxwYXRoIGQ9Ik00NCA3MiBxMjggLTU2IDkyIC01NiBsNjQgMCBxNDQgMCA2NCA1NiB6IiBmaWxsPSIjZmZmZmZmIiBvcGFjaXR5PSIwLjkyIi8+PHJlY3QgeD0iNzAiIHk9IjMwIiB3aWR0aD0iNjAiIGhlaWdodD0iNDAiIHJ4PSI2IiBmaWxsPSIjNmQ0YmUwIiBvcGFjaXR5PSIwLjg1Ii8+PHJlY3QgeD0iMTUwIiB5PSIzMCIgd2lkdGg9IjYwIiBoZWlnaHQ9IjQwIiByeD0iNiIgZmlsbD0iIzZkNGJlMCIgb3BhY2l0eT0iMC44NSIvPjxjaXJjbGUgY3g9Ijc0IiBjeT0iMTQ4IiByPSIyOCIgZmlsbD0iIzBlMjIzMCIvPjxjaXJjbGUgY3g9Ijc0IiBjeT0iMTQ4IiByPSIxMyIgZmlsbD0iIzU0ZDhlMiIvPjxjaXJjbGUgY3g9IjIwNiIgY3k9IjE0OCIgcj0iMjgiIGZpbGw9IiMwZTIyMzAiLz48Y2lyY2xlIGN4PSIyMDYiIGN5PSIxNDgiIHI9IjEzIiBmaWxsPSIjNTRkOGUyIi8+PC9nPjxnIHRyYW5zZm9ybT0idHJhbnNsYXRlKDU2MCw5MCkiIGZpbGw9Im5vbmUiIHN0cm9rZT0iIzU0ZDhlMiIgc3Ryb2tlLXdpZHRoPSIxMCIgc3Ryb2tlLWxpbmVjYXA9InJvdW5kIj48Y2lyY2xlIGN4PSI2MCIgY3k9IjYwIiByPSI0MCIgc3Ryb2tlPSIjZmZmZmZmIiBvcGFjaXR5PSIwLjg1Ii8+PHBhdGggZD0iTTYwIDM2IGwwIDI0IGwxNiAxMCIgc3Ryb2tlPSIjNTRkOGUyIi8+PC9nPjwvc3ZnPg=="); background-size: cover; background-position: center; }
|
| 334 |
+
.cz-bg-appt { background-image: url("data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHZpZXdCb3g9IjAgMCA4MDAgNDAwIiBwcmVzZXJ2ZUFzcGVjdFJhdGlvPSJ4TWlkWU1pZCBzbGljZSI+PGRlZnM+PGxpbmVhckdyYWRpZW50IGlkPSJnYSIgeDE9IjAiIHkxPSIwIiB4Mj0iMSIgeTI9IjEiPjxzdG9wIG9mZnNldD0iMCIgc3RvcC1jb2xvcj0iIzJmMmE3OCIvPjxzdG9wIG9mZnNldD0iMSIgc3RvcC1jb2xvcj0iIzBlNjQ3MCIvPjwvbGluZWFyR3JhZGllbnQ+PC9kZWZzPjxyZWN0IHdpZHRoPSI4MDAiIGhlaWdodD0iNDAwIiBmaWxsPSJ1cmwoI2dhKSIvPjxnIHRyYW5zZm9ybT0idHJhbnNsYXRlKDQ0MCw5MCkiPjxyZWN0IHdpZHRoPSIyNjAiIGhlaWdodD0iMjIwIiByeD0iMTYiIGZpbGw9IiNmZmZmZmYiIG9wYWNpdHk9IjAuOTUiLz48cmVjdCB3aWR0aD0iMjYwIiBoZWlnaHQ9IjUwIiByeD0iMTYiIGZpbGw9IiM2ZDRiZTAiLz48ZyBmaWxsPSIjZTdlM2Y1Ij48cmVjdCB4PSIyMiIgeT0iNzgiIHdpZHRoPSI0MCIgaGVpZ2h0PSI0MCIgcng9IjYiLz48cmVjdCB4PSI3OCIgeT0iNzgiIHdpZHRoPSI0MCIgaGVpZ2h0PSI0MCIgcng9IjYiLz48cmVjdCB4PSIxMzQiIHk9Ijc4IiB3aWR0aD0iNDAiIGhlaWdodD0iNDAiIHJ4PSI2Ii8+PHJlY3QgeD0iMTkwIiB5PSI3OCIgd2lkdGg9IjQwIiBoZWlnaHQ9IjQwIiByeD0iNiIvPjxyZWN0IHg9IjIyIiB5PSIxMzAiIHdpZHRoPSI0MCIgaGVpZ2h0PSI0MCIgcng9IjYiLz48cmVjdCB4PSI3OCIgeT0iMTMwIiB3aWR0aD0iNDAiIGhlaWdodD0iNDAiIHJ4PSI2Ii8+PHJlY3QgeD0iMTkwIiB5PSIxMzAiIHdpZHRoPSI0MCIgaGVpZ2h0PSI0MCIgcng9IjYiLz48L2c+PHJlY3QgeD0iMTM0IiB5PSIxMzAiIHdpZHRoPSI0MCIgaGVpZ2h0PSI0MCIgcng9IjYiIGZpbGw9IiMwYjgyOTQiLz48Y2lyY2xlIGN4PSIxNTQiIGN5PSIxNTAiIHI9IjkiIGZpbGw9IiNmZmZmZmYiLz48L2c+PGcgdHJhbnNmb3JtPSJ0cmFuc2xhdGUoMTIwLDExMCkiPjxjaXJjbGUgY3g9Ijk1IiBjeT0iOTUiIHI9Ijk1IiBmaWxsPSIjMGUyMjMwIiBvcGFjaXR5PSIwLjkyIi8+PGNpcmNsZSBjeD0iOTUiIGN5PSI5NSIgcj0iNzgiIGZpbGw9Im5vbmUiIHN0cm9rZT0iIzU0ZDhlMiIgc3Ryb2tlLXdpZHRoPSI5Ii8+PHBhdGggZD0iTTk1IDk1IEw5NSA0NiIgc3Ryb2tlPSIjNTRkOGUyIiBzdHJva2Utd2lkdGg9IjExIiBzdHJva2UtbGluZWNhcD0icm91bmQiLz48cGF0aCBkPSJNOTUgOTUgTDEzNiAxMTIiIHN0cm9rZT0iI2ZmZmZmZiIgc3Ryb2tlLXdpZHRoPSI4IiBzdHJva2UtbGluZWNhcD0icm91bmQiLz48L2c+PC9zdmc+"); background-size: cover; background-position: center; }
|
| 335 |
+
.cz-bg-party { background-image: url("data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHZpZXdCb3g9IjAgMCA4MDAgNDAwIiBwcmVzZXJ2ZUFzcGVjdFJhdGlvPSJ4TWlkWU1pZCBzbGljZSI+PGRlZnM+PGxpbmVhckdyYWRpZW50IGlkPSJncCIgeDE9IjAiIHkxPSIwIiB4Mj0iMSIgeTI9IjEiPjxzdG9wIG9mZnNldD0iMCIgc3RvcC1jb2xvcj0iIzViMmE4NiIvPjxzdG9wIG9mZnNldD0iMSIgc3RvcC1jb2xvcj0iIzBlNmY3ZSIvPjwvbGluZWFyR3JhZGllbnQ+PC9kZWZzPjxyZWN0IHdpZHRoPSI4MDAiIGhlaWdodD0iNDAwIiBmaWxsPSJ1cmwoI2dwKSIvPjxnIG9wYWNpdHk9IjAuNzUiPjxyZWN0IHg9IjEyMCIgeT0iNTAiIHdpZHRoPSIxNiIgaGVpZ2h0PSIxNiIgcng9IjMiIGZpbGw9IiM1NGQ4ZTIiIHRyYW5zZm9ybT0icm90YXRlKDIwIDEyOCA1OCkiLz48cmVjdCB4PSIzMDAiIHk9IjQwIiB3aWR0aD0iMTQiIGhlaWdodD0iMTQiIHJ4PSIzIiBmaWxsPSIjZmZmZmZmIiB0cmFuc2Zvcm09InJvdGF0ZSgtMTUgMzA3IDQ3KSIvPjxyZWN0IHg9IjY0MCIgeT0iNjAiIHdpZHRoPSIxNiIgaGVpZ2h0PSIxNiIgcng9IjMiIGZpbGw9IiM1NGQ4ZTIiIHRyYW5zZm9ybT0icm90YXRlKDMwIDY0OCA2OCkiLz48Y2lyY2xlIGN4PSI1MjAiIGN5PSI1MCIgcj0iOCIgZmlsbD0iI2ZmZmZmZiIvPjxjaXJjbGUgY3g9IjIyMCIgY3k9IjM0MCIgcj0iOCIgZmlsbD0iIzU0ZDhlMiIvPjxyZWN0IHg9IjcwMCIgeT0iMzIwIiB3aWR0aD0iMTQiIGhlaWdodD0iMTQiIHJ4PSIzIiBmaWxsPSIjZmZmZmZmIiB0cmFuc2Zvcm09InJvdGF0ZSgyNSA3MDcgMzI3KSIvPjwvZz48ZyB0cmFuc2Zvcm09InRyYW5zbGF0ZSgxNTAsODApIj48ZWxsaXBzZSBjeD0iNzAiIGN5PSI5MCIgcng9IjY0IiByeT0iNzgiIGZpbGw9IiM1NGQ4ZTIiLz48cGF0aCBkPSJNNzAgMTY4IGwtOSAyMCBsMTggMCB6IiBmaWxsPSIjNTRkOGUyIi8+PHBhdGggZD0iTTcwIDE4OCBxMjQgMzQgLTEyIDY2IiBzdHJva2U9IiNmZmZmZmYiIHN0cm9rZS13aWR0aD0iNCIgZmlsbD0ibm9uZSIgb3BhY2l0eT0iMC43Ii8+PC9nPjxnIHRyYW5zZm9ybT0idHJhbnNsYXRlKDQ3MCwxNzApIj48cmVjdCB3aWR0aD0iMTkwIiBoZWlnaHQ9IjE1MCIgcng9IjE0IiBmaWxsPSIjZmZmZmZmIiBvcGFjaXR5PSIwLjk1Ii8+PHJlY3QgeD0iODIiIHdpZHRoPSIyNiIgaGVpZ2h0PSIxNTAiIGZpbGw9IiM2ZDRiZTAiLz48cmVjdCB5PSI1OCIgd2lkdGg9IjE5MCIgaGVpZ2h0PSIyNiIgZmlsbD0iIzZkNGJlMCIvPjxwYXRoIGQ9Ik05NSA1OCBxLTQ2IC01NCAtMTQgLTU0IHEzNCAwIDE0IDU0IHEyMCAtNTQgNTQgLTU0IHEzNCAwIC0xNCA1NCB6IiBmaWxsPSIjMGI4Mjk0Ii8+PC9nPjwvc3ZnPg=="); background-size: cover; background-position: center; }
|
| 336 |
+
|
| 337 |
+
/* ===================================================================== */
|
| 338 |
+
/* Landing-page remaster — bold dark hero + light body */
|
| 339 |
+
/* ===================================================================== */
|
| 340 |
+
html { overflow-x: hidden; scroll-behavior: smooth; }
|
| 341 |
+
.gradio-container { scroll-behavior: smooth; }
|
| 342 |
+
|
| 343 |
+
/* ---- sticky top nav (full-bleed) ---- */
|
| 344 |
+
#site-nav { width: 100vw; margin-left: calc(50% - 50vw); position: sticky; top: 0; z-index: 60;
|
| 345 |
+
background: rgba(255,255,255,.82); backdrop-filter: blur(12px) saturate(1.2);
|
| 346 |
+
border-bottom: 1px solid var(--line); }
|
| 347 |
+
#site-nav .nav-inner { max-width: 1100px; margin: 0 auto; padding: 12px 22px;
|
| 348 |
+
display: flex; align-items: center; justify-content: space-between; gap: 16px; }
|
| 349 |
+
.nav-brand { display: inline-flex; align-items: center; gap: 9px;
|
| 350 |
+
font-family: "Fraunces", serif; font-weight: 700; font-size: 1.16rem; text-decoration: none; }
|
| 351 |
+
/* plain ink in the site display typeface — the gradient text-clip trick left
|
| 352 |
+
the title invisible whenever the clip didn't apply */
|
| 353 |
+
.nav-brand span { color: var(--text) !important; -webkit-text-fill-color: currentColor; }
|
| 354 |
+
.nav-logo { width: 30px; height: 30px; object-fit: contain; flex: none; }
|
| 355 |
+
/* calendar-option notes inside the step-2 dropdown */
|
| 356 |
+
.cal-note { color: var(--muted); font-size: .86rem; margin-bottom: 6px; }
|
| 357 |
+
.cal-note code { background: var(--surface2); padding: 1px 5px; border-radius: 5px; }
|
| 358 |
+
.nav-links { display: flex; align-items: center; gap: 18px; }
|
| 359 |
+
/* grouped dropdowns */
|
| 360 |
+
.nav-group { position: relative; }
|
| 361 |
+
.nav-top { background: none; border: 0; cursor: pointer; color: var(--muted); font-weight: 600;
|
| 362 |
+
font-size: .92rem; padding: 6px 4px; display: inline-flex; align-items: center; gap: 6px;
|
| 363 |
+
font-family: inherit; transition: color .15s; }
|
| 364 |
+
.nav-group:hover .nav-top, .nav-group:focus-within .nav-top, .nav-top:hover { color: var(--text); }
|
| 365 |
+
.nav-caret { font-size: .7rem; opacity: .7; transition: transform .15s; }
|
| 366 |
+
.nav-group:hover .nav-caret, .nav-group:focus-within .nav-caret { transform: rotate(180deg); }
|
| 367 |
+
.nav-menu { position: absolute; top: calc(100% + 8px); left: 0; min-width: 190px;
|
| 368 |
+
background: #fff; border: 1px solid var(--line); border-radius: 12px; box-shadow: var(--shadow);
|
| 369 |
+
padding: 8px; display: none; flex-direction: column; gap: 2px; z-index: 70; }
|
| 370 |
+
.nav-group:hover .nav-menu, .nav-group:focus-within .nav-menu { display: flex; }
|
| 371 |
+
.nav-item { display: block; padding: 8px 12px; border-radius: 8px; color: var(--text) !important;
|
| 372 |
+
text-decoration: none; font-weight: 600; font-size: .92rem; cursor: pointer; white-space: nowrap; }
|
| 373 |
+
.nav-item:hover { background: var(--surface2); }
|
| 374 |
+
.nav-cta { background: var(--accent); color: #fff !important; padding: 8px 18px; border-radius: 999px;
|
| 375 |
+
text-decoration: none; font-weight: 700; font-size: .92rem; box-shadow: 0 6px 18px rgba(109,75,224,.35);
|
| 376 |
+
transition: filter .15s; }
|
| 377 |
+
.nav-cta:hover { filter: brightness(1.06); }
|
| 378 |
+
|
| 379 |
+
/* Hide the default Gradio tab strip — the banner is the only navigation now.
|
| 380 |
+
IMPORTANT: don't use display:none / width:0 — Gradio 6's tab bar is responsive
|
| 381 |
+
(measures its width and overflows tabs that don't fit into a "more" menu as a
|
| 382 |
+
non-interactive clone the nav JS can't click). Collapsing it while keeping its
|
| 383 |
+
NATURAL width broke on mobile: a ~390px phone viewport can't fit 7 tabs, so the
|
| 384 |
+
later ones (Memory/Feed/Submission, index >= 3) overflowed into that dead clone
|
| 385 |
+
and their banner links did nothing — while desktop's ~1100px fit them all.
|
| 386 |
+
Fix: park the strip far off-screen at a FIXED LARGE width so the responsive
|
| 387 |
+
measurement always sees room for every tab on one row, no overflow menu is
|
| 388 |
+
ever built, and all tab buttons stay real and programmatically clickable. */
|
| 389 |
+
.gradio-container .tab-wrapper,
|
| 390 |
+
.gradio-container .tab-nav {
|
| 391 |
+
height: 0 !important; min-height: 0 !important; overflow: hidden !important;
|
| 392 |
+
opacity: 0 !important; margin: 0 !important; padding: 0 !important; border: 0 !important;
|
| 393 |
+
pointer-events: none;
|
| 394 |
+
/* KEEP the strip from ever overflowing tabs into the dead "more" clone:
|
| 395 |
+
force its measured width far past the tab count and forbid wrapping, so on
|
| 396 |
+
a phone all 7 tabs still "fit" and stay real, clickable buttons. height:0 +
|
| 397 |
+
overflow:hidden keeps it invisible; html{overflow-x:hidden} clips the
|
| 398 |
+
oversized width so it can't add a horizontal scrollbar. */
|
| 399 |
+
min-width: 1600px !important; max-width: none !important; flex-wrap: nowrap !important;
|
| 400 |
+
}
|
| 401 |
+
|
| 402 |
+
/* ---- hero (full-bleed DARK band, split: copy + example-card grid) ---- */
|
| 403 |
+
#hero { width: 100vw; margin-left: calc(50% - 50vw);
|
| 404 |
+
padding: 64px max(22px, calc(50vw - 560px)) 104px; /* extra bottom so the tool card overlaps */
|
| 405 |
+
display: flex !important; flex-wrap: nowrap; align-items: center; gap: 40px !important;
|
| 406 |
+
background:
|
| 407 |
+
radial-gradient(900px 520px at 82% -12%, rgba(109,75,224,.42) 0%, transparent 60%),
|
| 408 |
+
radial-gradient(760px 520px at -5% 115%, rgba(11,130,148,.34) 0%, transparent 55%),
|
| 409 |
+
linear-gradient(160deg, #0e0b1c 0%, #1a1230 58%, #0e1622 100%) !important; }
|
| 410 |
+
/* inner Gradio wrappers transparent so the dark band shows (NOT #hero itself) */
|
| 411 |
+
#hero-left, #hero-right,
|
| 412 |
+
#hero .block, #hero .form, #hero .gr-group, #hero .gradio-html, #hero > * {
|
| 413 |
+
background: transparent !important; border: 0 !important; box-shadow: none !important; }
|
| 414 |
+
#hero-left { flex: 1 1 520px; min-width: 0; }
|
| 415 |
+
#hero-right { flex: 0 0 380px; max-width: 400px; }
|
| 416 |
+
.hero-copy { max-width: 620px; animation: rise .5s ease both; }
|
| 417 |
+
.hero-eyebrow { color: #bff2f8 !important; font-weight: 700; letter-spacing: .14em; text-transform: uppercase;
|
| 418 |
+
font-size: .8rem; margin-bottom: 14px; }
|
| 419 |
+
.hero-title { font-family: "Fraunces", serif; color: #fff !important; font-size: clamp(2.2rem, 5vw, 4rem);
|
| 420 |
+
line-height: 1.03; letter-spacing: -.02em; margin: 0 0 18px; }
|
| 421 |
+
.hero-accent { background: linear-gradient(100deg, #b39bff, #5fd5e6);
|
| 422 |
+
-webkit-background-clip: text; background-clip: text; -webkit-text-fill-color: transparent; }
|
| 423 |
+
.hero-sub { color: #ded8f0 !important; font-size: clamp(1rem, 1.5vw, 1.18rem); line-height: 1.55; max-width: 560px; margin: 0 0 12px; }
|
| 424 |
+
.hero-trust { color: #b3add0 !important; font-size: .9rem; margin: 0; }
|
| 425 |
+
#hero-cta { gap: 14px !important; align-items: center; margin-top: 20px; flex-wrap: wrap; }
|
| 426 |
+
#hero-cta #hero-try { flex: 0 0 auto; }
|
| 427 |
+
#hero-try button { background: var(--accent) !important; color: #fff !important; border: none !important;
|
| 428 |
+
font-weight: 700 !important; border-radius: 999px !important; padding: 13px 28px !important; min-height: 0 !important;
|
| 429 |
+
box-shadow: 0 12px 32px rgba(109,75,224,.46) !important; transition: transform .15s, filter .15s !important; }
|
| 430 |
+
#hero-try button:hover { transform: translateY(-2px); filter: brightness(1.07); }
|
| 431 |
+
.hero-ghost { color: #fff; text-decoration: none; font-weight: 600; border: 1px solid rgba(255,255,255,.42);
|
| 432 |
+
padding: 12px 22px; border-radius: 999px; transition: background .15s; white-space: nowrap; }
|
| 433 |
+
.hero-ghost:hover { background: rgba(255,255,255,.12); }
|
| 434 |
+
|
| 435 |
+
/* hero example-card grid (chat -> event), echoes the reference's message cards */
|
| 436 |
+
.hx-grid { display: grid; grid-template-columns: 1fr 1fr; gap: 12px; }
|
| 437 |
+
.hx-card { background: rgba(255,255,255,.06); border: 1px solid rgba(255,255,255,.12);
|
| 438 |
+
border-radius: 12px; padding: 12px; box-shadow: 0 10px 24px rgba(0,0,0,.25); animation: rise .5s ease both; }
|
| 439 |
+
.hx-from { color: #8f88b5; font-size: .68rem; font-weight: 700; text-transform: uppercase; letter-spacing: .07em; margin-bottom: 5px; }
|
| 440 |
+
.hx-chat { color: #ece9f8; font-size: .84rem; line-height: 1.35; margin-bottom: 8px; }
|
| 441 |
+
.hx-event { background: #fff; color: #1e1934; border-radius: 8px; padding: 6px 9px; font-size: .78rem;
|
| 442 |
+
font-weight: 700; border-left: 3px solid #6d4be0; }
|
| 443 |
+
.hx-event + .hx-event { margin-top: 6px; } /* rule 5: deadline + event entries */
|
| 444 |
+
.hx-assumed { color: #8f88b5; font-size: .85em; font-weight: 600; } /* rule 2: inference flag */
|
| 445 |
+
|
| 446 |
+
/* ---- light marketing sections ---- */
|
| 447 |
+
.lp-section { max-width: 1080px; margin: 0 auto; padding: 78px 22px 6px; text-align: center; scroll-margin-top: 80px; }
|
| 448 |
+
.lp-eyebrow { color: var(--cyan); font-weight: 700; letter-spacing: .14em; text-transform: uppercase; font-size: .8rem; margin-bottom: 10px; }
|
| 449 |
+
.lp-title { font-family: "Fraunces", serif; color: var(--text); font-size: clamp(1.7rem, 3.4vw, 2.6rem);
|
| 450 |
+
line-height: 1.1; letter-spacing: -.01em; margin: 0 auto 34px; max-width: 780px; }
|
| 451 |
+
.lp-grid { display: grid; gap: 18px; text-align: left; }
|
| 452 |
+
.lp-grid-3 { grid-template-columns: repeat(3, 1fr); }
|
| 453 |
+
.lp-card, .lp-step, .lp-priv { background: var(--surface); border: 1px solid var(--line); border-radius: 16px;
|
| 454 |
+
padding: 24px; box-shadow: 0 6px 20px rgba(45,32,90,.07); transition: transform .18s, box-shadow .18s; }
|
| 455 |
+
.lp-card:hover, .lp-step:hover, .lp-priv:hover { transform: translateY(-4px); box-shadow: 0 16px 34px rgba(45,32,90,.16); }
|
| 456 |
+
.lp-ico { font-size: 1.9rem; margin-bottom: 10px; line-height: 1; }
|
| 457 |
+
.lp-step-n { font-family: "Fraunces", serif; font-size: 1.7rem; font-weight: 700; margin-bottom: 8px;
|
| 458 |
+
background: var(--accent); -webkit-background-clip: text; background-clip: text; -webkit-text-fill-color: transparent; }
|
| 459 |
+
.lp-card-t { font-size: 1.12rem; font-weight: 700; color: var(--text); margin: 0 0 8px; }
|
| 460 |
+
.lp-card-d { color: var(--muted); font-size: .96rem; line-height: 1.55; margin: 0; }
|
| 461 |
+
.lp-card-d code { background: var(--surface2); padding: 1px 6px; border-radius: 6px; font-size: .86em; }
|
| 462 |
+
.lp-tool-head { padding-bottom: 2px; }
|
| 463 |
+
|
| 464 |
+
/* tool anchor offset for the sticky nav */
|
| 465 |
+
.tool-anchor { scroll-margin-top: 84px; }
|
| 466 |
+
#rv-results { scroll-margin-top: 84px; }
|
| 467 |
+
|
| 468 |
+
/* ---- footer (full-bleed dark band) ---- */
|
| 469 |
+
#site-footer { width: 100vw; margin-left: calc(50% - 50vw); margin-top: 66px;
|
| 470 |
+
background: linear-gradient(160deg, #0e0b1c 0%, #181030 100%); color: #fff; }
|
| 471 |
+
.footer-inner { max-width: 1080px; margin: 0 auto; padding: 58px 22px; text-align: center; }
|
| 472 |
+
/* !important: the global .prose h2 readability rule (top of file) would
|
| 473 |
+
otherwise repaint this ink-dark on the dark band. */
|
| 474 |
+
.footer-cta-t, #site-footer .footer-cta-t { font-family: "Fraunces", serif; color: #fff !important;
|
| 475 |
+
font-size: clamp(1.6rem, 3vw, 2.4rem); margin: 0 0 18px; }
|
| 476 |
+
a.footer-cta { display: inline-block; background: var(--accent); color: #fff; text-decoration: none; font-weight: 700;
|
| 477 |
+
padding: 13px 28px; border-radius: 999px; box-shadow: 0 12px 32px rgba(109,75,224,.42); transition: transform .15s, filter .15s; }
|
| 478 |
+
a.footer-cta:hover { transform: translateY(-2px); filter: brightness(1.07); }
|
| 479 |
+
.footer-meta { color: #9b93c2; font-size: .86rem; margin-top: 18px; }
|
| 480 |
+
.footer-meta a { color: #bff2f8; text-decoration: none; }
|
| 481 |
+
|
| 482 |
+
/* Gradio's own footer (Use via API or MCP · Built with Gradio · Settings),
|
| 483 |
+
relocated into the banner by wireFooter() in the injected JS — bare white
|
| 484 |
+
hyperlinked text only: no pills, boxes, borders, or backgrounds. */
|
| 485 |
+
#site-footer footer, #site-footer footer * { background: transparent !important;
|
| 486 |
+
border: 0 !important; box-shadow: none !important; border-radius: 0 !important; }
|
| 487 |
+
#site-footer footer { justify-content: center; margin-top: 10px; padding: 0 !important; }
|
| 488 |
+
#site-footer footer, #site-footer footer a, #site-footer footer button,
|
| 489 |
+
#site-footer footer span { color: #fff !important; font-size: .86rem; }
|
| 490 |
+
#site-footer footer a, #site-footer footer button { cursor: pointer;
|
| 491 |
+
text-decoration: none; padding: 0 !important; margin: 0 4px; min-width: 0 !important; }
|
| 492 |
+
#site-footer footer a:hover, #site-footer footer button:hover {
|
| 493 |
+
color: #fff !important; text-decoration: underline; }
|
| 494 |
+
|
| 495 |
+
/* ---- mobile ---- */
|
| 496 |
+
@media (max-width: 760px) {
|
| 497 |
+
#site-nav .nav-inner { gap: 10px; padding: 10px 16px; }
|
| 498 |
+
.nav-links { gap: 10px; }
|
| 499 |
+
.nav-brand { font-size: 1rem; }
|
| 500 |
+
.nav-top { font-size: .86rem; padding: 6px 2px; }
|
| 501 |
+
.nav-menu { right: 0; left: auto; } /* keep menus on-screen near the edge */
|
| 502 |
+
.lp-grid-3 { grid-template-columns: 1fr; }
|
| 503 |
+
#hero { padding: 46px 20px 40px; flex-direction: column; align-items: stretch; gap: 26px !important; }
|
| 504 |
+
#hero-left, #hero-right { flex: 1 1 auto; max-width: 100%; }
|
| 505 |
+
#hero-cta { justify-content: flex-start; }
|
| 506 |
+
.lp-section { padding-top: 56px; }
|
| 507 |
+
}
|
| 508 |
+
|
| 509 |
+
/* ===================================================================== */
|
| 510 |
+
/* FAQ — left-aligned title + tabs + search + row-divider accordion */
|
| 511 |
+
/* ===================================================================== */
|
| 512 |
+
.lp-faq-section { max-width: 1080px; margin: 0 auto; padding: 78px 22px 12px;
|
| 513 |
+
text-align: left; scroll-margin-top: 80px; }
|
| 514 |
+
.lp-faq-head { display: flex; align-items: flex-end; justify-content: space-between;
|
| 515 |
+
gap: 24px; margin-bottom: 22px; flex-wrap: wrap; }
|
| 516 |
+
.lp-faq-h { font-family: "Fraunces", serif; color: var(--text);
|
| 517 |
+
font-size: clamp(1.7rem, 3.4vw, 2.4rem); line-height: 1.1; letter-spacing: -.01em;
|
| 518 |
+
margin: 0; font-weight: 700; }
|
| 519 |
+
|
| 520 |
+
/* Search input — bottom-border-only with svg icon on the right */
|
| 521 |
+
.lp-faq-search { position: relative; display: flex; align-items: center;
|
| 522 |
+
min-width: 260px; border-bottom: 1px solid var(--line); padding: 6px 0;
|
| 523 |
+
transition: border-color .15s; }
|
| 524 |
+
.lp-faq-search:focus-within { border-color: var(--violet); }
|
| 525 |
+
.lp-faq-search input { flex: 1; border: 0; background: transparent; color: var(--text);
|
| 526 |
+
font-size: .98rem; font-family: inherit; outline: none; padding: 4px 28px 4px 0;
|
| 527 |
+
-webkit-appearance: none; appearance: none; }
|
| 528 |
+
.lp-faq-search input::placeholder { color: var(--muted); }
|
| 529 |
+
.lp-faq-search input::-webkit-search-cancel-button { -webkit-appearance: none; }
|
| 530 |
+
.lp-faq-search svg { width: 18px; height: 18px; color: var(--muted);
|
| 531 |
+
position: absolute; right: 2px; pointer-events: none; }
|
| 532 |
+
|
| 533 |
+
/* Tabs */
|
| 534 |
+
.lp-faq-tabs { display: flex; gap: 32px; border-bottom: 1px solid var(--line);
|
| 535 |
+
margin: 0 0 4px; }
|
| 536 |
+
.lp-faq-tab { background: transparent; border: 0; padding: 12px 0 14px;
|
| 537 |
+
cursor: pointer; font-family: inherit; font-size: 1rem; color: var(--muted);
|
| 538 |
+
font-weight: 600; position: relative; transition: color .15s; }
|
| 539 |
+
.lp-faq-tab:hover { color: var(--text); }
|
| 540 |
+
.lp-faq-tab.is-active { color: var(--text); }
|
| 541 |
+
.lp-faq-tab.is-active::after { content: ""; position: absolute; left: 0; right: 0;
|
| 542 |
+
bottom: -1px; height: 2.5px; background: var(--violet); border-radius: 2px; }
|
| 543 |
+
|
| 544 |
+
/* Lists + rows */
|
| 545 |
+
.lp-faq-list { display: block; }
|
| 546 |
+
.lp-faq-list.is-hidden { display: none; }
|
| 547 |
+
.lp-faq-item { border-bottom: 1px solid var(--line); padding: 0; }
|
| 548 |
+
.lp-faq-q { display: flex; justify-content: space-between; align-items: center;
|
| 549 |
+
gap: 16px; padding: 22px 0; cursor: pointer; list-style: none;
|
| 550 |
+
font-size: 1.05rem; color: var(--text); font-weight: 500; transition: color .15s; }
|
| 551 |
+
.lp-faq-q::-webkit-details-marker { display: none; }
|
| 552 |
+
.lp-faq-q:hover { color: var(--violet); }
|
| 553 |
+
.lp-faq-qt { flex: 1; min-width: 0; }
|
| 554 |
+
.lp-faq-ico { flex: 0 0 26px; display: inline-flex; color: var(--muted);
|
| 555 |
+
transition: color .15s; }
|
| 556 |
+
.lp-faq-ico svg { width: 26px; height: 26px; }
|
| 557 |
+
.lp-faq-q:hover .lp-faq-ico,
|
| 558 |
+
.lp-faq-item[open] .lp-faq-ico { color: var(--violet); }
|
| 559 |
+
.lp-faq-ico-v { transition: opacity .15s; }
|
| 560 |
+
.lp-faq-item[open] .lp-faq-ico-v { opacity: 0; }
|
| 561 |
+
|
| 562 |
+
.lp-faq-a { padding: 0 0 22px; color: var(--muted); line-height: 1.6;
|
| 563 |
+
font-size: .97rem; max-width: 760px; }
|
| 564 |
+
.lp-faq-a p { margin: 0 0 10px; }
|
| 565 |
+
.lp-faq-a p:last-child { margin-bottom: 0; }
|
| 566 |
+
.lp-faq-a ul { margin: 6px 0 0; padding-left: 20px; }
|
| 567 |
+
.lp-faq-a li { margin-bottom: 4px; }
|
| 568 |
+
.lp-faq-a code { background: rgba(31,25,60,.06); padding: 1px 6px; border-radius: 5px;
|
| 569 |
+
font-size: .9em; }
|
| 570 |
+
.lp-faq-a a { color: var(--violet); text-decoration: none; font-weight: 600; }
|
| 571 |
+
.lp-faq-a a:hover { text-decoration: underline; }
|
| 572 |
+
.lp-faq-a b { color: var(--text); }
|
| 573 |
+
|
| 574 |
+
/* Empty-state message when search filters everything out */
|
| 575 |
+
.lp-faq-empty { color: var(--muted); padding: 28px 0; text-align: center; }
|
| 576 |
+
.lp-faq-empty.is-hidden { display: none; }
|
| 577 |
+
|
| 578 |
+
@media (max-width: 720px) {
|
| 579 |
+
.lp-faq-section { padding-top: 56px; }
|
| 580 |
+
.lp-faq-head { align-items: flex-start; }
|
| 581 |
+
.lp-faq-search { min-width: 100%; }
|
| 582 |
+
.lp-faq-tabs { gap: 22px; }
|
| 583 |
+
}
|
| 584 |
+
|
| 585 |
+
/* ===================================================================== */
|
| 586 |
+
/* Hackathon: Submission compliance scorecard */
|
| 587 |
+
/* ===================================================================== */
|
| 588 |
+
|
| 589 |
+
/* standalone nav link (between the dropdown groups and the CTA) */
|
| 590 |
+
/* Home / Submission share the SAME typography as the Learn/Workspace tops */
|
| 591 |
+
.nav-solo { background: none; border: 0; cursor: pointer; color: var(--muted) !important; font-weight: 600;
|
| 592 |
+
font-size: .92rem; padding: 6px 4px; display: inline-flex; align-items: center; gap: 6px;
|
| 593 |
+
font-family: inherit; text-decoration: none; transition: color .15s; }
|
| 594 |
+
.nav-solo:hover { color: var(--text) !important; }
|
| 595 |
+
|
| 596 |
+
/* submission scorecard */
|
| 597 |
+
.sub-wrap { max-width: 920px; margin: 0 auto; padding: 10px 4px 28px; }
|
| 598 |
+
.sub-group { margin: 20px 0; }
|
| 599 |
+
.sub-h { font-size: .8rem; text-transform: uppercase; letter-spacing: .08em; color: var(--muted); margin: 0 0 10px; }
|
| 600 |
+
.sub-lead { color: var(--text); line-height: 1.6; background: var(--surface); border: 1px solid var(--line);
|
| 601 |
+
border-left: 3px solid var(--violet); border-radius: 12px; padding: 14px 16px; box-shadow: 0 3px 12px rgba(45,32,90,.06); }
|
| 602 |
+
.sub-lead code { background: var(--surface2); padding: 1px 6px; border-radius: 6px; }
|
| 603 |
+
.sub-row { display: flex; gap: 12px; align-items: flex-start; background: var(--surface); border: 1px solid var(--line);
|
| 604 |
+
border-radius: 12px; padding: 12px 14px; margin-bottom: 8px; box-shadow: 0 3px 12px rgba(45,32,90,.06); }
|
| 605 |
+
.sub-pill { flex: 0 0 auto; width: 26px; height: 26px; border-radius: 50%; display: flex; align-items: center;
|
| 606 |
+
justify-content: center; font-weight: 800; font-size: .9rem; }
|
| 607 |
+
.sub-ok { background: rgba(21,137,79,.14); color: var(--mint); }
|
| 608 |
+
.sub-warn { background: rgba(179,112,10,.16); color: var(--amber); }
|
| 609 |
+
.sub-rt { min-width: 0; }
|
| 610 |
+
.sub-title { font-weight: 700; color: var(--text); }
|
| 611 |
+
.sub-ev { color: var(--muted); font-size: .92rem; margin-top: 2px; line-height: 1.5; }
|
| 612 |
+
.sub-ev a { color: var(--cyan); }
|
| 613 |
+
.sub-ev code, .sub-title code { background: var(--surface2); padding: 1px 6px; border-radius: 6px; }
|
| 614 |
+
|
| 615 |
+
/* ===================================================================== */
|
| 616 |
+
/* Reference-style redesign: nav pill · elevated tool card · 2-col input */
|
| 617 |
+
/* ===================================================================== */
|
| 618 |
+
|
| 619 |
+
/* fine-tuned-model pill (top-right of the nav) — links to the model */
|
| 620 |
+
.nav-status { display: inline-flex; align-items: center; gap: 6px; padding: 5px 12px; border-radius: 999px;
|
| 621 |
+
background: rgba(21,137,79,.12); color: var(--mint); font-size: .78rem; font-weight: 700;
|
| 622 |
+
white-space: nowrap; text-decoration: none; transition: filter .15s; }
|
| 623 |
+
.nav-status:hover { filter: brightness(1.05); }
|
| 624 |
+
.nav-status b { font-weight: 800; }
|
| 625 |
+
|
| 626 |
+
/* elevated tool card that overlaps the hero (the agent, up top) */
|
| 627 |
+
#tool-card { max-width: 1000px; margin: -84px auto 0 !important; position: relative; z-index: 5;
|
| 628 |
+
background: #fff !important; border: 1px solid var(--line) !important; border-radius: 20px !important;
|
| 629 |
+
box-shadow: 0 30px 70px rgba(20,12,50,.28) !important; padding: 26px !important; scroll-margin-top: 80px; }
|
| 630 |
+
.tc-head { display: flex; justify-content: space-between; align-items: flex-end; gap: 16px; margin-bottom: 18px; flex-wrap: wrap; }
|
| 631 |
+
.tc-eyebrow { color: var(--cyan); font-weight: 700; letter-spacing: .12em; text-transform: uppercase; font-size: .76rem; margin-bottom: 6px; }
|
| 632 |
+
.tc-title { font-family: "Fraunces", serif; font-size: 1.7rem; color: var(--text); margin: 0; line-height: 1.1; }
|
| 633 |
+
/* "Powered by fine-tuned Gemma 4" — green pill, right side of the tool-card head */
|
| 634 |
+
.tc-poweredby { display: inline-flex; align-items: center; gap: 6px; padding: 7px 14px;
|
| 635 |
+
border-radius: 999px; background: rgba(21,137,79,.10); border: 1px solid rgba(21,137,79,.35);
|
| 636 |
+
color: var(--mint); font-size: .82rem; font-weight: 600; text-decoration: none;
|
| 637 |
+
transition: background .15s; }
|
| 638 |
+
.tc-poweredby:hover { background: rgba(21,137,79,.18); }
|
| 639 |
+
.tc-poweredby b { color: var(--mint); font-weight: 800; }
|
| 640 |
+
|
| 641 |
+
/* ---- mode theme: the ONE decision point recolors the whole workflow card.
|
| 642 |
+
Offline = forest green ("local, sealed"); Online = cyan ("cloud-connected").
|
| 643 |
+
data-mode is set by wireModeTheme() on load + the mode.change JS. ---- */
|
| 644 |
+
#tool-card { --mode-c: #15894f; --mode-soft: rgba(21,137,79,.07);
|
| 645 |
+
--mode-line: rgba(21,137,79,.35); }
|
| 646 |
+
#tool-card[data-mode="online"] { --mode-c: #0e8ea0; --mode-soft: rgba(14,142,160,.09);
|
| 647 |
+
--mode-line: rgba(14,142,160,.4); }
|
| 648 |
+
#tool-card { border-top: 4px solid var(--mode-c) !important;
|
| 649 |
+
background: linear-gradient(var(--mode-soft), transparent 170px), #fff !important;
|
| 650 |
+
transition: border-color .35s ease, background .35s ease; }
|
| 651 |
+
|
| 652 |
+
/* ---- Offline / Online mode toggle, inside the full-width mode band ---- */
|
| 653 |
+
#mode-band { background: var(--mode-soft) !important; border: 1px solid var(--mode-line) !important;
|
| 654 |
+
border-radius: 14px !important; padding: 12px 16px 10px !important; margin-bottom: 6px;
|
| 655 |
+
transition: background .35s ease, border-color .35s ease; }
|
| 656 |
+
/* ONE box only: flatten every Gradio wrapper inside the band (the radio's and
|
| 657 |
+
the note's own block chrome would otherwise draw nested containers).
|
| 658 |
+
:not(label) — Gradio puts data-testid on the radio OPTION labels too, and
|
| 659 |
+
this rule must not strip their button/pill styling below. */
|
| 660 |
+
#mode-band .block, #mode-band .form, #mode-band fieldset, #mode-band .gradio-html,
|
| 661 |
+
#mode-band > div, #mode-band [data-testid]:not(label) {
|
| 662 |
+
background: transparent !important; border: 0 !important; box-shadow: none !important;
|
| 663 |
+
padding: 0 !important; margin: 0 !important; border-radius: 0 !important; }
|
| 664 |
+
/* ONE enclosing pill around both options — the eye lands here; the pill's
|
| 665 |
+
border wears the active mode color. !important throughout: the band's
|
| 666 |
+
flatten rule zeroes block chrome and must not strip the pill itself. */
|
| 667 |
+
#mode-toggle { display: flex !important; justify-content: center; gap: 0;
|
| 668 |
+
width: fit-content; margin: 2px auto 8px !important;
|
| 669 |
+
background: #fff !important; border: 2px solid var(--mode-c) !important;
|
| 670 |
+
border-radius: 999px !important; padding: 4px !important;
|
| 671 |
+
box-shadow: 0 4px 14px rgba(20,12,50,.10) !important;
|
| 672 |
+
transition: border-color .35s ease; }
|
| 673 |
+
#mode-toggle .wrap, #mode-toggle > div { justify-content: center; gap: 6px; }
|
| 674 |
+
/* each option is its OWN button: outlined + raised when idle (clearly
|
| 675 |
+
clickable), filled with the mode color when selected. Double-id selector
|
| 676 |
+
out-ranks the band's flatten rule. */
|
| 677 |
+
#mode-band #mode-toggle label {
|
| 678 |
+
background: #fff !important; border: 1.5px solid var(--line) !important;
|
| 679 |
+
border-radius: 999px !important; padding: 8px 20px !important; cursor: pointer;
|
| 680 |
+
font-weight: 700 !important; color: var(--text) !important;
|
| 681 |
+
box-shadow: 0 1px 3px rgba(20,12,50,.14) !important;
|
| 682 |
+
margin: 0 2px !important; transition: background .25s, color .25s, border-color .25s,
|
| 683 |
+
transform .1s; }
|
| 684 |
+
#mode-band #mode-toggle label:hover { border-color: var(--mode-c) !important; }
|
| 685 |
+
#mode-band #mode-toggle label:active { transform: translateY(1px); }
|
| 686 |
+
#mode-band #mode-toggle label:has(input:checked) {
|
| 687 |
+
background: var(--mode-c) !important; border-color: var(--mode-c) !important;
|
| 688 |
+
box-shadow: inset 0 1px 2px rgba(0,0,0,.15) !important; }
|
| 689 |
+
#mode-band #mode-toggle label:has(input:checked) span { color: #fff !important; }
|
| 690 |
+
#mode-toggle input[type="radio"] { display: none !important; }
|
| 691 |
+
.mode-note { display: flex; align-items: center; justify-content: center; gap: 10px;
|
| 692 |
+
flex-wrap: wrap; text-align: center; color: var(--muted); font-size: .85rem; margin: 0 0 2px; }
|
| 693 |
+
.mode-note code { background: var(--surface2); padding: 1px 5px; border-radius: 5px; }
|
| 694 |
+
.mode-chip { display: inline-block; padding: 3px 10px; border-radius: 999px;
|
| 695 |
+
background: var(--mode-c); color: #fff; font-size: .68rem; font-weight: 800;
|
| 696 |
+
letter-spacing: .08em; transition: background .35s ease; }
|
| 697 |
+
|
| 698 |
+
/* ---- numbered workflow steps, tied by a dashed tail under each chip ----
|
| 699 |
+
chips + connectors wear the active mode color (green offline / cyan online) */
|
| 700 |
+
.flow-step { position: relative; display: flex; align-items: center; gap: 8px;
|
| 701 |
+
margin: 20px 0 8px; }
|
| 702 |
+
.flow-step::before { content: ""; position: absolute; left: 10px; top: 100%;
|
| 703 |
+
height: 20px; border-left: 2px dashed rgba(109,75,224,.45); }
|
| 704 |
+
#tool-card .flow-step::before { border-color: var(--mode-line); transition: border-color .35s ease; }
|
| 705 |
+
#tool-card .step-chip { background: var(--mode-c); transition: background .35s ease; }
|
| 706 |
+
.flow-t { font-weight: 700; color: var(--text); }
|
| 707 |
+
.flow-sub { color: var(--muted); font-size: .82rem; font-weight: 500; }
|
| 708 |
+
.flow-gcal { margin: -2px 0 10px 30px; }
|
| 709 |
+
.flow-gcal .gcal-state { color: var(--mint); font-size: .85rem; }
|
| 710 |
+
|
| 711 |
+
/* two-column ① upload / ② paste */
|
| 712 |
+
#io-cols { gap: 20px !important; align-items: stretch; }
|
| 713 |
+
.io-col { min-width: 0; }
|
| 714 |
+
.io-label { display: flex; align-items: center; gap: 8px; font-weight: 700; color: var(--text); font-size: .95rem; margin-bottom: 8px; }
|
| 715 |
+
.step-chip { display: inline-flex; align-items: center; justify-content: center; width: 22px; height: 22px;
|
| 716 |
+
border-radius: 50%; background: var(--accent); color: #fff; font-size: .8rem; font-weight: 800; }
|
| 717 |
+
#io-drop { border: 2px dashed rgba(31,25,60,.22) !important; border-radius: 14px !important;
|
| 718 |
+
background: var(--surface2) !important; }
|
| 719 |
+
|
| 720 |
+
/* char counter + helper line under the paste box */
|
| 721 |
+
.rv-help { display: flex; justify-content: space-between; align-items: center; gap: 10px; margin-top: 6px;
|
| 722 |
+
color: var(--muted); font-size: .8rem; }
|
| 723 |
+
.rv-counter { font-variant-numeric: tabular-nums; color: var(--muted); }
|
| 724 |
+
|
| 725 |
+
/* centered primary / secondary actions + sample link */
|
| 726 |
+
#rv-actions { justify-content: center !important; gap: 12px !important; margin-top: 16px; }
|
| 727 |
+
#rv-analyze button { min-width: 200px; }
|
| 728 |
+
.rv-secondary button { background: var(--surface2) !important; color: var(--text) !important;
|
| 729 |
+
border: 1px solid var(--line) !important; box-shadow: none !important; font-weight: 600 !important; }
|
| 730 |
+
.rv-linkbtn { display: flex; justify-content: center; margin-top: 8px; }
|
| 731 |
+
.rv-linkbtn button { background: none !important; border: none !important; box-shadow: none !important;
|
| 732 |
+
color: var(--cyan) !important; font-weight: 600 !important; min-height: 0 !important; }
|
| 733 |
+
|
| 734 |
+
/* privacy-safe trace card */
|
| 735 |
+
.trace-card { background: var(--surface2) !important; border: 1px solid var(--line) !important;
|
| 736 |
+
border-radius: 12px !important; padding: 12px 14px !important; margin-top: 14px; }
|
| 737 |
+
.trace-desc { color: var(--muted); font-size: .82rem; margin-top: 2px; }
|
| 738 |
+
.trace-ok { color: var(--mint); font-size: .85rem; font-weight: 600; margin-top: 6px; }
|
| 739 |
+
|
| 740 |
+
/* screenshot-attached hint */
|
| 741 |
+
.shot-status { color: var(--cyan); font-size: .82rem; font-weight: 600; margin-top: 6px; }
|
| 742 |
+
|
| 743 |
+
/* mobile */
|
| 744 |
+
@media (max-width: 760px) {
|
| 745 |
+
#io-cols { flex-direction: column; }
|
| 746 |
+
#tool-card { margin-top: -56px !important; padding: 18px !important; border-radius: 16px !important; }
|
| 747 |
+
.tc-head { align-items: flex-start; }
|
| 748 |
+
.hx-grid { grid-template-columns: 1fr; }
|
| 749 |
+
.nav-status { display: none; }
|
| 750 |
+
}
|
| 751 |
+
|
| 752 |
+
/* hero trust badges (under the copy, on the dark band) */
|
| 753 |
+
.hero-badges { display: flex; flex-wrap: wrap; gap: 8px; margin-top: 18px; }
|
| 754 |
+
.hbadge { display: inline-flex; align-items: center; gap: 6px; padding: 6px 12px; border-radius: 999px;
|
| 755 |
+
background: rgba(255,255,255,.10); border: 1px solid rgba(255,255,255,.18); color: #fff;
|
| 756 |
+
font-size: .8rem; font-weight: 700; }
|
| 757 |
+
|
| 758 |
+
/* onboarding panel (inside the tool card) — an accordion: open on first visit,
|
| 759 |
+
collapsed (but always reopenable) once the device has memory. */
|
| 760 |
+
#onboard { background: var(--surface2) !important; border: 1px solid var(--line) !important;
|
| 761 |
+
border-radius: 14px !important; padding: 12px 18px !important; margin-bottom: 16px; }
|
| 762 |
+
#onboard .label-wrap span, #onboard > button span {
|
| 763 |
+
font-family: "Fraunces", serif !important; font-size: 1.1rem; font-weight: 700;
|
| 764 |
+
color: var(--text) !important; }
|
| 765 |
+
.ob-sub { color: var(--muted); font-size: .9rem; margin: 4px 0 12px; }
|
| 766 |
+
|
| 767 |
+
/* per-user Google Calendar connect link (in the export bar) */
|
| 768 |
+
.gcal-connect { display: inline-block; margin-top: 6px; color: var(--cyan); font-weight: 600;
|
| 769 |
+
text-decoration: none; cursor: pointer; font-size: .9rem; }
|
| 770 |
+
.gcal-connect:hover { text-decoration: underline; }
|
| 771 |
+
.gcal-state { color: var(--mint); font-weight: 700; font-size: .85rem; margin-left: 4px; }
|
| 772 |
+
|
| 773 |
+
/* ---- condensed results card: events + export in ONE area ---- */
|
| 774 |
+
#rv-resultcard {
|
| 775 |
+
background: var(--surface) !important; border: 1px solid var(--line) !important;
|
| 776 |
+
border-radius: 16px !important; padding: 16px 18px !important;
|
| 777 |
+
box-shadow: var(--shadow); margin-top: 12px;
|
| 778 |
+
}
|
| 779 |
+
/* the export cluster becomes a toolbar inside the card — drop its own panel
|
| 780 |
+
chrome, separate it from the events with a hairline */
|
| 781 |
+
#rv-resultcard #rv-export {
|
| 782 |
+
margin-top: 12px; padding: 12px 0 0 !important;
|
| 783 |
+
background: transparent !important; border: none !important;
|
| 784 |
+
border-top: 1px solid var(--line) !important; border-radius: 0 !important;
|
| 785 |
+
box-shadow: none;
|
| 786 |
+
}
|
| 787 |
+
#rv-resultcard #rv-export button { min-height: 42px !important; }
|
| 788 |
+
/* keep the export toolbar reachable on a phone (re-assert the sticky bar at
|
| 789 |
+
the new, more specific selector) */
|
| 790 |
+
@media (max-width: 640px) {
|
| 791 |
+
#rv-resultcard #rv-export {
|
| 792 |
+
position: sticky; bottom: 0; z-index: 20; padding: 10px !important;
|
| 793 |
+
background: rgba(255,255,255,.94) !important;
|
| 794 |
+
backdrop-filter: blur(8px); border-radius: 12px 12px 0 0 !important;
|
| 795 |
+
box-shadow: 0 -8px 24px rgba(45,32,90,.18);
|
| 796 |
+
}
|
| 797 |
+
}
|
| 798 |
+
/* prominent location line on the condensed event card */
|
| 799 |
+
.evx-loc { font-size: .92rem; font-weight: 600; color: var(--text); margin-top: 8px; }
|
| 800 |
+
/* arrival-context callout (per-event notes, e.g. "arrive 15 min early") */
|
| 801 |
+
.evx-notes {
|
| 802 |
+
margin-top: 8px; padding: 6px 10px; font-size: .86rem; color: var(--text);
|
| 803 |
+
background: rgba(240,180,60,.12); border-left: 3px solid var(--amber);
|
| 804 |
+
border-radius: 0 8px 8px 0;
|
| 805 |
+
}
|
| 806 |
+
/* the pre-generated .ics widget stays as the no-JS download fallback — slim it */
|
| 807 |
+
#ics-file { margin-top: 8px; }
|
| 808 |
+
#ics-file, #ics-file .file-preview { max-height: 88px; overflow-y: auto; }
|
| 809 |
+
|
| 810 |
+
/* ---- unified "Connect your calendar" block (Step 2a) ---- */
|
| 811 |
+
.cal-connect { display: flex; flex-direction: column; gap: 8px; margin: 4px 0 10px;
|
| 812 |
+
padding: 10px 14px; background: var(--surface2); border-radius: 10px; }
|
| 813 |
+
.cal-provider { display: flex; align-items: baseline; gap: 10px; flex-wrap: wrap;
|
| 814 |
+
font-size: .9rem; }
|
| 815 |
+
.cal-prov-name { font-weight: 600; min-width: 160px; }
|
| 816 |
+
.cal-cap { color: var(--muted); font-size: .82rem; }
|
| 817 |
+
.cal-privacy { color: var(--muted); font-size: .78rem; margin-top: 2px; }
|
| 818 |
+
.cal-provider .gcal-connect { margin-top: 0; }
|
| 819 |
+
.cal-provider .gcal-disconnect { display: none; color: var(--muted);
|
| 820 |
+
font-size: .78rem; text-decoration: none; cursor: pointer; }
|
| 821 |
+
.cal-provider .gcal-disconnect:hover { text-decoration: underline; }
|
| 822 |
+
/* connected: collapse the CTA, show ✓ + a quiet disconnect */
|
| 823 |
+
.cal-provider.is-connected .gcal-connect { display: none; }
|
| 824 |
+
.cal-provider.is-connected .gcal-disconnect { display: inline; }
|
| 825 |
+
.cal-provider.is-connected .cal-cap-online { display: none; }
|
| 826 |
+
/* offline mode: Google sync needs the cloud — swap the CTA for a hint */
|
| 827 |
+
.cal-provider[data-provider="google"] .cal-cap-offline { display: none; }
|
| 828 |
+
#tool-card[data-mode="offline"] .cal-provider[data-provider="google"] .gcal-connect { display: none; }
|
| 829 |
+
#tool-card[data-mode="offline"] .cal-provider[data-provider="google"] .cal-cap-online { display: none; }
|
| 830 |
+
#tool-card[data-mode="offline"] .cal-provider[data-provider="google"]:not(.is-connected) .cal-cap-offline { display: inline; }
|
| 831 |
+
|
| 832 |
+
/* ---- Google connection badge in the export toolbar (#rv-export) ---- */
|
| 833 |
+
.gcal-badge-wrap { margin-top: 8px; }
|
| 834 |
+
.gcal-badge { display: inline-block; font-size: .78rem; color: var(--muted);
|
| 835 |
+
padding: 2px 10px; border: 1px solid var(--line); border-radius: 999px; }
|
| 836 |
+
.gcal-badge.is-on { color: var(--mint); border-color: currentColor; font-weight: 600; }
|
| 837 |
+
/* the Offline workflow hides Google everywhere — badge included */
|
| 838 |
+
#tool-card[data-mode="offline"] .gcal-badge-wrap { display: none; }
|
| 839 |
+
/* ---- processing pipeline card (live agent stepper inside the trace accordion) ---- */
|
| 840 |
+
.pipe-card { position: relative; background: var(--surface); border: 1px solid var(--line);
|
| 841 |
+
border-radius: 16px; padding: 14px 16px; margin-bottom: 10px; box-shadow: var(--shadow); }
|
| 842 |
+
.pipe-head { display: flex; align-items: baseline; justify-content: space-between; gap: 10px; }
|
| 843 |
+
.pipe-title { font-size: .9rem; font-weight: 700; color: var(--text); }
|
| 844 |
+
.pipe-card[data-state="done"] .pipe-title { color: var(--mint); }
|
| 845 |
+
.pipe-card[data-state="error"] .pipe-title { color: var(--coral); }
|
| 846 |
+
/* the base rule forces Hanken Grotesk on everything — the clock must stay mono
|
| 847 |
+
so the ticking digits don't jitter */
|
| 848 |
+
.pipe-clock { font-family: ui-monospace, Menlo, Consolas, monospace !important;
|
| 849 |
+
font-variant-numeric: tabular-nums; font-size: .85rem; color: var(--muted); }
|
| 850 |
+
.pipe-track { position: relative; display: flex; align-items: center; gap: 4px;
|
| 851 |
+
margin-top: 12px; padding: 6px 2px; overflow: hidden; border-radius: 10px; }
|
| 852 |
+
.pipe-stage { flex: 1 1 0; min-width: 0; display: flex; flex-direction: column;
|
| 853 |
+
align-items: center; gap: 4px; padding: 6px 2px; border-radius: 10px;
|
| 854 |
+
background: var(--surface2); transition: background .25s ease; }
|
| 855 |
+
.pipe-badge { width: 24px; height: 24px; border-radius: 50%; display: flex;
|
| 856 |
+
align-items: center; justify-content: center; font-size: .72rem; font-weight: 800;
|
| 857 |
+
background: var(--surface); color: var(--muted); border: 1px solid var(--line);
|
| 858 |
+
transition: background .25s ease, border-color .25s ease, color .25s ease; }
|
| 859 |
+
.pipe-lab { font-size: .66rem; font-weight: 700; letter-spacing: .08em; color: var(--muted); }
|
| 860 |
+
.pipe-chev { flex: 0 0 auto; color: var(--muted); opacity: .5; font-weight: 700; }
|
| 861 |
+
.pipe-stage.is-active { background: rgba(109,75,224,.10); }
|
| 862 |
+
.pipe-stage.is-active .pipe-badge { background: #6d4be0; border-color: #6d4be0; color: #fff;
|
| 863 |
+
--c: #6d4be0; animation: pulse 1.4s infinite; } /* reuses the Activity tab's pulse keyframes */
|
| 864 |
+
.pipe-stage.is-active .pipe-lab { color: var(--text); }
|
| 865 |
+
.pipe-stage.is-done { background: rgba(21,137,79,.08); }
|
| 866 |
+
.pipe-stage.is-done .pipe-badge { background: var(--mint); border-color: var(--mint); color: #fff; }
|
| 867 |
+
.pipe-stage.is-done .pipe-lab { color: var(--mint); }
|
| 868 |
+
.pipe-stage.is-skip { opacity: .45; }
|
| 869 |
+
.pipe-stage.is-skip .pipe-badge { background: transparent; border-style: dashed; }
|
| 870 |
+
.pipe-stage.is-error .pipe-badge { background: var(--coral); border-color: var(--coral); color: #fff; }
|
| 871 |
+
/* indeterminate light sweep across the whole track while running */
|
| 872 |
+
.pipe-shimmer { position: absolute; inset: 0; pointer-events: none; display: none;
|
| 873 |
+
width: 36%; background: linear-gradient(100deg, transparent 0%,
|
| 874 |
+
rgba(109,75,224,.14) 45%, rgba(14,142,160,.14) 55%, transparent 100%); }
|
| 875 |
+
.pipe-card[data-state="running"] .pipe-shimmer { display: block;
|
| 876 |
+
animation: pipe-sweep 2.5s ease-in-out infinite; }
|
| 877 |
+
@keyframes pipe-sweep { from { transform: translateX(-100%); } to { transform: translateX(280%); } }
|
| 878 |
+
.pipe-cap { margin-top: 8px; text-align: center; font-size: .8rem; color: var(--muted); }
|
| 879 |
+
/* static post-run summary: speed / confidence / evidence / counts */
|
| 880 |
+
.pipe-summary { display: flex; flex-wrap: wrap; gap: 8px; margin-top: 12px;
|
| 881 |
+
padding-top: 10px; border-top: 1px solid var(--line); }
|
| 882 |
+
.pipe-chip { padding: 4px 10px; border-radius: 999px; background: var(--surface2);
|
| 883 |
+
border: 1px solid var(--line); font-size: .75rem; font-weight: 700; color: var(--text);
|
| 884 |
+
max-width: 260px; white-space: nowrap; overflow: hidden; text-overflow: ellipsis; }
|
| 885 |
+
.pipe-chip .pipe-k { color: var(--muted); font-weight: 600; margin-right: 4px;
|
| 886 |
+
text-transform: uppercase; font-size: .66rem; letter-spacing: .06em; }
|
| 887 |
+
.pipe-chip b { font-family: ui-monospace, Menlo, Consolas, monospace !important; }
|
| 888 |
+
.pipe-chip.is-high { color: var(--mint); }
|
| 889 |
+
.pipe-chip.is-review { color: var(--amber); }
|
| 890 |
+
@media (prefers-reduced-motion: reduce) {
|
| 891 |
+
.pipe-card[data-state="running"] .pipe-shimmer { display: none !important; }
|
| 892 |
+
.pipe-stage.is-active .pipe-badge { animation: none; }
|
| 893 |
+
}
|
| 894 |
+
|
| 895 |
+
/* ---- full-word pipeline stage labels: small, one line, equal-width boxes ---- */
|
| 896 |
+
.pipe-lab { font-size: .58rem; letter-spacing: .03em; white-space: nowrap; }
|
| 897 |
+
.pipe-stage { padding: 6px 1px; }
|
| 898 |
+
|
| 899 |
+
/* ---- single export surface: the per-event "Add to" links own exporting; the
|
| 900 |
+
header gains an all-events iCal link when 2+ events are found ---- */
|
| 901 |
+
.evx-head { display: flex; align-items: baseline; justify-content: space-between;
|
| 902 |
+
gap: 10px; flex-wrap: wrap; }
|
| 903 |
+
.evx-add-all { font-size: .8rem; font-weight: 700; color: var(--cyan);
|
| 904 |
+
text-decoration: none; white-space: nowrap; }
|
| 905 |
+
.evx-add-all:hover { text-decoration: underline; }
|
| 906 |
+
|
| 907 |
+
/* ---- consistent page width: Agent/Activity/Memory/Feed match the home
|
| 908 |
+
page's 1000px tool card (capping + centering only — internal layout of
|
| 909 |
+
the pages is untouched) ---- */
|
| 910 |
+
.page-wrap { max-width: 1000px; margin-left: auto !important; margin-right: auto !important; width: 100%; }
|
| 911 |
+
|
| 912 |
+
/* ---- Activity tab: metric values in the brand violet (the This-week cards
|
| 913 |
+
and the run tiles both render through .tile-v). Resets the gradient
|
| 914 |
+
text-clip so the color actually shows regardless of the base rule's state. ---- */
|
| 915 |
+
.tile-v { color: var(--violet); background: none;
|
| 916 |
+
-webkit-background-clip: initial; background-clip: initial;
|
| 917 |
+
-webkit-text-fill-color: currentColor; }
|
| 918 |
+
|
| 919 |
+
/* ---- mobile responsiveness fixes (phone) ---- */
|
| 920 |
+
|
| 921 |
+
/* Top nav: on a phone the brand wordmark + 5 links overflow a single row
|
| 922 |
+
(logo overlaps "Home", "Memory" clipped, "Feed" off-screen). Stack the brand
|
| 923 |
+
above a horizontally-scrollable link strip so nothing overlaps and every
|
| 924 |
+
page stays reachable. Reuses the classes from _nav_html(). */
|
| 925 |
+
@media (max-width: 600px) {
|
| 926 |
+
#site-nav .nav-inner { flex-direction: column; align-items: stretch;
|
| 927 |
+
justify-content: flex-start; gap: 6px; padding: 8px 14px; }
|
| 928 |
+
.nav-brand { justify-content: flex-start; font-size: .98rem; }
|
| 929 |
+
/* WRAP, don't scroll: a horizontal overflow-x:auto strip swallowed taps on a
|
| 930 |
+
phone (a touch with any micro-movement reads as a scroll, and links past the
|
| 931 |
+
fold needed scrolling first) — which left Submission/Memory/Feed feeling
|
| 932 |
+
dead. Wrapping keeps every link fully on-screen and plainly tappable. */
|
| 933 |
+
.nav-links { width: 100%; gap: 10px 16px; flex-wrap: wrap; }
|
| 934 |
+
.nav-solo { flex: 0 0 auto; white-space: nowrap; padding: 8px 6px;
|
| 935 |
+
touch-action: manipulation; }
|
| 936 |
+
}
|
| 937 |
+
|
| 938 |
+
/* Activity "Activity by stage" chart shares a 2-col row with the timeline, so on
|
| 939 |
+
a phone it's stuck at half width and the x-axis stage labels collapse into
|
| 940 |
+
garbled overlapping text. Stack the row so the chart renders full-width. */
|
| 941 |
+
@media (max-width: 760px) {
|
| 942 |
+
.act-chart-row { flex-direction: column !important; }
|
| 943 |
+
.act-chart-row > * { width: 100% !important; }
|
| 944 |
+
}
|
| 945 |
+
|
| 946 |
+
/* Footer: keep "on Hugging Face" (its own <a> in .footer-meta) on one line
|
| 947 |
+
instead of breaking as "on Hugging" / "Face". */
|
| 948 |
+
.footer-meta a { white-space: nowrap; }
|
| 949 |
+
|
| 950 |
+
/* Hardware-degraded banner — revealed by app.py's /health probe (inline JS). */
|
| 951 |
+
#status-banner-host { padding: 0; margin: 0; }
|
| 952 |
+
.status-banner {
|
| 953 |
+
display: none;
|
| 954 |
+
padding: 10px 16px;
|
| 955 |
+
text-align: center;
|
| 956 |
+
font-weight: 600;
|
| 957 |
+
font-size: 0.95rem;
|
| 958 |
+
line-height: 1.4;
|
| 959 |
+
background: var(--accent);
|
| 960 |
+
color: #fff;
|
| 961 |
+
}
|
static/logo.png
ADDED
|