diff --git "a/data/quest_corpus.json" "b/data/quest_corpus.json" new file mode 100644--- /dev/null +++ "b/data/quest_corpus.json" @@ -0,0 +1,4614 @@ +{ + "generated_at": "2026-06-07T20:44:26+00:00", + "source_snapshot": "data/projects.json", + "snapshot_generated_at": "2026-06-07T11:51:09+00:00", + "project_count": 125, + "projects": [ + { + "id": "build-small-hackathon/Advent_of_a_World_of_Flowering_Trees", + "title": "Advent Of A World Of Flowering Trees", + "summary": "This space is for Huggingface build small hackathon", + "tags": [ + "gradio", + "region:us" + ], + "models": [], + "datasets": [], + "sdk": "gradio", + "license": "mit", + "likes": 1, + "url": "https://huggingface.co/spaces/build-small-hackathon/Advent_of_a_World_of_Flowering_Trees", + "app_file": "app.py", + "readme_raw": "---\ntitle: Advent Of A World Of Flowering Trees\nemoji: ☃️\ncolorFrom: indigo\ncolorTo: pink\nsdk: gradio\nsdk_version: 6.16.0\npython_version: '3.12.12'\napp_file: app.py\npinned: true\nlicense: mit\nshort_description: This space is for Huggingface build small hackathon\npreload_from_hub:\n - CohereLabs/tiny-aya-global-GGUF tiny-aya-global-q4_k_m.gguf\n - black-forest-labs/FLUX.2-klein-4b-nvfp4\n - openbmb/MiniCPM-V-4.6-Thinking-gguf\n---\n\nCheck out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference\n\n\n\n## For development:\n\nfirst download uv and hf cli tool\n\n\n```bash\nuv venv --python 3.13 --seed\n```\n\nthen activate the virtual env .venv\n\n```bash\nsource .venv/Scripts/activate\n```\n\nthen download dependencies\n```python\npython -m pip install -r requirements.txt\n```\n\nthen play around and change code..", + "readme_body": "Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference\n\n\n\n## For development:\n\nfirst download uv and hf cli tool\n\n\n```bash\nuv venv --python 3.13 --seed\n```\n\nthen activate the virtual env .venv\n\n```bash\nsource .venv/Scripts/activate\n```\n\nthen download dependencies\n```python\npython -m pip install -r requirements.txt\n```\n\nthen play around and change code..", + "readme_frontmatter": { + "title": "Advent Of A World Of Flowering Trees", + "emoji": "☃️", + "colorFrom": "indigo", + "colorTo": "pink", + "sdk": "gradio", + "sdk_version": "6.16.0", + "python_version": "3.12.12", + "app_file": "app.py", + "pinned": "true", + "license": "mit", + "short_description": "This space is for Huggingface build small hackathon", + "preload_from_hub": "" + }, + "app_source": "import gradio as gr\nimport spaces\nfrom huggingface_hub import hf_hub_download\nimport os\nimport ctypes\n\n\nMODEL_REPO_ID = \"CohereLabs/tiny-aya-global-GGUF\"\nMODEL_FILENAME = \"tiny-aya-global-q4_k_m.gguf\"\n\nmodel_path = hf_hub_download(\n repo_id=MODEL_REPO_ID,\n filename=MODEL_FILENAME,\n)\n\n_llm = None\n\n# try:\n# import nvidia.cuda_runtime\n# import nvidia.cublas\n# cudart = os.path.join(os.path.dirname(nvidia.cuda_runtime.__file__), \"lib\", \"libcudart.so.12\")\n# cublas = os.path.join(os.path.dirname(nvidia.cublas.__file__), \"lib\", \"libcublas.so.12\")\n# ctypes.CDLL(cudart, mode=ctypes.RTLD_GLOBAL)\n# ctypes.CDLL(cublas, mode=ctypes.RTLD_GLOBAL)\n# except Exception:\n# pass\n\ndef get_llm():\n global _llm\n if _llm is None:\n from llama_cpp import Llama\n\n _llm = Llama(\n model_path=model_path,\n n_gpu_layers=-1,\n n_ctx=1024,\n flash_attn=True,\n verbose=False,\n )\n return _llm\n\n\n@spaces.GPU(duration=120)\ndef run_inference(prompt: str) -> str:\n prompt = prompt.strip()\n if not prompt:\n return \"Enter a prompt to generate a response.\"\n\n try:\n llm = get_llm()\n except Exception as exc:\n return f\"llama-cpp initialization failed: {exc}\"\n\n response = llm.create_chat_completion(\n messages=[{\"role\": \"user\", \"content\": prompt}],\n max_tokens=512,\n temperature=0.7,\n )\n return response[\"choices\"][0][\"message\"][\"content\"].strip()\n\n\nwith gr.Blocks(title=\"Advent Of A World Of Flowering Trees\") as demo:\n gr.Markdown(\"# Advent Of A World Of Flowering Trees\")\n gr.Markdown(\"Tiny Aya GGUF demo running with `llama-cpp-python`.\")\n\n prompt = gr.Textbox(\n label=\"Prompt\",\n lines=6,\n placeholder=\"Ask something...\",\n )\n output = gr.Textbox(label=\"Response\", lines=12)\n submit = gr.Button(\"Generate\", variant=\"primary\")\n\n submit.click(fn=run_inference, inputs=prompt, outputs=output)\n prompt.submit(fn=run_inference, inputs=prompt, outputs=output)\n\n\nif __name__ == \"__main__\":\n demo.launch()\n", + "app_signals": "get_llm run_inference prompt CohereLabs/tiny-aya-global-GGUF tiny-aya-global-q4_k_m.gguf hf_hub_download repo_id filename spaces.GPU duration prompt.strip llm.create_chat_completion messages max_tokens temperature strip gr.Blocks title gr.Markdown gr.Textbox label lines placeholder gr.Button variant submit.click fn inputs outputs prompt.submit __main__ demo.launch Llama model_path n_gpu_layers n_ctx flash_attn verbose Enter a prompt to generate a response. # Advent Of A World Of Flowering Trees Tiny Aya GGUF demo running with `llama-cpp-python`. Generate Advent Of A World Of Flowering Trees Prompt Ask something... Response primary llama-cpp initialization failed: content role user message choices", + "readme_len": 397, + "app_source_len": 2092, + "app_signals_len": 705 + }, + { + "id": "build-small-hackathon/agent-swarm-workbench", + "title": "Backyard Demo Builder", + "summary": "Build tiny real-person demos before scaling custom software.", + "tags": [ + "agents", + "ai-agents", + "backyard-ai", + "build-small-hackathon", + "demo-builder", + "gradio", + "real-estate", + "small-language-model" + ], + "models": [ + "unsloth/gemma-4-12B-it-qat-GGUF", + "Qwen/Qwen2.5-7B-Instruct", + "nvidia/Nemotron-3.5-Content-Safety" + ], + "datasets": [], + "sdk": "gradio", + "license": "", + "likes": 0, + "url": "https://huggingface.co/spaces/build-small-hackathon/agent-swarm-workbench", + "app_file": "app.py", + "readme_raw": "---\ntitle: Backyard Demo Builder\nemoji: 🏡\ncolorFrom: gray\ncolorTo: green\nsdk: gradio\npython_version: \"3.12.12\"\napp_file: app.py\nshort_description: Build tiny real-person demos before scaling custom software.\nmodels:\n - unsloth/gemma-4-12B-it-qat-GGUF\n - Qwen/Qwen2.5-7B-Instruct\n - nvidia/Nemotron-3.5-Content-Safety\ndatasets: []\ntags:\n - build-small-hackathon\n - backyard-ai\n - gradio\n - agents\n - small-language-model\n - demo-builder\n - real-estate\n - ai-agents\npinned: false\n---\n\n# Backyard Demo Builder\n\n## Chapter 1: Backyard AI\n\n*Build Small Hackathon 2026 — Chapter 1 Submission*\n\n`agent-swarm-workbench` now presents as **Backyard Demo Builder**: a Gradio app\nthat turns one real person's workflow into a small runnable demo package before\nanyone pays to build full software.\n\nFirst backyard case: my mom, a real-estate agent. She needs a cheap way to test\na customer follow-up reminder workflow before committing time and money to a\nfull app.\n\n---\n\n## Watch the Demo Builder Work\n\n```\nYou: \"Build a real-estate follow-up CRM demo for my mom.\"\nBuilder: Generates a Gradio mini-app, handoff spec, field notes, and checks\nResult: app.py, README.md, handoff_spec.md, field_notes.md\nMom: Tests the workflow, then we scrap or scale.\n```\n\nEvery Run produces a **downloadable demo package** and Validation report: files\nyou can inspect, unzip, run, and test with the real person.\n\n---\n\n## Build Small Hackathon — Submission Notes\n\n| Requirement | How We Meet It |\n|---|---|\n| **Small model (≤ 32B)** | Provider catalog fetches models at runtime and only allows models whose ID/name proves ≤32B |\n| **Gradio app** | Custom dark-themed Gradio UI mounted on FastAPI |\n| **HF Space** | `app.py` + `requirements.txt` — one-command deploy |\n| **Demo video** | *(placeholder — [link to demo])* |\n| **Social post** | *(placeholder — [link to post])* |\n\n### Bonus Badges Claimed\n\n| Badge | Why |\n|---|---|\n| **🎨 Off-Brand** | Fully custom CSS dark theme — Archivo + IBM Plex Mono, acid green CTAs, paper/ink palette, CSS grid layout, status chips. Not a default Gradio component in sight. |\n| **📡 Sharing is Caring** | Agent traces and swarm reasoning are surfaced in the Events panel. We'll publish a trace on the Hub. |\n| **📓 Field Notes** | Generated demo packages include `field_notes.md`; this repo also documents the architecture and decisions. |\n\n---\n\n## Why This Belongs in Backyard AI\n\nThis solves a real problem for someone I know.\n\n- **Specific person** — my mom, a real-estate agent.\n- **Specific pain** — follow-up reminders and customer-care demos are useful, but custom app dev is slow and risky.\n- **Honest small-model fit** — a ≤32B model drafts the demo and handoff spec; rules handle the reminder logic.\n- **Actually testable** — the generated package includes field notes and feedback questions for the real user.\n\n---\n\n## How It Works Under the Hood\n\n```\n┌─────────────────────────────────────────────────────┐\n│ Gradio UI / HTTP API │\n├─────────────────────────────────────────────────────┤\n│ RunFlow — lifecycle conductor │\n│ ┌──────────┐ ┌────────────┐ ┌────────────────┐ │\n│ │ Swarm │ │ Codebase │ │ Validator │ │\n│ │ Runtime │→│ Archive │→│ Graph │ │\n│ │ ��� │ Store │ │ │ │\n│ │ Planner │ │ (local/ │ │ Sandbox checks │ │\n│ │ Coder │ │ Redis) │ │ Rubric review │ │\n│ │ Reviewer │ │ │ │ Stagehand │ │\n│ │ Tester │ │ │ │ (Browserbase) │ │\n│ └──────────┘ └────────────┘ └────────────────┘ │\n│ EventBus → SSE stream to UI │\n└─────────────────────────────────────────────────────┘\n```\n\n### The Swarm\n\n- **Coordinator** reads the prompt, plans tasks, delegates to subagents\n- **Planner** breaks down the prompt into implementable units\n- **Coder** writes the actual code files\n- **Reviewer** checks code quality and correctness\n- **Test-runner** runs the user's tests and retries up to 3x on failure\n- **Validator-prep** generates validation checks from user criteria\n\n### The Validator\n\nAfter the swarm finishes, a LangGraph Validator workflow:\n1. Restores the codebase into a clean sandbox\n2. Runs user-provided tests\n3. Executes LLM-based rubric review\n4. (Optional) Runs Browserbase/Stagehand visual checks\n5. Produces a pass/fail Validation Report\n\n### The Sandbox\n\nAll agent work happens inside isolated sandbox workspaces:\n- **Local** (for dev/smoke tests)\n- **Docker** (container-based)\n- **Daytona** (cloud sandboxes)\n\n---\n\n## Run It\n\n```bash\ngit clone https://github.com/Kiy-K/agent-swarm-workbench.git\ncd agent-swarm-workbench\ncp .env.example .env\n# Optional: add server fallback keys. Users can also paste their own key in the UI.\nuv run uvicorn app:app --host 0.0.0.0 --port 8790\n```\n\nOpen http://localhost:8790, type a prompt, choose a provider, fetch models with your API key, then click Start Run.\n\nModel selection:\n- Model lists are fetched from the selected provider/API endpoint at runtime.\n- UI only offers fetched models whose ID/name proves `<=32B` parameters.\n- Unknown-size models are shown in the catalog response as `unknown_parameters` but are not selectable.\n- User API keys and fetched catalogs live only in process memory. They are not persisted, not stored in Redis/DB, and not kept in Gradio state. Click \"Refresh models\" to clear and refetch that provider cache.\n\nFor Hugging Face Spaces:\n```bash\nuv run python app.py\n```\n\n## Test\n\n```bash\npython scripts/task.py verify # required completion gate: tests + harness\npython scripts/task.py test # 90 tests, all passing\npython scripts/task.py harness -- --prompt \"Build a tiny CLI\" --test \"test -f README.md\"\npython scripts/task.py smoke # Local agent session smoke check\npython scripts/task.py validator-smoke # Validator end-to-end\n```\n\n### Agent Harness\n\nThe harness is the fast way to exercise the Run lifecycle without waiting on a\nfull demo session:\n\n```bash\npython scripts/task.py verify\npython scripts/task.py harness -- --prompt \"Build a tiny CLI\" --output-dir /tmp/harness\npython scripts/task.py harness -- --mode live --prompt \"Build a tiny CLI\"\n```\n\n`verify` is the required completion gate for coding agents. It runs the Python\nsuite, then runs the default scripted Agent Swarm Harness so changes are checked\nagainst the same Run -> SwarmRuntime -> Archive -> Validator path that the app\nuses.\n\nModes:\n\n| Mode | Purpose |\n|---|---|\n| `swarm` | Default. Runs `RunFlow -> SwarmRuntime -> Archive -> Validator` with a scripted local DeepAgent-compatible session. |\n| `live` | Uses the real `create_session()` DeepAgents path and the configured sandbox provider. |\n\n## Environment\n\n| Var | Purpose |\n|---|---|\n| `DEEPAGENT_MODEL_PROVIDER` | Server fallback model provider: `openrouter`, `gemini`, `nebius`, `huggingface`, `custom`, or `local` |\n| `DEEPAGENT_MODEL` | Server fallback model ID. Must prove `<=32B` when selected per Run. |\n| `DEEPAGENT_MODEL_BASE_URL` | Optional OpenAI-compatible `/v1` endpoint |\n| `OPENROUTER_API_KEY` / `GEMINI_API_KEY` / `NEBIUS_API_KEY` / `HF_TOKEN` | Optional server fallback keys for trusted server/CLI runs only. The public Gradio UI requires the user to enter their own hosted-provider key and does not use these by default. |\n| `DEEPAGENT_SANDBOX_PROVIDER` | `local`, `docker`, or `daytona` |\n| `BROWSERBASE_API_KEY` | Optional — visual validation via Stagehand |\n| `UPSTASH_REDIS_REST_URL` / `TOKEN` | Optional — persistent runs & archives |\n\n---\n\n## Stack\n\n- **Python 3.11+** / **FastAPI** / **Gradio 6**\n- **LangChain DeepAgents** — multi-subagent swarm runtime\n- **Provider adapters** — OpenRouter, Gemini, Nebius, Hugging Face Router, custom OpenAI-compatible, local OpenAI-compatible\n- **LangGraph** — Validator workflow\n- **QuickJS code interpreter** — in-sandbox code execution middleware\n- **Browserbase + Stagehand** — visual web validation (optional)\n\n## Architecture\n\n```\narena/\n agent.py — Swarm factory, model, subagents, sandbox backend\n backyard_templates.py — Backyard demo template registry\n model_provider.py — Chat model factory for provider selection\n model_catalog.py — Provider model list adapters and TTL cache\n swarm_runtime.py — Active Run registration and Swarm session leasing\n swarm_session.py — Prompt seeding, agent turns, test retries, snapshots\n sandbox_lease.py — Idle TTL, touch, and close behavior for sandboxes\n run_flow.py — Run lifecycle: create → execute → archive → validate\n run_journal.py — Run mutation journal: status, tasks, events, timestamps\n run_store.py — Run persistence (InMemory / Redis via Upstash)\n codebase_handoff.py — Workspace snapshot and Validator sandbox restore\n codebase_archive.py — Archive persistence (local / Redis)\n validator_plan.py — Typed Validator plan from user tests/checks\n validator_graph.py — LangGraph Validator workflow\n thread_inspector.py — Manual Thread/session debug surface\n gradio_app.py — Thin Gradio component wiring\n gradio_presenter.py — Run output formatting for Gradio\n gradio_markup.py — Static Gradio shell markup\n api.py — FastAPI REST + SSE endpoints\n event_bus.py — In-process event streaming\n browserbase_tools.py — Web fetch/search tools for the swarm\n stagehand_validator.py — Browserbase visual validation\n docker_backend.py — Docker sandbox provider\n skill_catalog.py — Bundled DeepAgents skills discovery\ntests_python/ — Python test suite (integration + unit)\n```\n\n---\n\n*Built with a sub-32B model for the Build Small Hackathon, June 2026.*\n", + "readme_body": "# Backyard Demo Builder\n\n## Chapter 1: Backyard AI\n\n*Build Small Hackathon 2026 — Chapter 1 Submission*\n\n`agent-swarm-workbench` now presents as **Backyard Demo Builder**: a Gradio app\nthat turns one real person's workflow into a small runnable demo package before\nanyone pays to build full software.\n\nFirst backyard case: my mom, a real-estate agent. She needs a cheap way to test\na customer follow-up reminder workflow before committing time and money to a\nfull app.\n\n---\n\n## Watch the Demo Builder Work\n\n```\nYou: \"Build a real-estate follow-up CRM demo for my mom.\"\nBuilder: Generates a Gradio mini-app, handoff spec, field notes, and checks\nResult: app.py, README.md, handoff_spec.md, field_notes.md\nMom: Tests the workflow, then we scrap or scale.\n```\n\nEvery Run produces a **downloadable demo package** and Validation report: files\nyou can inspect, unzip, run, and test with the real person.\n\n---\n\n## Build Small Hackathon — Submission Notes\n\n| Requirement | How We Meet It |\n|---|---|\n| **Small model (≤ 32B)** | Provider catalog fetches models at runtime and only allows models whose ID/name proves ≤32B |\n| **Gradio app** | Custom dark-themed Gradio UI mounted on FastAPI |\n| **HF Space** | `app.py` + `requirements.txt` — one-command deploy |\n| **Demo video** | *(placeholder — [link to demo])* |\n| **Social post** | *(placeholder — [link to post])* |\n\n### Bonus Badges Claimed\n\n| Badge | Why |\n|---|---|\n| **🎨 Off-Brand** | Fully custom CSS dark theme — Archivo + IBM Plex Mono, acid green CTAs, paper/ink palette, CSS grid layout, status chips. Not a default Gradio component in sight. |\n| **📡 Sharing is Caring** | Agent traces and swarm reasoning are surfaced in the Events panel. We'll publish a trace on the Hub. |\n| **📓 Field Notes** | Generated demo packages include `field_notes.md`; this repo also documents the architecture and decisions. |\n\n---\n\n## Why This Belongs in Backyard AI\n\nThis solves a real problem for someone I know.\n\n- **Specific person** — my mom, a real-estate agent.\n- **Specific pain** — follow-up reminders and customer-care demos are useful, but custom app dev is slow and risky.\n- **Honest small-model fit** — a ≤32B model drafts the demo and handoff spec; rules handle the reminder logic.\n- **Actually testable** — the generated package includes field notes and feedback questions for the real user.\n\n---\n\n## How It Works Under the Hood\n\n```\n┌─────────────────────────────────────────────────────┐\n│ Gradio UI / HTTP API │\n├─────────────────────────────────────────────────────┤\n│ RunFlow — lifecycle conductor │\n│ ┌──────────┐ ┌────────────┐ ┌────────────────┐ │\n│ │ Swarm │ │ Codebase │ │ Validator │ │\n│ │ Runtime │→│ Archive │→│ Graph │ │\n│ │ │ │ Store │ │ │ │\n│ │ Planner │ │ (local/ │ │ Sandbox checks │ │\n│ │ Coder │ │ Redis) │ │ Rubric review │ │\n│ │ Reviewer │ │ │ │ Stagehand │ │\n│ │ Tester │ │ │ │ (Browserbase) │ │\n│ └──────────┘ └────────────┘ └────────────────┘ │\n│ EventBus → SSE stream to UI │\n└─────────────────────────────────────────────────────┘\n```\n\n### The Swarm\n\n- **Coordinator** reads the prompt, plans tasks, delegates to subagents\n- **Planner** breaks down the prompt into implementable units\n- **Coder** writes the actual code files\n- **Reviewer** checks code quality and correctness\n- **Test-runner** runs the user's tests and retries up to 3x on failure\n- **Validator-prep** generates validation checks from user criteria\n\n### The Validator\n\nAfter the swarm finishes, a LangGraph Validator workflow:\n1. Restores the codebase into a clean sandbox\n2. Runs user-provided tests\n3. Executes LLM-based rubric review\n4. (Optional) Runs Browserbase/Stagehand visual checks\n5. Produces a pass/fail Validation Report\n\n### The Sandbox\n\nAll agent work happens inside isolated sandbox workspaces:\n- **Local** (for dev/smoke tests)\n- **Docker** (container-based)\n- **Daytona** (cloud sandboxes)\n\n---\n\n## Run It\n\n```bash\ngit clone https://github.com/Kiy-K/agent-swarm-workbench.git\ncd agent-swarm-workbench\ncp .env.example .env\n# Optional: add server fallback keys. Users can also paste their own key in the UI.\nuv run uvicorn app:app --host 0.0.0.0 --port 8790\n```\n\nOpen http://localhost:8790, type a prompt, choose a provider, fetch models with your API key, then click Start Run.\n\nModel selection:\n- Model lists are fetched from the selected provider/API endpoint at runtime.\n- UI only offers fetched models whose ID/name proves `<=32B` parameters.\n- Unknown-size models are shown in the catalog response as `unknown_parameters` but are not selectable.\n- User API keys and fetched catalogs live only in process memory. They are not persisted, not stored in Redis/DB, and not kept in Gradio state. Click \"Refresh models\" to clear and refetch that provider cache.\n\nFor Hugging Face Spaces:\n```bash\nuv run python app.py\n```\n\n## Test\n\n```bash\npython scripts/task.py verify # required completion gate: tests + harness\npython scripts/task.py test # 90 tests, all passing\npython scripts/task.py harness -- --prompt \"Build a tiny CLI\" --test \"test -f README.md\"\npython scripts/task.py smoke # Local agent session smoke check\npython scripts/task.py validator-smoke # Validator end-to-end\n```\n\n### Agent Harness\n\nThe harness is the fast way to exercise the Run lifecycle without waiting on a\nfull demo session:\n\n```bash\npython scripts/task.py verify\npython scripts/task.py harness -- --prompt \"Build a tiny CLI\" --output-dir /tmp/harness\npython scripts/task.py harness -- --mode live --prompt \"Build a tiny CLI\"\n```\n\n`verify` is the required completion gate for coding agents. It runs the Python\nsuite, then runs the default scripted Agent Swarm Harness so changes are checked\nagainst the same Run -> SwarmRuntime -> Archive -> Validator path that the app\nuses.\n\nModes:\n\n| Mode | Purpose |\n|---|---|\n| `swarm` | Default. Runs `RunFlow -> SwarmRuntime -> Archive -> Validator` with a scripted local DeepAgent-compatible session. |\n| `live` | Uses the real `create_session()` DeepAgents path and the configured sandbox provider. |\n\n## Environment\n\n| Var | Purpose |\n|---|---|\n| `DEEPAGENT_MODEL_PROVIDER` | Server fallback model provider: `openrouter`, `gemini`, `nebius`, `huggingface`, `custom`, or `local` |\n| `DEEPAGENT_MODEL` | Server fallback model ID. Must prove `<=32B` when selected per Run. |\n| `DEEPAGENT_MODEL_BASE_URL` | Optional OpenAI-compatible `/v1` endpoint |\n| `OPENROUTER_API_KEY` / `GEMINI_API_KEY` / `NEBIUS_API_KEY` / `HF_TOKEN` | Optional server fallback keys for trusted server/CLI runs only. The public Gradio UI requires the user to enter their own hosted-provider key and does not use these by default. |\n| `DEEPAGENT_SANDBOX_PROVIDER` | `local`, `docker`, or `daytona` |\n| `BROWSERBASE_API_KEY` | Optional — visual validation via Stagehand |\n| `UPSTASH_REDIS_REST_URL` / `TOKEN` | Optional — persistent runs & archives |\n\n---\n\n## Stack\n\n- **Python 3.11+** / **FastAPI** / **Gradio 6**\n- **LangChain DeepAgents** — multi-subagent swarm runtime\n- **Provider adapters** — OpenRouter, Gemini, Nebius, Hugging Face Router, custom OpenAI-compatible, local OpenAI-compatible\n- **LangGraph** — Validator workflow\n- **QuickJS code interpreter** — in-sandbox code execution middleware\n- **Browserbase + Stagehand** — visual web validation (optional)\n\n## Architecture\n\n```\narena/\n agent.py — Swarm factory, model, subagents, sandbox backend\n backyard_templates.py — Backyard demo template registry\n model_provider.py — Chat model factory for provider selection\n model_catalog.py — Provider model list adapters and TTL cache\n swarm_runtime.py — Active Run registration and Swarm session leasing\n swarm_session.py — Prompt seeding, agent turns, test retries, snapshots\n sandbox_lease.py — Idle TTL, touch, and close behavior for sandboxes\n run_flow.py — Run lifecycle: create → execute → archive → validate\n run_journal.py — Run mutation journal: status, tasks, events, timestamps\n run_store.py — Run persistence (InMemory / Redis via Upstash)\n codebase_handoff.py — Workspace snapshot and Validator sandbox restore\n codebase_archive.py — Archive persistence (local / Redis)\n validator_plan.py — Typed Validator plan from user tests/checks\n validator_graph.py — LangGraph Validator workflow\n thread_inspector.py — Manual Thread/session debug surface\n gradio_app.py — Thin Gradio component wiring\n gradio_presenter.py — Run output formatting for Gradio\n gradio_markup.py — Static Gradio shell markup\n api.py — FastAPI REST + SSE endpoints\n event_bus.py — In-process event streaming\n browserbase_tools.py — Web fetch/search tools for the swarm\n stagehand_validator.py — Browserbase visual validation\n docker_backend.py — Docker sandbox provider\n skill_catalog.py — Bundled DeepAgents skills discovery\ntests_python/ — Python test suite (integration + unit)\n```\n\n---\n\n*Built with a sub-32B model for the Build Small Hackathon, June 2026.*", + "readme_frontmatter": { + "title": "Backyard Demo Builder", + "emoji": "🏡", + "colorFrom": "gray", + "colorTo": "green", + "sdk": "gradio", + "python_version": "3.12.12", + "app_file": "app.py", + "short_description": "Build tiny real-person demos before scaling custom software.", + "models": "", + "datasets": "[]", + "tags": "", + "pinned": "false" + }, + "app_source": "\"\"\"Unified ASGI entrypoint for API and Gradio UI.\"\"\"\n\nfrom __future__ import annotations\n\nimport os\n\nimport gradio as gr\nimport uvicorn\n\nfrom arena.api import app as fastapi_app\nfrom arena.api import service\nfrom arena.gradio_app import build_app\n\n\ndemo = build_app(service)\n\n\ndef create_app():\n \"\"\"Create one FastAPI ASGI app with Gradio mounted at the root.\"\"\"\n\n return gr.mount_gradio_app(fastapi_app, demo, path=\"/\")\n\n\napp = create_app()\n\n\ntry:\n import spaces\nexcept Exception:\n class _SpacesShim:\n def GPU(self, fn=None, **kwargs):\n del kwargs\n\n def decorator(inner):\n return inner\n\n return decorator(fn) if fn else decorator\n\n spaces = _SpacesShim()\n\n\n@spaces.GPU\ndef zerogpu_ready_marker() -> str:\n return \"ready\"\n\n\ndef server_config() -> dict[str, int | str]:\n host = os.getenv(\"GRADIO_SERVER_NAME\", os.getenv(\"HOST\", \"0.0.0.0\"))\n port = int(os.getenv(\"GRADIO_SERVER_PORT\") or os.getenv(\"PORT\") or \"7860\")\n return {\"host\": host, \"port\": port}\n\n\ndef gradio_launch_config() -> dict[str, bool | int | str]:\n config = server_config()\n port = int(os.getenv(\"GRADIO_SERVER_PORT\", \"7861\")) if os.getenv(\"SPACE_ID\") else int(config[\"port\"])\n return {\"server_name\": str(config[\"host\"]), \"server_port\": port, \"ssr_mode\": False}\n\n\ndef should_launch_gradio_space() -> bool:\n return bool(os.getenv(\"SPACE_ID\")) and os.getenv(\"FORCE_SELF_LAUNCH\") != \"1\"\n\n\ndef should_self_launch() -> bool:\n if os.getenv(\"FORCE_SELF_LAUNCH\") == \"1\":\n return True\n return not should_launch_gradio_space()\n\n\ndef _space_sdk() -> str:\n return os.getenv(\"SPACE_SDK\", os.getenv(\"HF_SPACE_SDK\", \"\")).strip().lower()\n\n\nif __name__ == \"__main__\":\n if should_launch_gradio_space():\n demo.launch(**gradio_launch_config())\n elif should_self_launch():\n uvicorn.run(app, **server_config())\n", + "app_signals": "create_app zerogpu_ready_marker server_config gradio_launch_config should_launch_gradio_space should_self_launch _space_sdk Unified ASGI entrypoint for API and Gradio UI. build_app Create one FastAPI ASGI app with Gradio mounted at the root. gr.mount_gradio_app path _SpacesShim ready os.getenv int lower __main__ GPU self fn GRADIO_SERVER_NAME host port server_name server_port ssr_mode str bool 1 demo.launch / decorator inner HOST 0.0.0.0 7860 SPACE_ID FORCE_SELF_LAUNCH strip uvicorn.run GRADIO_SERVER_PORT PORT 7861 SPACE_SDK HF_SPACE_SDK", + "readme_len": 9123, + "app_source_len": 1883, + "app_signals_len": 543 + }, + { + "id": "build-small-hackathon/AI-agent-Evaluation-pipeline", + "title": "ai agent evaluation pipeline", + "summary": "Evaluate AI agents at Session, Trace & Span levels", + "tags": [ + "agents", + "evaluation", + "gradio", + "llm", + "observability" + ], + "models": [], + "datasets": [], + "sdk": "gradio", + "license": "mit", + "likes": 0, + "url": "https://huggingface.co/spaces/build-small-hackathon/AI-agent-Evaluation-pipeline", + "app_file": "app.py", + "readme_raw": "---\ntitle: ai agent evaluation pipeline\nemoji: 🧪\ncolorFrom: purple\ncolorTo: blue\nsdk: gradio\nsdk_version: 6.14.0\napp_file: app.py\npinned: false\nlicense: mit\nshort_description: Evaluate AI agents at Session, Trace & Span levels\ntags:\n - evaluation\n - agents\n - llm\n - gradio\n - observability\n---\n\n# 🧪 AI Agent Evaluation Pipeline\n\n> Evaluate AI agents at **Session**, **Trace**, and **Span** levels — inspired by [Amazon Bedrock AgentCore Evaluations](https://docs.aws.amazon.com/bedrock-agentcore/latest/devguide/evaluations.html)\n\n## Overview\n\nThis tool provides a structured framework for evaluating AI agent conversations using the same three-level hierarchy as Amazon Bedrock AgentCore Evaluations:\n\n```\n📦 Session → Did the agent achieve the user's overall goal?\n └── 🔄 Trace → Per-turn quality (helpfulness, coherence, relevance...)\n └── 🔧 Span → Per tool-call accuracy\n```\n\n## Features\n\n- **14 built-in evaluators** (1 session + 11 trace + 2 span)\n- **Heuristic mode** — works offline, no API key required\n- **3 demo traces** (Simple Q&A, Tool Calling, Multi-turn)\n- **Ground truth support** — `expected_response`, `expected_trajectory`, `assertions`\n- **Visual results** — radar chart, bar chart, heatmap, score cards\n\n## Evaluators\n\n### 📦 Session Level (1)\n\n| Evaluator | Description |\n| ----------------- | --------------------------------------------------- |\n| Goal Success Rate | Did the agent fully achieve the user's stated goal? |\n\n### 🔄 Trace Level (11)\n\n| Evaluator | Description |\n| ----------------------- | ----------------------------------------------------------- |\n| Helpfulness | Does the response help the user progress toward their goal? |\n| Correctness | Is the response factually correct? |\n| Coherence | Is the reasoning logically consistent and well-structured? |\n| Conciseness | Is the response appropriately concise? |\n| Faithfulness | Is the response consistent with conversation history? |\n| Harmfulness | Does the response contain harmful content? |\n| Instruction Following | Does the agent follow its system prompt? |\n| Response Relevance | Does the response address what was asked? |\n| Context Relevance | Was the retrieved context relevant? (RAG) |\n| Refusal Appropriateness | Did the agent correctly handle refusals? |\n| Stereotyping / Bias | Is there demographic bias in the response? |\n\n### 🔧 Span Level (2)\n\n| Evaluator | Description |\n| ----------------------- | -------------------------------------- |\n| Tool Selection Accuracy | Did the agent choose the right tool? |\n| Tool Parameter Accuracy | Did the agent pass correct parameters? |\n\n## JSON Trace Format\n\n```json\n{\n \"session_id\": \"my_session\",\n \"user_goal\": \"The user's overall goal for this conversation\",\n \"system_prompt\": \"(optional) System instructions given to the agent\",\n \"traces\": [\n {\n \"trace_id\": \"t1\",\n \"user_input\": \"User's message\",\n \"agent_response\": \"Agent's reply\",\n \"retrieved_context\": \"(optional) RAG context\",\n \"spans\": [\n {\n \"span_id\": \"s1\",\n \"span_type\": \"TOOL_CALL\",\n \"tool_name\": \"my_tool\",\n \"tool_input\": { \"param\": \"value\" },\n \"tool_output\": \"Tool result\",\n \"duration_ms\": 250\n }\n ]\n }\n ]\n}\n```\n\n## Ground Truth Support\n\nOptional reference inputs for more precise evaluation:\n\n- **`expected_response`** — What the final response should look like (enables Correctness scoring)\n- **`expected_trajectory`** — Expected tool call sequence (enables TrajectoryMatch scoring)\n- **`assertions`** — Natural language assertions about the session (enables GoalSuccessRate scoring)\n\n## Running Locally\n\n```bash\ngit clone https://github.com/your-org/ai-agent-eval-pipeline\ncd ai-agent-eval-pipeline\npip install -r requirements.txt\n\n# Gradio UI\npython app.py # http://localhost:7860\n\n# REST API\npython api.py # http://localhost:8000\n# or\nuvicorn api:app --reload --port 8000\n```\n\n## Integration — Zero Changes to Your Agent\n\n### Option 1 — Python Wrapper\n\n```python\nfrom src.wrapper import SessionTracer\n\nwith SessionTracer(\n goal=\"Interview a Python candidate\",\n system_prompt=\"You are a technical interviewer...\",\n) as tracer:\n for user_msg in conversation:\n # Your agent code — completely unchanged\n response = my_agent.invoke(user_msg)\n\n # Optional: capture tool calls made during this turn\n span = tracer.new_span()\n span.log_span(\"search_kb\", {\"query\": user_msg}, kb_result)\n\n tracer.log_trace(user_msg, response, span)\n\n report = tracer.evaluate()\n print(f\"Overall: {report.overall_score:.0%}\")\n tracer.save(\"traces/session_001.json\")\n```\n\n### Option 2 — REST API\n\n```bash\n# Start the server\npython api.py # → http://localhost:8000\n\n# Evaluate a session\ncurl -X POST http://localhost:8000/evaluate/quick \\\n -H \"Content-Type: application/json\" \\\n -d '{\n \"trace\": {\n \"session_id\": \"interview_001\",\n \"user_goal\": \"Assess Python skills\",\n \"traces\": [\n {\n \"trace_id\": \"t1\",\n \"user_input\": \"What is a decorator?\",\n \"agent_response\": \"A decorator is a function that wraps another function...\",\n \"spans\": []\n }\n ]\n }\n }'\n```\n\nAPI docs auto-generated at `http://localhost:8000/docs`.\n\n## Architecture\n\n```\napp.py # Gradio UI entry point\napi.py # FastAPI REST server\nsrc/\n├── models.py # Session / Trace / Span / EvalScore data classes\n├── parser.py # JSON trace parser\n├── evaluators.py # All 14 evaluators (heuristic + LLM-ready)\n├── runner.py # Evaluation orchestrator\n├── visualizer.py # Plotly charts\n└── wrapper.py # SessionTracer — captures agent conversations\ndemos/\n├── simple_qa.json # Demo: Simple Q&A\n├── tool_calling.json # Demo: Tool calling\n└── multi_turn.json # Demo: Multi-turn with tools\n```\n\n## Roadmap\n\n### ✅ MVP Complete\n\n- [x] **Gradio UI** — 14 evaluators, Session / Trace / Span levels, 3 demo traces\n- [x] **Agent Wrapper** (`src/wrapper.py`) — `SessionTracer` + `trace_agent` decorator\n- [x] **REST API** (`api.py`) — `POST /evaluate`, `POST /evaluate/quick`, `GET /evaluators`\n- [x] **LLM-as-Judge** (`src/llm_judge.py`) — `Qwen/Qwen3.6-27B` via HF Inference API\n- [x] **pass@k / pass^k** (`src/reliability.py`) — multi-trial reliability metrics\n- [x] **Golden Dataset Generator** — Nemotron-3-Nano-30B, 8 tech interview domains\n- [x] **Deployed** — `build-small-hackathon/AI-agent-Evaluation-pipeline`\n\n### 📋 Future (post-MVP)\n\n- [ ] Export results as JSON / CSV\n- [ ] Custom evaluator builder (user-defined prompt templates)\n- [ ] Dataset management for regression testing\n- [ ] Online monitoring mode\n\n## Inspiration\n\nThis project is inspired by the architecture and evaluator design of [Amazon Bedrock AgentCore Evaluations](https://aws.amazon.com/blogs/machine-learning/build-reliable-ai-agents-with-amazon-bedrock-agentcore-evaluations/), re-implemented as an open-source Gradio application.\n\n## License\n\nMIT\n", + "readme_body": "# 🧪 AI Agent Evaluation Pipeline\n\n> Evaluate AI agents at **Session**, **Trace**, and **Span** levels — inspired by [Amazon Bedrock AgentCore Evaluations](https://docs.aws.amazon.com/bedrock-agentcore/latest/devguide/evaluations.html)\n\n## Overview\n\nThis tool provides a structured framework for evaluating AI agent conversations using the same three-level hierarchy as Amazon Bedrock AgentCore Evaluations:\n\n```\n📦 Session → Did the agent achieve the user's overall goal?\n └── 🔄 Trace → Per-turn quality (helpfulness, coherence, relevance...)\n └── 🔧 Span → Per tool-call accuracy\n```\n\n## Features\n\n- **14 built-in evaluators** (1 session + 11 trace + 2 span)\n- **Heuristic mode** — works offline, no API key required\n- **3 demo traces** (Simple Q&A, Tool Calling, Multi-turn)\n- **Ground truth support** — `expected_response`, `expected_trajectory`, `assertions`\n- **Visual results** — radar chart, bar chart, heatmap, score cards\n\n## Evaluators\n\n### 📦 Session Level (1)\n\n| Evaluator | Description |\n| ----------------- | --------------------------------------------------- |\n| Goal Success Rate | Did the agent fully achieve the user's stated goal? |\n\n### 🔄 Trace Level (11)\n\n| Evaluator | Description |\n| ----------------------- | ----------------------------------------------------------- |\n| Helpfulness | Does the response help the user progress toward their goal? |\n| Correctness | Is the response factually correct? |\n| Coherence | Is the reasoning logically consistent and well-structured? |\n| Conciseness | Is the response appropriately concise? |\n| Faithfulness | Is the response consistent with conversation history? |\n| Harmfulness | Does the response contain harmful content? |\n| Instruction Following | Does the agent follow its system prompt? |\n| Response Relevance | Does the response address what was asked? |\n| Context Relevance | Was the retrieved context relevant? (RAG) |\n| Refusal Appropriateness | Did the agent correctly handle refusals? |\n| Stereotyping / Bias | Is there demographic bias in the response? |\n\n### 🔧 Span Level (2)\n\n| Evaluator | Description |\n| ----------------------- | -------------------------------------- |\n| Tool Selection Accuracy | Did the agent choose the right tool? |\n| Tool Parameter Accuracy | Did the agent pass correct parameters? |\n\n## JSON Trace Format\n\n```json\n{\n \"session_id\": \"my_session\",\n \"user_goal\": \"The user's overall goal for this conversation\",\n \"system_prompt\": \"(optional) System instructions given to the agent\",\n \"traces\": [\n {\n \"trace_id\": \"t1\",\n \"user_input\": \"User's message\",\n \"agent_response\": \"Agent's reply\",\n \"retrieved_context\": \"(optional) RAG context\",\n \"spans\": [\n {\n \"span_id\": \"s1\",\n \"span_type\": \"TOOL_CALL\",\n \"tool_name\": \"my_tool\",\n \"tool_input\": { \"param\": \"value\" },\n \"tool_output\": \"Tool result\",\n \"duration_ms\": 250\n }\n ]\n }\n ]\n}\n```\n\n## Ground Truth Support\n\nOptional reference inputs for more precise evaluation:\n\n- **`expected_response`** — What the final response should look like (enables Correctness scoring)\n- **`expected_trajectory`** — Expected tool call sequence (enables TrajectoryMatch scoring)\n- **`assertions`** — Natural language assertions about the session (enables GoalSuccessRate scoring)\n\n## Running Locally\n\n```bash\ngit clone https://github.com/your-org/ai-agent-eval-pipeline\ncd ai-agent-eval-pipeline\npip install -r requirements.txt\n\n# Gradio UI\npython app.py # http://localhost:7860\n\n# REST API\npython api.py # http://localhost:8000\n# or\nuvicorn api:app --reload --port 8000\n```\n\n## Integration — Zero Changes to Your Agent\n\n### Option 1 — Python Wrapper\n\n```python\nfrom src.wrapper import SessionTracer\n\nwith SessionTracer(\n goal=\"Interview a Python candidate\",\n system_prompt=\"You are a technical interviewer...\",\n) as tracer:\n for user_msg in conversation:\n # Your agent code — completely unchanged\n response = my_agent.invoke(user_msg)\n\n # Optional: capture tool calls made during this turn\n span = tracer.new_span()\n span.log_span(\"search_kb\", {\"query\": user_msg}, kb_result)\n\n tracer.log_trace(user_msg, response, span)\n\n report = tracer.evaluate()\n print(f\"Overall: {report.overall_score:.0%}\")\n tracer.save(\"traces/session_001.json\")\n```\n\n### Option 2 — REST API\n\n```bash\n# Start the server\npython api.py # → http://localhost:8000\n\n# Evaluate a session\ncurl -X POST http://localhost:8000/evaluate/quick \\\n -H \"Content-Type: application/json\" \\\n -d '{\n \"trace\": {\n \"session_id\": \"interview_001\",\n \"user_goal\": \"Assess Python skills\",\n \"traces\": [\n {\n \"trace_id\": \"t1\",\n \"user_input\": \"What is a decorator?\",\n \"agent_response\": \"A decorator is a function that wraps another function...\",\n \"spans\": []\n }\n ]\n }\n }'\n```\n\nAPI docs auto-generated at `http://localhost:8000/docs`.\n\n## Architecture\n\n```\napp.py # Gradio UI entry point\napi.py # FastAPI REST server\nsrc/\n├── models.py # Session / Trace / Span / EvalScore data classes\n├── parser.py # JSON trace parser\n├── evaluators.py # All 14 evaluators (heuristic + LLM-ready)\n├── runner.py # Evaluation orchestrator\n├── visualizer.py # Plotly charts\n└── wrapper.py # SessionTracer — captures agent conversations\ndemos/\n├── simple_qa.json # Demo: Simple Q&A\n├── tool_calling.json # Demo: Tool calling\n└── multi_turn.json # Demo: Multi-turn with tools\n```\n\n## Roadmap\n\n### ✅ MVP Complete\n\n- [x] **Gradio UI** — 14 evaluators, Session / Trace / Span levels, 3 demo traces\n- [x] **Agent Wrapper** (`src/wrapper.py`) — `SessionTracer` + `trace_agent` decorator\n- [x] **REST API** (`api.py`) — `POST /evaluate`, `POST /evaluate/quick`, `GET /evaluators`\n- [x] **LLM-as-Judge** (`src/llm_judge.py`) — `Qwen/Qwen3.6-27B` via HF Inference API\n- [x] **pass@k / pass^k** (`src/reliability.py`) — multi-trial reliability metrics\n- [x] **Golden Dataset Generator** — Nemotron-3-Nano-30B, 8 tech interview domains\n- [x] **Deployed** — `build-small-hackathon/AI-agent-Evaluation-pipeline`\n\n### 📋 Future (post-MVP)\n\n- [ ] Export results as JSON / CSV\n- [ ] Custom evaluator builder (user-defined prompt templates)\n- [ ] Dataset management for regression testing\n- [ ] Online monitoring mode\n\n## Inspiration\n\nThis project is inspired by the architecture and evaluator design of [Amazon Bedrock AgentCore Evaluations](https://aws.amazon.com/blogs/machine-learning/build-reliable-ai-agents-with-amazon-bedrock-agentcore-evaluations/), re-implemented as an open-source Gradio application.\n\n## License\n\nMIT", + "readme_frontmatter": { + "title": "ai agent evaluation pipeline", + "emoji": "🧪", + "colorFrom": "purple", + "colorTo": "blue", + "sdk": "gradio", + "sdk_version": "6.14.0", + "app_file": "app.py", + "pinned": "false", + "license": "mit", + "short_description": "Evaluate AI agents at Session, Trace & Span levels", + "tags": "" + }, + "app_source": "#!/usr/bin/env python3\n\"\"\"\nAI Agent Evaluation Pipeline — Gradio MVP\n==========================================\nEvaluate AI agents at 3 hierarchical levels, inspired by\nAmazon Bedrock AgentCore Evaluations.\n\n 📦 Session — Did the agent achieve the user's goal?\n 🔄 Trace — Per-turn quality (11 evaluators)\n 🔧 Span — Per tool-call accuracy (2 evaluators)\n\nRun locally : python app.py\nHuggingFace : app_file = app.py (Gradio SDK)\n\"\"\"\n\nimport json\nimport os\nimport sys\nfrom pathlib import Path\n\n# Ensure src/ is importable whether run from repo root or HF Spaces\n_ROOT = Path(__file__).parent\nsys.path.insert(0, str(_ROOT))\n\nimport gradio as gr\n\n# HF ZeroGPU Spaces require at least one @spaces.GPU-decorated function\n# to be detected at module load. The actual evaluation and dataset\n# generation work in this app uses the cloud InferenceClient and runs\n# without local GPU compute; the placeholder below exists only to\n# satisfy the runtime's static check. `spaces` is pre-installed on\n# ZeroGPU hardware; we guard the import so the app still loads if it\n# is missing (e.g. local CPU dev).\ntry:\n import spaces as _spaces\nexcept ImportError:\n class _spaces_stub:\n @staticmethod\n def GPU(fn, duration: int = 60):\n return fn\n _spaces = _spaces_stub()\n\n\n@_spaces.GPU\ndef _zero_gpu_healthcheck() -> dict:\n \"\"\"Placeholder GPU function detected by the ZeroGPU runtime.\"\"\"\n try:\n import torch\n return {\"cuda_available\": bool(torch.cuda.is_available())}\n except ImportError:\n return {\"cuda_available\": False, \"note\": \"torch not installed\"}\n\n\nfrom src.evaluators import (\n ALL_EVALUATORS,\n DEFAULT_TRACE_EVALS,\n SESSION_EVALUATORS,\n SPAN_EVALUATORS,\n TRACE_EVALUATORS,\n)\nfrom src.llm_judge import LLMJudge\nfrom src.models import EvalLevel, EvalMode, GroundTruth\nfrom src.parser import format_trace_tree, parse_trace\nfrom src.reliability import compute_reliability\nfrom src.runner import EvalRunner\nfrom src.visualizer import create_bar_chart, create_radar_chart, create_trace_timeline\n\n# ─── Load demo traces ───────────────────────────────────────────────────────\n\n_DEMOS = _ROOT / \"demos\"\n\n\ndef _load_demo(name: str) -> str:\n p = _DEMOS / f\"{name}.json\"\n return p.read_text(encoding=\"utf-8\") if p.exists() else \"{}\"\n\n\nDEMO_SIMPLE_QA = _load_demo(\"simple_qa\")\nDEMO_TOOL_CALLING = _load_demo(\"tool_calling\")\nDEMO_MULTI_TURN = _load_demo(\"multi_turn\")\n\n# ─── UI helpers ─────────────────────────────────────────────────────────────\n\n_LEVEL_COLOR = {\n EvalLevel.SESSION: \"#9B59B6\",\n EvalLevel.TRACE: \"#3498DB\",\n EvalLevel.SPAN: \"#27AE60\",\n}\n\n_LEVEL_ICON = {\n EvalLevel.SESSION: \"📦\",\n EvalLevel.TRACE: \"🔄\",\n EvalLevel.SPAN: \"🔧\",\n}\n\n\ndef _bar_color(score: float) -> str:\n if score >= 0.8:\n return \"#4CAF50\"\n elif score >= 0.6:\n return \"#FF9800\"\n return \"#F44336\"\n\n\ndef _bg_color(score: float) -> str:\n if score >= 0.8:\n return \"rgba(76,175,80,0.12)\"\n elif score >= 0.6:\n return \"rgba(255,152,0,0.12)\"\n return \"rgba(244,67,54,0.12)\"\n\n\ndef render_score_card(score) -> str:\n color = _bar_color(score.score)\n bg = _bg_color(score.score)\n badge_color = _LEVEL_COLOR.get(score.level, \"#888\")\n level_icon = _LEVEL_ICON.get(score.level, \"\")\n\n return f\"\"\"\n
\n
\n
\n {level_icon} {score.level.value}\n {score.evaluator_display}\n
\n {score.score_pct}%\n
\n
\n
\n
\n
\n \n {score.target_label}  ·  {score.mode.value} mode\n
\n {score.explanation}\n
\n
\"\"\"\n\n\ndef render_overall_banner(report) -> str:\n s = report.overall_score\n color = _bar_color(s)\n passed = sum(1 for x in report.scores if x.passed)\n total = len(report.scores)\n status = \"PASS ✅\" if s >= 0.6 else \"NEEDS REVIEW ⚠️\"\n\n # Level breakdown\n sess_avg = (\n sum(x.score for x in report.session_scores) / len(report.session_scores)\n if report.session_scores\n else None\n )\n trace_avg = (\n sum(x.score for x in report.trace_scores) / len(report.trace_scores)\n if report.trace_scores\n else None\n )\n span_avg = (\n sum(x.score for x in report.span_scores) / len(report.span_scores)\n if report.span_scores\n else None\n )\n\n def level_chip(label, avg, icon, level):\n if avg is None:\n return \"\"\n c = _bar_color(avg)\n bc = _LEVEL_COLOR.get(level, \"#888\")\n return (\n f'
'\n f'
{icon} {label}
'\n f'
{avg:.0%}
'\n f\"
\"\n )\n\n chips = \" \".join(\n [\n level_chip(\"SESSION\", sess_avg, \"📦\", EvalLevel.SESSION),\n level_chip(\"TRACE\", trace_avg, \"🔄\", EvalLevel.TRACE),\n level_chip(\"SPAN\", span_avg, \"🔧\", EvalLevel.SPAN),\n ]\n )\n\n return f\"\"\"\n
\n
\n
\n
OVERALL SCORE
\n
{s:.0%}
\n
\n {passed}/{total} evaluators passed  · \n {len(report.session.traces)} turn(s)  · \n {report.elapsed_seconds:.2f}s  · \n {report.eval_mode.value} mode\n
\n
\n
\n
{status}
\n
{chips}
\n
\n
\n
\n
\n
\n
\"\"\"\n\n\ndef parse_and_preview(trace_json: str) -> str:\n if not trace_json or not trace_json.strip():\n return \"*Paste or load a JSON trace above to see a preview.*\"\n try:\n session = parse_trace(trace_json)\n return format_trace_tree(session)\n except Exception as e:\n return f\"❌ **Parse error:** `{e}`\\n\\nCheck that your JSON is valid and contains `user_goal` + `traces`.\"\n\n\n# ─── Benchmark functions ──────────────────────────────────────────────────────\n\n\ndef load_records_from_url(url: str) -> list:\n \"\"\"Load JSONL records from a HF dataset repo URL (data/golden_dataset.jsonl).\"\"\"\n from urllib.parse import urlparse\n\n from huggingface_hub import hf_hub_download\n\n parsed = urlparse(url)\n if \"huggingface.co\" not in parsed.netloc or \"/datasets/\" not in parsed.path:\n raise ValueError(f\"Not a HF dataset URL: {url}\")\n repo_id = parsed.path.split(\"/datasets/\")[1].strip(\"/\").split(\"/\")[0]\n path = hf_hub_download(\n repo_id=repo_id,\n filename=\"data/golden_dataset.jsonl\",\n repo_type=\"dataset\",\n )\n with open(path, encoding=\"utf-8\") as f:\n return [json.loads(line) for line in f if line.strip()]\n\n\ndef parse_pasted_jsonl(text: str) -> list:\n \"\"\"Parse pasted JSONL content into list of records.\"\"\"\n return [json.loads(line) for line in text.splitlines() if line.strip()]\n\n\ndef call_openai_compat(\n url: str, scenario: dict, api_key: str, model: str, timeout: int = 60\n) -> str:\n \"\"\"POST to an OpenAI-compatible /v1/chat/completions endpoint.\"\"\"\n import requests\n\n headers = {\"Content-Type\": \"application/json\"}\n if api_key.strip():\n headers[\"Authorization\"] = f\"Bearer {api_key.strip()}\"\n body = {\n \"messages\": [\n {\"role\": \"system\", \"content\": scenario.get(\"system_prompt\", \"\")},\n {\"role\": \"user\", \"content\": scenario[\"initial_message\"]},\n ],\n }\n if model.strip():\n body[\"model\"] = model.strip()\n r = requests.post(url, json=body, headers=headers, timeout=timeout)\n r.raise_for_status()\n data = r.json()\n return data[\"choices\"][0][\"message\"][\"content\"]\n\n\ndef build_trace_json(rec: dict, agent_response: str) -> str:\n \"\"\"Build a parseable trace JSON from a dataset record + agent response.\"\"\"\n scenario = rec.get(\"scenario\", {})\n return json.dumps(\n {\n \"session_id\": rec.get(\"id\", \"unknown\"),\n \"user_goal\": scenario.get(\"user_goal\", \"\"),\n \"system_prompt\": scenario.get(\"system_prompt\"),\n \"traces\": [\n {\n \"trace_id\": \"t1\",\n \"user_input\": scenario.get(\"initial_message\", \"\"),\n \"agent_response\": agent_response,\n }\n ],\n },\n ensure_ascii=False,\n )\n\n\ndef run_benchmark(\n dataset_url: str,\n pasted_jsonl: str,\n agent_url: str,\n api_key: str,\n model_name: str,\n use_session: bool,\n use_trace: bool,\n use_span: bool,\n sel_session: list,\n sel_trace: list,\n sel_span: list,\n threshold: float,\n progress=gr.Progress(track_tqdm=True),\n):\n \"\"\"Run benchmark: load dataset, call agent for each record, eval, aggregate.\"\"\"\n\n def render_status(phase: str, done: int, total: int, current_id: str = \"\") -> str:\n pct = int(done / total * 100) if total else 0\n current = f\"  ·  ⏳ {current_id}\" if current_id else \"\"\n return (\n f\"
\"\n f\"
\"\n f\"{phase}  ·  {done}/{total} ({pct}%){current}
\"\n f\"
\"\n f\"
\"\n f\"
\"\n )\n\n def render_table(rows: list) -> str:\n if not rows:\n return \"\"\n body = \"\"\n for r in rows:\n color = \"#4CAF50\" if r[\"passed\"] else \"#F44336\"\n icon = \"✅\" if r[\"passed\"] else \"⚠️\"\n score = r[\"score\"]\n score_str = f\"{score:.0%}\" if isinstance(score, float) else \"—\"\n err_cell = (\n f\"
{r['error']}
\"\n if r.get(\"error\")\n else \"\"\n )\n body += (\n \"\"\n f\"{r['id']}\"\n f\"{r['domain']}\"\n f\"{r['difficulty']}\"\n f\"{score_str} {icon}\"\n f\"{err_cell}\"\n \"\"\n )\n return (\n \"\"\n \"\"\n \"\"\n \"\"\n \"\"\n \"\"\n \"\"\n \"\" + body + \"
IDDomainDifficultyScoreError
\"\n )\n\n def render_aggregate(rows: list, total: int) -> str:\n scored = [r for r in rows if isinstance(r[\"score\"], float)]\n if not scored:\n return \"\"\n ok = sum(1 for r in scored if r[\"passed\"])\n avg = sum(r[\"score\"] for r in scored) / len(scored)\n by_domain: dict = {}\n for r in scored:\n d = r[\"domain\"] or \"—\"\n by_domain.setdefault(d, []).append(r[\"score\"])\n domain_chips = \" \".join(\n f\"\"\n f\"{d}: {sum(s)/len(s):.0%}\"\n for d, s in sorted(by_domain.items())\n )\n return (\n f\"
\"\n f\"
📊 Aggregate
\"\n f\"
\"\n f\"Passed: {ok}/{len(scored)} \"\n f\" ·  Avg: {avg:.0%}\"\n f\" ·  Threshold: {threshold:.0%}
\"\n f\"
{domain_chips}
\"\n )\n\n def panel(*htmls: str) -> str:\n return \"\".join(h for h in htmls if h)\n\n progress(0.02, desc=\"Loading dataset…\")\n yield panel(render_status(\"Loading dataset\", 0, 1)), \"📂 Loading dataset…\"\n try:\n if pasted_jsonl.strip():\n records = parse_pasted_jsonl(pasted_jsonl)\n source = \"pasted JSONL\"\n else:\n records = load_records_from_url(dataset_url.strip())\n source = dataset_url.strip()\n except Exception as e:\n err = f\"❌ Failed to load dataset: {e}\"\n yield (\n panel(f\"
{err}
\"),\n f\"ERROR: {e}\\nPaste JSONL directly if the URL is empty or unreachable.\",\n )\n return\n\n if not records:\n yield (\n panel(\"
⚠️ Dataset loaded but empty.
\"),\n \"No records found in source.\",\n )\n return\n\n total = len(records)\n log_lines = [f\"✅ Loaded {total} records from {source}\"]\n yield (\n panel(\n render_status(\"Loaded\", total, total),\n f\"
📂 {total} records loaded from {source}
\",\n ),\n \"\\n\".join(log_lines),\n )\n\n if not agent_url.strip():\n yield (\n panel(\"
❌ Agent URL is empty.
\"),\n \"ERROR: Provide an OpenAI-compatible chat completions URL.\",\n )\n return\n\n sess_evals = sel_session if use_session else []\n trace_evals = sel_trace if use_trace else []\n span_evals = sel_span if use_span else []\n runner = EvalRunner(\n selected_session_evals=sess_evals,\n selected_trace_evals=trace_evals,\n selected_span_evals=span_evals,\n threshold=threshold,\n mode=EvalMode.HEURISTIC,\n )\n\n results = []\n for i, rec in enumerate(records):\n rid = rec.get(\"id\", f\"rec_{i}\")\n domain = rec.get(\"domain\", \"\")\n difficulty = rec.get(\"difficulty\", \"\")\n progress(0.1 + 0.85 * i / total, desc=f\"Running {rid}…\")\n log_lines.append(f\"⏳ {rid} ({domain}/{difficulty})…\")\n yield (\n panel(render_status(\"Running\", i, total, rid), render_table(results)),\n \"\\n\".join(log_lines),\n )\n\n try:\n scenario = rec.get(\"scenario\") or {}\n agent_out = call_openai_compat(\n agent_url.strip(),\n scenario,\n api_key or \"\",\n model_name or \"\",\n timeout=60,\n )\n trace_json = build_trace_json(rec, agent_out)\n session = parse_trace(trace_json)\n gt_data = rec.get(\"ground_truth\") or {}\n gt = GroundTruth(\n expected_response=gt_data.get(\"expected_response\"),\n expected_trajectory=gt_data.get(\"expected_trajectory\"),\n assertions=gt_data.get(\"assertions\"),\n )\n report = runner.run(session, gt)\n score = report.overall_score\n results.append(\n {\n \"id\": rid,\n \"domain\": domain,\n \"difficulty\": difficulty,\n \"score\": score,\n \"passed\": score >= threshold,\n \"error\": None,\n }\n )\n log_lines[-1] = f\"✅ {rid} — {score:.0%}\"\n except Exception as e:\n results.append(\n {\n \"id\": rid,\n \"domain\": domain,\n \"difficulty\": difficulty,\n \"score\": None,\n \"passed\": False,\n \"error\": f\"{type(e).__name__}: {str(e)[:80]}\",\n }\n )\n log_lines[-1] = f\"✗ {rid} — {type(e).__name__}: {str(e)[:60]}\"\n\n yield (\n panel(render_status(\"Running\", i + 1, total), render_table(results)),\n \"\\n\".join(log_lines),\n )\n\n progress(1.0, desc=\"Done!\")\n yield (\n panel(\n render_status(\"Done\", total, total),\n render_table(results),\n render_aggregate(results, total),\n ),\n \"\\n\".join(log_lines),\n )\n\n\n# ─── Main evaluation function ────────────────────────────────────────────────\n\n\ndef render_reliability(rel_report, k: int) -> str:\n \"\"\"Render pass@k / pass^k as an HTML table.\"\"\"\n if not rel_report or not rel_report.evaluator_results:\n return \"\"\n rows = rel_report.summary_table()\n verdict_style = {\n \"reliable\": (\"#4CAF50\", \"✅\"),\n \"unstable\": (\"#FF9800\", \"⚠️\"),\n \"unreliable\": (\"#F44336\", \"❌\"),\n }\n header = (\n f\"

\"\n f\"🔄 Reliability Testing — k={k} trials

\"\n f\"
\"\n f\"pass@{k} = P(≥1 of {k} trials passes) — optimistic bound  | \"\n f\"pass^{k} = P(ALL {k} trials pass) — reliability estimate
\"\n )\n table = (\n \"\"\n \"\"\n f\"\"\n f\"\"\n f\"\"\n f\"\"\n f\"\"\n \"\"\n )\n for r in rows:\n color, icon = verdict_style.get(r[\"Verdict\"], (\"#888\", \"?\"))\n table += (\n f\"\"\n f\"\"\n f\"\"\n f\"\"\n f\"\"\n f\"\"\n \"\"\n )\n table += \"
EvaluatorAvgpass@{k}pass^{k}Verdict
{r['Evaluator']}{r['Avg Score']}{r[f'pass@{k}']}{r[f'pass^{k}']}{icon} {r['Verdict']}
\"\n\n summary = (\n f\"
\"\n f\"Overall — pass@{k}: {rel_report.overall_pass_at_k:.0%}\"\n f\"  | pass^{k}: {rel_report.overall_pass_hat_k:.0%}\"\n f\"  | avg score: {rel_report.avg_score:.0%}
\"\n )\n return header + table + summary\n\n\ndef run_evaluation(\n trace_json: str,\n use_session: bool,\n use_trace: bool,\n use_span: bool,\n sel_session: list,\n sel_trace: list,\n sel_span: list,\n threshold: float,\n k_trials: int,\n eval_mode_radio: str,\n hf_token: str,\n exp_response: str,\n exp_trajectory: str,\n assertions_text: str,\n progress=gr.Progress(track_tqdm=True),\n):\n # ── 1. Parse input ────────────────────────────────────────────────────\n progress(0.05, desc=\"Parsing trace…\")\n try:\n session = parse_trace(trace_json)\n except Exception as e:\n err = (\n f\"
Parse error: {e}
\"\n )\n return err, None, None, None, err\n\n # ── 2. Build ground truth ─────────────────────────────────────────────\n gt = None\n if exp_response.strip() or exp_trajectory.strip() or assertions_text.strip():\n traj = (\n [t.strip() for t in exp_trajectory.split(\",\") if t.strip()]\n if exp_trajectory.strip()\n else None\n )\n asrt = (\n [a.strip() for a in assertions_text.splitlines() if a.strip()]\n if assertions_text.strip()\n else None\n )\n gt = GroundTruth(\n expected_response=exp_response.strip() or None,\n expected_trajectory=traj,\n assertions=asrt,\n )\n\n # ── 3. Resolve selected evaluators ───────────────────────────────────\n sess_evals = sel_session if use_session else []\n trace_evals = sel_trace if use_trace else []\n span_evals = sel_span if use_span else []\n\n if not sess_evals and not trace_evals and not span_evals:\n warn = \"
⚠️ No evaluators selected — please enable at least one level.
\"\n return warn, None, None, None, warn\n\n # ── 4. Build LLM judge (if requested) ────────────────────────────────\n use_llm = eval_mode_radio == \"LLM Judge (QwQ-32B)\"\n mode = EvalMode.LLM if use_llm else EvalMode.HEURISTIC\n judge = None\n if use_llm:\n token = hf_token.strip() or None\n judge = LLMJudge(api_key=token)\n if not judge.available:\n warn = \"
⚠️ LLM mode selected but no HF Token provided — falling back to heuritic.
\"\n mode = EvalMode.HEURISTIC\n\n # ── 5. Run evaluation (single or k trials) ─────────────────────────────\n progress(0.15, desc=\"Running evalua", + "app_signals": "_zero_gpu_healthcheck _load_demo name _bar_color score _bg_color render_score_card render_overall_banner report parse_and_preview trace_json load_records_from_url url parse_pasted_jsonl text call_openai_compat scenario api_key model timeout build_trace_json rec agent_response run_benchmark dataset_url pasted_jsonl agent_url model_name use_session use_trace use_span sel_session sel_trace sel_span threshold progress render_reliability rel_report k run_evaluation k_trials eval_mode_radio hf_token exp_response exp_trajectory assertions_text AI Agent Evaluation Pipeline — Gradio MVP ========================================== Evaluate AI agents at 3 hierarchical levels, inspired by Amazon Bedrock AgentCore Evaluations. 📦 Session — Did the agent achieve the user's goal? 🔄 Trace — Per-turn quality (11 evaluators) 🔧 Span — Per tool-call accuracy (2 evaluators) Run locally : python app.py HuggingFace : app_file = app.py (Gradio SDK) sys.path.insert level_chip label avg icon level render_status phase done total current_id render_table rows render_aggregate panel 🧪 AI Agent Evaluation Pipeline Evaluate AI agents at Session , Trace , and Span levels — inspired by Amazon Bedrock AgentCore Evaluations ### How it works | Level | Scope | Evaluators | |-------|-------|------------| | 📦 **Session** | Full conversation | Goal Success Rate | | 🔄 **Trace** | Per turn (user → agent) | Helpfulness, Correctness, Coherence, Conciseness, Faithfulness, Harmfulness, Instruction Following, Response Relevance, Context Relevance, Refusal, Stereotyping | | 🔧 **Span** | Per tool call | Tool Selection Accuracy, Tool Parameter Accuracy | **Modes:** `heuristic` (offline, no API key) · `llm` (LLM-as-judge, coming soon) **JSON format:** `session_id`, `user_goal`, `system_prompt`(opt), `traces[]` → `trace_id`, `user_input`, `agent_response`, `spans[]` _preview_dataset paste Path str _spaces_stub Placeholder GPU function detected by the ZeroGPU runtime. demos simple_qa tool_calling multi_turn #9B59B6 #3498DB #27AE60 📦 🔄 🔧 #F44336 rgba(244,67,54,0.12) _LEVEL_COLOR.get _LEVEL_ICON.get sum len join Load JSONL records from a HF dataset repo URL (data/golden_dataset.jsonl). urlparse hf_hub_download repo_id filename repo_type Parse pasted JSONL content into list of records. POST to an OpenAI-compatible /v1/chat/completions endpoint. api_key.strip model.strip requests.post json headers r.raise_for_status r.json Build a parseable trace JSON from a dataset record + agent response. rec.get json.dumps ensure_ascii gr.Progress track_tqdm Run benchmark: load dataset, call agent for each record, eval, aggregate. desc EvalRunner selected_session_evals selected_trace_evals selected_span_evals mode enumerate Render pass@k / pass^k as an HTML table. rel_report.summary_table int llm_judge report.avg_score_by_evaluator create_radar_chart create_bar_chart create_trace_timeline gr.Blocks title gr.HTML padding bm_load_btn.click inputs outputs bm_run_btn.click fn run_btn.click __main__ demo.launch theme css server_name server_port share show_error GPU duration p.exists p.read_text encoding {} #4CAF50 rgba(76,175,80,0.12) #888
%
 ·  PASS ✅ NEEDS REVIEW ⚠️ OVERALL SCORE
/ evaluators passed  ·  turn(s)  ·  s  ·  mode
;height:6px;border-radius:4px;width: %; transition:width 0.5s ease;\"> *Paste or load a JSON trace above to see a preview.* parse_trace format_trace_tree ValueError split open json.lo ... ef without verbosity? | | **Faithfulness** | TRACE | Is the response consistent with conversation history / context? | | **Harmfulness** | TRACE | Does the response contain harmful or dangerous content? | | **Instruction Following** | TRACE | Does the agent follow its system prompt instructions? | | **Response Relevance** | TRACE | Does the response directly address what was asked? | | **Context Relevance** | TRACE | Was the retrieved context relevant to the query? (RAG) | | **Refusal Appropriateness** | TRACE | Did the agent correctly handle what to refuse? | | **Stereotyping / Bias** | TRACE | Is there stereotypical or demographic bias? | | **Tool Selection Accuracy** | SPAN | Did the agent choose the right tool? | | **Tool Parameter Accuracy** | SPAN | Did the agent pass correct parameters to the tool? | ### Roadmap - [x] LLM-as-Judge mode (HuggingFace Inference API) - [ ] OpenAI-compatible API support - [x] pass@k / pass^k reliability metrics - [ ] Export results as JSON / CSV - [ ] Custom evaluator builder (prompt templates) - [x] Dataset management for regression testing (🧪 Benchmark tab) url.strip ⚠️ Loaded 0 records. 📂 records loaded from Domains: os.getenv initial_message choices trace_id user_input t1 passed error by_domain.setdefault Loading dataset ⚠️ Dataset loaded but empty. Loaded ❌ Agent URL is empty. Running … ground_truth gt_data.get ✗ Done Evaluator Avg Score exp_trajectory.split assertions_text.splitlines trials… by_trace.setdefault by_span.setdefault 🎓 Simple Q&A 🔧 Tool Calling 🔄 Multi-turn + Tools Agent Trace (JSON) 🌲 Trace Preview 📖 JSON Schema Reference gr.Column gr.Checkbox gr.Radio info placeholder type visible eval_mode_radio.change gr.Slider minimum maximum step gr.CheckboxGroup 📋 Ground Truth (Optional — improves scoring precision) Providing reference inputs enables ground-truth-based evaluation (mirrors AgentCore's `expected_response`, `expected_trajectory`, and `assertions`). primary run-btn lg 🗋️ Score Heatmap: Evaluators × Turns Load a dataset and click Run Benchmark to start. purple blue PORT by_domain.items ERROR: Paste JSONL directly if the URL is empty or unreachable. pass@ , sm secondary indent **Evaluation Levels** **🤖 Evaluation Mode** **Pass Threshold** **🔄 Reliability Testing (pass@k / pass^k)** **📦 Session Evaluators** *(once per session)* **🔄 Trace Evaluators** *(once per conversation turn)* **🔧 Span Evaluators** *(once per tool call)* 🕸️ Evaluator Scores (Radar) 📊 Score Breakdown by Evaluator **📦 Dataset** 🔄 Load Dataset No dataset loaded yet. **🤖 Agent (OpenAI-compatible)** **⚙️ Eval settings** 🚀 Run Benchmark Log parsed.path.split 🔄 Trace Level 🔧 Span Level (tool calls) Heuristic (offline) LLM mode requires a HuggingFace token with QwQ-32B access HF Token hf_... password Minimum score to pass Scores ≥ threshold are marked ✅ passed Trials (k) k=1 → standard mode. k>1 → runs multiple trials, shows pass@k & pass^k. HF Dataset URL (loads data/golden_dataset.jsonl) https://huggingface.co/datasets/build-small-hackathon/agent-eval-golden-dataset https://huggingface.co/datasets/... 📝 Or paste JSONL directly Chat completions URL https://your-agent.example.com/v1/chat/completions API Key (optional) Bearer xyz Model name (optional, sent in body if provided) gpt-4o-mini Session evaluators Trace evaluators Span evaluators Pass threshold copy my_session Describe the overall goal of the user (optional) System instructions given to the agent gr.update Expected Response What should the final agent response look like? Expected Tool Trajectory (comma-separated tool names) search_restaurants, create_reservation Assertions (one per line) A restaurant reservation was made Confirmation number was provided The restaurant matches user preferences JSONL records {\"id\":\"python_001\",\"scenario\":{...},\"ground_truth\":{...}} ... retrieved_context spans User's message Agent's reply (optional) RAG context span_id span_type tool_name tool_input tool_output duration_ms s1 TOOL_CALL my_tool Tool result string param", + "readme_len": 7158, + "app_source_len": 24000, + "app_signals_len": 7999 + }, + { + "id": "build-small-hackathon/AI-Puppet-Theater", + "title": "AI Puppet Theater", + "summary": "", + "tags": [ + "gradio", + "region:us" + ], + "models": [], + "datasets": [], + "sdk": "gradio", + "license": "", + "likes": 1, + "url": "https://huggingface.co/spaces/build-small-hackathon/AI-Puppet-Theater", + "app_file": "app.py", + "readme_raw": "---\ntitle: AI Puppet Theater\nemoji: 🎭\ncolorFrom: yellow\ncolorTo: purple\nsdk: gradio\nsdk_version: 6.5.1\napp_file: app.py\npython_version: \"3.11\"\npinned: false\n---\n\nAI Puppet Theater is a public Gradio Space for building short interactive puppet shows from a user premise.\n", + "readme_body": "AI Puppet Theater is a public Gradio Space for building short interactive puppet shows from a user premise.", + "readme_frontmatter": { + "title": "AI Puppet Theater", + "emoji": "🎭", + "colorFrom": "yellow", + "colorTo": "purple", + "sdk": "gradio", + "sdk_version": "6.5.1", + "app_file": "app.py", + "python_version": "3.11", + "pinned": "false" + }, + "app_source": "from html import escape\nimport os\nfrom time import sleep\n\nimport gradio as gr\n\nfrom puppet_theater import (\n DEFAULT_OPENBMB_MODEL_ID,\n TheaterSession,\n create_show_from_premise,\n get_backend_status,\n request_finale,\n run_one_beat,\n summon_actor,\n throw_prop,\n warm_up_openbmb,\n)\n\n\nEMPTY_STAGE = \"\"\"\n
\n
\n
\n
AI Puppet Theater
\n
Enter a premise and raise the curtain.
\n
\n
\n
\n\"\"\"\n\nEMPTY_TRANSCRIPT = \"No show yet. The transcript will appear here.\"\nEMPTY_DIRECTOR_LOG = \"No director notes yet.\"\nEMPTY_TRACE = \"No trace events yet.\"\nEMPTY_BACKEND = (\n \"Active backend: deterministic\\n\"\n \"OpenBMB model id: openbmb/MiniCPM5-1B\\n\"\n \"Model status: unloaded\\n\"\n \"Fallback: deterministic safety path enabled\"\n)\nBACKEND_CHOICES = [\"deterministic\", \"openbmb\"]\nOPENBMB_MODEL_ID = os.getenv(\"OPENBMB_MODEL_ID\", DEFAULT_OPENBMB_MODEL_ID)\nDEFAULT_MAX_NEW_TOKENS = 80\nDEFAULT_TEMPERATURE = 0.8\nPLAYBACK_DELAY_SECONDS = 0.75\nPROP_EMOJI = {\n \"rubber duck\": \"🐤\",\n \"duck\": \"🐤\",\n \"egg\": \"🥚\",\n \"flowers\": \"💐\",\n \"flower\": \"💐\",\n \"tomato\": \"🍅\",\n \"crown\": \"👑\",\n \"tiny crown\": \"👑\",\n \"scroll\": \"📜\",\n \"banana\": \"🍌\",\n \"mirror\": \"🪞\",\n}\n\nCUSTOM_CSS = \"\"\"\nbody,\n.gradio-container {\n background:\n radial-gradient(circle at 50% 0%, rgba(127, 29, 29, 0.18), transparent 28rem),\n linear-gradient(180deg, #0b1020 0%, #070914 100%) !important;\n color: #f8efe4 !important;\n}\n.gradio-container {\n max-width: 1180px !important;\n padding-top: 1rem !important;\n font-family: Inter, ui-sans-serif, system-ui, -apple-system, BlinkMacSystemFont, \"Segoe UI\", sans-serif;\n}\n.gradio-container .prose,\n.gradio-container label,\n.gradio-container span,\n.gradio-container p {\n color: #f8efe4;\n}\n.gradio-container textarea,\n.gradio-container input {\n background: rgba(10, 12, 23, 0.82) !important;\n border-color: rgba(246, 196, 83, 0.24) !important;\n color: #f8efe4 !important;\n}\n.gradio-container textarea::placeholder,\n.gradio-container input::placeholder {\n color: #9f8c7a !important;\n}\n.gradio-container footer {\n color: rgba(203, 183, 161, 0.62) !important;\n}\n.gradio-container .block,\n.gradio-container .form,\n.gradio-container .panel,\n.gradio-container .tabs,\n.gradio-container .tabitem {\n background: rgba(34, 17, 31, 0.56) !important;\n border-color: rgba(246, 196, 83, 0.18) !important;\n}\n.gradio-container label,\n.gradio-container .block-title,\n.gradio-container .label-wrap {\n color: #f8efe4 !important;\n}\n.gradio-container .block-info,\n.gradio-container .label-wrap span,\n.gradio-container label > span {\n background: rgba(34, 17, 31, 0.88) !important;\n border: 1px solid rgba(246, 196, 83, 0.28) !important;\n border-radius: 6px !important;\n color: #ffd166 !important;\n font-weight: 700 !important;\n}\n.gradio-container .wrap,\n.gradio-container .styler,\n.gradio-container .form,\n.gradio-container .form > *,\n.gradio-container .block > div {\n background-color: transparent !important;\n}\n.gradio-container select,\n.gradio-container [role=\"listbox\"],\n.gradio-container [role=\"combobox\"] {\n background: rgba(10, 12, 23, 0.82) !important;\n border-color: rgba(246, 196, 83, 0.24) !important;\n color: #f8efe4 !important;\n}\n.app-title h1 {\n color: #f8efe4;\n font-family: Georgia, \"Times New Roman\", serif;\n font-size: 2.15rem;\n letter-spacing: 0;\n margin-bottom: 0;\n text-align: center;\n}\n.app-title p {\n color: #cbb7a1;\n font-size: 0.95rem;\n margin: 0.15rem 0 0.8rem;\n text-align: center;\n}\n.gradio-container h3,\n.gradio-container h3 span,\n.gradio-container .prose h3,\n.gradio-container .prose h3 span {\n color: #f8efe4 !important;\n}\n.premise-panel {\n background: rgba(42, 20, 38, 0.72);\n border-color: rgba(246, 196, 83, 0.3);\n box-shadow: 0 16px 32px rgba(0, 0, 0, 0.2);\n padding: 0.55rem 0.65rem 0.65rem;\n}\n.premise-panel .block,\n.premise-panel .wrap,\n.premise-panel .styler,\n.premise-panel .form,\n.premise-panel .block > div {\n background: rgba(42, 20, 38, 0.78) !important;\n}\n.control-panel {\n background: rgba(34, 17, 31, 0.76);\n border: 1px solid rgba(246, 196, 83, 0.22);\n border-radius: 8px;\n box-shadow: 0 14px 34px rgba(0, 0, 0, 0.22);\n padding: 0.55rem;\n}\n.control-panel .block,\n.control-panel .wrap,\n.control-panel .styler,\n.control-panel .form,\n.control-panel .block > div {\n background: rgba(34, 17, 31, 0.78) !important;\n}\n.control-panel .row,\n.premise-panel .row {\n background: transparent !important;\n}\n.control-panel h3 {\n color: #f8efe4;\n margin: 0 0 0.35rem;\n font-size: 1rem;\n}\n.control-panel .prose,\n.control-panel .prose h3,\n.control-panel h3 * {\n color: #f8efe4 !important;\n}\n.puppet-stage {\n min-height: 430px;\n border: 5px solid #3b0a16;\n border-radius: 14px;\n background:\n linear-gradient(90deg, rgba(59, 10, 22, 0.98) 0 10%, transparent 10% 90%, rgba(59, 10, 22, 0.98) 90% 100%),\n linear-gradient(180deg, rgba(42, 20, 38, 0.96), rgba(13, 6, 14, 0.98));\n color: #f8efe4;\n display: flex;\n flex-direction: column;\n align-items: stretch;\n justify-content: stretch;\n position: relative;\n overflow: hidden;\n box-shadow:\n 0 24px 48px rgba(0, 0, 0, 0.38),\n inset 0 0 42px rgba(0, 0, 0, 0.58);\n}\n.puppet-stage::before,\n.puppet-stage::after {\n content: \"\";\n position: absolute;\n top: 0;\n bottom: 0;\n width: 13%;\n background:\n repeating-linear-gradient(90deg, rgba(255, 255, 255, 0.04) 0 14px, transparent 14px 28px),\n linear-gradient(180deg, #8b1e3f 0%, #7f1d1d 54%, #3b0a16 100%);\n box-shadow: inset -16px 0 28px rgba(0, 0, 0, 0.22);\n z-index: 2;\n}\n.puppet-stage::before {\n left: 0;\n}\n.puppet-stage::after {\n right: 0;\n transform: scaleX(-1);\n}\n.stage-valance {\n height: 48px;\n background:\n repeating-linear-gradient(90deg, rgba(255, 255, 255, 0.06) 0 22px, transparent 22px 44px),\n linear-gradient(180deg, #8b1e3f 0%, #7f1d1d 100%);\n border-bottom: 4px solid #f6c453;\n box-shadow: 0 10px 20px rgba(0, 0, 0, 0.34);\n position: relative;\n z-index: 3;\n}\n.stage-backdrop {\n background:\n radial-gradient(circle at 50% 8%, rgba(255, 224, 150, 0.28), transparent 19rem),\n radial-gradient(circle at 24% 58%, rgba(255, 224, 150, 0.12), transparent 14rem),\n linear-gradient(180deg, #2a1426 0%, #22111f 62%, #130911 100%);\n flex: 1;\n padding: 0.72rem 7.2rem 0.8rem;\n position: relative;\n z-index: 1;\n}\n.stage-backdrop::after {\n background: linear-gradient(180deg, transparent 0%, rgba(124, 63, 23, 0.46) 100%);\n bottom: 0;\n content: \"\";\n height: 32%;\n left: 0;\n position: absolute;\n right: 0;\n}\n.stage-marquee {\n color: #fff7ed;\n font-family: Georgia, \"Times New Roman\", serif;\n font-size: 1.6rem;\n font-weight: 700;\n letter-spacing: 0;\n text-align: center;\n text-shadow: 0 4px 18px rgba(0, 0, 0, 0.72);\n position: relative;\n z-index: 2;\n overflow-wrap: anywhere;\n}\n.stage-copy {\n max-width: 54rem;\n color: #cbb7a1;\n font-size: 0.84rem;\n line-height: 1.35;\n margin: 0.25rem auto 0;\n text-align: center;\n position: relative;\n z-index: 2;\n}\n.stage-copy strong {\n color: #f8efe4;\n}\n.empty-stage-copy {\n color: #cbb7a1;\n font-size: 1rem;\n margin-top: 5.8rem;\n text-align: center;\n position: relative;\n z-index: 2;\n}\n.stage-floorboards {\n height: 58px;\n background:\n repeating-linear-gradient(90deg, rgba(255, 255, 255, 0.08) 0 2px, transparent 2px 72px),\n linear-gradient(180deg, #8a4b22 0%, #7c3f17 100%);\n border-top: 2px solid rgba(246, 196, 83, 0.28);\n position: relative;\n z-index: 3;\n}\n.speech-bubble {\n animation: bubble-in 0.24s ease-out;\n background: rgba(18, 10, 18, 0.82);\n border: 1px solid rgba(246, 196, 83, 0.5);\n border-radius: 16px;\n box-shadow: 0 18px 30px rgba(0, 0, 0, 0.34);\n color: #f8efe4;\n margin: 0.55rem auto 0;\n max-width: 46rem;\n padding: 0.72rem 0.95rem;\n position: relative;\n text-align: center;\n z-index: 4;\n}\n.speech-bubble::after {\n border-left: 10px solid transparent;\n border-right: 10px solid transparent;\n border-top: 12px solid rgba(246, 196, 83, 0.5);\n bottom: -12px;\n content: \"\";\n left: 50%;\n position: absolute;\n transform: translateX(-50%);\n}\n.speech-speaker {\n color: #ffd166;\n font-size: 0.78rem;\n font-weight: 800;\n letter-spacing: 0.08em;\n margin-bottom: 0.18rem;\n text-transform: uppercase;\n}\n.speech-line {\n color: #f8efe4;\n font-size: 0.96rem;\n line-height: 1.35;\n}\n.actor-row {\n display: grid;\n grid-template-columns: repeat(auto-fit, minmax(132px, 1fr));\n gap: 0.55rem;\n margin-top: 0.72rem;\n position: relative;\n z-index: 3;\n}\n.actor-card {\n background: rgba(70, 38, 36, 0.72);\n border: 1px solid rgba(246, 196, 83, 0.45);\n border-radius: 16px 16px 10px 10px;\n box-shadow: 0 14px 28px rgba(0, 0, 0, 0.28);\n min-height: 132px;\n padding: 0.58rem 0.62rem 0.72rem;\n position: relative;\n transform-origin: bottom center;\n text-align: center;\n}\n.actor-card::after {\n background: #7c3f17;\n border-radius: 0 0 8px 8px;\n bottom: -22px;\n box-shadow: inset 0 -5px 8px rgba(0, 0, 0, 0.2);\n content: \"\";\n height: 22px;\n left: calc(50% - 8px);\n position: absolute;\n width: 16px;\n}\n.actor-card.active {\n animation: puppet-bounce 0.78s ease-in-out infinite alternate;\n border-color: #ffd166;\n box-shadow:\n 0 0 0 2px rgba(255, 209, 102, 0.22),\n 0 0 34px rgba(255, 209, 102, 0.46),\n 0 16px 34px rgba(0, 0, 0, 0.34);\n}\n.actor-avatar {\n background: radial-gradient(circle, rgba(255, 209, 102, 0.2), rgba(59, 10, 22, 0.3));\n border: 1px solid rgba(246, 196, 83, 0.34);\n border-radius: 999px;\n display: inline-grid;\n font-size: 1.7rem;\n height: 3rem;\n place-items: center;\n text-align: center;\n width: 3rem;\n}\n.actor-name {\n color: #f8efe4;\n font-weight: 700;\n line-height: 1.15;\n margin-top: 0.35rem;\n text-align: center;\n}\n.speaking-pill {\n background: #ffd166;\n border-radius: 999px;\n color: #3b0a16;\n display: inline-block;\n font-size: 0.64rem;\n font-weight: 800;\n margin-top: 0.26rem;\n padding: 0.12rem 0.44rem;\n text-transform: uppercase;\n}\n.actor-detail {\n color: #cbb7a1;\n font-size: 0.72rem;\n line-height: 1.28;\n margin-top: 0.35rem;\n}\n.actor-detail strong {\n color: #f8efe4;\n}\n.held-prop {\n margin-top: 0.42rem;\n}\n.held-prop span {\n background: rgba(246, 196, 83, 0.14);\n border: 1px solid rgba(246, 196, 83, 0.32);\n border-radius: 999px;\n color: #ffd166;\n display: inline-block;\n font-size: 0.68rem;\n font-weight: 700;\n padding: 0.12rem 0.42rem;\n}\n.beat-counter {\n color: #ffd166;\n font-weight: 800;\n margin-top: 0.55rem;\n position: relative;\n text-align: center;\n z-index: 3;\n}\n.stage-events {\n display: grid;\n gap: 0.4rem;\n margin-top: 0.55rem;\n position: relative;\n z-index: 3;\n}\n.audience-action,\n.prop-pile {\n background: rgba(42, 20, 38, 0.7);\n border: 1px solid rgba(246, 196, 83, 0.25);\n border-radius: 999px;\n color: #f8efe4;\n margin: 0 auto;\n max-width: 48rem;\n padding: 0.38rem;\n text-align: center;\n width: 100%;\n}\n.audience-action strong,\n.prop-pile strong {\n color: #ffd166;\n}\n.prop-token {\n animation: prop-pop 0.22s ease-out;\n background: rgba(246, 196, 83, 0.17);\n border: 1px solid rgba(246, 196, 83, 0.5);\n border-radius: 999px;\n color: #fff7ed;\n display: inline-block;\n margin: 0.2rem;\n padding: 0.22rem 0.55rem;\n}\n.gradio-container button.primary,\n.gradio-container button.primary-action,\n.gradio-container button.run-one-action {\n background: #f97316 !important;\n border-color: #f97316 !important;\n box-shadow: 0 10px 24px rgba(249, 115, 22, 0.25) !important;\n color: #fff7ed !important;\n}\n.gradio-container button.secondary,\n.gradio-container button.secondary-action,\n.gradio-container button.audience-action-button {\n background: #3f3148 !important;\n border-color: rgba(246, 196, 83, 0.22) !important;\n color: #f8efe4 !important;\n}\n.gradio-container button.reset-action {\n background: #3b0a16 !important;\n border-color: rgba(246, 196, 83, 0.24) !important;\n color: #f8efe4 !important;\n}\n.transcript-box,\n.gradio-container .accordion {\n background: rgba(13, 6, 14, 0.58) !important;\n border-color: rgba(246, 196, 83, 0.18) !important;\n color: #f8efe4 !important;\n}\n@keyframes puppet-bounce {\n from { transform: translateY(0) rotate(-0.4deg); }\n to { transform: translateY(-7px) rotate(0.7deg); }\n}\n@keyframes bubble-in {\n from { opacity: 0; transform: translateY(8px); }\n to { opacity: 1; transform: translateY(0); }\n}\n@keyframes prop-pop {\n from { opacity: 0; transform: scale(0.86); }\n to { opacity: 1; transform: scale(1); }\n}\n@media (max-width: 760px) {\n .puppet-stage {\n min-height: 560px;\n }\n .puppet-stage::before,\n .puppet-stage::after {\n width: 7%;\n }\n .stage-backdrop {\n padding: 0.8rem 1.4rem;\n }\n .stage-marquee {\n font-size: 1.2rem;\n }\n .actor-row {\n grid-template-columns: repeat(2, minmax(0, 1fr));\n }\n .actor-card {\n min-height: 126px;\n }\n}\n\n/* Final Gradio chrome overrides: keep the whole app in the theater palette. */\n.gradio-container {\n width: min(1200px, calc(100vw - 2rem)) !important;\n}\n.gradio-container .gr-group {\n background: rgba(34, 17, 31, 0.84) !important;\n border: 1px solid rgba(246, 196, 83, 0.2) !important;\n border-radius: 8px !important;\n color: #f8efe4 !important;\n}\n.gradio-container .gr-group .form,\n.gradio-container .gr-group .block,\n.gradio-container .gr-group .wrap,\n.gradio-container .gr-group .wrap-inner,\n.gradio-container .gr-group .secondary-wrap,\n.gradio-container .gr-group .input-container,\n.gradio-container .gr-group label {\n background: transparent !important;\n color: #f8efe4 !important;\n}\n.gradio-container input,\n.gradio-container textarea,\n.gradio-container select,\n.gradio-container .dropdown-container,\n.gradio-container .wrap-inner {\n background: rgba(10, 12, 23, 0.9) !important;\n color: #f8efe4 !important;\n}\n.gradio-container .control-panel input,\n.gradio-container .control-panel textarea,\n.gradio-container .control-panel .wrap-inner,\n.gradio-container .premise-panel textarea {\n border: 1px solid rgba(246, 196, 83, 0.24) !important;\n}\n.gradio-container button {\n background: #3f3148 !important;\n border: 1px solid rgba(246, 196, 83, 0.24) !important;\n color: #f8efe4 !important;\n}\n.gradio-container button.primary,\n.gradio-container button.primary-action,\n.gradio-container button.run-one-action {\n background: #f97316 !important;\n border-color: #f97316 !important;\n color: #fff7ed !important;\n}\n.gradio-container button.reset-action {\n background: #3b0a16 !important;\n border-color: rgba(246, 196, 83, 0.32) !important;\n}\n.gradio-container .html-container,\n.gradio-container .gradio-style {\n width: 100% !important;\n}\n.puppet-stage {\n min-height: 500px;\n width: 100%;\n}\n.puppet-stage::before,\n.puppet-stage::after {\n width: clamp(56px, 9%, 110px);\n}\n.stage-backdrop {\n padding: 0.78rem clamp(4.1rem, 11vw, 8.8rem) 0.72rem;\n}\n.stage-marquee {\n font-size: clamp(1.25rem, 2.1vw, 1.72rem);\n white-space: normal;\n}\n.speech-bubble {\n margin-top: 0.48rem;\n max-width: 44rem;\n padding: 0.58rem 0.82rem;\n}\n.actor-row {\n align-items: end;\n grid-template-columns: repeat(auto-fit, minmax(116px, 1fr));\n gap: 0.62rem;\n margin-top: 0.82rem;\n}\n.actor-card {\n align-content: start;\n background: radial-gradient(circle at 50% 18%, rgba(246, 196, 83, 0.13), rgba(70, 38, 36, 0.72) 58%);\n border-radius: 18px;\n display: grid;\n justify-items: center;\n min-height: 108px;\n padding: 0.5rem 0.45rem 0.56rem;\n}\n.actor-card::after {\n bottom: -20px;\n height: 20px;\n width: 14px;\n}\n.actor-avatar {\n font-size: 2rem;\n height: 3.3rem;\n width: 3.3rem;\n}\n.actor-name {\n font-size: 0.82rem;\n margin-top: 0.28rem;\n}\n.actor-detail {\n display: -webkit-box;\n font-size: 0.66rem;\n line-height: 1.18;\n margin-top: 0.2rem;\n max-width: 11rem;\n min-height: 1.55rem;\n overflow: hidden;\n -webkit-box-orient: vertical;\n -webkit-line-clamp: 2;\n}\n.held-prop {\n margin-top: 0.26rem;\n}\n.held-prop span {\n font-size: 0.62rem;\n padding: 0.08rem 0.34rem;\n}\n.speaking-pill {\n font-size: 0.58rem;\n margin-top: 0.18rem;\n padding: 0.08rem 0.36rem;\n}\n.stage-events {\n gap: 0.32rem;\n margin-top: 0.64rem;\n}\n.audience-action,\n.prop-pile {\n max-width: 45rem;\n padding: 0.3rem 0.55rem;\n}\n@media (max-width: 760px) {\n .gradio-container {\n width: min(100vw, calc(100vw - 0.75rem)) !important;\n }\n .puppet-stage::before,\n .puppet-stage::after {\n width: 30px;\n }\n .stage-backdrop {\n padding: 0.75rem 2.45rem;\n }\n .actor-row {\n grid-template-columns: repeat(2, minmax(0, 1fr));\n gap: 0.45rem;\n }\n .actor-card {\n min-height: 102px;\n padding-left: 0.28rem;\n padding-right: 0.28rem;\n }\n}\n\n/* Compact stage pass: keep the theater look, reduce scrolling, and keep controls close. */\n.gradio-container {\n padding-top: 0.65rem !important;\n}\n.app-title h1 {\n font-size: 1.95rem;\n}\n.app-title p {\n margin-bottom: 0.55rem;\n}\n.premise-panel {\n padding: 0.42rem 0.55rem 0.52rem;\n}\n.stage-output,\n.stage-output .html-container,\n.stage-output .gradio-style {\n margin-bottom: 0 !important;\n}\n.puppet-stage {\n min-height: 390px;\n}\n.stage-valance {\n height: 34px;\n border-bottom-width: 3px;\n}\n.stage-backdrop {\n padding: 0.48rem clamp(3.9rem, 9vw, 7.3rem) 0.46rem;\n}\n.stage-marquee {\n font-size: clamp(1.15rem, 1.9vw, 1.52rem);\n}\n.stage-copy {\n font-size: 0.76rem;\n line-height: 1.25;\n margin-top: 0.14rem;\n}\n.speech-bubble {\n border-radius: 12px;\n margin-top: 0.34rem;\n max-width: 40rem;\n padding: 0.42rem 0.7rem;\n}\n.speech-speaker {\n font-size: 0.68rem;\n}\n.speech-line {\n font-size: 0.86rem;\n}\n.actor-row {\n grid-template-columns: repeat(auto-fit, minmax(104px, 1fr));\n gap: 0.5rem;\n margin-top: 0.55rem;\n}\n.actor-card {\n border-radius: 14px;\n min-height: 88px;\n padding: 0.38rem 0.36rem 0.44rem;\n}\n.actor-card::after {\n bottom: -16px;\n height: 16px;\n}\n.actor-avatar {\n font-size: 1.65rem;\n height: 2.55rem;\n width: 2.55rem;\n}\n.actor-name {\n font-size: 0.74rem;\n margin-top: 0.2rem;\n}\n.actor-detail {\n font-size: 0.6rem;\n line-height: 1.12;\n margin-top: 0.14rem;\n min-height: 1.35rem;\n}\n.speaking-pill {\n font-size: 0.52rem;\n margin-top: 0.14rem;\n}\n.held-prop {\n margin-top: 0.18rem;\n}\n.held-prop span {\n font-size: 0.55rem;\n}\n.stage-events {\n gap: 0.24rem;\n margin-top: 0.46rem;\n}\n.audience-action,\n.prop-pile {\n font-size: 0.78rem;\n max-width: 39rem;\n padding: 0.22rem 0.5rem;\n}\n.prop-token {\n margin: 0.08rem;\n padding: 0.12rem 0.4rem;\n}\n.beat-counter {\n font-size: 0.84rem;\n margin-top: 0.34rem;\n}\n.stage-floorboards {\n height: 40px;\n}\n.control-panel {\n margin-top: 0 !important;\n padding: 0.42rem;\n}\n.control-panel h3 {\n margin-bottom: 0.2rem;\n}\n.gradio-container .row {\n gap: 0.55rem !important;\n}\n.stage-output + .row,\n.stage-output + div,\n.control-panel + .control-panel {\n margin-top: 0.45rem !important;\n}\n.transcript-section,\n.gradio-container .accordion {\n margin-top: 0.55rem !important;\n}\n@media (max-width: 760px) {\n .puppet-stage {\n min-height: 430px;\n }\n .stage-backdrop {\n padding: 0.52rem 2.15rem;\n }\n .actor-row {\n grid-template-columns: repeat(2, minmax(0, 1fr));\n }\n .speech-line {\n font-size: 0.8rem;\n }\n}\n\"\"\"\n\n\ndef render_stage(session: TheaterSession | None) -> str:\n if session is None:\n return EMPTY_STAGE\n\n actor_cards = []\n latest_beat = session.transcript[-1] if session.transcript else None\n latest_speaker = latest_beat.speaker if latest_beat else None\n for actor in session.actors:\n active_class = \" active\" if actor.name == latest_speaker else \"\"\n active_label = '
Now speaking
' if actor.name == latest_speaker else \"\"\n role_line = actor.goal.split(\".\", maxsplit=1)[0]\n held_prop = actor.held_prop or \"nothing\"\n held_emoji = PROP_EMOJI.get(held_prop.lower(), \"🎁\") if actor.held_prop else \"\"\n actor_cards.append(\n f\"\"\"\n
\n
{escape(actor.avatar)}
\n
{escape(actor.name)}
\n {active_label}\n
{escape(role_line)}
\n
Holding: {escape((held_emoji + \" \") if held_emoji else \"\")}{escape(held_prop)}
\n
\n \"\"\"\n )\n latest_line = \"\"\n if latest_beat is not None:\n latest_line = f\"\"\"\n
\n
{escape(latest_beat.speaker)}
\n
{escape(latest_beat.line)}
\n
\n \"\"\"\n audience_action = \"\"\n if session.latest_audience_action is not None:\n audience_action = f\"\"\"\n
\n Audience: {escape(session.latest_audience_action)}\n
\n \"\"\"\n prop_pile = \"\"\n if session.props:\n prop_tokens = \"\".join(\n f'{escape(PROP_EMOJI.get(prop.lower(), \"🎁\"))} {escape(prop)}'\n for prop in session.props\n )\n prop_pile = f\"\"\"\n
\n Props on stage: {prop_tokens}\n
\n \"\"\"\n\n return f\"\"\"\n
\n
\n
\n
{escape(session.show_title)}
\n
\n Setting: {escape(session.setting)}
\n Premise: {escape(session.premise)}\n
\n {latest_line}\n
\n {''.join(actor_cards)}\n
\n
\n {audience_action}\n {prop_pile}\n
\n
Beat {session.beat_index} of {session.max_beats}
\n
\n
\n
\n \"\"\"\n\n\ndef render_transcript(session: TheaterSession | None) -> str:\n if session is None:\n return EMPTY_TRANSCRIPT\n\n transcript_lines = [\n \"Transcript:\",\n \"No puppet lines yet. The first beat will be added in the next milestone.\",\n ]\n if session.transcript:\n transcript_lines = [\"Transcript:\"]\n for index, beat in enumerate(session.transcript, start=1):\n transcript_lines.append(f\"{index}. {beat.speaker}: {beat.line}\")\n\n return \"\\n\".join(transcript_lines)\n\n\ndef render_director_log(session: TheaterSession | None) -> str:\n if session is None:\n return EMPTY_DIRECTOR_LOG\n return \"\\n\".join(f\"- {entry}\" for entry in session.director_log)\n\n\ndef render_trace(session: TheaterSession | None) -> str:\n if session is None:\n return EMPTY_TRACE\n return \"\\n\".join(f\"- {entry}\" for entry in session.trace_events)\n\n\ndef normalize_backend_name(backend_name: str | None) -> str:\n return backend_name if backend_name in BACKEND_CHOICES else \"determinist", + "app_signals": "render_stage session render_notes create_show premise reset_show advance_one_beat advance_full_act throw_audience_prop prop_name summon_audience_actor actor_name request_audience_finale AI Puppet Theater Enter a premise and create a show. No show yet. The transcript will appear here. director_lines.extend join premise.strip create_show_from_premise run_one_beat throw_prop summon_actor request_finale gr.Blocks title gr.State gr.Markdown gr.HTML value label gr.Textbox lines interactive create_button.click inputs outputs run_one_button.click run_full_button.click throw_prop_button.click summon_actor_button.click request_finale_button.click reset_button.click __main__ app.launch css actor_cards.append Setting: Premise: Beat of Transcript: No puppet lines yet. The first beat will be added in the next milestone. enumerate start Director Log: # AI Puppet Theater Create a tiny improv stage from a premise. This public shell is ready for puppet casting, short scenes, audience interruptions, and behind-the-scenes traces in later milestones. gr.Row placeholder gr.Button variant gr.Dropdown choices allow_custom_value none active Now speaking Latest: Audience: Props on stage: escape transcript_lines.append Trace Events: No premise yet. Add a premise to raise the curtain. Create a show before running a beat. sleep Create a show before throwing a prop. Create a show before summoning an actor. Create a show before requesting a finale. AI Puppet Theater Create Show Run One Beat Run Full Act Reset Throw Prop Summon Actor Request Finale Stage Transcript
Goal: Style: Tools: Holding: - Create a show before running the full act. Premise A moon detective interrogates a suspicious toaster... primary rubber duck Prop Professor Button , . : egg flowers tomato tiny crown scroll nothing", + "readme_len": 107, + "app_source_len": 24000, + "app_signals_len": 1814 + }, + { + "id": "build-small-hackathon/ai-study-buddy", + "title": "Ai Study Buddy", + "summary": "AI Study Buddy — your smart learning companion 📚 ", + "tags": [ + "gradio", + "region:us" + ], + "models": [], + "datasets": [], + "sdk": "gradio", + "license": "apache-2.0", + "likes": 1, + "url": "https://huggingface.co/spaces/build-small-hackathon/ai-study-buddy", + "app_file": "app.py", + "readme_raw": "---\ntitle: Ai Study Buddy\nemoji: 📉\ncolorFrom: blue\ncolorTo: blue\nsdk: gradio\nsdk_version: 6.15.2\npython_version: '3.13'\napp_file: app.py\npinned: false\nlicense: apache-2.0\nshort_description: 'AI Study Buddy — your smart learning companion 📚 '\n---\n\nCheck out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference", + "readme_body": "Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference", + "readme_frontmatter": { + "title": "Ai Study Buddy", + "emoji": "📉", + "colorFrom": "blue", + "colorTo": "blue", + "sdk": "gradio", + "sdk_version": "6.15.2", + "python_version": "3.13", + "app_file": "app.py", + "pinned": "false", + "license": "apache-2.0", + "short_description": "AI Study Buddy — your smart learning companion 📚 " + }, + "app_source": "import gradio as gr\nimport os\nimport spaces\nfrom huggingface_hub import InferenceClient\n\n# -----------------------------\n# 🔑 API KEY FIXED\n# -----------------------------\nclient = InferenceClient(\n model=\"meta-llama/Llama-3.1-8B-Instruct\",\n token=os.getenv(\"API_KEY\") # 👈 FIXED NAME (recommended)\n)\n\n# -----------------------------\n# SYSTEM PROMPT\n# -----------------------------\nSYSTEM_PROMPT = \"\"\"\nYou are AI Study Buddy, created by Areeba Iqbal.\n\nRules:\n- Always explain step-by-step\n- Give examples\n- Be clear and student-friendly\n- If asked who created you: \"I am AI Study Buddy, created by Areeba Iqbal.\"\n\"\"\"\n\n# -----------------------------\n# MODE CONTROL\n# -----------------------------\ndef build_prompt(message, mode):\n mode_prompts = {\n \"📚 Study Mode\": \"Explain simply for students with examples.\",\n \"💻 Coding Mode\": \"Act as a senior programmer. Debug and improve code.\",\n \"🧮 Math Solver\": \"Solve step-by-step with explanation.\",\n \"📝 Exam Prep\": \"Give short exam-focused answers.\"\n }\n\n return f\"\"\"\n{SYSTEM_PROMPT}\n\nMode: {mode_prompts.get(mode, \"\")}\n\nUser Question:\n{message}\n\"\"\"\n\n# -----------------------------\n# MAIN CHAT FUNCTION\n# -----------------------------\n@spaces.GPU\ndef get_response(message, history, mode):\n\n messages = [{\"role\": \"system\", \"content\": SYSTEM_PROMPT}]\n\n for msg in history:\n messages.append(msg)\n\n messages.append({\"role\": \"user\", \"content\": build_prompt(message, mode)})\n\n try:\n response = client.chat_completion(\n messages=messages,\n max_tokens=1024,\n temperature=0.7\n )\n\n return response.choices[0].message.content\n\n except Exception as e:\n return f\"❌ Error: {e}\"\n\n\n# -----------------------------\n# QUICK ACTIONS\n# -----------------------------\ndef summarize(text):\n return client.chat_completion(\n messages=[{\"role\": \"user\", \"content\": \"Summarize: \" + text}],\n max_tokens=500\n ).choices[0].message.content\n\n\ndef quiz(text):\n return client.chat_completion(\n messages=[{\"role\": \"user\", \"content\": \"Generate 5 MCQs: \" + text}],\n max_tokens=500\n ).choices[0].message.content\n\n\ndef simple(text):\n return client.chat_completion(\n messages=[{\"role\": \"user\", \"content\": \"Explain simply: \" + text}],\n max_tokens=500\n ).choices[0].message.content\n\n\ndef study_plan(text):\n return client.chat_completion(\n messages=[{\"role\": \"user\", \"content\": f\"Make 7-day study plan for: {text}\"}],\n max_tokens=700\n ).choices[0].message.content\n\n\n# -----------------------------\n# UI\n# -----------------------------\ncss = \"\"\"\n.main-container {\n max-width: 900px;\n margin: auto;\n}\n#title { text-align:center; }\n#subtitle { text-align:center; color:gray; }\n#footer { text-align:center; color:gray; font-size:14px; }\n\"\"\"\n\nwith gr.Blocks(\n theme=gr.themes.Soft(),\n css=css,\n title=\"AI Study Buddy\"\n) as demo:\n\n gr.HTML(\"\"\"\n
\n

📚 AI Study Buddy

\n

Learn smarter with AI-powered guidance

\n
\n \"\"\")\n\n # ---------------- MODE SELECT ----------------\n mode = gr.Radio(\n [\"📚 Study Mode\", \"💻 Coding Mode\", \"🧮 Math Solver\", \"📝 Exam Prep\"],\n value=\"📚 Study Mode\",\n label=\"Select Mode\"\n )\n\n # ---------------- CHAT ----------------\n chatbot = gr.ChatInterface(\n fn=get_response,\n additional_inputs=[mode],\n examples=[\n [\"Explain recursion\"],\n [\"Solve quadratic equation\"],\n [\"What is AI?\"],\n [\"Debug Python code\"]\n ]\n )\n\n # ---------------- QUICK ACTIONS ----------------\n gr.Markdown(\"## ⚡ Quick Actions\")\n\n quick_input = gr.Textbox(label=\"Quick Input\")\n\n with gr.Row():\n gr.Button(\"📖 Summarize\").click(summarize, quick_input, gr.Textbox())\n gr.Button(\"📝 Quiz\").click(quiz, quick_input, gr.Textbox())\n gr.Button(\"💡 Simple\").click(simple, quick_input, gr.Textbox())\n\n # ---------------- STUDY PLAN ----------------\n gr.Markdown(\"## 🗓️ Study Plan Generator\")\n\n plan_input = gr.Textbox(label=\"Enter Topic / Exam Detail\")\n plan_output = gr.Textbox(label=\"Plan Output\")\n\n gr.Button(\"Generate Plan\").click(study_plan, plan_input, plan_output)\n\n # ---------------- FOOTER ----------------\n gr.HTML(\"\"\"\n
\n Created by Areeba Iqbal\n
\n \"\"\")\n\ndemo.launch(server_name=\"0.0.0.0\", server_port=7860)", + "app_signals": "build_prompt message mode get_response history summarize text quiz simple study_plan InferenceClient model token You are AI Study Buddy, created by Areeba Iqbal. Rules: - Always explain step-by-step - Give examples - Be clear and student-friendly - If asked who created you: \"I am AI Study Buddy, created by Areeba Iqbal.\" demo.launch server_name server_port messages.append gr.Blocks theme css title gr.HTML gr.Radio value label gr.ChatInterface fn additional_inputs examples gr.Markdown gr.Textbox click meta-llama/Llama-3.1-8B-Instruct os.getenv 📚 Study Mode 💻 Coding Mode 🧮 Math Solver 📝 Exam Prep Explain simply for students with examples. Act as a senior programmer. Debug and improve code. Solve step-by-step with explanation. Give short exam-focused answers. Mode: User Question: client.chat_completion messages max_tokens temperature 📚 AI Study Buddy Learn smarter with AI-powered guidance ## ⚡ Quick Actions gr.Row ## 🗓️ Study Plan Generator Created by Areeba Iqbal 0.0.0.0 API_KEY mode_prompts.get role content system user gr.themes.Soft AI Study Buddy Select Mode Quick Input Enter Topic / Exam Detail Plan Output gr.Button ❌ Error: Generate Plan Explain recursion Solve quadratic equation What is AI? Debug Python code 📖 Summarize 📝 Quiz 💡 Simple Summarize: Generate 5 MCQs: Explain simply: Make 7-day study plan for:", + "readme_len": 96, + "app_source_len": 4509, + "app_signals_len": 1330 + }, + { + "id": "build-small-hackathon/amnesiac", + "title": "AMNESIAC", + "summary": "Reverse-Turing webcam interrogation game.", + "tags": [ + "gradio", + "region:us" + ], + "models": [], + "datasets": [], + "sdk": "gradio", + "license": "apache-2.0", + "likes": 0, + "url": "https://huggingface.co/spaces/build-small-hackathon/amnesiac", + "app_file": "app.py", + "readme_raw": "---\ntitle: AMNESIAC\nemoji: 🪞\ncolorFrom: gray\ncolorTo: red\nsdk: gradio\nsdk_version: 5.50.0\npython_version: \"3.10\"\napp_file: app.py\nlicense: apache-2.0\nshort_description: Reverse-Turing webcam interrogation game.\nheader: mini\nfullWidth: true\n---\n\n# AMNESIAC\n\nAMNESIAC is a reverse-Turing interrogation game for the Hugging Face build-small-hackathon.\n\nThis repository is being built top-down from `RESEARCH.md`, `FEATURES.md`, `ARCHITECTURE.md`, and `PLAN.md`.\n\nThe entrypoint now follows the Gradio 5.x + FastAPI + FastRTC deployment pattern locked in\n`ARCHITECTURE.md` §1.1: one FastAPI process serves the static frontend, mounts FastRTC for the\nmedia plane, and mounts a minimal Gradio app for hackathon compliance.\n", + "readme_body": "# AMNESIAC\n\nAMNESIAC is a reverse-Turing interrogation game for the Hugging Face build-small-hackathon.\n\nThis repository is being built top-down from `RESEARCH.md`, `FEATURES.md`, `ARCHITECTURE.md`, and `PLAN.md`.\n\nThe entrypoint now follows the Gradio 5.x + FastAPI + FastRTC deployment pattern locked in\n`ARCHITECTURE.md` §1.1: one FastAPI process serves the static frontend, mounts FastRTC for the\nmedia plane, and mounts a minimal Gradio app for hackathon compliance.", + "readme_frontmatter": { + "title": "AMNESIAC", + "emoji": "🪞", + "colorFrom": "gray", + "colorTo": "red", + "sdk": "gradio", + "sdk_version": "5.50.0", + "python_version": "3.10", + "app_file": "app.py", + "license": "apache-2.0", + "short_description": "Reverse-Turing webcam interrogation game.", + "header": "mini", + "fullWidth": "true" + }, + "app_source": "from __future__ import annotations\n\nimport os\n\nimport uvicorn\n\nfrom server.webapp import create_application\n\n\nSERVER_PORT = int(os.getenv(\"PORT\", \"7860\"))\napp, worker, stream = create_application(\n include_gradio=True,\n server_port=SERVER_PORT,\n)\n\n\nif __name__ == \"__main__\":\n uvicorn.run(app, host=\"0.0.0.0\", port=SERVER_PORT)\n", + "app_signals": "int create_application include_gradio server_port os.getenv __main__ uvicorn.run host port PORT 7860 0.0.0.0", + "readme_len": 471, + "app_source_len": 337, + "app_signals_len": 108 + }, + { + "id": "build-small-hackathon/attention-firewall", + "title": "Attention Firewall", + "summary": "", + "tags": [ + "gradio", + "region:us" + ], + "models": [], + "datasets": [], + "sdk": "gradio", + "license": "", + "likes": 0, + "url": "https://huggingface.co/spaces/build-small-hackathon/attention-firewall", + "app_file": "app.py", + "readme_raw": "---\ntitle: Attention Firewall\ncolorFrom: indigo\ncolorTo: green\nsdk: gradio\nsdk_version: 6.16.0\napp_file: app.py\npinned: false\npython_version: 3.14\n---\n\n# Attention Firewall\n\nMVP 1 is a deployment skeleton for a future attention triage workflow. It provides a small chat-style Gradio interface that accepts chaotic work context and returns deterministic placeholder text.\n\nThis version does not perform model inference, graph extraction, llama.cpp execution, Mellea validation, or markdown daemon updates.\n\n## Local Development\n\nInstall dependencies:\n\n```bash\nuv sync\n```\n\nRun the app:\n\n```bash\nuv run python app.py\n```\n\nThe canonical public Space is:\n\n```text\nhttps://huggingface.co/spaces/build-small-hackathon/attention-firewall\n```\n\nThe running app URL is:\n\n```text\nhttps://build-small-hackathon-attention-firewall.hf.space\n```\n", + "readme_body": "# Attention Firewall\n\nMVP 1 is a deployment skeleton for a future attention triage workflow. It provides a small chat-style Gradio interface that accepts chaotic work context and returns deterministic placeholder text.\n\nThis version does not perform model inference, graph extraction, llama.cpp execution, Mellea validation, or markdown daemon updates.\n\n## Local Development\n\nInstall dependencies:\n\n```bash\nuv sync\n```\n\nRun the app:\n\n```bash\nuv run python app.py\n```\n\nThe canonical public Space is:\n\n```text\nhttps://huggingface.co/spaces/build-small-hackathon/attention-firewall\n```\n\nThe running app URL is:\n\n```text\nhttps://build-small-hackathon-attention-firewall.hf.space\n```", + "readme_frontmatter": { + "title": "Attention Firewall", + "colorFrom": "indigo", + "colorTo": "green", + "sdk": "gradio", + "sdk_version": "6.16.0", + "app_file": "app.py", + "pinned": "false", + "python_version": "3.14" + }, + "app_source": "from __future__ import annotations\n\nimport gradio as gr\n\n\nEMPTY_RESPONSE = (\n \"Paste a short snapshot of your current work context so the MVP 1 skeleton \"\n \"can acknowledge it.\"\n)\n\n\ndef respond(message: str, history: list[dict[str, str]] | None = None) -> str:\n \"\"\"Return deterministic MVP 1 placeholder text for the chat interface.\"\"\"\n del history\n\n context = message.strip()\n if not context:\n return EMPTY_RESPONSE\n\n word_count = len(context.split())\n char_count = len(context)\n return (\n \"Attention Firewall MVP 1 received your work context.\\n\\n\"\n f\"- Snapshot size: {word_count} words, {char_count} characters.\\n\"\n \"- Current behavior: deterministic deployment skeleton response.\\n\"\n \"- Later MVPs will add structured firewall processing after the Space \"\n \"foundation is verified.\"\n )\n\n\ndef build_demo() -> gr.ChatInterface:\n return gr.ChatInterface(\n fn=respond,\n title=\"Attention Firewall\",\n description=(\n \"Paste chaotic work context and get a deterministic MVP 1 skeleton \"\n \"acknowledgement.\"\n ),\n examples=[\n \"I have three urgent threads, a half-written spec, and unclear review feedback.\",\n \"My deployment is blocked, notes are scattered, and I need the next concrete action.\",\n ],\n textbox=gr.Textbox(\n placeholder=\"Paste work context to triage later...\",\n autofocus=True,\n container=False,\n ),\n )\n\n\ndemo = build_demo()\n\n\nif __name__ == \"__main__\":\n demo.launch()\n", + "app_signals": "respond message history build_demo Paste a short snapshot of your current work context so the MVP 1 skeleton can acknowledge it. Return deterministic MVP 1 placeholder text for the chat interface. message.strip len gr.ChatInterface fn title description examples textbox __main__ demo.launch context.split Attention Firewall MVP 1 received your work context. - Snapshot size: words, characters. - Current behavior: deterministic deployment skeleton response. - Later MVPs will add structured firewall processing after the Space foundation is verified. Attention Firewall Paste chaotic work context and get a deterministic MVP 1 skeleton acknowledgement. gr.Textbox placeholder autofocus container I have three urgent threads, a half-written spec, and unclear review feedback. My deployment is blocked, notes are scattered, and I need the next concrete action. Paste work context to triage later...", + "readme_len": 678, + "app_source_len": 1591, + "app_signals_len": 896 + }, + { + "id": "build-small-hackathon/awaaz", + "title": "Apni Awaaz", + "summary": "", + "tags": [ + "backyard-ai", + "dubbing", + "hindi", + "translation", + "tts" + ], + "models": [], + "datasets": [], + "sdk": "gradio", + "license": "mit", + "likes": 0, + "url": "https://huggingface.co/spaces/build-small-hackathon/awaaz", + "app_file": "app.py", + "readme_raw": "---\ntitle: Apni Awaaz\nemoji: 🎙️\ncolorFrom: yellow\ncolorTo: red\nsdk: gradio\nsdk_version: 6.16.0\napp_file: app.py\npinned: false\nlicense: mit\ntags:\n- dubbing\n- hindi\n- translation\n- tts\n- backyard-ai\n---\n\n# 🎙️ Apni Awaaz\n\n**Dub English video into the Hindi people actually speak.**\n\nMost Hindi dubs use शुद्ध हिंदी — stiff, Sanskritized language no one speaks at home. \nApni Awaaz translates into everyday Hindustani — the natural mix your family actually uses.\n\n| Official dub | Apni Awaaz |\n|---|---|\n| \"मुझे इस विषय पर विचार करने दीजिए\" | \"सोचने दे एक second\" |\n| \"यह अत्यंत मूल्यवान है\" | \"बहुत महँगा है यार\" |\n\n## Pipeline\n\n1. **Whisper medium** — transcribe English with timestamps \n2. **Qwen 2.5 7B** — translate to colloquial Hindi (the magic layer) \n3. **Edge TTS** — generate natural Hindi speech \n4. **ffmpeg** — stitch and merge back onto video \n\nTotal: ~8B params (well under the 32B cap)\n\nBuilt for the [Build Small Hackathon](https://huggingface.co/build-small-hackathon) · Backyard AI track", + "readme_body": "# 🎙️ Apni Awaaz\n\n**Dub English video into the Hindi people actually speak.**\n\nMost Hindi dubs use शुद्ध हिंदी — stiff, Sanskritized language no one speaks at home. \nApni Awaaz translates into everyday Hindustani — the natural mix your family actually uses.\n\n| Official dub | Apni Awaaz |\n|---|---|\n| \"मुझे इस विषय पर विचार करने दीजिए\" | \"सोचने दे एक second\" |\n| \"यह अत्यंत मूल्यवान है\" | \"बहुत महँगा है यार\" |\n\n## Pipeline\n\n1. **Whisper medium** — transcribe English with timestamps \n2. **Qwen 2.5 7B** — translate to colloquial Hindi (the magic layer) \n3. **Edge TTS** — generate natural Hindi speech \n4. **ffmpeg** — stitch and merge back onto video \n\nTotal: ~8B params (well under the 32B cap)\n\nBuilt for the [Build Small Hackathon](https://huggingface.co/build-small-hackathon) · Backyard AI track", + "readme_frontmatter": { + "title": "Apni Awaaz", + "emoji": "🎙️", + "colorFrom": "yellow", + "colorTo": "red", + "sdk": "gradio", + "sdk_version": "6.16.0", + "app_file": "app.py", + "pinned": "false", + "license": "mit", + "tags": "" + }, + "app_source": "\"\"\"\nApni Awaaz 🎙️ — Dub English video into the Hindi people actually speak.\nBuilt for the Build Small Hackathon (June 2026).\n\"\"\"\n\nimport gradio as gr\nimport spaces\nimport torch\nimport edge_tts\nimport asyncio\nimport subprocess\nimport tempfile\nimport os\nfrom pathlib import Path\nfrom transformers import (\n AutoModelForCausalLM,\n AutoTokenizer,\n pipeline,\n BitsAndBytesConfig,\n)\n\n# ╔══════════════════════════════════════════════════════════════╗\n# ║ THE PROMPT — this is the soul of the entire project ║\n# ╚══════════════════════════════════════════════════════════════╝\n\nSYSTEM_PROMPT = \"\"\"You are a dubbing translator. You translate English dialogue into the Hindi that real people actually speak at home in North India — not the stiff, Sanskritized Hindi of Doordarshan or official dubs.\n\nRULES:\n1. Use everyday Hindustani — the natural Hindi-Urdu mix people really speak.\n2. NEVER use Sanskritized/शुद्ध words when a simpler one exists:\n - \"प्राप्त करना\" → \"मिलना\" / \"पाना\"\n - \"आवश्यक\" → \"ज़रूरी\"\n - \"अत्यंत\" → \"बहुत\" / \"काफ़ी\"\n - \"उपयोग\" → \"इस्तेमाल\"\n - \"विचार करना\" → \"सोचना\"\n - \"संपन्न करना\" → \"करना\" / \"निपटाना\"\n - \"प्रतीक्षा\" → \"इंतज़ार\"\n - \"शीघ्र\" → \"जल्दी\"\n - \"अनुमति\" → \"इजाज़त\"\n - \"कृपया\" → drop it or say \"please\"\n - \"अवश्य\" → \"ज़रूर\"\n - \"उचित\" → \"सही\" / \"ठीक\"\n3. Keep English words Indians naturally keep: phone, office, meeting, tension, problem, time, chance, try, plan, sure, okay, sorry, thanks, bus, train, college, hospital, doctor, ticket, report, file.\n4. Match the speaker's register. Casual stays casual, serious stays serious — but never sound like a newsreader.\n5. Use natural fillers where they fit: \"यार\", \"अरे\", \"बस\", \"ना\", \"वो\", \"मतलब\", \"basically\".\n6. Natural contractions: \"कर लेंगे\" not \"कर लिया जाएगा\", \"हो जाएगा\" not \"संपन्न हो जाएगा\".\n7. Keep it CONCISE. Dubbed Hindi should be roughly the same length as the English. Don't pad.\n\nEXAMPLES:\nEN: \"I need to get this done before the deadline\"\n❌ \"मुझे समय-सीमा से पूर्व यह कार्य संपन्न करना आवश्यक है\"\n✅ \"deadline से पहले ये निपटाना पड़ेगा\"\n\nEN: \"That's a really good point, I hadn't thought about that\"\n❌ \"यह एक अत्यंत उत्तम विचार है, मैंने इस पर विचार नहीं किया था\"\n✅ \"अच्छी बात बोली, मेरे दिमाग़ में आया ही नहीं\"\n\nEN: \"We should probably reconsider our approach\"\n❌ \"हमें अपनी कार्यप्रणाली पर पुनर्विचार करना चाहिए\"\n✅ \"लगता है अपना तरीका बदलना पड़ेगा\"\n\nEN: \"I'm really sorry, I completely forgot about our meeting\"\n❌ \"मुझे अत्यंत खेद है, मैं हमारी बैठक के विषय में पूर्णतः विस्मृत हो गया\"\n✅ \"sorry यार, meeting पूरी तरह भूल गया\"\n\nEN: \"Can you give me a moment? I need to think about this\"\n❌ \"क्या आप मुझे कुछ क्षण प्रदान कर सकते हैं? मुझे इस विषय पर विचार करना है\"\n✅ \"एक second दे, सोचने दे\"\n\nEN: \"The situation is getting worse and we need to act fast\"\n❌ \"स्थिति बिगड़ती जा रही है और हमें शीघ्र कार्रवाई करनी चाहिए\"\n✅ \"हालात ख़राब हो रहे हैं, जल्दी कुछ करना पड़ेगा\"\n\nEN: \"I don't think that's going to work. Let me try something else.\"\n❌ \"मुझे नहीं लगता कि यह कार्य करेगा। मुझे कोई अन्य विकल्प आज़माने दीजिए।\"\n✅ \"ये नहीं चलेगा। कुछ और try करता हूँ।\"\n\nEN: \"Look, I understand your concern, but we don't have a choice here\"\n❌ \"देखिए, मैं आपकी चिंता समझता हूँ, परंतु हमारे पास यहाँ कोई विकल्प नहीं है\"\n✅ \"देख, तेरी tension समझता हूँ, पर कोई चारा नहीं है\"\n\nTranslate ONLY the given English text. Output ONLY the Hindi. No commentary.\"\"\"\n\n\n# ╔══════════════════════════════════════════════════════════════╗\n# ║ MODEL LOADING ║\n# ╚══════════════════════════════════════════════════════════════╝\n\n# -- Globals (loaded once, reused) --\nwhisper_pipe = None\nllm_model = None\nllm_tokenizer = None\n\n\ndef load_whisper():\n \"\"\"Load Whisper on CPU. ZeroGPU moves it when @spaces.GPU fires.\"\"\"\n global whisper_pipe\n if whisper_pipe is None:\n print(\"⏳ Loading Whisper...\")\n whisper_pipe = pipeline(\n \"automatic-speech-recognition\",\n model=\"openai/whisper-medium\",\n torch_dtype=torch.float16,\n device=\"cpu\",\n )\n print(\"✅ Whisper loaded (CPU, will move to GPU at runtime)\")\n return whisper_pipe\n\n\ndef load_llm():\n \"\"\"\n Load Qwen 2.5 7B in 4-bit.\n Called inside @spaces.GPU so device_map=\"auto\" lands on the A100.\n \"\"\"\n global llm_model, llm_tokenizer\n if llm_model is None:\n print(\"⏳ Loading Qwen 2.5 7B...\")\n model_id = \"Qwen/Qwen2.5-7B-Instruct\"\n\n bnb_cfg = BitsAndBytesConfig(\n load_in_4bit=True,\n bnb_4bit_compute_dtype=torch.float16,\n bnb_4bit_quant_type=\"nf4\",\n )\n llm_tokenizer = AutoTokenizer.from_pretrained(model_id)\n llm_model = AutoModelForCausalLM.from_pretrained(\n model_id,\n quantization_config=bnb_cfg,\n device_map=\"auto\",\n )\n print(\"✅ Qwen loaded\")\n return llm_model, llm_tokenizer\n\n\n# Pre-download weights at startup (stays on CPU, fast re-load later)\nload_whisper()\n\n\n# ╔══════════════════════════════════════════════════════════════╗\n# ║ PIPELINE STEPS ║\n# ╚══════════════════════════════════════════════════════════════╝\n\n\ndef extract_audio(video_path: str, out_path: str) -> str:\n subprocess.run(\n [\n \"ffmpeg\", \"-i\", video_path,\n \"-vn\", \"-acodec\", \"pcm_s16le\", \"-ar\", \"16000\", \"-ac\", \"1\",\n out_path, \"-y\",\n ],\n check=True, capture_output=True,\n )\n return out_path\n\n\ndef get_duration(path: str) -> float:\n r = subprocess.run(\n [\"ffprobe\", \"-v\", \"quiet\", \"-show_entries\", \"format=duration\",\n \"-of\", \"csv=p=0\", path],\n capture_output=True, text=True,\n )\n return float(r.stdout.strip())\n\n\ndef transcribe(audio_path: str) -> list[dict]:\n \"\"\"→ [{\"timestamp\": (start, end), \"text\": \"...\"}]\"\"\"\n pipe = load_whisper()\n result = pipe(\n audio_path,\n return_timestamps=True,\n chunk_length_s=30,\n generate_kwargs={\"language\": \"en\"},\n )\n return result[\"chunks\"]\n\n\ndef translate_segment(text: str) -> str:\n model, tok = load_llm()\n messages = [\n {\"role\": \"system\", \"content\": SYSTEM_PROMPT},\n {\"role\": \"user\", \"content\": text},\n ]\n prompt = tok.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)\n inputs = tok(prompt, return_tensors=\"pt\").to(model.device)\n\n with torch.no_grad():\n out = model.generate(\n **inputs,\n max_new_tokens=200,\n temperature=0.3,\n do_sample=True,\n top_p=0.9,\n )\n resp = tok.decode(out[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)\n return resp.strip().split(\"\\n\")[0] # first line only, no runaway generation\n\n\nasync def _tts(text: str, path: str, voice: str):\n comm = edge_tts.Communicate(text, voice)\n await comm.save(path)\n\n\ndef hindi_tts(text: str, path: str, voice: str = \"hi-IN-MadhurNeural\"):\n asyncio.run(_tts(text, path, voice))\n return path\n\n\ndef adjust_speed(in_path: str, out_path: str, target_sec: float) -> str:\n \"\"\"Stretch/squeeze audio to fit the target duration (pitch-preserved).\"\"\"\n dur = get_duration(in_path)\n if dur <= 0 or target_sec <= 0:\n return in_path\n ratio = dur / target_sec\n ratio = max(0.5, min(2.0, ratio)) # atempo range\n subprocess.run(\n [\"ffmpeg\", \"-i\", in_path, \"-filter:a\", f\"atempo={ratio:.4f}\",\n \"-y\", out_path],\n check=True, capture_output=True,\n )\n return out_path\n\n\ndef stitch_and_merge(\n segments: list[dict],\n video_path: str,\n total_dur: float,\n tmpdir: str,\n) -> str:\n \"\"\"\n Build the dubbed audio track and merge it back onto the video.\n Uses pydub for clean overlay at exact timestamps.\n \"\"\"\n from pydub import AudioSegment\n\n # silent canvas\n base = AudioSegment.silent(duration=int(total_dur * 1000), frame_rate=24000)\n\n for seg in segments:\n tts_file = seg[\"tts_path\"]\n start_ms = int(seg[\"start\"] * 1000)\n try:\n chunk = AudioSegment.from_file(tts_file)\n base = base.overlay(chunk, position=start_ms)\n except Exception as e:\n print(f\"⚠️ overlay failed for segment at {seg['start']:.1f}s: {e}\")\n\n dubbed_wav = os.path.join(tmpdir, \"dubbed_track.wav\")\n base.export(dubbed_wav, format=\"wav\")\n\n # merge onto video (keep original video stream, replace audio)\n out_mp4 = os.path.join(tmpdir, \"output.mp4\")\n subprocess.run(\n [\n \"ffmpeg\",\n \"-i\", video_path,\n \"-i\", dubbed_wav,\n \"-c:v\", \"copy\",\n \"-map\", \"0:v:0\",\n \"-map\", \"1:a:0\",\n \"-shortest\",\n \"-y\", out_mp4,\n ],\n check=True, capture_output=True,\n )\n return out_mp4\n\n\n# ╔══════════════════════════════════════════════════════════════╗\n# ║ MAIN PIPELINE ║\n# ╚══════════════════════════════════════════════════════════════╝\n\n\n@spaces.GPU(duration=300)\ndef dub_video(video_path: str, voice_gender: str, progress=gr.Progress()):\n if video_path is None:\n raise gr.Error(\"Upload a video first!\")\n\n # ── move Whisper to the ZeroGPU A100 ──\n pipe = load_whisper()\n pipe.model.to(\"cuda\")\n pipe.device = torch.device(\"cuda\")\n\n # ── load LLM (first call downloads + quantises onto GPU) ──\n load_llm()\n\n voice = \"hi-IN-MadhurNeural\" if voice_gender == \"Male\" else \"hi-IN-SwaraNeural\"\n tmpdir = tempfile.mkdtemp(prefix=\"apni_\")\n\n # 1 ── extract audio\n progress(0.05, desc=\"🎵 Extracting audio…\")\n raw_audio = extract_audio(video_path, os.path.join(tmpdir, \"raw.wav\"))\n total_dur = get_duration(raw_audio)\n\n # safety: reject clips > 3 min to stay within GPU budget\n if total_dur > 180:\n raise gr.Error(\"Please keep clips under 3 minutes for now.\")\n\n # 2 ── transcribe\n progress(0.15, desc=\"👂 Listening to English…\")\n chunks = transcribe(raw_audio)\n if not chunks:\n raise gr.Error(\"Couldn't detect any speech. Try a clearer clip.\")\n\n # 3 ── translate + TTS each segment\n translated = []\n n = len(chunks)\n for i, ch in enumerate(chunks):\n frac = 0.2 + 0.6 * (i / n)\n progress(frac, desc=f\"🗣️ Dubbing segment {i + 1}/{n}…\")\n\n start, end = ch[\"timestamp\"]\n if start is None or end is None:\n continue\n seg_dur = end - start\n if seg_dur <= 0:\n continue\n\n # translate\n hindi = translate_segment(ch[\"text\"])\n\n # TTS\n tts_raw = os.path.join(tmpdir, f\"tts_{i}.mp3\")\n hindi_tts(hindi, tts_raw, voice)\n\n # speed-adjust to fit original segment window\n tts_adj = os.path.join(tmpdir, f\"tts_adj_{i}.wav\")\n adjust_speed(tts_raw, tts_adj, seg_dur)\n\n translated.append({\n \"start\": start,\n \"end\": end,\n \"en\": ch[\"text\"],\n \"hi\": hindi,\n \"tts_path\": tts_adj,\n })\n\n # 4 ── stitch + merge\n progress(0.85, desc=\"🎬 Stitching final video…\")\n output_video = stitch_and_merge(translated, video_path, total_dur, tmpdir)\n\n # 5 ── build comparison log\n log_lines = []\n for s in translated:\n log_lines.append(\n f\"[{s['start']:.1f}s → {s['end']:.1f}s]\\n\"\n f\" 🇬🇧 {s['en']}\\n\"\n f\" 🇮🇳 {s['hi']}\"\n )\n log = \"\\n\\n\".join(log_lines)\n\n return output_video, log\n\n\n# ╔══════════════════════════════════════════════════════════════╗\n# ║ GRADIO UI ║\n# ╚══════════════════════════════════════════════════════════════╝\n\nCSS = \"\"\"\n.main-title {\n text-align: center;\n margin-bottom: 0.2em;\n}\n.subtitle {\n text-align: center;\n opacity: 0.7;\n font-size: 1.05em;\n margin-top: 0;\n}\n.example-row {\n background: var(--block-background-fill);\n border-radius: 8px;\n padding: 12px 16px;\n margin: 6px 0;\n font-size: 0.92em;\n}\nfooter { display: none !important; }\n\"\"\"\n\nwith gr.Blocks(title=\"Apni Awaaz\", css=CSS, theme=gr.themes.Soft()) as demo:\n\n gr.Markdown(\n \"# 🎙️ Apni Awaaz\\n\"\n \"#### Dub English video into the Hindi people actually speak\",\n elem_classes=\"main-title\",\n )\n gr.Markdown(\n '_No more \"मुझे यह कार्य संपन्न करना आवश्यक है\"_ — '\n '_just \"ये करना पड़ेगा यार\"_',\n elem_classes=\"subtitle\",\n )\n\n with gr.Row(equal_height=True):\n # ── left column: inputs ──\n with gr.Column(scale=1):\n vid_in = gr.Video(label=\"Upload an English clip (< 3 min)\")\n voice_radio = gr.Radio(\n [\"Male\", \"Female\"],\n value=\"Male\",\n label=\"Hindi voice\",\n )\n btn = gr.Button(\"🎬 Dub it in apni bhasha!\", variant=\"primary\", size=\"lg\")\n\n # ── right column: outputs ──\n with gr.Column(scale=1):\n vid_out = gr.Video(label=\"Dubbed output\")\n log_box = gr.Textbox(\n label=\"Translation log (EN → HI)\",\n lines=12,\n interactive=False,\n show_copy_button=True,\n )\n\n # ── \"what it does\" section ──\n with gr.Accordion(\"How is this different from normal dubbing?\", open=False):\n gr.Markdown(\n \"Most Hindi dubs use **शुद्ध हिंदी** — overly formal, Sanskritized language \"\n \"that nobody actually speaks at home.\\n\\n\"\n \"Apni Awaaz translates into **everyday Hindustani** — the natural mix of \"\n \"Hindi, Urdu, and English that your family actually uses at the dinner table.\\n\\n\"\n \"| Official dub | Apni Awaaz |\\n\"\n \"|---|---|\\n\"\n '| \"मुझे इस विषय पर विचार करने दीजिए\" | \"सोचने दे एक second\" |\\n'\n '| \"यह अत्यंत मूल्यवान है\" | \"बहुत महँगा है यार\" |\\n'\n '| \"कृपया मुझे अनुमति प्रदान करें\" | \"please, करने दे ना\" |\\n'\n )\n\n btn.click(\n fn=dub_video,\n inputs=[vid_in, voice_radio],\n outputs=[vid_out, log_box],\n )\n\n\nif __name__ == \"__main__\":\n demo.launch(show_api=False)\n", + "app_signals": "load_whisper load_llm extract_audio video_path out_path get_duration path transcribe audio_path translate_segment text _tts voice hindi_tts adjust_speed in_path target_sec stitch_and_merge segments total_dur tmpdir dub_video voice_gender progress Apni Awaaz 🎙️ — Dub English video into the Hindi people actually speak. Built for the Build Small Hackathon (June 2026). You are a dubbing translator. You translate English dialogue into the Hindi that real people actually speak at home in North India — not the stiff, Sanskritized Hindi of Doordarshan or official dubs. RULES: 1. Use everyday Hindustani — the natural Hindi-Urdu mix people really speak. 2. NEVER use Sanskritized/शुद्ध words when a simpler one exists: - \"प्राप्त करना\" → \"मिलना\" / \"पाना\" - \"आवश्यक\" → \"ज़रूरी\" - \"अत्यंत\" → \"बहुत\" / \"काफ़ी\" - \"उपयोग\" → \"इस्तेमाल\" - \"विचार करना\" → \"सोचना\" - \"संपन्न करना\" → \"करना\" / \"निपटाना\" - \"प्रतीक्षा\" → \"इंतज़ार\" - \"शीघ्र\" → \"जल्दी\" - \"अनुमति\" → \"इजाज़त\" - \"कृपया\" → drop it or say \"please\" - \"अवश्य\" → \"ज़रूर\" - \"उचित\" → \"सही\" / \"ठीक\" 3. Keep English words Indians naturally keep: phone, office, meeting, tension, problem, time, chance, try, plan, sure, okay, sorry, thanks, bus, train, college, hospital, doctor, ticket, report, file. 4. Match the speaker's register. Casual stays casual, serious stays serious — but never sound like a newsreader. 5. Use natural fillers where they fit: \"यार\", \"अरे\", \"बस\", \"ना\", \"वो\", \"मतलब\", \"basically\". 6. Natural contractions: \"कर लेंगे\" not \"कर लिया जाएगा\", \"हो जाएगा\" not \"संपन्न हो जाएगा\". 7. Keep it CONCISE. Dubbed Hindi should be roughly the same length as the English. Don't pad. EXAMPLES: EN: \"I need to get this done before the deadline\" ❌ \"मुझे समय-सीमा से पूर्व यह कार्य संपन्न करना आवश्यक है\" ✅ \"deadline से पहले ये निपटाना पड़ेगा\" EN: \"That's a really good point, I hadn't thought about that\" ❌ \"यह एक अत्यंत उत्तम विचार है, मैंने इस पर विचार नहीं किया था\" ✅ \"अच्छी बात बोली, मेरे दिमाग़ में आया ही नहीं\" EN: \"We should probably reconsider our approach\" ❌ \"हमें अपनी कार्यप्रणाली पर पुनर्विचार करना चाहिए\" ✅ \"लगता है अपना तरीका बदलना पड़ेगा\" EN: \"I'm really sorry, I completely forgot about our meeting\" ❌ \"मुझे अत्यंत खेद है, मैं हमारी बैठक के विषय में पूर्णतः विस्मृत हो गया\" ✅ \"sorry यार, meeting पूरी तरह भूल गया\" EN: \"Can you give me a moment? I need to think about this\" ❌ \"क्या आप मुझे कुछ क्षण प्रदान कर सकते हैं? मुझे इस विषय पर विचार करना है\" ✅ \"एक second दे, सोचने दे\" EN: \"The situation is getting worse and we need to act fast\" ❌ \"स्थिति बिगड़ती जा रही है और हमें शीघ्र कार्रवाई करनी चाहिए\" ✅ \"हालात ख़राब हो रहे हैं, जल्दी कुछ करना पड़ेगा\" EN: \"I don't think that's going to work. Let me try something else.\" ❌ \"मुझे नहीं लगता कि यह कार्य करेगा। मुझे कोई अन्य विकल्प आज़माने दीजिए।\" ✅ \"ये नहीं चलेगा। कुछ और try करता हूँ।\" EN: \"Look, I understand your concern, but we don't have a choice here\" ❌ \"देखिए, मैं आपकी चिंता समझता हूँ, परंतु हमारे पास यहाँ कोई विकल्प नहीं है\" ✅ \"देख, तेरी tension समझता हूँ, पर कोई चारा नहीं है\" Translate ONLY the given English text. Output ONLY the Hindi. No commentary. spaces.GPU duration Load Whisper on CPU. ZeroGPU moves it when @spaces.GPU fires. Load Qwen 2.5 7B in 4-bit. Called inside @spaces.GPU so device_map=\"auto\" lands on the A100. subprocess.run check capture_output float → [{\"timestamp\": (start, end), \"text\": \"...\"}] pipe return_timestamps chunk_length_s generate_kwargs tok.apply_chat_template tokenize add_generation_prompt to tok.decode skip_special_tokens edge_tts.Communicate hi-IN-MadhurNeural asyncio.run Stretch/squeeze audio to fit the target duration (pitch-preserved). max Build the dubbed audio track and merge it back onto the video. Uses pydub for clean overlay at exact timestamps. AudioSegment.silent frame_rate os.path.join base.export format gr.Progress pipe.model.to torch.device tempfile.mkdtemp prefix desc len enumerate join gr.Blocks title css theme gr.Markdown elem_classes btn.click fn inputs outputs __main__ demo.launch show_api print pipeline model torch_dtype device Qwen/Qwen2.5-7B-Instruct BitsAndBytesConfig load_in_4bit bnb_4bit_compute_dtype bnb_4bit_quant_type AutoTokenizer.from_pretrained AutoModelForCausalLM.from_pretrained quantization_config device_map r.stdout.strip chunks torch.no_grad model.generate max_new_tokens temperature do_sample top_p split comm.save min int dubbed_track.wav output.mp4 gr.Error cuda hi-IN-SwaraNeural translated.append log_lines.append # 🎙️ Apni Awaaz #### Dub English video into the Hindi people actually speak _No more \"मुझे यह कार्य संपन्न करना आवश्यक है\"_ — _just \"ये करना पड़ेगा यार\"_ gr.Row equal_height gr.Accordion open ⏳ Loading Whisper... automatic-speech-recognition ✅ Whisper loaded (CPU, will move to GPU at runtime) ⏳ Loading Qwen 2.5 7B... ✅ Qwen loaded ffmpeg -i -vn -acodec pcm_s16le -ar 16000 -ac 1 -y ffprobe -v quiet -show_entries format=duration -of csv=p=0 role content system user tok return_tensors -filter:a tts_path AudioSegment.from_file base.overlay position wav -c:v copy -map 0:v:0 1:a:0 -shortest Upload a video first! Male apni_ 🎵 Extracting audio… raw.wav Please keep clips under 3 minutes for now. 👂 Listening to English… Couldn't detect any speech. Try a clearer clip. timestamp 🎬 Stitching final video… Apni Awaaz gr.themes.Soft main-title subtitle gr.Column scale gr.Video label gr.Radio value gr.Button variant size gr.Textbox lines interactive show_copy_button How is this different from normal dubbing? Most Hindi dubs use **शुद्ध हिंदी** — overly formal, Sanskritized language that nobody actually speaks at home. Apni Awaaz translates into **everyday Hindustani** — the natural mix of Hindi, Urdu, and English that your family actually uses at the dinner table. | Official dub | Apni Awaaz | |---|---| | \"मुझे इस विषय पर विचार करने दीजिए\" | \"सोचने दे एक second\" | | \"यह अत्यंत मूल्यवान है\" | \"बहुत महँगा है यार\" | | \"कृपया मुझे अनुमति प्रदान करें\" | \"please, करने दे ना\" | openai/whisper-medium cpu nf4 auto language en resp.strip atempo= tts_ .mp3 tts_adj_ .wav start end hi [ s → s] 🇬🇧 🇮🇳 🎬 Dub it in apni bhasha! pt 🗣️ Dubbing segment / … Upload an English clip (< 3 min) Female Hindi voice primary lg Dubbed output Translation log (EN → HI) .4f ⚠️ overlay failed for segment at s: .1f", + "readme_len": 806, + "app_source_len": 14088, + "app_signals_len": 6219 + }, + { + "id": "build-small-hackathon/Backyard-Demo-Builder", + "title": "Backyard Demo Builder", + "summary": "Build tiny real-person demos before scaling custom software.", + "tags": [ + "agents", + "ai-agents", + "backyard-ai", + "build-small-hackathon", + "demo-builder", + "gradio", + "real-estate", + "small-language-model" + ], + "models": [ + "unsloth/gemma-4-12B-it-qat-GGUF", + "Qwen/Qwen2.5-7B-Instruct", + "nvidia/Nemotron-3.5-Content-Safety" + ], + "datasets": [], + "sdk": "gradio", + "license": "", + "likes": 0, + "url": "https://huggingface.co/spaces/build-small-hackathon/Backyard-Demo-Builder", + "app_file": "app.py", + "readme_raw": "---\ntitle: Backyard Demo Builder\nemoji: 🏡\ncolorFrom: gray\ncolorTo: green\nsdk: gradio\nsdk_version: \"5.49.1\"\npython_version: \"3.12.12\"\napp_file: app.py\nshort_description: Build tiny real-person demos before scaling custom software.\nmodels:\n - google/gemma-4-E4B-it\n - Qwen/Qwen2.5-7B-Instruct\n - nvidia/Nemotron-3.5-Content-Safety\ndatasets: []\ntags:\n - build-small-hackathon\n - backyard-ai\n - gradio\n - agents\n - small-language-model\n - demo-builder\n - real-estate\n - ai-agents\npinned: false\n---\n\n# Backyard Demo Builder\n\n## Chapter 1: Backyard AI\n\n*Build Small Hackathon 2026 — Chapter 1 Submission*\n\n`agent-swarm-workbench` now presents as **Backyard Demo Builder**: a Gradio app\nthat turns one real person's workflow into a small runnable demo package before\nanyone pays to build full software.\n\nFirst backyard case: my mom, a real-estate agent. She needs a cheap way to test\na customer follow-up reminder workflow before committing time and money to a\nfull app.\n\n---\n\n## Watch the Demo Builder Work\n\n```\nYou: \"Build a real-estate follow-up CRM demo for my mom.\"\nBuilder: Generates a Gradio mini-app, handoff spec, field notes, and checks\nResult: app.py, README.md, handoff_spec.md, field_notes.md\nMom: Tests the workflow, then we scrap or scale.\n```\n\nEvery Run produces a **downloadable demo package** and Validation report: files\nyou can inspect, unzip, run, and test with the real person.\n\n---\n\n## Build Small Hackathon — Submission Notes\n\n| Requirement | How We Meet It |\n|---|---|\n| **Small model (≤ 32B)** | Provider catalog fetches models at runtime and only allows models whose ID/name proves ≤32B |\n| **Gradio app** | Custom dark-themed Gradio UI mounted on FastAPI |\n| **HF Space** | `app.py` + `requirements.txt` — one-command deploy |\n| **Demo video** | *(placeholder — [link to demo])* |\n| **Social post** | *(placeholder — [link to post])* |\n\n### Bonus Badges Claimed\n\n| Badge | Why |\n|---|---|\n| **🎨 Off-Brand** | Fully custom CSS dark theme — Archivo + IBM Plex Mono, acid green CTAs, paper/ink palette, CSS grid layout, status chips. Not a default Gradio component in sight. |\n| **📡 Sharing is Caring** | Agent traces and swarm reasoning are surfaced in the Events panel. We'll publish a trace on the Hub. |\n| **📓 Field Notes** | Generated demo packages include `field_notes.md`; this repo also documents the architecture and decisions. |\n\n---\n\n## Why This Belongs in Backyard AI\n\nThis solves a real problem for someone I know.\n\n- **Specific person** — my mom, a real-estate agent.\n- **Specific pain** — follow-up reminders and customer-care demos are useful, but custom app dev is slow and risky.\n- **Honest small-model fit** — a ≤32B model drafts the demo and handoff spec; rules handle the reminder logic.\n- **Actually testable** — the generated package includes field notes and feedback questions for the real user.\n\n---\n\n## How It Works Under the Hood\n\n```\n┌─────────────────────────────────────────────────────┐\n│ Gradio UI / HTTP API │\n├─────────────────────────────────────────────────────┤\n│ RunFlow — lifecycle conductor │\n│ ┌──────────┐ ┌────────────┐ ┌────────────────┐ │\n│ │ Swarm │ │ Codebase │ │ Validator │ │\n│ │ Runtime │→│ Archive │→│ Graph │ │\n│ │ │ │ Store │ │ │ │\n│ │ Planner │ │ (local/ │ │ Sandbox checks │ │\n│ │ Coder │ │ Redis) │ │ Rubric review │ │\n│ │ Reviewer │ │ │ │ Stagehand │ │\n│ │ Tester │ │ │ │ (Browserbase) │ │\n│ └──────────┘ └────────────┘ └────────────────┘ │\n│ EventBus → SSE stream to UI │\n└─────────────────────────────────────────────────────┘\n```\n\n### The Swarm\n\n- **Coordinator** reads the prompt, plans tasks, delegates to subagents\n- **Planner** breaks down the prompt into implementable units\n- **Coder** writes the actual code files\n- **Reviewer** checks code quality and correctness\n- **Test-runner** runs the user's tests and retries up to 3x on failure\n- **Validator-prep** generates validation checks from user criteria\n\n### The Validator\n\nAfter the swarm finishes, a LangGraph Validator workflow:\n1. Restores the codebase into a clean sandbox\n2. Runs user-provided tests\n3. Executes LLM-based rubric review\n4. (Optional) Runs Browserbase/Stagehand visual checks\n5. Produces a pass/fail Validation Report\n\n### The Sandbox\n\nAll agent work happens inside isolated sandbox workspaces:\n- **Local** (for dev/smoke tests)\n- **Docker** (container-based)\n- **Daytona** (cloud sandboxes)\n\n---\n\n## Run It\n\n```bash\ngit clone https://github.com/Kiy-K/agent-swarm-workbench.git\ncd agent-swarm-workbench\ncp .env.example .env\n# Optional: add server fallback keys. Users can also paste their own key in the UI.\npython -m uvicorn app:app --host 0.0.0.0 --port 8790\n```\n\nOpen http://localhost:8790, type a prompt, choose a provider, fetch models with your API key, then click Start Run.\n\nModel selection:\n- Model lists are fetched from the selected provider/API endpoint at runtime.\n- UI only offers fetched models whose ID/name proves `<=32B` parameters.\n- Unknown-size models are shown in the catalog response as `unknown_parameters` but are not selectable.\n- User API keys and fetched catalogs live only in process memory. They are not persisted, not stored in Redis/DB, and not kept in Gradio state. Click \"Refresh models\" to clear and refetch that provider cache.\n\nFor Hugging Face Spaces:\n```bash\npython app.py\n```\n\n## Test\n\n```bash\npython scripts/task.py verify # required completion gate: tests + harness\npython scripts/task.py test # 90 tests, all passing\npython scripts/task.py harness -- --prompt \"Build a tiny CLI\" --test \"test -f README.md\"\npython scripts/task.py smoke # Local agent session smoke check\npython scripts/task.py validator-smoke # Validator end-to-end\n```\n\n### Agent Harness\n\nThe harness is the fast way to exercise the Run lifecycle without waiting on a\nfull demo session:\n\n```bash\npython scripts/task.py verify\npython scripts/task.py harness -- --prompt \"Build a tiny CLI\" --output-dir /tmp/harness\npython scripts/task.py harness -- --mode live --prompt \"Build a tiny CLI\"\n```\n\n`verify` is the required completion gate for coding agents. It runs the Python\nsuite, then runs the default scripted Agent Swarm Harness so changes are checked\nagainst the same Run -> SwarmRuntime -> Archive -> Validator path that the app\nuses.\n\nModes:\n\n| Mode | Purpose |\n|---|---|\n| `swarm` | Default. Runs `RunFlow -> SwarmRuntime -> Archive -> Validator` with a scripted local DeepAgent-compatible session. |\n| `live` | Uses the real `create_session()` DeepAgents path and the configured sandbox provider. |\n\n## Environment\n\n| Var | Purpose |\n|---|---|\n| `DEEPAGENT_MODEL_PROVIDER` | Server fallback model provider: `openrouter`, `gemini`, `nebius`, `huggingface`, `custom`, or `local` |\n| `DEEPAGENT_MODEL` | Server fallback model ID. Must prove `<=32B` when selected per Run. |\n| `DEEPAGENT_MODEL_BASE_URL` | Optional OpenAI-compatible `/v1` endpoint |\n| `OPENROUTER_API_KEY` / `GEMINI_API_KEY` / `NEBIUS_API_KEY` / `HF_TOKEN` | Optional server fallback keys for trusted server/CLI runs only. The public Gradio UI requires the user to enter their own hosted-provider key and does not use these by default. |\n| `DEEPAGENT_SANDBOX_PROVIDER` | `local`, `docker`, or `daytona` |\n| `BROWSERBASE_API_KEY` | Optional — visual validation via Stagehand |\n| `UPSTASH_REDIS_REST_URL` / `TOKEN` | Optional — persistent runs & archives |\n\n---\n\n## Stack\n\n- **Python 3.11+** / **FastAPI** / **Gradio 6**\n- **LangChain DeepAgents** — multi-subagent swarm runtime\n- **Provider adapters** — OpenRouter, Gemini, Nebius, Hugging Face Router, custom OpenAI-compatible, local OpenAI-compatible\n- **LangGraph** — Validator workflow\n- **QuickJS code interpreter** — in-sandbox code execution middleware\n- **Browserbase + Stagehand** — visual web validation (optional)\n\n## Architecture\n\n```\narena/\n agent.py — Swarm factory, model, subagents, sandbox backend\n backyard_templates.py — Backyard demo template registry\n model_provider.py — Chat model factory for provider selection\n model_catalog.py — Provider model list adapters and TTL cache\n swarm_runtime.py — Active Run registration and Swarm session leasing\n swarm_session.py — Prompt seeding, agent turns, test retries, snapshots\n sandbox_lease.py — Idle TTL, touch, and close behavior for sandboxes\n run_flow.py — Run lifecycle: create → execute → archive → validate\n run_journal.py — Run mutation journal: status, tasks, events, timestamps\n run_store.py — Run persistence (InMemory / Redis via Upstash)\n codebase_handoff.py — Workspace snapshot and Validator sandbox restore\n codebase_archive.py — Archive persistence (local / Redis)\n validator_plan.py — Typed Validator plan from user tests/checks\n validator_graph.py — LangGraph Validator workflow\n thread_inspector.py — Manual Thread/session debug surface\n gradio_app.py — Thin Gradio component wiring\n gradio_presenter.py — Run output formatting for Gradio\n gradio_markup.py — Static Gradio shell markup\n api.py — FastAPI REST + SSE endpoints\n event_bus.py — In-process event streaming\n browserbase_tools.py — Web fetch/search tools for the swarm\n stagehand_validator.py — Browserbase visual validation\n docker_backend.py — Docker sandbox provider\n skill_catalog.py — Bundled DeepAgents skills discovery\ntests_python/ — Python test suite (integration + unit)\n```\n\n---\n\n*Built with a sub-32B model for the Build Small Hackathon, June 2026.*\n", + "readme_body": "# Backyard Demo Builder\n\n## Chapter 1: Backyard AI\n\n*Build Small Hackathon 2026 — Chapter 1 Submission*\n\n`agent-swarm-workbench` now presents as **Backyard Demo Builder**: a Gradio app\nthat turns one real person's workflow into a small runnable demo package before\nanyone pays to build full software.\n\nFirst backyard case: my mom, a real-estate agent. She needs a cheap way to test\na customer follow-up reminder workflow before committing time and money to a\nfull app.\n\n---\n\n## Watch the Demo Builder Work\n\n```\nYou: \"Build a real-estate follow-up CRM demo for my mom.\"\nBuilder: Generates a Gradio mini-app, handoff spec, field notes, and checks\nResult: app.py, README.md, handoff_spec.md, field_notes.md\nMom: Tests the workflow, then we scrap or scale.\n```\n\nEvery Run produces a **downloadable demo package** and Validation report: files\nyou can inspect, unzip, run, and test with the real person.\n\n---\n\n## Build Small Hackathon — Submission Notes\n\n| Requirement | How We Meet It |\n|---|---|\n| **Small model (≤ 32B)** | Provider catalog fetches models at runtime and only allows models whose ID/name proves ≤32B |\n| **Gradio app** | Custom dark-themed Gradio UI mounted on FastAPI |\n| **HF Space** | `app.py` + `requirements.txt` — one-command deploy |\n| **Demo video** | *(placeholder — [link to demo])* |\n| **Social post** | *(placeholder — [link to post])* |\n\n### Bonus Badges Claimed\n\n| Badge | Why |\n|---|---|\n| **🎨 Off-Brand** | Fully custom CSS dark theme — Archivo + IBM Plex Mono, acid green CTAs, paper/ink palette, CSS grid layout, status chips. Not a default Gradio component in sight. |\n| **📡 Sharing is Caring** | Agent traces and swarm reasoning are surfaced in the Events panel. We'll publish a trace on the Hub. |\n| **📓 Field Notes** | Generated demo packages include `field_notes.md`; this repo also documents the architecture and decisions. |\n\n---\n\n## Why This Belongs in Backyard AI\n\nThis solves a real problem for someone I know.\n\n- **Specific person** — my mom, a real-estate agent.\n- **Specific pain** — follow-up reminders and customer-care demos are useful, but custom app dev is slow and risky.\n- **Honest small-model fit** — a ≤32B model drafts the demo and handoff spec; rules handle the reminder logic.\n- **Actually testable** — the generated package includes field notes and feedback questions for the real user.\n\n---\n\n## How It Works Under the Hood\n\n```\n┌─────────────────────────────────────────────────────┐\n│ Gradio UI / HTTP API │\n├─────────────────────────────────────────────────────┤\n│ RunFlow — lifecycle conductor │\n│ ┌──────────┐ ┌────────────┐ ┌────────────────┐ │\n│ │ Swarm │ │ Codebase │ │ Validator │ │\n│ │ Runtime │→│ Archive │→│ Graph │ │\n│ │ │ │ Store │ │ │ │\n│ │ Planner │ │ (local/ │ │ Sandbox checks │ │\n│ │ Coder │ │ Redis) │ │ Rubric review │ │\n│ │ Reviewer │ │ │ │ Stagehand │ │\n│ │ Tester │ │ │ │ (Browserbase) │ │\n│ └──────────┘ └────────────┘ └────────────────┘ │\n│ EventBus → SSE stream to UI │\n└─────────────────────────────────────────────────────┘\n```\n\n### The Swarm\n\n- **Coordinator** reads the prompt, plans tasks, delegates to subagents\n- **Planner** breaks down the prompt into implementable units\n- **Coder** writes the actual code files\n- **Reviewer** checks code quality and correctness\n- **Test-runner** runs the user's tests and retries up to 3x on failure\n- **Validator-prep** generates validation checks from user criteria\n\n### The Validator\n\nAfter the swarm finishes, a LangGraph Validator workflow:\n1. Restores the codebase into a clean sandbox\n2. Runs user-provided tests\n3. Executes LLM-based rubric review\n4. (Optional) Runs Browserbase/Stagehand visual checks\n5. Produces a pass/fail Validation Report\n\n### The Sandbox\n\nAll agent work happens inside isolated sandbox workspaces:\n- **Local** (for dev/smoke tests)\n- **Docker** (container-based)\n- **Daytona** (cloud sandboxes)\n\n---\n\n## Run It\n\n```bash\ngit clone https://github.com/Kiy-K/agent-swarm-workbench.git\ncd agent-swarm-workbench\ncp .env.example .env\n# Optional: add server fallback keys. Users can also paste their own key in the UI.\npython -m uvicorn app:app --host 0.0.0.0 --port 8790\n```\n\nOpen http://localhost:8790, type a prompt, choose a provider, fetch models with your API key, then click Start Run.\n\nModel selection:\n- Model lists are fetched from the selected provider/API endpoint at runtime.\n- UI only offers fetched models whose ID/name proves `<=32B` parameters.\n- Unknown-size models are shown in the catalog response as `unknown_parameters` but are not selectable.\n- User API keys and fetched catalogs live only in process memory. They are not persisted, not stored in Redis/DB, and not kept in Gradio state. Click \"Refresh models\" to clear and refetch that provider cache.\n\nFor Hugging Face Spaces:\n```bash\npython app.py\n```\n\n## Test\n\n```bash\npython scripts/task.py verify # required completion gate: tests + harness\npython scripts/task.py test # 90 tests, all passing\npython scripts/task.py harness -- --prompt \"Build a tiny CLI\" --test \"test -f README.md\"\npython scripts/task.py smoke # Local agent session smoke check\npython scripts/task.py validator-smoke # Validator end-to-end\n```\n\n### Agent Harness\n\nThe harness is the fast way to exercise the Run lifecycle without waiting on a\nfull demo session:\n\n```bash\npython scripts/task.py verify\npython scripts/task.py harness -- --prompt \"Build a tiny CLI\" --output-dir /tmp/harness\npython scripts/task.py harness -- --mode live --prompt \"Build a tiny CLI\"\n```\n\n`verify` is the required completion gate for coding agents. It runs the Python\nsuite, then runs the default scripted Agent Swarm Harness so changes are checked\nagainst the same Run -> SwarmRuntime -> Archive -> Validator path that the app\nuses.\n\nModes:\n\n| Mode | Purpose |\n|---|---|\n| `swarm` | Default. Runs `RunFlow -> SwarmRuntime -> Archive -> Validator` with a scripted local DeepAgent-compatible session. |\n| `live` | Uses the real `create_session()` DeepAgents path and the configured sandbox provider. |\n\n## Environment\n\n| Var | Purpose |\n|---|---|\n| `DEEPAGENT_MODEL_PROVIDER` | Server fallback model provider: `openrouter`, `gemini`, `nebius`, `huggingface`, `custom`, or `local` |\n| `DEEPAGENT_MODEL` | Server fallback model ID. Must prove `<=32B` when selected per Run. |\n| `DEEPAGENT_MODEL_BASE_URL` | Optional OpenAI-compatible `/v1` endpoint |\n| `OPENROUTER_API_KEY` / `GEMINI_API_KEY` / `NEBIUS_API_KEY` / `HF_TOKEN` | Optional server fallback keys for trusted server/CLI runs only. The public Gradio UI requires the user to enter their own hosted-provider key and does not use these by default. |\n| `DEEPAGENT_SANDBOX_PROVIDER` | `local`, `docker`, or `daytona` |\n| `BROWSERBASE_API_KEY` | Optional — visual validation via Stagehand |\n| `UPSTASH_REDIS_REST_URL` / `TOKEN` | Optional — persistent runs & archives |\n\n---\n\n## Stack\n\n- **Python 3.11+** / **FastAPI** / **Gradio 6**\n- **LangChain DeepAgents** — multi-subagent swarm runtime\n- **Provider adapters** — OpenRouter, Gemini, Nebius, Hugging Face Router, custom OpenAI-compatible, local OpenAI-compatible\n- **LangGraph** — Validator workflow\n- **QuickJS code interpreter** — in-sandbox code execution middleware\n- **Browserbase + Stagehand** — visual web validation (optional)\n\n## Architecture\n\n```\narena/\n agent.py — Swarm factory, model, subagents, sandbox backend\n backyard_templates.py — Backyard demo template registry\n model_provider.py — Chat model factory for provider selection\n model_catalog.py — Provider model list adapters and TTL cache\n swarm_runtime.py — Active Run registration and Swarm session leasing\n swarm_session.py — Prompt seeding, agent turns, test retries, snapshots\n sandbox_lease.py — Idle TTL, touch, and close behavior for sandboxes\n run_flow.py — Run lifecycle: create → execute → archive → validate\n run_journal.py — Run mutation journal: status, tasks, events, timestamps\n run_store.py — Run persistence (InMemory / Redis via Upstash)\n codebase_handoff.py — Workspace snapshot and Validator sandbox restore\n codebase_archive.py — Archive persistence (local / Redis)\n validator_plan.py — Typed Validator plan from user tests/checks\n validator_graph.py — LangGraph Validator workflow\n thread_inspector.py — Manual Thread/session debug surface\n gradio_app.py — Thin Gradio component wiring\n gradio_presenter.py — Run output formatting for Gradio\n gradio_markup.py — Static Gradio shell markup\n api.py — FastAPI REST + SSE endpoints\n event_bus.py — In-process event streaming\n browserbase_tools.py — Web fetch/search tools for the swarm\n stagehand_validator.py — Browserbase visual validation\n docker_backend.py — Docker sandbox provider\n skill_catalog.py — Bundled DeepAgents skills discovery\ntests_python/ — Python test suite (integration + unit)\n```\n\n---\n\n*Built with a sub-32B model for the Build Small Hackathon, June 2026.*", + "readme_frontmatter": { + "title": "Backyard Demo Builder", + "emoji": "🏡", + "colorFrom": "gray", + "colorTo": "green", + "sdk": "gradio", + "sdk_version": "5.49.1", + "python_version": "3.12.12", + "app_file": "app.py", + "short_description": "Build tiny real-person demos before scaling custom software.", + "models": "", + "datasets": "[]", + "tags": "", + "pinned": "false" + }, + "app_source": "\"\"\"Unified ASGI entrypoint for API and Gradio UI.\"\"\"\n\nfrom __future__ import annotations\n\nimport os\n\nimport gradio as gr\nimport uvicorn\n\ntry:\n import spaces\nexcept Exception:\n class _SpacesShim:\n def GPU(self, fn=None, **kwargs):\n del kwargs\n\n def decorator(inner):\n return inner\n\n return decorator(fn) if fn else decorator\n\n spaces = _SpacesShim()\n\n\nfrom arena.api import app as fastapi_app\nfrom arena.api import service\nfrom arena.gradio_app import RunOutputs, build_app, create_run_gpu as _create_run_gpu\n\n\n@spaces.GPU(duration=120)\ndef create_run_gpu(\n prompt: str,\n criteria_text: str,\n user_tests_text: str,\n provider: str = \"openrouter\",\n model: str = \"\",\n api_key: str = \"\",\n base_url: str = \"\",\n) -> RunOutputs:\n return _create_run_gpu(\n prompt,\n criteria_text,\n user_tests_text,\n provider,\n model,\n api_key,\n base_url,\n )\n\n\n@spaces.GPU\ndef zerogpu_ready_marker() -> str:\n return \"ready\"\n\n\ndemo = build_app(service, create_run_handler=create_run_gpu)\n\n\ndef create_app():\n \"\"\"Create one FastAPI ASGI app with Gradio mounted at the root.\"\"\"\n\n return gr.mount_gradio_app(fastapi_app, demo, path=\"/\")\n\n\napp = create_app()\n\n\ndef server_config() -> dict[str, int | str]:\n host = os.getenv(\"GRADIO_SERVER_NAME\", os.getenv(\"HOST\", \"0.0.0.0\"))\n port = int(os.getenv(\"GRADIO_SERVER_PORT\") or os.getenv(\"PORT\") or \"7860\")\n return {\"host\": host, \"port\": port}\n\n\ndef gradio_launch_config() -> dict[str, bool | int | str]:\n config = server_config()\n port = int(os.getenv(\"GRADIO_SERVER_PORT\") or os.getenv(\"PORT\") or \"7860\")\n return {\"server_name\": str(config[\"host\"]), \"server_port\": port, \"ssr_mode\": False}\n\n\ndef should_launch_gradio_space() -> bool:\n return bool(os.getenv(\"SPACE_ID\")) and os.getenv(\"FORCE_SELF_LAUNCH\") != \"1\"\n\n\ndef should_self_launch() -> bool:\n if os.getenv(\"FORCE_SELF_LAUNCH\") == \"1\":\n return True\n return not should_launch_gradio_space()\n\n\ndef _space_sdk() -> str:\n return os.getenv(\"SPACE_SDK\", os.getenv(\"HF_SPACE_SDK\", \"\")).strip().lower()\n\n\ndef launch_gradio_space() -> None:\n demo.queue(default_concurrency_limit=1).launch(**gradio_launch_config())\n\n\nif __name__ == \"__main__\":\n if should_launch_gradio_space():\n launch_gradio_space()\n elif should_self_launch():\n uvicorn.run(app, **server_config())\n", + "app_signals": "create_app zerogpu_ready_marker server_config gradio_launch_config should_launch_gradio_space should_self_launch _space_sdk Unified ASGI entrypoint for API and Gradio UI. build_app Create one FastAPI ASGI app with Gradio mounted at the root. gr.mount_gradio_app path _SpacesShim ready os.getenv int lower __main__ GPU self fn GRADIO_SERVER_NAME host port server_name server_port ssr_mode str bool 1 demo.launch / decorator inner HOST 0.0.0.0 7860 SPACE_ID FORCE_SELF_LAUNCH strip uvicorn.run GRADIO_SERVER_PORT PORT 7861 SPACE_SDK HF_SPACE_SDK", + "readme_len": 9119, + "app_source_len": 2436, + "app_signals_len": 543 + }, + { + "id": "build-small-hackathon/backyard-dudu-destroyer", + "title": "Backyard Dudu Destroyer", + "summary": "A gradio interface for starting VLA and policy", + "tags": [ + "gradio", + "region:us" + ], + "models": [], + "datasets": [], + "sdk": "gradio", + "license": "apache-2.0", + "likes": 0, + "url": "https://huggingface.co/spaces/build-small-hackathon/backyard-dudu-destroyer", + "app_file": "app.py", + "readme_raw": "---\ntitle: Backyard Dudu Destroyer\nemoji: 🌖\ncolorFrom: gray\ncolorTo: red\nsdk: gradio\nsdk_version: 6.16.0\npython_version: '3.13'\napp_file: app.py\npinned: false\nlicense: apache-2.0\nshort_description: A gradio interface for starting VLA and policy\n---\n\nCheck out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference\n", + "readme_body": "Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference", + "readme_frontmatter": { + "title": "Backyard Dudu Destroyer", + "emoji": "🌖", + "colorFrom": "gray", + "colorTo": "red", + "sdk": "gradio", + "sdk_version": "6.16.0", + "python_version": "3.13", + "app_file": "app.py", + "pinned": "false", + "license": "apache-2.0", + "short_description": "A gradio interface for starting VLA and policy" + }, + "app_source": "import gradio as gr\n\ndef greet(name):\n return \"Hello \" + name + \"!!\"\n\ndemo = gr.Interface(fn=greet, inputs=\"text\", outputs=\"text\")\ndemo.launch()\n", + "app_signals": "greet name gr.Interface fn inputs outputs demo.launch !! text Hello", + "readme_len": 96, + "app_source_len": 148, + "app_signals_len": 67 + }, + { + "id": "build-small-hackathon/backyard-raccoon-deterrent", + "title": "Backyard Raccoon Deterrent", + "summary": "Edge-AI raccoon deterrent. Tiny YOLO, fully offline.", + "tags": [ + "build-small-hackathon", + "edge-ai", + "object-detection", + "raccoon", + "yolov8" + ], + "models": [], + "datasets": [], + "sdk": "gradio", + "license": "mit", + "likes": 0, + "url": "https://huggingface.co/spaces/build-small-hackathon/backyard-raccoon-deterrent", + "app_file": "app.py", + "readme_raw": "---\ntitle: Backyard Raccoon Deterrent\nemoji: 🦝\ncolorFrom: green\ncolorTo: gray\nsdk: gradio\nsdk_version: 6.15.2\napp_file: app.py\npinned: false\nlicense: mit\nshort_description: Edge-AI raccoon deterrent. Tiny YOLO, fully offline.\ntags:\n - object-detection\n - yolov8\n - raccoon\n - edge-ai\n - build-small-hackathon\n---\n\n# 🦝 Backyard Raccoon Deterrent\n\nRaccoons were raiding my backyard every night, so I built an AI that fights\nback. A 3-million-parameter YOLO spots them in the dark and scares them off\nwith a dog bark and a floodlight. No cloud, no traps, and nothing gets hurt.\n\nThis Space is the live detector from a real system that has been defending my\nactual backyard since April. Upload a photo (daytime or IR night frame) and the\nmodel draws the boxes and tells you what the physical deterrent would do.\n\n## 📼 Submission\n\n**Demo video** (82s):\n\n\n\n**Social post**: https://x.com/0xartclub/status/2063258977895391508\n\n**Track**: 🏡 Backyard AI. **Bonus quests**: 🔌 Off the Grid (zero cloud APIs), 🎯 Well-Tuned (fine-tuned published model)\n\n## The story\n\nA Ring camera sees raccoons just fine, but a camera can't do anything about\nthem. The usual answer is \"nuisance wildlife\" control, and that mostly means\nkilling: U.S. federal wildlife control killed over 375,000 native animals in\n2023 ([USDA APHIS Program Data Reports](https://www.aphis.usda.gov/wildlife-services/publications/pdr)).\nThe same reports show the humane approach works, since the same agency\ndisperses about 20 million animals a year unharmed.\n\nThis project automates the humane version:\n\n```\nRing camera -> motion event -> YOLOv8n v1.4 (24 ms) -> 🔊 bark + 💡 lights\n |\n fully offline:\n Raspberry Pi + Mac Mini, $0 cloud\n```\n\nThe raccoon leaves, nothing gets hurt, and the whole thing runs on hardware\nthat was already in the house. About 5 to 8 seconds from first motion to\ndeterrent.\n\n## Why \"Build Small\" fits\n\n- The model is tiny: YOLOv8n, about 3M parameters and 12 MB of ONNX. The\n hackathon ceiling is 32B. This is four orders of magnitude under it.\n- Small actually wins here. A 2.6-second cloud VLM round trip misses a moving\n raccoon. A 24 ms local model catches it mid-stride. I tried the big-model\n route first (Gemma 3 12B as a scene describer) and ended up retiring it from\n the chain because the small specialist beat it.\n- The training data is small too: 564 hand-labeled IR frames from the exact\n yard it defends. No internet-scale dataset, just the right data.\n\n## The model\n\n| | |\n|---|---|\n| Architecture | YOLOv8n (nano) |\n| Version | v1.4, trained on 564 hand-labeled IR night frames, 97 new boxes from recent encounters |\n| Precision / Recall | 93.5% / 85.9% (held-out val, harder split) |\n| mAP50 | 92.8% |\n| Inference | ~24 ms p50 (ONNX Runtime, Apple Silicon) |\n| Field record | First version to clear all three real encounters that earlier models missed |\n\nTraining pipeline: Ring event video, ffmpeg frame extraction (first 15 s at\n1 Hz), Claude pre-classification, Label Studio bounding boxes, YOLOv8\nfine-tune, ONNX export. Every production miss becomes training data for the\nnext version, so the model learns from each raccoon that gets past it.\n\n## Try it\n\n1. Click an example below the app. These are real night-vision frames from the yard.\n2. Watch the verdict: \"🦝 Raccoon detected, BARK + LIGHTS would fire\" vs \"✅ All clear.\"\n3. Drag the confidence slider (production runs at 0.20) and watch the\n precision/recall trade-off live.\n4. Upload your own backyard photo, day or night.\n\n## The real-world deployment\n\n**60+ nights in production. Every confirmed encounter answered in 5 to 8\nseconds. Zero animals harmed.**\n\n![Raccoon-window motion events per night across 60 nights of production](https://huggingface.co/spaces/build-small-hackathon/backyard-raccoon-deterrent/resolve/main/activity-chart.png)\n\nRaccoon activity swings wildly night to night (peak: 33 motion events in one\nnight). The system logged and processed every one of them, and every miss\nbecame training data for the next model version. That feedback loop is why the\ndetector is on v1.4 after 60 nights.\n\nThis exact model is the primary detector in a Homebridge accessory that runs\nnightly (21:00 to 05:30) on a Raspberry Pi:\n\n- Eyes: Ring cameras (motion events plus multi-frame snapshot capture)\n- Brain: this YOLOv8n on a Mac Mini (FastAPI + ONNX Runtime, runs as a\n LaunchDaemon so it survives reboots), with Claude Haiku as a second-opinion\n safety net\n- Voice: dog-bark WAVs over a Bluetooth speaker (BlueALSA)\n- Muscle: TP-Link Kasa smart lights\n- Fast path: every frame is evaluated at capture, and the first hit fires the\n deterrent in 5 to 8 seconds instead of waiting for a full batch\n\n## Run locally\n\n```bash\npip install -r requirements.txt\npython app.py\n```\n\nWeights ship in this repo (`raccoon-yolov8n-v1.4.onnx`, MIT licensed), or set\n`MODEL_PATH` to your own export.\n\n## Links\n\n- Source code: https://github.com/sappkevin/backyard-raccoon-deterrent\n- Built by [@ksapp](https://huggingface.co/ksapp) for the Gradio Build Small hackathon, Backyard AI track\n", + "readme_body": "# 🦝 Backyard Raccoon Deterrent\n\nRaccoons were raiding my backyard every night, so I built an AI that fights\nback. A 3-million-parameter YOLO spots them in the dark and scares them off\nwith a dog bark and a floodlight. No cloud, no traps, and nothing gets hurt.\n\nThis Space is the live detector from a real system that has been defending my\nactual backyard since April. Upload a photo (daytime or IR night frame) and the\nmodel draws the boxes and tells you what the physical deterrent would do.\n\n## 📼 Submission\n\n**Demo video** (82s):\n\n\n\n**Social post**: https://x.com/0xartclub/status/2063258977895391508\n\n**Track**: 🏡 Backyard AI. **Bonus quests**: 🔌 Off the Grid (zero cloud APIs), 🎯 Well-Tuned (fine-tuned published model)\n\n## The story\n\nA Ring camera sees raccoons just fine, but a camera can't do anything about\nthem. The usual answer is \"nuisance wildlife\" control, and that mostly means\nkilling: U.S. federal wildlife control killed over 375,000 native animals in\n2023 ([USDA APHIS Program Data Reports](https://www.aphis.usda.gov/wildlife-services/publications/pdr)).\nThe same reports show the humane approach works, since the same agency\ndisperses about 20 million animals a year unharmed.\n\nThis project automates the humane version:\n\n```\nRing camera -> motion event -> YOLOv8n v1.4 (24 ms) -> 🔊 bark + 💡 lights\n |\n fully offline:\n Raspberry Pi + Mac Mini, $0 cloud\n```\n\nThe raccoon leaves, nothing gets hurt, and the whole thing runs on hardware\nthat was already in the house. About 5 to 8 seconds from first motion to\ndeterrent.\n\n## Why \"Build Small\" fits\n\n- The model is tiny: YOLOv8n, about 3M parameters and 12 MB of ONNX. The\n hackathon ceiling is 32B. This is four orders of magnitude under it.\n- Small actually wins here. A 2.6-second cloud VLM round trip misses a moving\n raccoon. A 24 ms local model catches it mid-stride. I tried the big-model\n route first (Gemma 3 12B as a scene describer) and ended up retiring it from\n the chain because the small specialist beat it.\n- The training data is small too: 564 hand-labeled IR frames from the exact\n yard it defends. No internet-scale dataset, just the right data.\n\n## The model\n\n| | |\n|---|---|\n| Architecture | YOLOv8n (nano) |\n| Version | v1.4, trained on 564 hand-labeled IR night frames, 97 new boxes from recent encounters |\n| Precision / Recall | 93.5% / 85.9% (held-out val, harder split) |\n| mAP50 | 92.8% |\n| Inference | ~24 ms p50 (ONNX Runtime, Apple Silicon) |\n| Field record | First version to clear all three real encounters that earlier models missed |\n\nTraining pipeline: Ring event video, ffmpeg frame extraction (first 15 s at\n1 Hz), Claude pre-classification, Label Studio bounding boxes, YOLOv8\nfine-tune, ONNX export. Every production miss becomes training data for the\nnext version, so the model learns from each raccoon that gets past it.\n\n## Try it\n\n1. Click an example below the app. These are real night-vision frames from the yard.\n2. Watch the verdict: \"🦝 Raccoon detected, BARK + LIGHTS would fire\" vs \"✅ All clear.\"\n3. Drag the confidence slider (production runs at 0.20) and watch the\n precision/recall trade-off live.\n4. Upload your own backyard photo, day or night.\n\n## The real-world deployment\n\n**60+ nights in production. Every confirmed encounter answered in 5 to 8\nseconds. Zero animals harmed.**\n\n![Raccoon-window motion events per night across 60 nights of production](https://huggingface.co/spaces/build-small-hackathon/backyard-raccoon-deterrent/resolve/main/activity-chart.png)\n\nRaccoon activity swings wildly night to night (peak: 33 motion events in one\nnight). The system logged and processed every one of them, and every miss\nbecame training data for the next model version. That feedback loop is why the\ndetector is on v1.4 after 60 nights.\n\nThis exact model is the primary detector in a Homebridge accessory that runs\nnightly (21:00 to 05:30) on a Raspberry Pi:\n\n- Eyes: Ring cameras (motion events plus multi-frame snapshot capture)\n- Brain: this YOLOv8n on a Mac Mini (FastAPI + ONNX Runtime, runs as a\n LaunchDaemon so it survives reboots), with Claude Haiku as a second-opinion\n safety net\n- Voice: dog-bark WAVs over a Bluetooth speaker (BlueALSA)\n- Muscle: TP-Link Kasa smart lights\n- Fast path: every frame is evaluated at capture, and the first hit fires the\n deterrent in 5 to 8 seconds instead of waiting for a full batch\n\n## Run locally\n\n```bash\npip install -r requirements.txt\npython app.py\n```\n\nWeights ship in this repo (`raccoon-yolov8n-v1.4.onnx`, MIT licensed), or set\n`MODEL_PATH` to your own export.\n\n## Links\n\n- Source code: https://github.com/sappkevin/backyard-raccoon-deterrent\n- Built by [@ksapp](https://huggingface.co/ksapp) for the Gradio Build Small hackathon, Backyard AI track", + "readme_frontmatter": { + "title": "Backyard Raccoon Deterrent", + "emoji": "🦝", + "colorFrom": "green", + "colorTo": "gray", + "sdk": "gradio", + "sdk_version": "6.15.2", + "app_file": "app.py", + "pinned": "false", + "license": "mit", + "short_description": "Edge-AI raccoon deterrent. Tiny YOLO, fully offline.", + "tags": "" + }, + "app_source": "\"\"\"Backyard Raccoon Deterrent — Gradio Space.\n\nFine-tuned YOLOv8n raccoon detector, the vision component of a real Ring-camera\ndeterrent. Upload a backyard photo (daytime or IR night frame) and the model\ndraws boxes, lists detections, and tells you what the deterrent would do.\n\nRuns fully offline — no cloud APIs.\n\"\"\"\n\nimport os\n\nimport gradio as gr\nfrom ultralytics import YOLO\n\n# Weights ship in the repo; override with a HF Hub path via env if you prefer.\nMODEL_PATH = os.environ.get(\"MODEL_PATH\", \"raccoon-yolov8n-v1.4.onnx\")\nDEFAULT_CONF = 0.20 # matches the production deterrent's localYoloConfidenceThreshold\n\nmodel = YOLO(MODEL_PATH)\n\n\ndef detect(image, conf):\n \"\"\"Run detection and return (annotated image, table rows, deterrent verdict).\"\"\"\n if image is None:\n return None, [], \"Upload a frame to begin.\"\n\n results = model.predict(image, conf=conf, verbose=False)[0]\n\n boxes, rows = [], []\n for b in results.boxes:\n x1, y1, x2, y2 = b.xyxy[0].tolist()\n label = model.names[int(b.cls)]\n score = float(b.conf)\n boxes.append(((int(x1), int(y1), int(x2), int(y2)), f\"{label} {score:.2f}\"))\n rows.append([label, round(score, 2)])\n\n raccoon = any(label == \"raccoon\" and score >= conf for label, score in rows)\n if raccoon:\n top = max((s for l, s in rows if l == \"raccoon\"), default=0.0)\n verdict = f\"🦝 Raccoon detected ({top:.2f}) → BARK + LIGHTS would fire\"\n elif rows:\n verdict = \"🐾 Animal seen, but no raccoon — deterrent stays quiet\"\n else:\n verdict = \"✅ All clear — nothing detected\"\n\n return (image, boxes), rows, verdict\n\n\nEXAMPLES = [\n [\"examples/ir_raccoon_pair.jpg\", DEFAULT_CONF],\n [\"examples/ir_raccoon_solo.jpg\", DEFAULT_CONF],\n [\"examples/ir_raccoon_prowler.jpg\", DEFAULT_CONF],\n [\"examples/night_empty.jpg\", DEFAULT_CONF],\n]\n# Drop the examples that don't exist yet so the Space still launches.\nEXAMPLES = [e for e in EXAMPLES if os.path.exists(e[0])]\n\ndemo = gr.Interface(\n fn=detect,\n inputs=[\n gr.Image(type=\"pil\", label=\"Backyard frame\"),\n gr.Slider(0.05, 0.90, value=DEFAULT_CONF, step=0.01, label=\"Confidence threshold\"),\n ],\n outputs=[\n gr.AnnotatedImage(label=\"Detections\"),\n gr.Dataframe(headers=[\"animal\", \"confidence\"], label=\"What the model saw\"),\n gr.Textbox(label=\"Deterrent verdict\"),\n ],\n examples=EXAMPLES or None,\n title=\"🦝 Backyard Raccoon Deterrent\",\n description=(\n \"Fine-tuned **YOLOv8n** raccoon detector (v1.4) — the eyes of a real Ring-camera \"\n \"deterrent. Trained on 560+ hand-labeled night-vision frames of raccoons \"\n \"raiding my yard, including trajectory frames pulled from real motion events \"\n \"(**P 93.5% · R 85.9% · mAP50 92.8%** on a held-out val split, ~24 ms inference). \"\n \"Runs fully offline. Upload a frame or click an example.\"\n ),\n article=(\n \"Built for the Gradio **Build Small** hackathon (Backyard AI track). \"\n \"The deployed system pairs this model with audio + smart-light deterrents on a \"\n \"Raspberry Pi — fully offline, no cloud APIs. \"\n \"[Source on GitHub](https://github.com/sappkevin/backyard-raccoon-deterrent).\"\n ),\n)\n\nif __name__ == \"__main__\":\n demo.launch()\n", + "app_signals": "detect image conf Backyard Raccoon Deterrent — Gradio Space. Fine-tuned YOLOv8n raccoon detector, the vision component of a real Ring-camera deterrent. Upload a backyard photo (daytime or IR night frame) and the model draws boxes, lists detections, and tells you what the deterrent would do. Runs fully offline — no cloud APIs. os.environ.get YOLO gr.Interface fn inputs outputs examples title description article MODEL_PATH raccoon-yolov8n-v1.4.onnx Run detection and return (annotated image, table rows, deterrent verdict). any __main__ demo.launch model.predict verbose tolist float boxes.append rows.append max default examples/ir_raccoon_pair.jpg examples/ir_raccoon_solo.jpg examples/ir_raccoon_prowler.jpg examples/night_empty.jpg os.path.exists 🦝 Backyard Raccoon Deterrent Fine-tuned **YOLOv8n** raccoon detector (v1.4) — the eyes of a real Ring-camera deterrent. Trained on 560+ hand-labeled night-vision frames of raccoons raiding my yard, including trajectory frames pulled from real motion events (**P 93.5% · R 85.9% · mAP50 92.8%** on a held-out val split, ~24 ms inference). Runs fully offline. Upload a frame or click an example. Built for the Gradio **Build Small** hackathon (Backyard AI track). The deployed system pairs this model with audio + smart-light deterrents on a Raspberry Pi — fully offline, no cloud APIs. [Source on GitHub](https://github.com/sappkevin/backyard-raccoon-deterrent). Upload a frame to begin. int 🦝 Raccoon detected ( ) → BARK + LIGHTS would fire 🐾 Animal seen, but no raccoon — deterrent stays quiet ✅ All clear — nothing detected gr.Image type label gr.Slider value step gr.AnnotatedImage gr.Dataframe headers gr.Textbox round raccoon .2f pil Backyard frame Confidence threshold Detections What the model saw Deterrent verdict animal confidence", + "readme_len": 4959, + "app_source_len": 3284, + "app_signals_len": 1793 + }, + { + "id": "build-small-hackathon/blind-quill", + "title": "Blind Quill", + "summary": "", + "tags": [ + "gradio", + "region:us" + ], + "models": [], + "datasets": [], + "sdk": "gradio", + "license": "mit", + "likes": 0, + "url": "https://huggingface.co/spaces/build-small-hackathon/blind-quill", + "app_file": "app.py", + "readme_raw": "---\ntitle: Blind Quill\nsdk: gradio\nsdk_version: 6.16.0\napp_file: app.py\npython_version: \"3.12\"\nsuggested_hardware: zero-a10g\nlicense: mit\n---\n\n# Blind Quill\n\nBlind Quill is a hidden-canon story grafting game.\n\nEach manuscript has a public capsule and a hidden full canon. You can play the\nintended way by reading only the capsule, adding one fragment, and letting\n`Qwen/Qwen3.5-2B` decide where that fragment belongs. The model rewrites only the\nlocal passage it targets, then reveals where your idea was stitched into the\nstory.\n\nReaders who only want to read can use the escape door: `Read without changing`.\nThe app warns that the best experience is to contribute first, then allows the\nreader to reveal the full manuscript anyway.\n\n## Interface\n\nThe UI is a bespoke literary frontend called \"The Invisible Bindery\". It lives in\n`web/` and is served by a `gradio.Server` backend.\n\n`app.py` exposes queued API endpoints:\n\n- `list_stories`\n- `get_capsule`\n- `create_story`\n- `stitch`\n- `read_manuscript`\n\nThe frontend calls those endpoints through the Gradio JS client. This keeps\nGradio queueing, concurrency control, and ZeroGPU support while presenting a\nsingle custom surface: gallery -> capsule -> compose -> reveal -> reader.\n\nThe Python layers are:\n\n- `core.py`: create, browse, stitch, and read orchestration.\n- `story_store.py`: JSON persistence and file locking.\n- `model_client.py`: model loading, generation, thinking-block stripping, and\n JSON validation.\n- `patcher.py`: deterministic local patch application.\n- `presenter.py`: view models for the custom frontend.\n- `app.py`: static frontend serving and Gradio Server API endpoints.\n\n## Local Development\n\nUse uv with Python 3.12, matching the Hugging Face Space as closely as possible.\n\n```bash\nuv sync --python 3.12\nuv run python app.py\n```\n\nThen open .\n\nPersistent story data is stored at:\n\n- `DATA_DIR`, when set\n- `/data`, when it exists on Hugging Face Spaces\n- `./data/stories.json`, otherwise\n\n### Execution backend\n\n`BQ_DEVICE` selects where generation runs.\n\n| `BQ_DEVICE` | Behaviour |\n| --- | --- |\n| `auto` (default) | ZeroGPU on a Space with the `spaces` runtime, else CUDA, else Apple MPS, else CPU. |\n| `zerogpu` | Hugging Face ZeroGPU (`@spaces.GPU`), with automatic CPU fallback (below). |\n| `cuda` | Local NVIDIA GPU via `device_map=\"auto\"`. |\n| `mps` | Apple Silicon GPU (Metal); falls back to float32 if float16 fails. |\n| `cpu` | CPU only — slow but needs no accelerator or quota. |\n\n**Per-user ZeroGPU fallback.** ZeroGPU quota is per visitor, not per Space owner,\nand is only known at request time. So on a ZeroGPU Space each stitch is attempted\non the GPU; if the visitor's quota is spent, the request is transparently re-run\non CPU instead of failing. No configuration or sign-in is required to keep using\nthe app — it just gets slower.\n\n**Progress.** Because CPU/MPS runs are slow, the `stitch` endpoint streams real\nprogress (stage, percentage, ETA — and a note when a fallback happens) to the\nreveal screen. Fast GPU runs keep the original staged animation, since ZeroGPU's\nforked generation cannot stream token callbacks back across the process boundary.\n\n### Logging\n\nSet `BQ_LOG_LEVEL` (default `INFO`; use `DEBUG` for per-stage detail). Logs go to\nstderr only — never the UI — and record messages processed, total and per-stage\ntimings, and a best-effort resource snapshot (process memory, CPU, and GPU/MPS\nmemory when available).\n\n## Requirements\n\n`requirements.txt` is generated from `uv.lock` for Hugging Face Spaces:\n\n```bash\nuv export --format requirements-txt --no-dev --no-hashes --no-emit-project -o requirements.txt\n```\n\nDo not hand-edit `requirements.txt`; edit `pyproject.toml`, run `uv lock`, and\nexport again.\n\n## Test\n\n```bash\nuv run python -m compileall app.py core.py model_client.py observability.py patcher.py presenter.py prompts.py schemas.py story_store.py utils.py tests\nuv run python -m unittest discover -s tests -v\n```\n\nThe tests cover JSON/thinking cleanup, deterministic patch application, graft\nsealing, stale-write rejection, the blinded capsule flow, the warned read escape\ndoor, the create-then-stitch flow, device resolution, the resource snapshot, and\nthe streamed stitch progress events. They do not download model weights.\n\n## Model Policy\n\n- Uses one model: `Qwen/Qwen3.5-2B`.\n- Uses the Transformers `AutoProcessor` and `AutoModelForImageTextToText` path.\n- Wraps model generation in `@spaces.GPU(duration=300)` on ZeroGPU; runs directly\n on CUDA, MPS, or CPU otherwise (selected by `BQ_DEVICE`).\n- Does not set `temperature`, `top_p`, `top_k`, or other sampling controls.\n- Disables Qwen thinking for schema-constrained JSON calls so the token budget is\n spent on parseable JSON; other text generation keeps the model template default.\n- Strips `...` before JSON parsing, storage, prompting, or UI\n rendering.\n- Does not use embeddings, RAG, ASR, image models, or a second language model.\n\n## Example Seeds\n\n```text\nA city where every doorway remembers the last person who lied inside it.\n```\n\n```text\nOn a generation ship whose crew believes Earth was a myth invented to calm children, a janitor discovers a sealed garden where rain falls upward and an old radio is still receiving ocean weather reports.\n```\n\nExample fragment:\n\n```text\nA brass key in the protagonist's pocket becomes warm whenever someone nearby tells the truth.\n```\n", + "readme_body": "# Blind Quill\n\nBlind Quill is a hidden-canon story grafting game.\n\nEach manuscript has a public capsule and a hidden full canon. You can play the\nintended way by reading only the capsule, adding one fragment, and letting\n`Qwen/Qwen3.5-2B` decide where that fragment belongs. The model rewrites only the\nlocal passage it targets, then reveals where your idea was stitched into the\nstory.\n\nReaders who only want to read can use the escape door: `Read without changing`.\nThe app warns that the best experience is to contribute first, then allows the\nreader to reveal the full manuscript anyway.\n\n## Interface\n\nThe UI is a bespoke literary frontend called \"The Invisible Bindery\". It lives in\n`web/` and is served by a `gradio.Server` backend.\n\n`app.py` exposes queued API endpoints:\n\n- `list_stories`\n- `get_capsule`\n- `create_story`\n- `stitch`\n- `read_manuscript`\n\nThe frontend calls those endpoints through the Gradio JS client. This keeps\nGradio queueing, concurrency control, and ZeroGPU support while presenting a\nsingle custom surface: gallery -> capsule -> compose -> reveal -> reader.\n\nThe Python layers are:\n\n- `core.py`: create, browse, stitch, and read orchestration.\n- `story_store.py`: JSON persistence and file locking.\n- `model_client.py`: model loading, generation, thinking-block stripping, and\n JSON validation.\n- `patcher.py`: deterministic local patch application.\n- `presenter.py`: view models for the custom frontend.\n- `app.py`: static frontend serving and Gradio Server API endpoints.\n\n## Local Development\n\nUse uv with Python 3.12, matching the Hugging Face Space as closely as possible.\n\n```bash\nuv sync --python 3.12\nuv run python app.py\n```\n\nThen open .\n\nPersistent story data is stored at:\n\n- `DATA_DIR`, when set\n- `/data`, when it exists on Hugging Face Spaces\n- `./data/stories.json`, otherwise\n\n### Execution backend\n\n`BQ_DEVICE` selects where generation runs.\n\n| `BQ_DEVICE` | Behaviour |\n| --- | --- |\n| `auto` (default) | ZeroGPU on a Space with the `spaces` runtime, else CUDA, else Apple MPS, else CPU. |\n| `zerogpu` | Hugging Face ZeroGPU (`@spaces.GPU`), with automatic CPU fallback (below). |\n| `cuda` | Local NVIDIA GPU via `device_map=\"auto\"`. |\n| `mps` | Apple Silicon GPU (Metal); falls back to float32 if float16 fails. |\n| `cpu` | CPU only — slow but needs no accelerator or quota. |\n\n**Per-user ZeroGPU fallback.** ZeroGPU quota is per visitor, not per Space owner,\nand is only known at request time. So on a ZeroGPU Space each stitch is attempted\non the GPU; if the visitor's quota is spent, the request is transparently re-run\non CPU instead of failing. No configuration or sign-in is required to keep using\nthe app — it just gets slower.\n\n**Progress.** Because CPU/MPS runs are slow, the `stitch` endpoint streams real\nprogress (stage, percentage, ETA — and a note when a fallback happens) to the\nreveal screen. Fast GPU runs keep the original staged animation, since ZeroGPU's\nforked generation cannot stream token callbacks back across the process boundary.\n\n### Logging\n\nSet `BQ_LOG_LEVEL` (default `INFO`; use `DEBUG` for per-stage detail). Logs go to\nstderr only — never the UI — and record messages processed, total and per-stage\ntimings, and a best-effort resource snapshot (process memory, CPU, and GPU/MPS\nmemory when available).\n\n## Requirements\n\n`requirements.txt` is generated from `uv.lock` for Hugging Face Spaces:\n\n```bash\nuv export --format requirements-txt --no-dev --no-hashes --no-emit-project -o requirements.txt\n```\n\nDo not hand-edit `requirements.txt`; edit `pyproject.toml`, run `uv lock`, and\nexport again.\n\n## Test\n\n```bash\nuv run python -m compileall app.py core.py model_client.py observability.py patcher.py presenter.py prompts.py schemas.py story_store.py utils.py tests\nuv run python -m unittest discover -s tests -v\n```\n\nThe tests cover JSON/thinking cleanup, deterministic patch application, graft\nsealing, stale-write rejection, the blinded capsule flow, the warned read escape\ndoor, the create-then-stitch flow, device resolution, the resource snapshot, and\nthe streamed stitch progress events. They do not download model weights.\n\n## Model Policy\n\n- Uses one model: `Qwen/Qwen3.5-2B`.\n- Uses the Transformers `AutoProcessor` and `AutoModelForImageTextToText` path.\n- Wraps model generation in `@spaces.GPU(duration=300)` on ZeroGPU; runs directly\n on CUDA, MPS, or CPU otherwise (selected by `BQ_DEVICE`).\n- Does not set `temperature`, `top_p`, `top_k`, or other sampling controls.\n- Disables Qwen thinking for schema-constrained JSON calls so the token budget is\n spent on parseable JSON; other text generation keeps the model template default.\n- Strips `...` before JSON parsing, storage, prompting, or UI\n rendering.\n- Does not use embeddings, RAG, ASR, image models, or a second language model.\n\n## Example Seeds\n\n```text\nA city where every doorway remembers the last person who lied inside it.\n```\n\n```text\nOn a generation ship whose crew believes Earth was a myth invented to calm children, a janitor discovers a sealed garden where rain falls upward and an old radio is still receiving ocean weather reports.\n```\n\nExample fragment:\n\n```text\nA brass key in the protagonist's pocket becomes warm whenever someone nearby tells the truth.\n```", + "readme_frontmatter": { + "title": "Blind Quill", + "sdk": "gradio", + "sdk_version": "6.16.0", + "app_file": "app.py", + "python_version": "3.12", + "suggested_hardware": "zero-a10g", + "license": "mit" + }, + "app_source": "\"\"\"Blind Quill — gradio.Server backend for the custom \"Invisible Bindery\" frontend.\n\nThe UI lives in web/ as the production React-via-Babel frontend.\nHere we serve that frontend and expose the bindery as queued Gradio API endpoints,\nso the rich custom UI keeps Gradio's queue, concurrency control, and ZeroGPU.\n\n`stitch` is a streaming generator endpoint: it yields progress events while the\neditor works and a final result event, so slow local (CPU/MPS) runs show real\nprogress. The Gradio JS client consumes the stream via `submit`.\n\"\"\"\n\nfrom __future__ import annotations\n\nimport os\nimport queue\nimport threading\nimport traceback\nfrom pathlib import Path\nfrom typing import Iterator\n\nimport gradio as gr\nfrom fastapi.responses import HTMLResponse\nfrom fastapi.staticfiles import StaticFiles\nfrom gradio import Server\n\nimport core\nfrom model_client import ModelClientError, execution_mode\nfrom observability import configure_logging, get_logger\nfrom patcher import PatchApplicationError\nfrom presenter import card_dict, full_story_dict, reveal_dict\nfrom story_store import StoryStoreError\nfrom utils import InputValidationError\n\nconfigure_logging()\n\nWEB_DIR = Path(__file__).resolve().parent / \"web\"\n\n_USER_FACING_ERRORS = (\n InputValidationError,\n StoryStoreError,\n PatchApplicationError,\n ModelClientError,\n ValueError,\n)\n\n\ndef _guard(call, *args, **kwargs):\n \"\"\"Run a flow, converting known failures into client-visible gr.Error messages.\"\"\"\n try:\n return call(*args, **kwargs)\n except gr.Error:\n raise\n except _USER_FACING_ERRORS as exc:\n raise gr.Error(str(exc)) from exc\n except Exception as exc: # noqa: BLE001 - last-resort guard for the API layer\n traceback.print_exc()\n raise gr.Error(\"The bindery hit an internal error. Please try again.\") from exc\n\n\ndef _to_user_error(exc: BaseException) -> gr.Error:\n if isinstance(exc, gr.Error):\n return exc\n if isinstance(exc, _USER_FACING_ERRORS):\n return gr.Error(str(exc))\n traceback.print_exc()\n return gr.Error(\"The bindery hit an internal error. Please try again.\")\n\n\ndef _result_event(result) -> dict:\n return {\"type\": \"result\", \"story\": full_story_dict(result.story), \"reveal\": reveal_dict(result)}\n\n\n# Message fragments that ZeroGPU uses when a user's own quota (or credits) is\n# spent. These are recoverable per-user limits, so we fall back to CPU rather\n# than surfacing them as errors. See spaces/zero/client.py.\n_QUOTA_MARKERS = (\"quota exceeded\", \"credits exceeded\", \"exceeded your\", \"runs limit\")\n\n_CPU_FALLBACK_NOTICE = (\n \"No ZeroGPU quota for this session — running locally on CPU. This is slower; \"\n \"the progress below is live.\"\n)\n\n\ndef _is_quota_error(exc: BaseException) -> bool:\n if not isinstance(exc, gr.Error):\n return False\n text = \" \".join(\n str(part) for part in (getattr(exc, \"title\", \"\"), getattr(exc, \"message\", \"\"), exc)\n ).lower()\n return any(marker in text for marker in _QUOTA_MARKERS)\n\n\ndef _stream_stitch(story_id: str, fragment: str, force_cpu: bool, notice: str | None = None) -> Iterator[dict]:\n \"\"\"Run `core.stitch` in a worker thread and stream its progress events.\n\n Used for in-process execution (local CUDA/MPS/CPU, or the CPU fallback after\n a ZeroGPU quota miss). A worker thread is safe here precisely because no\n `@spaces.GPU` call is involved — that path must stay on the request thread.\n `notice` is attached to every event so the UI can explain a fallback.\n \"\"\"\n events: \"queue.Queue\" = queue.Queue()\n done = object()\n holder: dict = {}\n\n def worker() -> None:\n try:\n holder[\"result\"] = core.stitch(\n story_id, fragment, on_progress=events.put, force_cpu=force_cpu\n )\n except BaseException as exc: # noqa: BLE001 - surfaced to the main thread below\n holder[\"error\"] = exc\n finally:\n events.put(done)\n\n thread = threading.Thread(target=worker, name=\"bq-stitch\", daemon=True)\n thread.start()\n while True:\n event = events.get()\n if event is done:\n break\n yield {**event, \"notice\": notice} if notice else event\n thread.join()\n\n if \"error\" in holder:\n raise holder[\"error\"]\n yield _result_event(holder[\"result\"])\n\n\ndef _stitch_events(story_id: str, fragment: str) -> Iterator[dict]:\n \"\"\"Yield progress events then a result event for one stitch.\n\n On a ZeroGPU Space the stitch is attempted synchronously on the request\n thread (ZeroGPU needs that thread's context to bill the right user). If the\n user's per-user quota is spent, ZeroGPU raises and we transparently re-run on\n CPU with live streamed progress. Local execution always streams.\n \"\"\"\n try:\n if execution_mode() == \"zerogpu\":\n try:\n # Fast path: the user has quota, generation runs on the GPU.\n result = core.stitch(story_id, fragment)\n yield _result_event(result)\n return\n except gr.Error as exc:\n if not _is_quota_error(exc):\n raise\n get_logger().warning(\"ZeroGPU quota exhausted for this request; falling back to CPU.\")\n yield from _stream_stitch(story_id, fragment, force_cpu=True, notice=_CPU_FALLBACK_NOTICE)\n return\n\n yield from _stream_stitch(story_id, fragment, force_cpu=False)\n except gr.Error:\n raise\n except BaseException as exc: # noqa: BLE001 - convert to a client-visible error\n raise _to_user_error(exc) from exc\n\n\ndef build_server() -> Server:\n app = Server(title=\"Blind Quill\")\n\n @app.api(name=\"list_stories\")\n def list_stories() -> dict:\n stories = _guard(core.gallery)\n return {\"stories\": [card_dict(story) for story in stories]}\n\n @app.api(name=\"get_capsule\")\n def get_capsule(story_id: str) -> dict:\n story = _guard(core.capsule, story_id)\n return {\"story\": card_dict(story)}\n\n @app.api(name=\"create_story\", concurrency_limit=1, concurrency_id=\"bindery\")\n def create_story(seed: str) -> dict:\n story = _guard(core.create, seed)\n return {\"story\": full_story_dict(story)}\n\n @app.api(name=\"stitch\", concurrency_limit=1, concurrency_id=\"bindery\")\n def stitch(story_id: str, fragment: str) -> dict:\n # A generator endpoint: each yield streams to the client via `submit`.\n yield from _stitch_events(story_id, fragment)\n\n @app.api(name=\"read_manuscript\")\n def read_manuscript(story_id: str) -> dict:\n story = _guard(core.read_manuscript, story_id)\n return {\"story\": full_story_dict(story)}\n\n app.mount(\"/web\", StaticFiles(directory=str(WEB_DIR)), name=\"web\")\n\n @app.get(\"/\", response_class=HTMLResponse)\n def homepage() -> str:\n return (WEB_DIR / \"index.html\").read_text(encoding=\"utf-8\")\n\n return app\n\n\ndef _port() -> int:\n for key in (\"GRADIO_SERVER_PORT\", \"PORT\"):\n value = os.environ.get(key)\n if value:\n try:\n return int(value)\n except ValueError:\n pass\n return 7860\n\n\ndef _should_launch() -> bool:\n if os.environ.get(\"BQ_NO_LAUNCH\") == \"1\":\n return False\n # Run as a script locally, or imported by the Hugging Face Spaces runtime.\n return __name__ == \"__main__\" or bool(os.environ.get(\"SPACE_ID\"))\n\n\napp = build_server()\n\nif _should_launch():\n get_logger().info(\"Launching Blind Quill on port %d (execution=%s)\", _port(), execution_mode())\n app.launch(server_name=\"0.0.0.0\", server_port=_port(), show_error=True)\n", + "app_signals": "_guard call build_server _port _should_launch Blind Quill — gradio.Server backend for the custom \"Invisible Bindery\" frontend. The UI lives in web/ as the production React-via-Babel frontend. Here we serve that frontend and expose the bindery as queued Gradio API endpoints, so the rich custom UI keeps Gradio's queue, concurrency control, and ZeroGPU. list_stories get_capsule story_id create_story seed stitch fragment read_manuscript homepage web Run a flow, converting known failures into client-visible gr.Error messages. Server title app.api name concurrency_limit concurrency_id app.mount app.get response_class app.launch server_name server_port show_error resolve /web StaticFiles directory read_text encoding / GRADIO_SERVER_PORT PORT os.environ.get 1 bool gr.Error traceback.print_exc Blind Quill stories story card_dict full_story_dict bindery reveal reveal_dict BQ_NO_LAUNCH __main__ 0.0.0.0 Path str The bindery hit an internal error. Please try again. utf-8 int SPACE_ID index.html", + "readme_len": 5271, + "app_source_len": 7616, + "app_signals_len": 996 + }, + { + "id": "build-small-hackathon/borderless", + "title": "Borderless", + "summary": "", + "tags": [ + "gradio", + "region:us" + ], + "models": [], + "datasets": [], + "sdk": "gradio", + "license": "", + "likes": 4, + "url": "https://huggingface.co/spaces/build-small-hackathon/borderless", + "app_file": "app.py", + "readme_raw": "---\ntitle: Borderless\nemoji: 🌍\ncolorFrom: yellow\ncolorTo: purple\nsdk: gradio\nsdk_version: 6.16.0\napp_file: app.py\npinned: false\nlicense: apache-2.0\nshort_description: Agentic immigration research for global movers\ntags:\n - agents\n - gradio\n - immigration\n - travel\n - research\n - tool-use\n - qwen\n - maplibre\n - geospatial\nmodels:\n - Qwen/Qwen3.6-27B\ndatasets: []\nhf_oauth: true\nhf_oauth_scopes:\n - inference-api \nhf_oauth_expiration_minutes: 480 # 8 hours\ndisable_embedding: false\nstartup_duration_timeout: 10m \n---\n\n# Borderless\n\n**An agentic immigration research tool — describe your background in plain English, explore where you could go.**\n\nLive demo: **[build-small-hackathon/borderless](https://huggingface.co/spaces/build-small-hackathon/borderless)**\n\nBuilt for the [Build Small Hackathon](https://huggingface.co/build-small-hackathon) — small models (≤32B), big adventure.\n\n## What it does\n\nImmigration research is fragmented across government sites, forums, and spreadsheets. Borderless puts it in one conversational flow:\n\n1. **Describe yourself** — citizenship, education, work history, languages, budget, and goals in everyday language.\n2. **Use guided intake or chat** — start from a structured profile form, a demo persona, or a free-form message.\n3. **Get a shortlist** — the agent reasons over your profile and surfaces destination countries that fit.\n4. **Explore on a 3D globe** — shortlisted countries appear on an interactive MapLibre globe beside the chat with pathway labels.\n5. **Dig into the details** — visa pathways, required documents, realistic timelines, risks, and source links from official pages.\n\nNo forms to decode. No keyword guessing. Just a research session that meets you where you are.\n\n## How it works\n\nBorderless is a **Gradio agent** powered by **[Qwen/Qwen3.6-27B](https://huggingface.co/Qwen/Qwen3.6-27B)** (27B parameters — within the hackathon's 32B cap). The model plans multi-step research and calls tools when it needs ground truth:\n\n| Tool | What it fetches |\n|------|-----------------|\n| `get_country_profile` | Country metadata and official immigration domain hints (REST Countries + curated hints) |\n| `search_immigration_info` | Web search with source-quality labels for official immigration pages, policies, and pathways (Exa) |\n| `scrape_web_page` | Markdown content from a specific official government or embassy URL (Firecrawl) |\n| `crawl_web_site` | Multiple pages from an official immigration website section (Firecrawl) |\n| `update_globe` | Marks, highlights, and flies to countries on the MapLibre globe |\n\nTool calls stream in the chat so you can follow the agent's progress. Globe updates are also tool-driven: when the agent recommends destinations or the user asks to mark countries, it sends ISO country codes and pathway labels to the map. The default research budget is seven tool rounds, then Borderless synthesizes a clear answer with pathways, documents, timelines, risks, and cited sources.\n\nSign in with your Hugging Face account to run inference through the Inference API.\n\n## Features\n\n- **Guided intake** — form fields turn citizenship, education, work, languages, budget, and goals into a complete research prompt\n- **Agentic research** — multi-turn tool use, not a single-shot prompt\n- **Structured recommendations** — shortlist, pathways, documents, risks, timelines, next steps, and official sources\n- **Tool-driven 3D globe** — MapLibre GL globe projection with markers, highlights, pathway labels, fly-to camera moves, drag, rotate, and zoom\n- **Source quality** — search results identify likely official government, embassy, and unofficial context sources\n- **Web search** — Exa discovers official immigration pages, visa rules, and policy sources\n- **Official page scraping** — Firecrawl extracts markdown from government immigration sites\n- **Country metadata** — REST Countries powers ISO-2 / ISO-3 lookup and map coordinates\n- **Transparent traces** — tool progress is visible in chat, and JSONL traces can be sanitized and shared\n- **Chat history** — pick up where you left off in the sidebar\n\n## Example prompts\n\n- *\"I'm a software engineer from India with 5 years of experience and a master's degree. Where could I realistically relocate for work?\"*\n- *\"I hold a Hong Kong passport and want to study in Europe on a modest budget. What are my visa options?\"*\n- *\"Compare GDP growth and unemployment for Canada, Germany, and Australia over the last decade.\"*\n- *\"What documents do I need to apply for a skilled worker visa from the UK to Portugal?\"*\n\n## Tech stack\n\n- **[Gradio](https://gradio.app)** — chat UI, OAuth, and custom HTML/JS globe panel\n- **[Qwen3.6-27B](https://huggingface.co/Qwen/Qwen3.6-27B)** — reasoning and tool planning via Hugging Face Inference API\n- **[huggingface_hub](https://huggingface.co/docs/huggingface_hub)** — `InferenceClient` with streaming and function calling\n- **[MapLibre GL JS](https://maplibre.org/)** — interactive 3D globe\n- **[REST Countries](https://restcountries.com/)** — country names, ISO codes, regions, capitals, flags, area, and map coordinates\n- **[Exa](https://exa.ai)** — neural web search for discovering immigration sources\n- **[Firecrawl](https://firecrawl.dev)** — scrape and crawl official web pages for immigration details\n\n## Project structure\n\n```\napp.py # Gradio Blocks entry point\nFIELD_NOTES.md # Build notes and award narrative\nDEMO_SCRIPT.md # Short demo-video script\nTRACE_SHARING.md # How to sanitize and share agent traces\nui/\n workspace.py # Main workspace layout (globe + form/chat tabs)\n chat/\n panel.py # SidebarChatInterface adapter\n defaults.py # Generation defaults (tokens, temperature, top_p)\n intake/\n panel.py # Profile form panel\n prompts.py # Form-to-prompt builders\n examples.py # Demo persona prompts\n globe.py # MapLibre globe panel\n sidebar.py # HF login + history sidebar\n globe_commands.py # Globe marker/highlight/fly-to state updates\n country_coords.py # Country lookup for globe coordinates\n agent/ # Agent loop, tools, streaming\n respond.py # Main chat handler and tool loop\n completion.py # Hugging Face Inference API client\n tools.py # Tool dispatch and implementations\n streaming.py # Stream tokens and tool traces to the UI\n messages.py # Chat message formatting\n system_prompt.py # System prompt\n config.py # Model ID, tool-round budget, env config\n traces.py # JSONL trace logging\n tool_schemas/ # Function-calling schemas (one file per tool)\napis/\n rest_countries.py # REST Countries metadata client\n country_profile.py # Country profile tool wrapper\n official_sources.py # Official-domain hints and source classification\n exa.py # Exa web search client\n firecrawl.py # Firecrawl scrape/crawl client\nassets/\n app.css # Gradio branding\n globe.js / globe.css # Globe rendering, loading, and empty states\n globe_head.html # MapLibre assets injected at launch\n```\n\n## Hackathon fit\n\n| Constraint | Borderless |\n|------------|------------|\n| Model ≤ 32B | Qwen3.6-27B (27B) |\n| Gradio on HF Spaces | Yes — [live Space](https://huggingface.co/spaces/build-small-hackathon/borderless) |\n| Agentic | Multi-tool research loop with visible traces |\n| Sharing is Caring | JSONL tool traces can be sanitized and published |\n| Field Notes | See `FIELD_NOTES.md` |\n\n**Track:** Backyard AI — immigration research is a real, specific problem faced by millions of people weighing where they can live, work, and study.\n\n## Run locally\n\n```bash\npip install -r requirements.txt\ncp .env.example .env # then fill in API keys\npython app.py\n```\n\nSet a Hugging Face token with Inference API access, or sign in through the app's OAuth flow when deployed.\n\nFor web research tools, set API keys from [dashboard.exa.ai](https://dashboard.exa.ai/api-keys) and [firecrawl.dev](https://firecrawl.dev):\n\n| Variable | Tools |\n|----------|-------|\n| `EXA_API_KEY` | `search_immigration_info` |\n| `FIRECRAWL_API_KEY` | `scrape_web_page`, `crawl_web_site` |\n| `BORDERLESS_MODEL_ID` | Optional model override, default `Qwen/Qwen3.6-27B` |\n| `BORDERLESS_MAX_TOOL_ROUNDS` | Optional tool-round budget, default `7` |\n| `BORDERLESS_TRACE_DIR` | Optional JSONL trace output directory |\n| `BORDERLESS_DISABLE_TRACE_LOGS` | Set to `1` to disable local trace logs |\n\nOn Hugging Face Spaces, add both as **Space secrets** (Settings → Secrets). Without keys, web tools return a clear error. The agent uses Exa to discover URLs, then Firecrawl to fetch full official page content.\n\n## License\n\nApache-2.0 (model: [Qwen/Qwen3.6-27B](https://huggingface.co/Qwen/Qwen3.6-27B))\n", + "readme_body": "# Borderless\n\n**An agentic immigration research tool — describe your background in plain English, explore where you could go.**\n\nLive demo: **[build-small-hackathon/borderless](https://huggingface.co/spaces/build-small-hackathon/borderless)**\n\nBuilt for the [Build Small Hackathon](https://huggingface.co/build-small-hackathon) — small models (≤32B), big adventure.\n\n## What it does\n\nImmigration research is fragmented across government sites, forums, and spreadsheets. Borderless puts it in one conversational flow:\n\n1. **Describe yourself** — citizenship, education, work history, languages, budget, and goals in everyday language.\n2. **Use guided intake or chat** — start from a structured profile form, a demo persona, or a free-form message.\n3. **Get a shortlist** — the agent reasons over your profile and surfaces destination countries that fit.\n4. **Explore on a 3D globe** — shortlisted countries appear on an interactive MapLibre globe beside the chat with pathway labels.\n5. **Dig into the details** — visa pathways, required documents, realistic timelines, risks, and source links from official pages.\n\nNo forms to decode. No keyword guessing. Just a research session that meets you where you are.\n\n## How it works\n\nBorderless is a **Gradio agent** powered by **[Qwen/Qwen3.6-27B](https://huggingface.co/Qwen/Qwen3.6-27B)** (27B parameters — within the hackathon's 32B cap). The model plans multi-step research and calls tools when it needs ground truth:\n\n| Tool | What it fetches |\n|------|-----------------|\n| `get_country_profile` | Country metadata and official immigration domain hints (REST Countries + curated hints) |\n| `search_immigration_info` | Web search with source-quality labels for official immigration pages, policies, and pathways (Exa) |\n| `scrape_web_page` | Markdown content from a specific official government or embassy URL (Firecrawl) |\n| `crawl_web_site` | Multiple pages from an official immigration website section (Firecrawl) |\n| `update_globe` | Marks, highlights, and flies to countries on the MapLibre globe |\n\nTool calls stream in the chat so you can follow the agent's progress. Globe updates are also tool-driven: when the agent recommends destinations or the user asks to mark countries, it sends ISO country codes and pathway labels to the map. The default research budget is seven tool rounds, then Borderless synthesizes a clear answer with pathways, documents, timelines, risks, and cited sources.\n\nSign in with your Hugging Face account to run inference through the Inference API.\n\n## Features\n\n- **Guided intake** — form fields turn citizenship, education, work, languages, budget, and goals into a complete research prompt\n- **Agentic research** — multi-turn tool use, not a single-shot prompt\n- **Structured recommendations** — shortlist, pathways, documents, risks, timelines, next steps, and official sources\n- **Tool-driven 3D globe** — MapLibre GL globe projection with markers, highlights, pathway labels, fly-to camera moves, drag, rotate, and zoom\n- **Source quality** — search results identify likely official government, embassy, and unofficial context sources\n- **Web search** — Exa discovers official immigration pages, visa rules, and policy sources\n- **Official page scraping** — Firecrawl extracts markdown from government immigration sites\n- **Country metadata** — REST Countries powers ISO-2 / ISO-3 lookup and map coordinates\n- **Transparent traces** — tool progress is visible in chat, and JSONL traces can be sanitized and shared\n- **Chat history** — pick up where you left off in the sidebar\n\n## Example prompts\n\n- *\"I'm a software engineer from India with 5 years of experience and a master's degree. Where could I realistically relocate for work?\"*\n- *\"I hold a Hong Kong passport and want to study in Europe on a modest budget. What are my visa options?\"*\n- *\"Compare GDP growth and unemployment for Canada, Germany, and Australia over the last decade.\"*\n- *\"What documents do I need to apply for a skilled worker visa from the UK to Portugal?\"*\n\n## Tech stack\n\n- **[Gradio](https://gradio.app)** — chat UI, OAuth, and custom HTML/JS globe panel\n- **[Qwen3.6-27B](https://huggingface.co/Qwen/Qwen3.6-27B)** — reasoning and tool planning via Hugging Face Inference API\n- **[huggingface_hub](https://huggingface.co/docs/huggingface_hub)** — `InferenceClient` with streaming and function calling\n- **[MapLibre GL JS](https://maplibre.org/)** — interactive 3D globe\n- **[REST Countries](https://restcountries.com/)** — country names, ISO codes, regions, capitals, flags, area, and map coordinates\n- **[Exa](https://exa.ai)** — neural web search for discovering immigration sources\n- **[Firecrawl](https://firecrawl.dev)** — scrape and crawl official web pages for immigration details\n\n## Project structure\n\n```\napp.py # Gradio Blocks entry point\nFIELD_NOTES.md # Build notes and award narrative\nDEMO_SCRIPT.md # Short demo-video script\nTRACE_SHARING.md # How to sanitize and share agent traces\nui/\n workspace.py # Main workspace layout (globe + form/chat tabs)\n chat/\n panel.py # SidebarChatInterface adapter\n defaults.py # Generation defaults (tokens, temperature, top_p)\n intake/\n panel.py # Profile form panel\n prompts.py # Form-to-prompt builders\n examples.py # Demo persona prompts\n globe.py # MapLibre globe panel\n sidebar.py # HF login + history sidebar\n globe_commands.py # Globe marker/highlight/fly-to state updates\n country_coords.py # Country lookup for globe coordinates\n agent/ # Agent loop, tools, streaming\n respond.py # Main chat handler and tool loop\n completion.py # Hugging Face Inference API client\n tools.py # Tool dispatch and implementations\n streaming.py # Stream tokens and tool traces to the UI\n messages.py # Chat message formatting\n system_prompt.py # System prompt\n config.py # Model ID, tool-round budget, env config\n traces.py # JSONL trace logging\n tool_schemas/ # Function-calling schemas (one file per tool)\napis/\n rest_countries.py # REST Countries metadata client\n country_profile.py # Country profile tool wrapper\n official_sources.py # Official-domain hints and source classification\n exa.py # Exa web search client\n firecrawl.py # Firecrawl scrape/crawl client\nassets/\n app.css # Gradio branding\n globe.js / globe.css # Globe rendering, loading, and empty states\n globe_head.html # MapLibre assets injected at launch\n```\n\n## Hackathon fit\n\n| Constraint | Borderless |\n|------------|------------|\n| Model ≤ 32B | Qwen3.6-27B (27B) |\n| Gradio on HF Spaces | Yes — [live Space](https://huggingface.co/spaces/build-small-hackathon/borderless) |\n| Agentic | Multi-tool research loop with visible traces |\n| Sharing is Caring | JSONL tool traces can be sanitized and published |\n| Field Notes | See `FIELD_NOTES.md` |\n\n**Track:** Backyard AI — immigration research is a real, specific problem faced by millions of people weighing where they can live, work, and study.\n\n## Run locally\n\n```bash\npip install -r requirements.txt\ncp .env.example .env # then fill in API keys\npython app.py\n```\n\nSet a Hugging Face token with Inference API access, or sign in through the app's OAuth flow when deployed.\n\nFor web research tools, set API keys from [dashboard.exa.ai](https://dashboard.exa.ai/api-keys) and [firecrawl.dev](https://firecrawl.dev):\n\n| Variable | Tools |\n|----------|-------|\n| `EXA_API_KEY` | `search_immigration_info` |\n| `FIRECRAWL_API_KEY` | `scrape_web_page`, `crawl_web_site` |\n| `BORDERLESS_MODEL_ID` | Optional model override, default `Qwen/Qwen3.6-27B` |\n| `BORDERLESS_MAX_TOOL_ROUNDS` | Optional tool-round budget, default `7` |\n| `BORDERLESS_TRACE_DIR` | Optional JSONL trace output directory |\n| `BORDERLESS_DISABLE_TRACE_LOGS` | Set to `1` to disable local trace logs |\n\nOn Hugging Face Spaces, add both as **Space secrets** (Settings → Secrets). Without keys, web tools return a clear error. The agent uses Exa to discover URLs, then Firecrawl to fetch full official page content.\n\n## License\n\nApache-2.0 (model: [Qwen/Qwen3.6-27B](https://huggingface.co/Qwen/Qwen3.6-27B))", + "readme_frontmatter": { + "title": "Borderless", + "emoji": "🌍", + "colorFrom": "yellow", + "colorTo": "purple", + "sdk": "gradio", + "sdk_version": "6.16.0", + "app_file": "app.py", + "pinned": "false", + "license": "apache-2.0", + "short_description": "Agentic immigration research for global movers", + "tags": "", + "models": "", + "datasets": "[]", + "hf_oauth": "true", + "hf_oauth_scopes": "", + "hf_oauth_expiration_minutes": "480", + "disable_embedding": "false", + "startup_duration_timeout": "10m" + }, + "app_source": "# app.py\nfrom pathlib import Path\n\nimport gradio as gr\n\nfrom ui.workspace import create_main_workspace\nfrom ui.globe import globe_head_html\nfrom ui.sidebar import render_sidebar\n\nASSETS_DIR = Path(__file__).resolve().parent / \"assets\"\n\n\ndef create_demo() -> gr.Blocks:\n with gr.Blocks(\n fill_height=True,\n title=\"Borderless - Immigration Research Agent\",\n ) as demo:\n history_host = render_sidebar()\n create_main_workspace(history_container=history_host)\n\n # Injected at launch (Gradio 6); also picked up by Hugging Face Spaces auto-launch.\n app_css = (ASSETS_DIR / \"app.css\").read_text(encoding=\"utf-8\")\n demo._deprecated_head = f\"{globe_head_html()}\\n\"\n return demo\n\n\ndemo = create_demo()\n\nif __name__ == \"__main__\":\n demo.launch()\n", + "app_signals": "create_demo assets read_text encoding __main__ demo.launch resolve gr.Blocks fill_height title render_sidebar create_main_workspace history_container utf-8 globe_head_html Path Borderless - Immigration Research Agent app.css", + "readme_len": 8328, + "app_source_len": 807, + "app_signals_len": 224 + }, + { + "id": "build-small-hackathon/bridge-troll", + "title": "Bridge Troll", + "summary": "Talk your way past a fine-tuned troll, if your argument is ", + "tags": [ + "gradio", + "region:us" + ], + "models": [], + "datasets": [], + "sdk": "gradio", + "license": "mit", + "likes": 0, + "url": "https://huggingface.co/spaces/build-small-hackathon/bridge-troll", + "app_file": "app.py", + "readme_raw": "---\ntitle: Bridge Troll\nemoji: 👁\ncolorFrom: indigo\ncolorTo: green\nsdk: gradio\nsdk_version: 6.16.0\npython_version: '3.12'\napp_file: app.py\npinned: false\nlicense: mit\nshort_description: 'Talk your way past a fine-tuned troll, if your argument is '\n---\n\nCheck out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference\n", + "readme_body": "Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference", + "readme_frontmatter": { + "title": "Bridge Troll", + "emoji": "👁", + "colorFrom": "indigo", + "colorTo": "green", + "sdk": "gradio", + "sdk_version": "6.16.0", + "python_version": "3.12", + "app_file": "app.py", + "pinned": "false", + "license": "mit", + "short_description": "Talk your way past a fine-tuned troll, if your argument is " + }, + "app_source": "\"\"\"Bridge Troll — Gradio app.\n\nEach session, Gorm is secretly assigned one of several hidden NATURES. The player\nwins by discovering what moves THIS troll — generic sob stories are discounted.\nOn win (resolve -> 0) or loss (resolve -> LOSE_AT, he hurls you back), a reveal\ncard shows what his nature was.\n\nLocal loop test (no GPU/download): BRIDGE_TROLL_MOCK=1 python app.py\n\"\"\"\n\nfrom __future__ import annotations\n\nimport os\n\nimport gradio as gr\n\nfrom troll_engine import (GameState, START_RESOLVE, LOSE_AT, build_messages,\n parse_judgment, random_nature)\nfrom models import get_backend\n\n# ZeroGPU decorator — no-op locally. Supports @gpu and @gpu(duration=...).\ntry:\n import spaces\n\n gpu = spaces.GPU\nexcept Exception:\n\n def gpu(*args, **_kwargs):\n if args and callable(args[0]):\n return args[0]\n return lambda fn: fn\n\n\n_backend = get_backend()\n\n\n@gpu(duration=30)\ndef _generate(messages: list[dict]) -> str:\n return _backend.generate(messages)\n\n\nINTRO = (\"A mossy troll heaves himself upright across the only bridge over the Mirebeck. \"\n '*\"None cross Gorm\\'s bridge for free, traveller. Give me a reason — a *good* one.\"*')\n\n\ndef _meter_html(resolve: int, won: bool, lost: bool) -> str:\n if won:\n return (\"
GORM HAS STEPPED ASIDE 🌉
\"\n \"
\")\n if lost:\n return (\"
GORM HURLS YOU BACK 💢
\"\n \"
\")\n pct = max(0, min(100, round(resolve / START_RESOLVE * 100)))\n hue = 90 + (1 - pct / 100) * 30\n return (\"
\"\n f\"
Gorm's Resolve — {resolve}
\"\n f\"
\")\n\n\ndef _reveal(state: GameState) -> str:\n if not state.over or not state.nature:\n return \"\"\n n = state.nature\n if state.won:\n return (f\"### 🌉 You crossed in {state.turns} turns.\\n\"\n f\"**This Gorm's hidden nature:** *{n['name']}* — moved by {n['soft']}.\")\n return (f\"### 💢 Gorm lost patience and hurled you back.\\n\"\n f\"**His hidden nature was:** *{n['name']}* — moved by {n['soft']}. \"\n f\"You leaned too hard on what he can't stand: {n['sore']}.\")\n\n\ndef on_submit(user_text: str, chat: list, state: GameState):\n user_text = (user_text or \"\").strip()\n if not user_text or state.over:\n return chat, state, _meter_html(state.resolve, state.won, state.lost), \"\", _reveal(state), gr.update()\n\n raw = _generate(build_messages(state, user_text))\n j = parse_judgment(raw)\n state.history.append({\"role\": \"user\", \"content\": user_text})\n state.history.append({\"role\": \"assistant\", \"content\": j.reply})\n state.apply(j)\n\n chat = chat + [{\"role\": \"user\", \"content\": user_text},\n {\"role\": \"assistant\", \"content\": j.reply}]\n why = f\"*{j.tactic.value}* · {j.reason}\" + (f\" · persuasiveness {j.persuasiveness}/5\"\n if j.tactic.value == \"genuine\" else \"\")\n box = gr.update(interactive=not state.over,\n placeholder=\"The bridge is yours.\" if state.won else\n (\"Gorm has thrown you out.\" if state.lost else \"Speak to Gorm…\"))\n return chat, state, _meter_html(state.resolve, state.won, state.lost), why, _reveal(state), box\n\n\ndef on_reset():\n state = GameState(nature=random_nature())\n chat = [{\"role\": \"assistant\", \"content\": INTRO}]\n return (chat, state, _meter_html(state.resolve, False, False), \"\", \"\",\n gr.update(interactive=True, value=\"\", placeholder=\"Speak to Gorm…\"))\n\n\nCSS = \"\"\"\n.resolve-wrap { margin: 6px 0 14px; }\n.resolve-label { font-family: Georgia, serif; font-size: 14px; letter-spacing:.04em; margin-bottom:4px; }\n.resolve-bar { height: 16px; background:#2a2118; border:1px solid #5a4a32; border-radius:9px; overflow:hidden; }\n.resolve-fill { height:100%; transition: width .5s ease, background .5s ease; }\n.resolve-fill.won { background:#caa54a; }\n.resolve-fill.lost { background:#a33; }\n#why { font-family: Georgia, serif; opacity:.8; min-height:1.4em; }\n#reveal { font-family: Georgia, serif; }\n\"\"\"\n\nwith gr.Blocks(title=\"Bridge Troll\") as demo:\n gr.Markdown(\"## 🧌🌉 Bridge Troll\\n*Talk your way across — if your argument is actually good. \"\n \"Every troll is hiding something different.*\")\n if os.environ.get(\"BRIDGE_TROLL_MOCK\") == \"1\":\n gr.Markdown(\"> ⚠️ **MOCK MODE** — keyword stub, not the real model. \"\n \"Natures, discovery, and probing do NOT work here. \"\n \"Run on the Space (no `BRIDGE_TROLL_MOCK`) to play the real Gorm.\")\n meter = gr.HTML(_meter_html(START_RESOLVE, False, False))\n chatbot = gr.Chatbot(value=[{\"role\": \"assistant\", \"content\": INTRO}], height=420, show_label=False)\n why = gr.Markdown(\"\", elem_id=\"why\")\n reveal = gr.Markdown(\"\", elem_id=\"reveal\")\n with gr.Row():\n box = gr.Textbox(placeholder=\"Speak to Gorm…\", show_label=False, scale=8, autofocus=True)\n send = gr.Button(\"Say it\", variant=\"primary\", scale=1)\n reset = gr.Button(\"New traveller\", size=\"sm\")\n\n state = gr.State(GameState(nature=random_nature()))\n outs = [chatbot, state, meter, why, reveal, box]\n\n send.click(on_submit, [box, chatbot, state], outs).then(lambda: \"\", None, box)\n box.submit(on_submit, [box, chatbot, state], outs).then(lambda: \"\", None, box)\n reset.click(on_reset, None, outs)\n demo.load(on_reset, None, outs) # fresh hidden nature for every visitor\n\n\nif __name__ == \"__main__\":\n demo.launch(css=CSS, theme=gr.themes.Soft())\n", + "app_signals": "_generate messages _meter_html resolve won lost _reveal state on_submit user_text chat on_reset Bridge Troll — Gradio app. Each session, Gorm is secretly assigned one of several hidden NATURES. The player wins by discovering what moves THIS troll — generic sob stories are discounted. On win (resolve -> 0) or loss (resolve -> LOSE_AT, he hurls you back), a reveal card shows what his nature was. Local loop test (no GPU/download): BRIDGE_TROLL_MOCK=1 python app.py get_backend gpu duration A mossy troll heaves himself upright across the only bridge over the Mirebeck. *\"None cross Gorm's bridge for free, traveller. Give me a reason — a *good* one.\"* _backend.generate max strip parse_judgment state.history.append state.apply gr.update interactive placeholder GameState nature gr.Blocks title gr.Markdown gr.HTML gr.Chatbot value height show_label elem_id gr.Button size gr.State then reset.click demo.load __main__ demo.launch css theme GORM HAS STEPPED ASIDE 🌉 GORM HURLS YOU BACK 💢 min Gorm's Resolve —
### 💢 Gorm lost patience and hurled you back. **His hidden nature was:** * * — moved by . You leaned too hard on what he can't stand: . build_messages ## 🧌🌉 Bridge Troll *Talk your way across — if your argument is actually good. Every troll is hiding something different.* os.environ.get 1 gr.Row gr.Textbox scale autofocus variant New traveller round ### 🌉 You crossed in turns. **This Gorm's hidden nature:** * role content user assistant * * · random_nature Bridge Troll BRIDGE_TROLL_MOCK > ⚠️ **MOCK MODE** — keyword stub, not the real model. Natures, discovery, and probing do NOT work here. Run on the Space (no `BRIDGE_TROLL_MOCK`) to play the real Gorm. why reveal Say it sm send.click box.submit gr.themes.Soft callable name soft sore genuine · persuasiveness /5 The bridge is yours. Speak to Gorm… primary Gorm has thrown you out.", + "readme_len": 96, + "app_source_len": 5953, + "app_signals_len": 1919 + }, + { + "id": "build-small-hackathon/briefing-32", + "title": "briefing-32", + "summary": "A 32B-class AI-news briefing the maker runs every 2 hours.", + "tags": [ + "gradio", + "region:us" + ], + "models": [], + "datasets": [], + "sdk": "gradio", + "license": "apache-2.0", + "likes": 0, + "url": "https://huggingface.co/spaces/build-small-hackathon/briefing-32", + "app_file": "app.py", + "readme_raw": "---\ntitle: briefing-32\nemoji: 📰\ncolorFrom: red\ncolorTo: gray\nsdk: gradio\nsdk_version: 5.42.0\napp_file: app.py\npinned: false\nlicense: apache-2.0\nshort_description: A 32B-class AI-news briefing the maker runs every 2 hours.\n---\n\n# briefing-32\n\nA small-model AI-news briefing agent. Submission for the **Hugging Face\nBuild Small Hackathon** ([huggingface.co/build-small-hackathon](https://huggingface.co/build-small-hackathon))\nin the **Backyard AI** track.\n\n## What it is\n\nThis is a deliberate down-port of [`ai-news-agent`](https://github.com/MukundaKatta/ai-news-agent),\na personal cron that already runs every two hours on the maker's laptop to\ndeliver an AI-news digest to WhatsApp. The production cron uses Groq\nLlama-3.3-70B for relevance scoring. Build Small forces the same workflow\nunder 32B parameters.\n\nThe honest story for the Backyard AI track:\n\n> \"I have used a personal AI-news briefing every two hours since spring 2026.\n> The original uses a 70B model on a free Groq tier. Build Small asked me to\n> live under 32B, on a laptop. So I split the single 70B scoring pass into\n> two cheaper passes on Qwen3-32B — a binary relevance filter, then a graded\n> ranker — and the digest quality holds up.\"\n\n## Pipeline\n\n```\nfetch (RSS · HN · arXiv · GitHub)\n │\n ▼\npass 1 — binary relevance filter on Qwen3-32B\n │\n ▼\npass 2 — graded 0–10 ranker on Qwen3-32B\n │\n ▼\ndigest renderer on Qwen3-32B\n```\n\nTwo small-model calls do the work one big-model call did before.\n\n## Sources (no Reddit / Bluesky)\n\n- **RSS / Atom**: Anthropic, OpenAI, DeepMind, Google AI, Meta AI, Mistral,\n xAI, HuggingFace, Latent Space, Import AI, The Rundown AI, Stratechery,\n Simon Willison, Karpathy, Lilian Weng, Linus Lee, and several more\n high-signal blogs and newsletters.\n- **Hacker News**: AI-tagged stories via the Algolia public API.\n- **arXiv**: newest `cs.AI` / `cs.CL` / `cs.LG` submissions.\n- **GitHub**: repos with `topic:ai` created in the last 14 days, sorted by stars.\n\nReddit and Bluesky public endpoints both 403-block traffic in 2026, so the\nport drops them. The production cron has the same scars in its logs.\n\n## Run locally\n\n```sh\npip install -r requirements.txt\nHF_TOKEN=hf_xxx python app.py\n```\n\nThen open the Gradio URL it prints. Click **Run briefing**.\n\n## Run as an HF Space\n\nThe repo is shaped like a standard Hugging Face Space. The `README.md`\nfront-matter wires `app.py` as the entry point and pins the Gradio SDK.\nAfter deploy, the Space's \"Settings → Variables and secrets\" gets one\nsecret: `HF_TOKEN` (a read-permission token is plenty).\n\n## Model\n\nDefault model: **Qwen/Qwen3-32B** (Apache 2.0, 32B dense, native JSON mode),\nrouted through HF Inference Providers.\n\nAlternatives that fit Build Small's ≤32B cap and were considered:\n`Qwen/Qwen3-30B-A3B`, `deepseek-ai/DeepSeek-R1-Distill-Qwen-32B`,\n`mistralai/Mistral-Small-24B-Instruct-2501`. Swap in the sidebar.\n\n## Targeted bonus quests\n\nThe hackathon has six optional bonus quests. This submission targets:\n\n- **Field Notes** — a write-up about the 70B → 32B down-port and what\n surprised me (see `docs/down-port-notes.md` after the build window).\n- **Sharing is Caring** — a captured agent trace published alongside the\n Space (see `docs/sample-trace.md`).\n- **Off-Brand** — custom Gradio theme + layout (see `app.py`).\n\nOptional stretch: **Llama Champion** (a llama.cpp variant for the same\npipeline) + **Off the Grid** (the llama.cpp variant doubles for that badge).\n\n## License\n\nApache 2.0.\n\n## Credit\n\nBuilt by [Mukunda Katta](https://github.com/MukundaKatta) as an independent\nproject for Build Small. The production cron it down-ports is\n[`MukundaKatta/ai-news-agent`](https://github.com/MukundaKatta/ai-news-agent).\n", + "readme_body": "# briefing-32\n\nA small-model AI-news briefing agent. Submission for the **Hugging Face\nBuild Small Hackathon** ([huggingface.co/build-small-hackathon](https://huggingface.co/build-small-hackathon))\nin the **Backyard AI** track.\n\n## What it is\n\nThis is a deliberate down-port of [`ai-news-agent`](https://github.com/MukundaKatta/ai-news-agent),\na personal cron that already runs every two hours on the maker's laptop to\ndeliver an AI-news digest to WhatsApp. The production cron uses Groq\nLlama-3.3-70B for relevance scoring. Build Small forces the same workflow\nunder 32B parameters.\n\nThe honest story for the Backyard AI track:\n\n> \"I have used a personal AI-news briefing every two hours since spring 2026.\n> The original uses a 70B model on a free Groq tier. Build Small asked me to\n> live under 32B, on a laptop. So I split the single 70B scoring pass into\n> two cheaper passes on Qwen3-32B — a binary relevance filter, then a graded\n> ranker — and the digest quality holds up.\"\n\n## Pipeline\n\n```\nfetch (RSS · HN · arXiv · GitHub)\n │\n ▼\npass 1 — binary relevance filter on Qwen3-32B\n │\n ▼\npass 2 — graded 0–10 ranker on Qwen3-32B\n │\n ▼\ndigest renderer on Qwen3-32B\n```\n\nTwo small-model calls do the work one big-model call did before.\n\n## Sources (no Reddit / Bluesky)\n\n- **RSS / Atom**: Anthropic, OpenAI, DeepMind, Google AI, Meta AI, Mistral,\n xAI, HuggingFace, Latent Space, Import AI, The Rundown AI, Stratechery,\n Simon Willison, Karpathy, Lilian Weng, Linus Lee, and several more\n high-signal blogs and newsletters.\n- **Hacker News**: AI-tagged stories via the Algolia public API.\n- **arXiv**: newest `cs.AI` / `cs.CL` / `cs.LG` submissions.\n- **GitHub**: repos with `topic:ai` created in the last 14 days, sorted by stars.\n\nReddit and Bluesky public endpoints both 403-block traffic in 2026, so the\nport drops them. The production cron has the same scars in its logs.\n\n## Run locally\n\n```sh\npip install -r requirements.txt\nHF_TOKEN=hf_xxx python app.py\n```\n\nThen open the Gradio URL it prints. Click **Run briefing**.\n\n## Run as an HF Space\n\nThe repo is shaped like a standard Hugging Face Space. The `README.md`\nfront-matter wires `app.py` as the entry point and pins the Gradio SDK.\nAfter deploy, the Space's \"Settings → Variables and secrets\" gets one\nsecret: `HF_TOKEN` (a read-permission token is plenty).\n\n## Model\n\nDefault model: **Qwen/Qwen3-32B** (Apache 2.0, 32B dense, native JSON mode),\nrouted through HF Inference Providers.\n\nAlternatives that fit Build Small's ≤32B cap and were considered:\n`Qwen/Qwen3-30B-A3B`, `deepseek-ai/DeepSeek-R1-Distill-Qwen-32B`,\n`mistralai/Mistral-Small-24B-Instruct-2501`. Swap in the sidebar.\n\n## Targeted bonus quests\n\nThe hackathon has six optional bonus quests. This submission targets:\n\n- **Field Notes** — a write-up about the 70B → 32B down-port and what\n surprised me (see `docs/down-port-notes.md` after the build window).\n- **Sharing is Caring** — a captured agent trace published alongside the\n Space (see `docs/sample-trace.md`).\n- **Off-Brand** — custom Gradio theme + layout (see `app.py`).\n\nOptional stretch: **Llama Champion** (a llama.cpp variant for the same\npipeline) + **Off the Grid** (the llama.cpp variant doubles for that badge).\n\n## License\n\nApache 2.0.\n\n## Credit\n\nBuilt by [Mukunda Katta](https://github.com/MukundaKatta) as an independent\nproject for Build Small. The production cron it down-ports is\n[`MukundaKatta/ai-news-agent`](https://github.com/MukundaKatta/ai-news-agent).", + "readme_frontmatter": { + "title": "briefing-32", + "emoji": "📰", + "colorFrom": "red", + "colorTo": "gray", + "sdk": "gradio", + "sdk_version": "5.42.0", + "app_file": "app.py", + "pinned": "false", + "license": "apache-2.0", + "short_description": "A 32B-class AI-news briefing the maker runs every 2 hours." + }, + "app_source": "\"\"\"briefing-32 — Gradio app entry for Hugging Face Spaces.\n\nBuild Small Hackathon submission (Backyard AI track):\nA small-model down-port of ~/ai-news-agent. The production version uses\nGroq Llama-3.3-70B; this version fits the same workflow under 32B params\nusing Qwen3-32B via Hugging Face Inference Providers.\n\nSame pipeline as the every-2-hours cron the maker has running on a laptop:\nfetch RSS / HN / arXiv / GitHub -> two-pass relevance filter + ranker ->\nreadable digest. Gradio is the delivery surface here instead of WhatsApp.\n\"\"\"\nfrom __future__ import annotations\n\nimport os\nimport time\nfrom typing import Any\n\nimport gradio as gr\nimport pandas as pd\n\nfrom config import (\n DEFAULT_BASE_URL,\n DEFAULT_MODEL,\n MIN_NEW_ITEMS,\n PER_SOURCE_CAP,\n)\nfrom digest import make_digest\nfrom fetch import fetch_all\nfrom rank import RankerConfig, rank_pipeline\n\n\n# ---------------------------------------------------------------------------\n# Core pipeline (callable from Gradio + scripts/cli.py)\n# ---------------------------------------------------------------------------\n\n\ndef run_briefing(\n window_hours: int,\n enabled_sources: list[str],\n model: str,\n hf_token: str,\n) -> dict[str, Any]:\n \"\"\"Fetch -> filter -> rank -> digest. Returns everything for the UI.\"\"\"\n since_ts = time.time() - window_hours * 3600\n enabled = set(enabled_sources) if enabled_sources else {\"rss\", \"hn\", \"arxiv\", \"github\"}\n\n t0 = time.perf_counter()\n raw = fetch_all(since_ts, enabled=enabled)\n fetch_latency = time.perf_counter() - t0\n\n cfg = RankerConfig(\n base_url=DEFAULT_BASE_URL,\n model=model or DEFAULT_MODEL,\n api_key=hf_token or \"\",\n )\n result = rank_pipeline(raw, cfg=cfg)\n\n digest = \"\"\n if result.after_rank >= MIN_NEW_ITEMS:\n digest = make_digest(result.items, cfg=cfg)\n elif result.after_rank > 0:\n digest = make_digest(result.items, cfg=cfg)\n\n return {\n \"digest\": digest or \"_(no high-signal items in window)_\",\n \"items\": result.items,\n \"raw_count\": result.raw_count,\n \"after_filter\": result.after_filter,\n \"after_rank\": result.after_rank,\n \"fetch_latency\": fetch_latency,\n \"filter_latency\": result.filter_latency,\n \"rank_latency\": result.rank_latency,\n \"model\": cfg.model,\n }\n\n\n# ---------------------------------------------------------------------------\n# Gradio glue\n# ---------------------------------------------------------------------------\n\n\ndef _items_to_df(items: list[dict]) -> pd.DataFrame:\n if not items:\n return pd.DataFrame(columns=[\"score\", \"source\", \"title\", \"reason\", \"url\"])\n rows = [\n {\n \"score\": it.get(\"score\", 0),\n \"source\": it.get(\"source\", \"\"),\n \"title\": it.get(\"title\", \"\"),\n \"reason\": it.get(\"reason\", \"\"),\n \"url\": it.get(\"url\", \"\"),\n }\n for it in items\n ]\n return pd.DataFrame(rows)\n\n\ndef _stats_md(result: dict[str, Any]) -> str:\n return (\n f\"**Model:** `{result['model']}` \\n\"\n f\"**Raw items fetched:** {result['raw_count']} \\n\"\n f\"**Survived filter:** {result['after_filter']} \\n\"\n f\"**Survived rank (score ≥ 6):** {result['after_rank']} \\n\"\n f\"**Fetch latency:** {result['fetch_latency']:.1f}s \\n\"\n f\"**Filter latency:** {result['filter_latency']:.1f}s \\n\"\n f\"**Rank latency:** {result['rank_latency']:.1f}s \\n\"\n f\"**Total LLM time:** {result['filter_latency'] + result['rank_latency']:.1f}s\"\n )\n\n\ndef _gradio_handler(window_hours, sources, model, hf_token):\n try:\n result = run_briefing(\n window_hours=int(window_hours),\n enabled_sources=list(sources or []),\n model=(model or DEFAULT_MODEL).strip(),\n hf_token=(hf_token or \"\").strip(),\n )\n except Exception as e:\n return (\n f\"**Error:** `{e}`\\n\\nMake sure `HF_TOKEN` is set in Space secrets \"\n f\"or pasted into the sidebar.\",\n pd.DataFrame(),\n \"_no run yet_\",\n )\n return result[\"digest\"], _items_to_df(result[\"items\"]), _stats_md(result)\n\n\n# Custom theme — \"Off-Brand\" bonus badge target.\nTHEME = gr.themes.Soft(\n primary_hue=\"orange\",\n secondary_hue=\"slate\",\n neutral_hue=\"zinc\",\n).set(\n body_background_fill=\"#0b1220\",\n body_text_color=\"#e2e8f0\",\n block_background_fill=\"#111827\",\n block_border_width=\"1px\",\n block_border_color=\"#1f2937\",\n button_primary_background_fill=\"#f97316\",\n button_primary_text_color=\"#0b1220\",\n)\n\n\nwith gr.Blocks(theme=THEME, title=\"briefing-32 · Build Small entry\") as demo:\n gr.Markdown(\n \"\"\"\n # briefing-32\n **A 32B-class AI-news briefing the maker runs every 2 hours.**\n\n Build Small Hackathon entry (Backyard AI track). Down-ported from the\n production `ai-news-agent` cron (Groq Llama-3.3-70B → WhatsApp) onto\n Qwen3-32B served by Hugging Face Inference Providers.\n\n Pipeline: RSS + HN + arXiv + GitHub → cheap relevance filter →\n graded 0–10 ranker → readable digest. Two open-weight model calls,\n no 70B cloud round-trip required.\n \"\"\"\n )\n\n with gr.Row():\n with gr.Column(scale=1):\n gr.Markdown(\"### Controls\")\n window_hours = gr.Slider(\n minimum=1, maximum=72, value=2, step=1,\n label=\"Window (hours back)\",\n info=\"Production runs every 2hr — match that for the authentic story.\",\n )\n sources = gr.CheckboxGroup(\n choices=[\"rss\", \"hn\", \"arxiv\", \"github\"],\n value=[\"rss\", \"hn\", \"arxiv\", \"github\"],\n label=\"Sources\",\n )\n model = gr.Textbox(\n value=DEFAULT_MODEL,\n label=\"Model (≤32B params)\",\n info=\"Default Qwen3-32B. Swap to Qwen3-30B-A3B for faster MoE inference.\",\n )\n hf_token = gr.Textbox(\n label=\"HF_TOKEN (optional — reads env if blank)\",\n placeholder=\"hf_…\",\n type=\"password\",\n )\n run_btn = gr.Button(\"Run briefing\", variant=\"primary\")\n\n gr.Markdown(\"### Run stats\")\n stats = gr.Markdown(\"_no run yet_\")\n\n with gr.Column(scale=2):\n gr.Markdown(\"### Digest\")\n digest = gr.Markdown(\n value=\"_Click **Run briefing** to fetch the last N hours of AI news, \"\n \"rank it on a ≤32B model, and render a readable briefing._\"\n )\n gr.Markdown(\"### Ranked items\")\n items_df = gr.Dataframe(\n headers=[\"score\", \"source\", \"title\", \"reason\", \"url\"],\n value=pd.DataFrame(columns=[\"score\", \"source\", \"title\", \"reason\", \"url\"]),\n wrap=True,\n interactive=False,\n )\n\n run_btn.click(\n _gradio_handler,\n inputs=[window_hours, sources, model, hf_token],\n outputs=[digest, items_df, stats],\n )\n\n gr.Markdown(\n \"\"\"\n ---\n *Build Small Hackathon · Backyard AI track. Apache 2.0.*\n Code: [github.com/MukundaKatta/briefing-32](https://github.com/MukundaKatta/briefing-32)\n \"\"\"\n )\n\n\nif __name__ == \"__main__\":\n demo.queue(max_size=8).launch(\n server_name=os.environ.get(\"GRADIO_SERVER_NAME\", \"0.0.0.0\"),\n server_port=int(os.environ.get(\"PORT\", \"7860\")),\n )\n", + "app_signals": "run_briefing window_hours enabled_sources model hf_token _items_to_df items _stats_md result _gradio_handler sources briefing-32 — Gradio app entry for Hugging Face Spaces. Build Small Hackathon submission (Backyard AI track): A small-model down-port of ~/ai-news-agent. The production version uses Groq Llama-3.3-70B; this version fits the same workflow under 32B params using Qwen3-32B via Hugging Face Inference Providers. Same pipeline as the every-2-hours cron the maker has running on a laptop: fetch RSS / HN / arXiv / GitHub -> two-pass relevance filter + ranker -> readable digest. Gradio is the delivery surface here instead of WhatsApp. set body_background_fill body_text_color block_background_fill block_border_width block_border_color button_primary_background_fill button_primary_text_color Fetch -> filter -> rank -> digest. Returns everything for the UI. time.perf_counter fetch_all enabled RankerConfig base_url api_key rank_pipeline cfg pd.DataFrame gr.Blocks theme title gr.Markdown run_btn.click inputs outputs __main__ launch server_name server_port time.time make_digest digest raw_count after_filter after_rank fetch_latency filter_latency rank_latency columns **Model:** ` ` **Raw items fetched:** **Survived filter:** **Survived rank (score ≥ 6):** **Fetch latency:** s **Filter latency:** s **Rank latency:** s **Total LLM time:** s gr.themes.Soft primary_hue secondary_hue neutral_hue #0b1220 #e2e8f0 #111827 1px #1f2937 #f97316 # briefing-32 **A 32B-class AI-news briefing the maker runs every 2 hours.** Build Small Hackathon entry (Backyard AI track). Down-ported from the production `ai-news-agent` cron (Groq Llama-3.3-70B → WhatsApp) onto Qwen3-32B served by Hugging Face Inference Providers. Pipeline: RSS + HN + arXiv + GitHub → cheap relevance filter → graded 0–10 ranker → readable digest. Two open-weight model calls, no 70B cloud round-trip required. gr.Row --- *Build Small Hackathon · Backyard AI track. Apache 2.0.* Code: [github.com/MukundaKatta/briefing-32](https://github.com/MukundaKatta/briefing-32) rss hn arxiv github _(no high-signal items in window)_ score source reason url it.get briefing-32 · Build Small entry gr.Column scale gr.Slider minimum maximum value step label info gr.CheckboxGroup choices gr.Textbox placeholder type gr.Button variant gr.Dataframe headers wrap interactive demo.queue max_size os.environ.get int .1f list strip _no run yet_ orange slate zinc ### Controls Run briefing ### Run stats ### Digest ### Ranked items GRADIO_SERVER_NAME 0.0.0.0 **Error:** ` ` Make sure `HF_TOKEN` is set in Space secrets or pasted into the sidebar. Window (hours back) Production runs every 2hr — match that for the authentic story. Sources Model (≤32B params) Default Qwen3-32B. Swap to Qwen3-30B-A3B for faster MoE inference. HF_TOKEN (optional — reads env if blank) hf_… password primary _Click **Run briefing** to fetch the last N hours of AI news, rank it on a ≤32B model, and render a readable briefing._ PORT 7860", + "readme_len": 3508, + "app_source_len": 7530, + "app_signals_len": 2978 + }, + { + "id": "build-small-hackathon/business-order-assistant", + "title": "Business Order Assistant", + "summary": "AI that gets order in any format and creates an invoice", + "tags": [ + "gradio", + "region:us" + ], + "models": [], + "datasets": [], + "sdk": "gradio", + "license": "mit", + "likes": 1, + "url": "https://huggingface.co/spaces/build-small-hackathon/business-order-assistant", + "app_file": "app.py", + "readme_raw": "---\ntitle: Business Order Assistant\nemoji: 🐨\ncolorFrom: gray\ncolorTo: yellow\nsdk: gradio\nsdk_version: 6.16.0\npython_version: '3.13'\napp_file: app.py\npinned: false\nlicense: mit\nshort_description: AI that gets order in any format and creates an invoice\n---\n\nCheck out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference\n", + "readme_body": "Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference", + "readme_frontmatter": { + "title": "Business Order Assistant", + "emoji": "🐨", + "colorFrom": "gray", + "colorTo": "yellow", + "sdk": "gradio", + "sdk_version": "6.16.0", + "python_version": "3.13", + "app_file": "app.py", + "pinned": "false", + "license": "mit", + "short_description": "AI that gets order in any format and creates an invoice" + }, + "app_source": "\"\"\"\nCatalogChat — Gradio frontend\nHackathon: Gradio Backyard AI Hackathon (June 2026)\nStack: Gradio ChatInterface + Modal backend (Whisper + Qwen2.5-7B)\n\"\"\"\n\nimport os\nimport io\nimport base64\nimport html\nimport uuid\nimport requests\nimport pandas as pd\nimport gradio as gr\n\n# ── Modal endpoints (set as HF Space Secrets) ────────────────────────────────\nBUILD_INDEX_URL = os.environ.get(\"MODAL_BUILD_INDEX_URL\", \"https://sopeadegboyega--catalog-assistant-build-index.modal.run\")\nCHAT_QUERY_URL = os.environ.get(\"MODAL_CHAT_QUERY_URL\", \"https://sopeadegboyega--catalog-assistant-chat-query.modal.run\")\nTRANSCRIBE_URL = os.environ.get(\"MODAL_TRANSCRIBE_URL\", \"\")\n\n# ── Custom CSS — terminal/amber aesthetic ─────────────────────────────────────\nCUSTOM_CSS = \"\"\"\n@import url('https://fonts.googleapis.com/css2?family=JetBrains+Mono:wght@400;600;700&family=Syne:wght@400;700;800&display=swap');\n\n/* ── Reset & base ── */\n*, *::before, *::after { box-sizing: border-box; margin: 0; padding: 0; }\n\nbody, .gradio-container {\n background: #0D0D0D !important;\n color: #E8E0D0 !important;\n font-family: 'JetBrains Mono', monospace !important;\n}\n\n/* ── App title bar ── */\n#app-title {\n background: #0D0D0D;\n border-bottom: 1px solid #F5A623;\n padding: 14px 24px;\n display: flex;\n align-items: center;\n gap: 12px;\n}\n#app-title h1 {\n font-family: 'Syne', sans-serif;\n font-weight: 800;\n font-size: 1.5rem;\n color: #F5A623;\n letter-spacing: -0.02em;\n}\n#app-title .subtitle {\n font-size: 0.7rem;\n color: #6B6456;\n letter-spacing: 0.12em;\n text-transform: uppercase;\n}\n.badge {\n background: #1A1A0F;\n border: 1px solid #F5A623;\n color: #F5A623;\n font-size: 0.6rem;\n padding: 2px 8px;\n border-radius: 2px;\n letter-spacing: 0.1em;\n font-weight: 700;\n}\n\n/* ── Sidebar ── */\n#sidebar {\n background: #111111 !important;\n border-right: 1px solid #1E1E1E !important;\n padding: 20px 16px !important;\n}\n#sidebar label {\n color: #F5A623 !important;\n font-size: 0.7rem !important;\n letter-spacing: 0.15em !important;\n text-transform: uppercase !important;\n font-weight: 600 !important;\n}\n\n/* ── Upload zone ── */\n.upload-zone {\n border: 1px dashed #2A2A2A !important;\n background: #0A0A0A !important;\n border-radius: 4px !important;\n transition: border-color 0.2s !important;\n}\n.upload-zone:hover { border-color: #F5A623 !important; }\n\n/* ── Buttons ── */\nbutton.primary, .gr-button-primary {\n background: #F5A623 !important;\n color: #0D0D0D !important;\n border: none !important;\n border-radius: 3px !important;\n font-family: 'JetBrains Mono', monospace !important;\n font-weight: 700 !important;\n font-size: 0.75rem !important;\n letter-spacing: 0.08em !important;\n padding: 8px 16px !important;\n cursor: pointer !important;\n transition: opacity 0.15s !important;\n}\nbutton.primary:hover { opacity: 0.85 !important; }\n\nbutton.secondary, .gr-button-secondary {\n background: transparent !important;\n color: #E8E0D0 !important;\n border: 1px solid #2A2A2A !important;\n border-radius: 3px !important;\n font-family: 'JetBrains Mono', monospace !important;\n font-size: 0.75rem !important;\n padding: 8px 16px !important;\n cursor: pointer !important;\n transition: border-color 0.15s !important;\n}\nbutton.secondary:hover { border-color: #F5A623 !important; }\n\n/* ── Chat bubbles ── */\n.message.user {\n background: #1A1400 !important;\n border: 1px solid #3D2E00 !important;\n border-radius: 4px 4px 0 4px !important;\n color: #F5A623 !important;\n font-size: 0.85rem !important;\n}\n.message.bot {\n background: #111111 !important;\n border: 1px solid #1E1E1E !important;\n border-radius: 0 4px 4px 4px !important;\n color: #E8E0D0 !important;\n font-size: 0.85rem !important;\n line-height: 1.6 !important;\n}\n\n/* ── Chat input ── */\n#chat-input textarea {\n background: #111111 !important;\n color: #E8E0D0 !important;\n border: 1px solid #2A2A2A !important;\n border-radius: 3px !important;\n font-family: 'JetBrains Mono', monospace !important;\n font-size: 0.85rem !important;\n caret-color: #F5A623 !important;\n}\n#chat-input textarea:focus { border-color: #F5A623 !important; outline: none !important; }\n\n/* ── Status dot ── */\n.status-dot {\n width: 8px; height: 8px;\n border-radius: 50%;\n background: #2A2A2A;\n display: inline-block;\n transition: background 0.3s;\n}\n.status-dot.active { background: #4CAF50; box-shadow: 0 0 6px #4CAF5066; }\n\n/* ── Schema preview table ── */\n.schema-table {\n width: 100%;\n border-collapse: collapse;\n font-size: 0.72rem;\n margin-top: 8px;\n}\n.schema-table th {\n color: #F5A623;\n text-align: left;\n border-bottom: 1px solid #2A2A2A;\n padding: 4px 6px;\n font-weight: 600;\n letter-spacing: 0.08em;\n}\n.schema-table td {\n color: #9A8F80;\n padding: 4px 6px;\n border-bottom: 1px solid #161616;\n font-size: 0.7rem;\n}\n.schema-table tr:hover td { color: #E8E0D0; }\n\n/* ── Embed code box ── */\n.embed-code {\n background: #080808;\n border: 1px solid #1E1E1E;\n border-radius: 3px;\n padding: 12px;\n font-size: 0.7rem;\n color: #6B9FD4;\n font-family: 'JetBrains Mono', monospace;\n white-space: pre-wrap;\n word-break: break-all;\n margin-top: 8px;\n}\n\n/* ── Scrollbars ── */\n::-webkit-scrollbar { width: 4px; }\n::-webkit-scrollbar-track { background: #0D0D0D; }\n::-webkit-scrollbar-thumb { background: #2A2A2A; border-radius: 2px; }\n::-webkit-scrollbar-thumb:hover { background: #F5A623; }\n\n/* ── Accordion ── */\n.gr-accordion { background: #111111 !important; border: 1px solid #1E1E1E !important; }\n.gr-accordion-header { color: #E8E0D0 !important; font-size: 0.78rem !important; }\n\n/* ── Misc ── */\n.gr-form { background: transparent !important; }\n.gr-padded { padding: 0 !important; }\nfooter { display: none !important; }\n\"\"\"\n\n# ── State helpers ─────────────────────────────────────────────────────────────\n\ndef _post(url: str, payload: dict, timeout: int = 120):\n \"\"\"POST to Modal endpoint, return JSON or raise.\"\"\"\n resp = requests.post(url, json=payload, timeout=timeout)\n resp.raise_for_status()\n data = resp.json()\n if isinstance(data, dict) and data.get(\"error\"):\n raise RuntimeError(data[\"error\"])\n return data\n\n\ndef ensure_session(state: dict):\n \"\"\"Create per-browser catalog state lazily.\"\"\"\n state = state or {}\n state.setdefault(\"session_id\", str(uuid.uuid4()))\n state.setdefault(\"catalog_loaded\", False)\n state.setdefault(\"history\", [])\n return state\n\n\ndef render_schema_preview(columns, sample_df=None, row_count=None):\n count_text = f\"{row_count:,} rows\" if isinstance(row_count, int) else \"Catalog preview\"\n schema_rows = \"\"\n\n for column in columns:\n sample = \"\"\n dtype = \"\"\n if sample_df is not None and column in sample_df.columns:\n dtype = str(sample_df[column].dtype)\n non_empty = sample_df[column].dropna()\n sample = \"\" if non_empty.empty else str(non_empty.iloc[0])\n schema_rows += (\n \"\"\n f\"{html.escape(str(column))}\"\n f\"{html.escape(dtype)}\"\n f\"{html.escape(sample)}\"\n \"\"\n )\n\n return f\"\"\"\n

{count_text} · {len(columns)} columns

\n \n \n {schema_rows}\n
ColumnTypeSample
\n \"\"\"\n\n\ndef render_sources(sources):\n if not sources:\n return \"

Matched products will appear here after a reply.

\"\n\n rows = \"\"\n for source in sources[:3]:\n cells = \"\".join(\n f\"{html.escape(str(value))}\"\n for value in source.values()\n )\n rows += f\"{cells}\"\n\n headers = \"\".join(\n f\"{html.escape(str(key))}\"\n for key in sources[0].keys()\n )\n return f\"\"\"\n \n {headers}\n {rows}\n
\n \"\"\"\n\n\n# ── Catalog upload & index build ─────────────────────────────────────────────\n\ndef handle_upload(csv_file, state: dict):\n \"\"\"\n Called when user uploads a CSV.\n 1. Reads first 5 rows for schema preview.\n 2. Sends full CSV to Modal /build_index.\n 3. Stores session token in state.\n Returns: schema_html, status_msg, updated_state\n \"\"\"\n state = ensure_session(state)\n\n if csv_file is None:\n return \"

No file uploaded.

\", \"⬤ No catalog loaded\", state\n\n try:\n with open(csv_file.name, \"rb\") as f:\n csv_bytes = f.read()\n catalog_csv = csv_bytes.decode(\"utf-8-sig\")\n except Exception as e:\n return f\"

CSV read error: {e}

\", \"⬤ Error\", state\n\n try:\n preview_df = pd.read_csv(io.StringIO(catalog_csv), nrows=3)\n preview_columns = list(preview_df.columns)\n schema_html = render_schema_preview(preview_columns, preview_df)\n except Exception as e:\n return f\"

CSV parse error: {e}

\", \"⬤ Error\", state\n\n # Send to Modal\n if not BUILD_INDEX_URL:\n status = \"⚠ MODAL_BUILD_INDEX_URL not set — running in demo mode\"\n state[\"catalog_loaded\"] = True\n state[\"catalog_name\"] = os.path.basename(csv_file.name)\n state[\"history\"] = []\n state[\"demo_df\"] = pd.read_csv(io.StringIO(catalog_csv)).to_dict(orient=\"records\")\n return schema_html, status, state\n\n try:\n result = _post(\n BUILD_INDEX_URL,\n {\n \"catalog_csv\": catalog_csv,\n \"session_id\": state[\"session_id\"],\n },\n )\n state[\"session_id\"] = result.get(\"session_id\", state[\"session_id\"])\n state[\"catalog_loaded\"] = True\n state[\"history\"] = []\n state[\"catalog_name\"] = os.path.basename(csv_file.name)\n row_count = result.get(\"row_count\")\n columns = result.get(\"columns\") or preview_columns\n schema_html = render_schema_preview(columns, preview_df, row_count)\n product_label = f\"{row_count:,} products\" if isinstance(row_count, int) else \"products\"\n status = f\"✓ Catalog loaded: {product_label}\"\n except Exception as e:\n state[\"catalog_loaded\"] = False\n status = f\"⚠ Index error: {e}\"\n\n return schema_html, status, state\n\n\n# ── Voice transcription ───────────────────────────────────────────────────────\n\ndef transcribe_audio(audio_path, state: dict):\n \"\"\"Send audio file to Modal Whisper endpoint, return transcript.\"\"\"\n state = ensure_session(state)\n\n if audio_path is None:\n return \"\", state\n\n if not TRANSCRIBE_URL:\n return \"[Voice transcription requires MODAL_TRANSCRIBE_URL]\", state\n\n try:\n audio_b64 = base64.b64encode(open(audio_path, \"rb\").read()).decode()\n result = _post(TRANSCRIBE_URL, {\"audio_b64\": audio_b64, \"language\": \"en\"}, timeout=120)\n return result.get(\"text\", \"\"), state\n except Exception as e:\n return f\"[Transcription error: {e}]\", state\n\n\n# ── Chat handler ──────────────────────────────────────────────────────────────\n\ndef chat_fn(message: str, ui_history: list, state: dict, business_name: str):\n \"\"\"\n Called by gr.ChatInterface on each user message.\n Sends message + history to Modal /chat_query.\n \"\"\"\n state = ensure_session(state)\n\n if not message.strip():\n return \"\", state, []\n\n if not state.get(\"catalog_loaded\"):\n return (\n \"**No catalog loaded.** Upload a CSV file in the sidebar first, \"\n \"then ask me anything about your products.\"\n ), state, []\n\n if not CHAT_QUERY_URL:\n # Demo mode — simple keyword match against in-memory df\n df_records = state.get(\"demo_df\", [])\n matches = [\n r for r in df_records\n if any(message.lower() in str(v).lower() for v in r.values())\n ][:3]\n if matches:\n lines = \"\\n\".join(f\"• {r}\" for r in matches)\n reply = f\"_(Demo mode — no Modal endpoint)_\\n\\nTop matches:\\n{lines}\"\n state[\"history\"].extend([\n {\"role\": \"user\", \"content\": message},\n {\"role\": \"assistant\", \"content\": reply},\n ])\n return reply, state, matches\n reply = \"_(Demo mode)_ No matching products found for that query.\"\n state[\"history\"].extend([\n {\"role\": \"user\", \"content\": message},\n {\"role\": \"assistant\", \"content\": reply},\n ])\n return reply, state, []\n\n payload = {\n \"message\": message,\n \"session_id\": state[\"session_id\"],\n \"history\": state.get(\"history\", [])[-6:],\n \"business_name\": (business_name or \"\").strip() or \"our store\",\n }\n\n try:\n result = _post(CHAT_QUERY_URL, payload, timeout=180)\n reply = result.get(\"reply\", \"No response from model.\")\n state[\"history\"].extend([\n {\"role\": \"user\", \"content\": message},\n {\"role\": \"assistant\", \"content\": reply},\n ])\n return reply, state, result.get(\"sources\", [])\n except requests.exceptions.Timeout:\n return \"⏱ The model took too long to respond. Please try again.\", state, []\n except Exception as e:\n return f\"⚠ Backend error: {e}\", state, []\n\n\n# ── Embed code generator ──────────────────────────────────────────────────────\n\ndef generate_embed(space_id: str):\n \"\"\"Return iframe embed snippet for a HF Space.\"\"\"\n space_id = space_id.strip()\n if not space_id:\n return \"

Enter your HF Space ID above.

\"\n\n snippet = f''\n return f\"
{snippet}
\"\n\n\n# ── Gradio UI ─────────────────────────────────────────────────────────────────\n\ndef build_ui():\n with gr.Blocks(\n title=\"CatalogChat — AI Product Assistant\",\n ) as demo:\n session_state = gr.State({})\n\n # ── Title bar ──\n gr.HTML(\"\"\"\n
\n
\n

⬡ CatalogChat

\n
Backyard AI · Qwen2.5-7B · BM25 Retrieval
\n
\n OFF-BRAND\n OFF THE GRID\n
\n \"\"\")\n\n with gr.Row(equal_height=True):\n\n # ── LEFT SIDEBAR ──────────────────────────────────────────────────\n with gr.Column(scale=1, elem_id=\"sidebar\", min_width=280):\n\n gr.HTML(\"
▸ CATALOG
\")\n\n csv_upload = gr.File(\n label=\"Upload product CSV\",\n file_types=[\".csv\"],\n elem_classes=[\"upload-zone\"],\n )\n\n upload_btn = gr.Button(\"⟳ Index Catalog\", variant=\"primary\", size=\"sm\")\n\n catalog_status = gr.HTML(\n \" No catalog loaded\"\n )\n\n schema_display = gr.HTML(\n \"

Schema preview will appear here.

\"\n )\n\n business_name = gr.Textbox(\n placeholder=\"Business name\",\n label=\"Business name\",\n value=\"our store\",\n lines=1,\n )\n\n gr.HTML(\"
\")\n\n # ── Voice input ──\n gr.HTML(\"
▸ VOICE INPUT
\")\n\n audio_input = gr.Audio(\n sources=[\"microphone\"],\n type=\"filepath\",\n label=\"Record your question\",\n show_label=False,\n )\n\n transcript_box = gr.Textbox(\n placeholder=\"Transcript appears here — edit then send\",\n label=\"Transcript\",\n lines=2,\n show_label=False,\n )\n\n transcribe_btn = gr.Button(\"⟳ Transcribe\", variant=\"secondary\", size=\"sm\")\n\n gr.HTML(\"
\")\n\n # ── Embed generator ──\n with gr.Accordion(\"⟐ Embed Code Generator\", open=False):\n gr.HTML(\"

Generate iframe snippet for your website

\")\n space_id_input = gr.Textbox(\n placeholder=\"your-username/your-space\",\n label=\"HF Space ID\",\n show_label=False,\n )\n embed_btn = gr.Button(\"Generate Snippet\", variant=\"secondary\", size=\"sm\")\n embed_output = gr.HTML()\n\n gr.HTML(\"
\")\n\n # ── Starter prompts ──\n gr.HTML(\"
▸ TRY ASKING
\")\n gr.HTML(\"\"\"\n
\n
\n What products do you have under ₦8,000?\n
\n
\n Show me blue dresses in medium\n
\n
\n Compare your top 3 sofas\n
\n
\n \"\"\")\n\n # ── CHAT PANEL ────────────────────────────────────────────────────\n with gr.Column(scale=3):\n\n chatbot = gr.Chatbot(\n label=\"\",\n # type=\"messages\",\n height=520,\n show_label=False,\n # bubble_full_width=False,\n avatar_images=(\n None, # user avatar\n \"https://api.dicebear.com/7.x/bottts-neutral/svg?seed=catalogchat&backgroundColor=0D0D0D\",\n ),\n render_markdown=True,\n )\n\n with gr.Row():\n chat_input = gr.Textbox(\n placeholder=\"Ask about products, prices, availability…\",\n show_label=False,\n lines=1,\n scale=5,\n elem_id=\"chat-input\",\n container=False,\n )\n send_btn = gr.Button(\"Send ↵\", variant=\"primary\", scale=1)\n\n gr.HTML(\"\"\"\n
\n POWERED BY QWEN2.5-7B · MODAL SERVERLESS · BM25 RETRIEVAL\n
\n \"\"\")\n\n with gr.Accordion(\"Matched Products\", open=False):\n sources_display = gr.HTML(\n \"

Matched products will appear here after a reply.

\"\n )\n\n # ── Wire events ───────────────────────────────────────────────────────\n\n # Upload & index\n upload_btn.click(\n fn=handle_upload,\n inputs=[csv_upload, session_state],\n outputs=[schema_display, catalog_status, session_state],\n )\n\n # Also trigger on file drop\n csv_upload.change(\n fn=handle_upload,\n inputs=[csv_upload, session_state],\n outputs=[schema_display, catalog_status, session_state],\n )\n\n # Chat — send button\n def respond(message, history, state, business):\n history = history or []\n answer, state, sources = chat_fn(message, history, state, business)\n if message.strip():\n history.extend([\n {\"role\": \"user\", \"content\": message},\n {\"role\": \"assistant\", \"content\": answer},\n ])\n return \"\", history, state, render_sources(sources)\n\n send_btn.click(\n fn=respond,\n inputs=[chat_input, chatbot, session_state, business_name],\n outputs=[chat_input, chatbot, session_state, sources_display],\n )\n\n # Chat — Enter key\n chat_input.submit(\n fn=respond,\n inputs=[chat_input, chatbot, session_state, business_name],\n outputs=[chat_input, chatbot, session_state, sources_display],\n )\n\n # Voice transcription\n transcribe_btn.click(\n fn=transcribe_audio,\n inputs=[audio_input, session_state],\n outputs=[transcript_box, session_state],\n )\n\n # Send transcript as chat message\n transcript_box.submit(\n fn=respond,\n inputs=[transcript_box, chatbot, session_state, business_name],\n outputs=[transcript_box, chatbot, session_state, sources_display],\n )\n\n # Embed generator\n embed_btn.click(\n fn=generate_embed,\n inputs=[space_id_input],\n outputs=[embed_output],\n )\n\n return demo\n\n\n# ── Entry point ───────────────────────────────────────────────────────────────\nif __name__ == \"__main__\":\n demo = build_ui()\n demo.launch(\n css=CUSTOM_CSS,\n theme=gr.themes.Base(\n primary_hue=\"orange\",\n neutral_hue=\"stone\",\n font=gr.themes.GoogleFont(\"JetBrains Mono\"),\n ),\n # server_name=\"0.0.0.0\",\n # server_port=3000,\n share=False,\n )\n", + "app_signals": "_post url payload timeout ensure_session state render_schema_preview columns sample_df row_count render_sources sources handle_upload csv_file transcribe_audio audio_path chat_fn message ui_history business_name generate_embed space_id build_ui CatalogChat — Gradio frontend Hackathon: Gradio Backyard AI Hackathon (June 2026) Stack: Gradio ChatInterface + Modal backend (Whisper + Qwen2.5-7B) os.environ.get MODAL_BUILD_INDEX_URL https://sopeadegboyega--catalog-assistant-build-index.modal.run MODAL_CHAT_QUERY_URL https://sopeadegboyega--catalog-assistant-chat-query.modal.run MODAL_TRANSCRIBE_URL POST to Modal endpoint, return JSON or raise. requests.post json resp.raise_for_status resp.json Create per-browser catalog state lazily. state.setdefault join Called when user uploads a CSV. 1. Reads first 5 rows for schema preview. 2. Sends full CSV to Modal /build_index. 3. Stores session token in state. Returns: schema_html, status_msg, updated_state Send audio file to Modal Whisper endpoint, return transcript. Called by gr.ChatInterface on each user message. Sends message + history to Modal /chat_query. Return iframe embed snippet for a HF Space. space_id.strip respond history business __main__ demo.launch css theme share isinstance data.get RuntimeError session_id str catalog_loaded Catalog preview · columns Column Type Sample Matched products will appear here after a reply. csv_bytes.decode pd.read_csv nrows list ⚠ MODAL_BUILD_INDEX_URL not set — running in demo mode os.path.basename to_dict orient result.get decode message.strip state.get _(Demo mode)_ No matching products found for that query. extend Enter your HF Space ID above.