Spaces:
Running on Zero
Running on Zero
agharsallah
feat(observability): enhance logging setup to route Python warnings and improve telemetry
3e954d6 | # Copy to .env and fill in your values. | |
| # | |
| # The app requires real infrastructure to run: a DATABASE_URL (durable event | |
| # store) and a live inference backend. There is no offline/stub product mode β | |
| # the deterministic stub now exists only as the test suite's mock data. Pick one | |
| # of two inference backends (the Lab's Β§00 selector chooses per run): the small | |
| # models you serve yourself from `modal/` (see modal/README.md), or Hugging Face's | |
| # serverless Inference Providers (just an HF_TOKEN). Either is small-model-only | |
| # (<=32B); there is no generic OpenAI/cloud path. | |
| # ββ Live models: the OpenAI-compatible small models served on Modal ββββββββββββ | |
| # (ADR-0014 / 0015 / 0019). Setting MODAL_WORKSPACE activates the live path. Each | |
| # profile in config/models.yaml binds to a model by its *catalogue key* | |
| # (modal/catalogue.py β the single source of truth for what is deployed); the | |
| # engine derives that endpoint's URL as | |
| # https://${MODAL_WORKSPACE}--<app>-<endpoint>.modal.run/v1 | |
| # so the workspace is the only deploy-specific value and is never hard-coded. | |
| # Calls route through the LiteLLM gateway, which meters real per-call cost into | |
| # the Governor's hourly_budget_usd. MODAL_LLM_KEY is the endpoint bearer token β | |
| # a self-served vLLM endpoint accepts any token (default "EMPTY"). | |
| # MODAL_WORKSPACE=your-modal-workspace | |
| # MODAL_LLM_KEY=EMPTY | |
| MODAL_WORKSPACE= | |
| MODAL_LLM_KEY= | |
| # Alternatively, point at a single explicit OpenAI-compatible endpoint (also | |
| # activates the live path) β one model served on Modal, or a local llama.cpp / | |
| # vLLM box. Used as the base_url for every profile. | |
| # MODAL_LLM_BASE_URL=https://your-workspace--google-llms-gemma-4-12b.modal.run/v1 | |
| MODAL_LLM_BASE_URL= | |
| # ββ Live models: Hugging Face serverless Inference Providers (ADR-0024) βββββββββ | |
| # A SECOND inference backend, next to Modal β no GPUs to deploy: one HF_TOKEN makes | |
| # many small instruct models callable through HF's OpenAI-compatible router. Setting | |
| # HF_TOKEN activates the live path for the Hugging Face backend; the Lab's "Inference | |
| # backend" selector (Β§00) chooses Modal vs Hugging Face per run, and the per-agent | |
| # pickers then offer that backend's catalogue (src/models/hf_catalogue.py). Calls route | |
| # through the same LiteLLM gateway as Modal, metering real cost into the Governor. | |
| # HF_TOKEN=hf_xxxxxxxxxxxxxxxxxxxxxxxx | |
| HF_TOKEN= | |
| # Optional: point the HF backend at a self-hosted TGI / dedicated HF Inference Endpoint | |
| # (same OpenAI-compatible REST API) instead of the public router. Activates live on its | |
| # own (HF_TOKEN then becomes that endpoint's bearer, if it needs one). | |
| # HF_INFERENCE_BASE_URL=https://your-endpoint.endpoints.huggingface.cloud/v1 | |
| HF_INFERENCE_BASE_URL= | |
| # Optional per-profile model override (ADR-0010). Each agent's manifest picks a | |
| # logical profile (tiny/fast/balanced/strong); these override the model string | |
| # bound to that profile in config/models.yaml (highest priority). Every model | |
| # must be <=32B; tiny <=4B. Values are LiteLLM model strings β for a custom | |
| # OpenAI-compatible endpoint that is `openai/<served_model_id>`. | |
| # MODEL_TINY=openai/nvidia/NVIDIA-Nemotron-3-Nano-4B-BF16 | |
| # MODEL_FAST=openai/openbmb/MiniCPM4.1-8B | |
| # MODEL_BALANCED=openai/google/gemma-4-12B | |
| # MODEL_STRONG=openai/google/gemma-4-26B-A4B-it | |
| # Durable event store backend (ADR-0014 Postgres) β REQUIRED. The append-only | |
| # ledger is persisted through SQLAlchemy; the app refuses to run without a URL | |
| # (there is no in-memory fallback). Neon (managed Postgres) form: | |
| # DATABASE_URL=postgresql+psycopg://USER:PASSWORD@HOST/DB?sslmode=require | |
| # A local SQLite URL also works (e.g. a single-box run without a server): | |
| # DATABASE_URL=sqlite:///runs/events.db | |
| DATABASE_URL= | |
| # Semantic memory index (ADR-0018). Truthy activates a derived vector lens over | |
| # the ledger. Embeddings run LOCALLY via sentence-transformers (no API key; | |
| # cached after first use); sentence-transformers + mem0 ship as core deps. | |
| # Override the backend/embedder with a MEMORY_INDEX_CONFIG JSON blob (passed | |
| # verbatim to mem0; see docs/architecture/memory-stack.md). | |
| # MEMORY_INDEX=1 | |
| # MEMORY_INDEX_CONFIG={"embedder": {"provider": "huggingface", "config": {"model": "sentence-transformers/all-MiniLM-L6-v2"}}} | |
| MEMORY_INDEX= | |
| MEMORY_INDEX_CONFIG= | |
| # Hosted mem0 platform backend (ADR-0020), opt-in. Set MEMORY_INDEX=cloud (or | |
| # MEMORY_INDEX_BACKEND=cloud) to use mem0's managed service instead of the local | |
| # embedder. NOTE: this sends ledger event text to mem0's servers β a deliberate | |
| # departure from the off-the-grid default; the local backend stays the default. | |
| # MEMORY_INDEX=cloud | |
| # MEM0_API_KEY=m0-... # required for the hosted backend | |
| MEM0_API_KEY= | |
| # Optional hosted scoping: | |
| # MEM0_ORG_ID= | |
| # MEM0_PROJECT_ID= | |
| # MEM0_HOST= | |
| # MEMORY_INDEX_BACKEND= # set to "cloud" to force the hosted backend | |
| # Gradio server port (auto-detects a free port in 7960-8059 if unset). | |
| GRADIO_SERVER_PORT= | |
| # ββ Observability / logging (ADR-0024) ββββββββββββββββββββββββββββββββββββββββ | |
| # Logs stream to the CLI (stdout) AND the in-app Telemetry panel. Raise the level | |
| # to watch agent/LLM activity live while running `uv run app.py` β DEBUG surfaces | |
| # full prompts + memory. Python warnings are routed through this stream too. | |
| # MAL_LOG_LEVEL=DEBUG # DEBUG | INFO (default) | WARNING | ERROR | |
| # MAL_LOG_FORMAT=text # text (human, default) | json (one object per line) | |
| # MAL_TRACING=memory # off | console | memory (default) | both | |
| MAL_LOG_LEVEL= | |
| MAL_LOG_FORMAT= | |
| MAL_TRACING= | |