multi-agent-lab / .env.example
agharsallah
feat(observability): enhance logging setup to route Python warnings and improve telemetry
3e954d6
Raw
History Blame Contribute Delete
5.68 kB
# Copy to .env and fill in your values.
#
# The app requires real infrastructure to run: a DATABASE_URL (durable event
# store) and a live inference backend. There is no offline/stub product mode β€”
# the deterministic stub now exists only as the test suite's mock data. Pick one
# of two inference backends (the Lab's Β§00 selector chooses per run): the small
# models you serve yourself from `modal/` (see modal/README.md), or Hugging Face's
# serverless Inference Providers (just an HF_TOKEN). Either is small-model-only
# (<=32B); there is no generic OpenAI/cloud path.
# ── Live models: the OpenAI-compatible small models served on Modal ────────────
# (ADR-0014 / 0015 / 0019). Setting MODAL_WORKSPACE activates the live path. Each
# profile in config/models.yaml binds to a model by its *catalogue key*
# (modal/catalogue.py β€” the single source of truth for what is deployed); the
# engine derives that endpoint's URL as
# https://${MODAL_WORKSPACE}--<app>-<endpoint>.modal.run/v1
# so the workspace is the only deploy-specific value and is never hard-coded.
# Calls route through the LiteLLM gateway, which meters real per-call cost into
# the Governor's hourly_budget_usd. MODAL_LLM_KEY is the endpoint bearer token β€”
# a self-served vLLM endpoint accepts any token (default "EMPTY").
# MODAL_WORKSPACE=your-modal-workspace
# MODAL_LLM_KEY=EMPTY
MODAL_WORKSPACE=
MODAL_LLM_KEY=
# Alternatively, point at a single explicit OpenAI-compatible endpoint (also
# activates the live path) β€” one model served on Modal, or a local llama.cpp /
# vLLM box. Used as the base_url for every profile.
# MODAL_LLM_BASE_URL=https://your-workspace--google-llms-gemma-4-12b.modal.run/v1
MODAL_LLM_BASE_URL=
# ── Live models: Hugging Face serverless Inference Providers (ADR-0024) ─────────
# A SECOND inference backend, next to Modal β€” no GPUs to deploy: one HF_TOKEN makes
# many small instruct models callable through HF's OpenAI-compatible router. Setting
# HF_TOKEN activates the live path for the Hugging Face backend; the Lab's "Inference
# backend" selector (Β§00) chooses Modal vs Hugging Face per run, and the per-agent
# pickers then offer that backend's catalogue (src/models/hf_catalogue.py). Calls route
# through the same LiteLLM gateway as Modal, metering real cost into the Governor.
# HF_TOKEN=hf_xxxxxxxxxxxxxxxxxxxxxxxx
HF_TOKEN=
# Optional: point the HF backend at a self-hosted TGI / dedicated HF Inference Endpoint
# (same OpenAI-compatible REST API) instead of the public router. Activates live on its
# own (HF_TOKEN then becomes that endpoint's bearer, if it needs one).
# HF_INFERENCE_BASE_URL=https://your-endpoint.endpoints.huggingface.cloud/v1
HF_INFERENCE_BASE_URL=
# Optional per-profile model override (ADR-0010). Each agent's manifest picks a
# logical profile (tiny/fast/balanced/strong); these override the model string
# bound to that profile in config/models.yaml (highest priority). Every model
# must be <=32B; tiny <=4B. Values are LiteLLM model strings β€” for a custom
# OpenAI-compatible endpoint that is `openai/<served_model_id>`.
# MODEL_TINY=openai/nvidia/NVIDIA-Nemotron-3-Nano-4B-BF16
# MODEL_FAST=openai/openbmb/MiniCPM4.1-8B
# MODEL_BALANCED=openai/google/gemma-4-12B
# MODEL_STRONG=openai/google/gemma-4-26B-A4B-it
# Durable event store backend (ADR-0014 Postgres) β€” REQUIRED. The append-only
# ledger is persisted through SQLAlchemy; the app refuses to run without a URL
# (there is no in-memory fallback). Neon (managed Postgres) form:
# DATABASE_URL=postgresql+psycopg://USER:PASSWORD@HOST/DB?sslmode=require
# A local SQLite URL also works (e.g. a single-box run without a server):
# DATABASE_URL=sqlite:///runs/events.db
DATABASE_URL=
# Semantic memory index (ADR-0018). Truthy activates a derived vector lens over
# the ledger. Embeddings run LOCALLY via sentence-transformers (no API key;
# cached after first use); sentence-transformers + mem0 ship as core deps.
# Override the backend/embedder with a MEMORY_INDEX_CONFIG JSON blob (passed
# verbatim to mem0; see docs/architecture/memory-stack.md).
# MEMORY_INDEX=1
# MEMORY_INDEX_CONFIG={"embedder": {"provider": "huggingface", "config": {"model": "sentence-transformers/all-MiniLM-L6-v2"}}}
MEMORY_INDEX=
MEMORY_INDEX_CONFIG=
# Hosted mem0 platform backend (ADR-0020), opt-in. Set MEMORY_INDEX=cloud (or
# MEMORY_INDEX_BACKEND=cloud) to use mem0's managed service instead of the local
# embedder. NOTE: this sends ledger event text to mem0's servers β€” a deliberate
# departure from the off-the-grid default; the local backend stays the default.
# MEMORY_INDEX=cloud
# MEM0_API_KEY=m0-... # required for the hosted backend
MEM0_API_KEY=
# Optional hosted scoping:
# MEM0_ORG_ID=
# MEM0_PROJECT_ID=
# MEM0_HOST=
# MEMORY_INDEX_BACKEND= # set to "cloud" to force the hosted backend
# Gradio server port (auto-detects a free port in 7960-8059 if unset).
GRADIO_SERVER_PORT=
# ── Observability / logging (ADR-0024) ────────────────────────────────────────
# Logs stream to the CLI (stdout) AND the in-app Telemetry panel. Raise the level
# to watch agent/LLM activity live while running `uv run app.py` β€” DEBUG surfaces
# full prompts + memory. Python warnings are routed through this stream too.
# MAL_LOG_LEVEL=DEBUG # DEBUG | INFO (default) | WARNING | ERROR
# MAL_LOG_FORMAT=text # text (human, default) | json (one object per line)
# MAL_TRACING=memory # off | console | memory (default) | both
MAL_LOG_LEVEL=
MAL_LOG_FORMAT=
MAL_TRACING=