# --- Preset selection (models.yaml is the source of truth) --- ACTIVE_MODEL=minicpm5-1b # Defaults to true when unset (models.yaml). Space: set false to pin one model for visitors. # ALLOW_MODEL_SWITCH=false # MODEL_PRESETS_PATH=./models.yaml # --- Agent outputs --- # AGENT_OUTPUTS_DIR=/tmp/agent_outputs # AGENT_TRACES_DIR=outputs/traces # SKILLS_DIR=./skills # --- ResearchMind (MemRAG + scraper) --- # RESEARCHMIND_DATA_DIR=outputs/researchmind # RESEARCHMIND_EMBED_MODEL=all-MiniLM-L6-v2 # RESEARCHMIND_EMBED_DEVICE=cpu # INFERENCE_DEVICE=auto # RESEARCHMIND_AUTO_SEARCH=false # RESEARCHMIND_TOP_K=5 # RESEARCHMIND_CHUNK_SIZE=512 # RESEARCHMIND_CHUNK_OVERLAP=128 # --- Legacy single-model overrides (optional; applied to ACTIVE_MODEL only) --- # INFERENCE_BACKEND=transformers # MODEL_ID=openbmb/MiniCPM5-1B # TRUST_REMOTE_CODE=true # --- Local dev: switch backends/models in Gradio Settings (Classic + Studio) --- # ALLOW_MODEL_SWITCH=true # ACTIVE_MODEL=minicpm-v-4.6 # transformers default (or minicpm5-1b) # switch in UI to minicpm-v-4.6-gguf for llama.cpp / Llama Champion track # --- llama.cpp presets (optional) --- # ACTIVE_MODEL=minicpm-v-4.6-gguf # ACTIVE_MODEL=qwen3b-gguf # INFERENCE_BACKEND=llama_cpp # MODEL_REPO=Qwen/Qwen2.5-3B-Instruct-GGUF # MODEL_FILE=qwen2.5-3b-instruct-q4_k_m.gguf # N_CTX=4096 # N_GPU_LAYERS=0 # Optional: local GGUF path instead of Hub download # MODEL_PATH=./models/qwen2.5-3b-instruct-q4_k_m.gguf # Optional: local fine-tuned merged weights # ACTIVE_MODEL=gemma-merged-local # MODEL_ID=./gemma_merged_model # --- Modal (research/modal/finetune_app.py) --- # Create secret: modal secret create huggingface HF_TOKEN= # HF_TOKEN=hf_... # --- Fine-tuning (research/finetune.py) --- # FINETUNE_PRESET=minicpm5-1b # FINETUNE_MODEL=openbmb/MiniCPM5-1B # FINETUNE_DATASET=./research/data/education-lesson-chat.jsonl # FINETUNE_DATASET=tatsu-lab/alpaca # FINETUNE_DATASET_CONFIG= # FINETUNE_DATASET_SPLIT=train # FINETUNE_MAX_SAMPLES=500 # FINETUNE_OUT=./models/finetuned/minicpm5-1b-lora # FINETUNE_FORMAT=chat # After training, point Gradio at the adapter preset: # ACTIVE_MODEL=minicpm5-1b-lesson-lora # --- EchoCoach / Language lessons (voice stack) --- # VOICE_PRESETS_PATH=./voice_models.yaml # Default (Cohere-free): Whisper ASR + OpenBMB language-lesson LoRA coach # ECHOCOACH_ASR_PRESET=whisper-cpp-base # ECHOCOACH_COACH_MODEL=minicpm5-1b-language-lesson-hub # ECHOCOACH_COACH_FALLBACK=minicpm5-1b-language-lesson-lora,minicpm5-1b # Optional Cohere Labs partner demo (GPU Space + HF gated models): # ECHOCOACH_ASR_PRESET=cohere-transcribe # ECHOCOACH_COACH_MODEL=tiny-aya-global # ECHOCOACH_TTS_PRESET=piper-multilingual # ECHOCOACH_REALTIME_TTS_PRESET=vibevoice-realtime-0.5b # Dev fallback (CPU, no LoRA): # ECHOCOACH_ASR_PRESET=whisper-cpp-tiny # ECHOCOACH_COACH_MODEL=minicpm5-1b # ECHOCOACH_MAX_SECONDS=30 # ECHOCOACH_CAPTURE_DEVICE= # optional ALSA/PipeWire device (e.g. pipewire, alsa_input.pci-...) # ECHOCOACH_VOICE_PROFILE=pipeline # pipeline (default) or omni for MiniCPM-o attempt # ECHOCOACH_OMNI_MODEL=openbmb/MiniCPM-o-4_5 # PIPER_VOICES_DIR=~/.local/share/piper/voices BASE=openbmb/MiniCPM5-1B