File size: 3,188 Bytes
9a7964b
a31982f
727cb75
 
9a7964b
 
a31982f
 
 
 
f173e0f
e7fd66f
 
 
81da2d5
 
e7fd66f
 
 
 
 
a31982f
f173e0f
9a7964b
 
 
727cb75
 
 
 
 
a31982f
727cb75
a31982f
 
 
 
 
 
 
 
 
 
9a7964b
 
 
59e2c8a
6cea344
 
 
 
59e2c8a
 
 
 
 
 
 
 
 
 
 
 
 
9939b9d
1719c2a
aac5f23
 
 
 
 
9939b9d
 
1719c2a
9939b9d
aac5f23
9939b9d
1719c2a
 
 
29e2c18
 
1719c2a
 
59e2c8a
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
# --- Preset selection (models.yaml is the source of truth) ---
ACTIVE_MODEL=minicpm5-1b
# Defaults to true when unset (models.yaml). Space: set false to pin one model for visitors.
# ALLOW_MODEL_SWITCH=false
# MODEL_PRESETS_PATH=./models.yaml

# --- Agent outputs ---
# AGENT_OUTPUTS_DIR=/tmp/agent_outputs
# AGENT_TRACES_DIR=outputs/traces
# SKILLS_DIR=./skills

# --- ResearchMind (MemRAG + scraper) ---
# RESEARCHMIND_DATA_DIR=outputs/researchmind
# RESEARCHMIND_EMBED_MODEL=all-MiniLM-L6-v2
# RESEARCHMIND_EMBED_DEVICE=cpu
# INFERENCE_DEVICE=auto
# RESEARCHMIND_AUTO_SEARCH=false
# RESEARCHMIND_TOP_K=5
# RESEARCHMIND_CHUNK_SIZE=512
# RESEARCHMIND_CHUNK_OVERLAP=128

# --- Legacy single-model overrides (optional; applied to ACTIVE_MODEL only) ---
# INFERENCE_BACKEND=transformers
# MODEL_ID=openbmb/MiniCPM5-1B
# TRUST_REMOTE_CODE=true

# --- Local dev: switch backends/models in Gradio Settings (Classic + Studio) ---
# ALLOW_MODEL_SWITCH=true
# ACTIVE_MODEL=minicpm-v-4.6          # transformers default (or minicpm5-1b)
# switch in UI to minicpm-v-4.6-gguf for llama.cpp / Llama Champion track

# --- llama.cpp presets (optional) ---
# ACTIVE_MODEL=minicpm-v-4.6-gguf
# ACTIVE_MODEL=qwen3b-gguf
# INFERENCE_BACKEND=llama_cpp
# MODEL_REPO=Qwen/Qwen2.5-3B-Instruct-GGUF
# MODEL_FILE=qwen2.5-3b-instruct-q4_k_m.gguf
# N_CTX=4096
# N_GPU_LAYERS=0

# Optional: local GGUF path instead of Hub download
# MODEL_PATH=./models/qwen2.5-3b-instruct-q4_k_m.gguf

# Optional: local fine-tuned merged weights
# ACTIVE_MODEL=gemma-merged-local
# MODEL_ID=./gemma_merged_model

# --- Modal (research/modal/finetune_app.py) ---
# Create secret: modal secret create huggingface HF_TOKEN=<token>
# HF_TOKEN=hf_...

# --- Fine-tuning (research/finetune.py) ---
# FINETUNE_PRESET=minicpm5-1b
# FINETUNE_MODEL=openbmb/MiniCPM5-1B
# FINETUNE_DATASET=./research/data/education-lesson-chat.jsonl
# FINETUNE_DATASET=tatsu-lab/alpaca
# FINETUNE_DATASET_CONFIG=
# FINETUNE_DATASET_SPLIT=train
# FINETUNE_MAX_SAMPLES=500
# FINETUNE_OUT=./models/finetuned/minicpm5-1b-lora
# FINETUNE_FORMAT=chat
# After training, point Gradio at the adapter preset:
# ACTIVE_MODEL=minicpm5-1b-lesson-lora

# --- EchoCoach / Language lessons (voice stack) ---
# VOICE_PRESETS_PATH=./voice_models.yaml
# Default (Cohere-free): Whisper ASR + OpenBMB language-lesson LoRA coach
# ECHOCOACH_ASR_PRESET=whisper-cpp-base
# ECHOCOACH_COACH_MODEL=minicpm5-1b-language-lesson-hub
# ECHOCOACH_COACH_FALLBACK=minicpm5-1b-language-lesson-lora,minicpm5-1b
# Optional Cohere Labs partner demo (GPU Space + HF gated models):
# ECHOCOACH_ASR_PRESET=cohere-transcribe
# ECHOCOACH_COACH_MODEL=tiny-aya-global
# ECHOCOACH_TTS_PRESET=piper-multilingual
# ECHOCOACH_REALTIME_TTS_PRESET=vibevoice-realtime-0.5b
# Dev fallback (CPU, no LoRA):
# ECHOCOACH_ASR_PRESET=whisper-cpp-tiny
# ECHOCOACH_COACH_MODEL=minicpm5-1b
# ECHOCOACH_MAX_SECONDS=30
# ECHOCOACH_CAPTURE_DEVICE=   # optional ALSA/PipeWire device (e.g. pipewire, alsa_input.pci-...)
# ECHOCOACH_VOICE_PROFILE=pipeline   # pipeline (default) or omni for MiniCPM-o attempt
# ECHOCOACH_OMNI_MODEL=openbmb/MiniCPM-o-4_5
# PIPER_VOICES_DIR=~/.local/share/piper/voices

BASE=openbmb/MiniCPM5-1B