felix-framework / scripts /config /multi_model_config.json
jkbennitt
Clean hf-space branch and prepare for HuggingFace Spaces deployment
fb867c3
{
"servers": [
{
"name": "research_fast",
"url": "http://127.0.0.1:1234/v1",
"model": "qwen/qwen3-4b-2507",
"timeout": 120.0,
"max_concurrent": 2,
"weight": 1.0,
"enabled": true,
"description": "Fast Qwen 4B model for research tasks - quick exploration and data gathering"
},
{
"name": "thinking_analysis",
"url": "http://127.0.0.1:1234/v1",
"model": "qwen/qwen3-4b-thinking-2507",
"timeout": 150.0,
"max_concurrent": 2,
"weight": 1.2,
"enabled": true,
"description": "Qwen Thinking model for analysis and critique - reasoning focused"
},
{
"name": "synthesis_quality",
"url": "http://127.0.0.1:1234/v1",
"model": "google/gemma-3-12b",
"timeout": 180.0,
"max_concurrent": 1,
"weight": 1.5,
"enabled": true,
"description": "Gemma 12B model for synthesis - high-quality final output generation"
}
],
"agent_mapping": {
"research": "research_fast",
"analysis": "thinking_analysis",
"synthesis": "synthesis_quality",
"critic": "thinking_analysis"
},
"load_balance_strategy": "agent_type_mapping",
"health_check_interval": 30.0,
"failover_enabled": true,
"debug_mode": false,
"notes": {
"setup": "Single LM Studio server with multiple models",
"models_required": [
"qwen/qwen3-4b-2507",
"qwen/qwen3-4b-thinking-2507",
"google/gemma-3-12b"
],
"expected_behavior": "Agents use different models but same server endpoint",
"performance": "Parallel requests with model switching handled by LM Studio"
}
}