Spaces:

jkbennitt
/

felix-framework

Paused

felix-framework / scripts /config /multi_model_config.json

jkbennitt

Clean hf-space branch and prepare for HuggingFace Spaces deployment

fb867c3 4 months ago

1.64 kB

	{
	"servers": [
	{
	"name": "research_fast",
	"url": "http://127.0.0.1:1234/v1",
	"model": "qwen/qwen3-4b-2507",
	"timeout": 120.0,
	"max_concurrent": 2,
	"weight": 1.0,
	"enabled": true,
	"description": "Fast Qwen 4B model for research tasks - quick exploration and data gathering"
	},
	{
	"name": "thinking_analysis",
	"url": "http://127.0.0.1:1234/v1",
	"model": "qwen/qwen3-4b-thinking-2507",
	"timeout": 150.0,
	"max_concurrent": 2,
	"weight": 1.2,
	"enabled": true,
	"description": "Qwen Thinking model for analysis and critique - reasoning focused"
	},
	{
	"name": "synthesis_quality",
	"url": "http://127.0.0.1:1234/v1",
	"model": "google/gemma-3-12b",
	"timeout": 180.0,
	"max_concurrent": 1,
	"weight": 1.5,
	"enabled": true,
	"description": "Gemma 12B model for synthesis - high-quality final output generation"
	}
	],
	"agent_mapping": {
	"research": "research_fast",
	"analysis": "thinking_analysis",
	"synthesis": "synthesis_quality",
	"critic": "thinking_analysis"
	},
	"load_balance_strategy": "agent_type_mapping",
	"health_check_interval": 30.0,
	"failover_enabled": true,
	"debug_mode": false,
	"notes": {
	"setup": "Single LM Studio server with multiple models",
	"models_required": [
	"qwen/qwen3-4b-2507",
	"qwen/qwen3-4b-thinking-2507",
	"google/gemma-3-12b"
	],
	"expected_behavior": "Agents use different models but same server endpoint",
	"performance": "Parallel requests with model switching handled by LM Studio"
	}
	}