Spaces:

ResearchEngineering
/

AGI

Sleeping

AGI / config.toml.example

Dmitry Beresnev

Refactor the C++ LLM manager into modular components, moves Python modules under python/, and keeps the current control-plane behavior intact. The C++ server now has clearer separation for config, model lifecycle, runtime services, request parsing, HTTP helpers, and server routing, while Docker build/runtime paths were updated to compile multiple C++ files and load Python code from the new package folder

332826f 3 months ago

Raw

History Blame Contribute Delete

847 Bytes

	[server]
	host = "0.0.0.0"
	port = 7860

	[worker]
	default_model = "QuantFactory/Qwen2.5-7B-Instruct-GGUF:q4_k_m"
	llama_server_bin = "/usr/local/bin/llama-server"
	host = "127.0.0.1"
	bind_host = "0.0.0.0"
	base_port = 8080
	switch_timeout_sec = 300

	[llama]
	n_ctx = 8192
	threads = 4
	ngl = 0
	batch = 128
	ubatch = 64

	[auth]
	header = "Authorization"
	scheme = "Bearer"

	[limits]
	default_max_tokens = 256
	max_tokens_per_request = 2048
	request_timeout_sec = 30

	[queue]
	max_size = 100
	max_tokens = 20000
	admin_quota = 3
	retry_after_sec = 5

	[scheduler]
	max_concurrent = 1

	[streaming]
	enabled = false

	[rate_limit]
	requests_per_minute = 60
	estimated_tokens_per_minute = 6000

	[[api_keys]]
	key_id = "admin-main"
	secret = "change-me-admin"
	role = "admin"
	enabled = true

	[[api_keys]]
	key_id = "user-main"
	secret = "change-me-user"
	role = "user"
	enabled = true