Spaces:
Building
Building
File size: 847 Bytes
d9ce859 332826f d9ce859 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 | [server]
host = "0.0.0.0"
port = 7860
[worker]
default_model = "QuantFactory/Qwen2.5-7B-Instruct-GGUF:q4_k_m"
llama_server_bin = "/usr/local/bin/llama-server"
host = "127.0.0.1"
bind_host = "0.0.0.0"
base_port = 8080
switch_timeout_sec = 300
[llama]
n_ctx = 8192
threads = 4
ngl = 0
batch = 128
ubatch = 64
[auth]
header = "Authorization"
scheme = "Bearer"
[limits]
default_max_tokens = 256
max_tokens_per_request = 2048
request_timeout_sec = 30
[queue]
max_size = 100
max_tokens = 20000
admin_quota = 3
retry_after_sec = 5
[scheduler]
max_concurrent = 1
[streaming]
enabled = false
[rate_limit]
requests_per_minute = 60
estimated_tokens_per_minute = 6000
[[api_keys]]
key_id = "admin-main"
secret = "change-me-admin"
role = "admin"
enabled = true
[[api_keys]]
key_id = "user-main"
secret = "change-me-user"
role = "user"
enabled = true
|