File size: 847 Bytes
d9ce859
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
332826f
 
 
d9ce859
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
[server]
host = "0.0.0.0"
port = 7860

[worker]
default_model = "QuantFactory/Qwen2.5-7B-Instruct-GGUF:q4_k_m"
llama_server_bin = "/usr/local/bin/llama-server"
host = "127.0.0.1"
bind_host = "0.0.0.0"
base_port = 8080
switch_timeout_sec = 300

[llama]
n_ctx = 8192
threads = 4
ngl = 0
batch = 128
ubatch = 64

[auth]
header = "Authorization"
scheme = "Bearer"

[limits]
default_max_tokens = 256
max_tokens_per_request = 2048
request_timeout_sec = 30

[queue]
max_size = 100
max_tokens = 20000
admin_quota = 3
retry_after_sec = 5

[scheduler]
max_concurrent = 1

[streaming]
enabled = false

[rate_limit]
requests_per_minute = 60
estimated_tokens_per_minute = 6000

[[api_keys]]
key_id = "admin-main"
secret = "change-me-admin"
role = "admin"
enabled = true

[[api_keys]]
key_id = "user-main"
secret = "change-me-user"
role = "user"
enabled = true