AdarshJi commited on
Commit
2033856
·
verified ·
1 Parent(s): bc7f410

Update server.py

Browse files
Files changed (1) hide show
  1. server.py +251 -215
server.py CHANGED
@@ -6,290 +6,306 @@ import httpx
6
  import json
7
  from fastapi import FastAPI, Request, HTTPException
8
  from fastapi.responses import StreamingResponse
9
-
10
-
11
-
12
-
13
-
14
-
15
-
16
- def GROQM() -> list:
17
- R = [
18
- {
19
- "id": "openai/gpt-oss-120b",
20
- "owned_by": "OpenAI",
21
- "context_window": 131072,
22
- "max_completion_tokens": 65536
23
- },
24
- {
25
- "id": "moonshotai/kimi-k2-instruct",
26
- "owned_by": "Moonshot AI",
27
- "context_window": 131072,
28
- "max_completion_tokens": 16384
29
- },
30
- {
31
- "id": "canopylabs/orpheus-v1-english",
32
- "owned_by": "Canopy Labs",
33
- "context_window": 4000,
34
- "max_completion_tokens": 50000
35
- },
36
- {
37
- "id": "llama-3.1-8b-instant",
38
- "owned_by": "Meta",
39
- "context_window": 131072,
40
- "max_completion_tokens": 131072
41
- },
42
- {
43
- "id": "whisper-large-v3",
44
- "owned_by": "OpenAI",
45
- "context_window": 448,
46
- "max_completion_tokens": 448
47
- },
48
- {
49
- "id": "meta-llama/llama-4-scout-17b-16e-instruct",
50
- "owned_by": "Meta",
51
- "context_window": 131072,
52
- "max_completion_tokens": 8192
53
- },
54
- {
55
- "id": "allam-2-7b",
56
- "owned_by": "SDAIA",
57
- "context_window": 4096,
58
- "max_completion_tokens": 4096
59
- },
60
- {
61
- "id": "groq/compound",
62
- "owned_by": "Groq",
63
- "context_window": 131072,
64
- "max_completion_tokens": 8192
65
- },
66
- {
67
- "id": "canopylabs/orpheus-arabic-saudi",
68
- "owned_by": "Canopy Labs",
69
- "context_window": 4000,
70
- "max_completion_tokens": 50000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
  },
72
  {
73
- "id": "llama-3.3-70b-versatile",
74
- "owned_by": "Meta",
75
- "context_window": 131072,
76
- "max_completion_tokens": 32768
77
  },
78
  {
79
- "id": "qwen/qwen3-32b",
80
- "owned_by": "Alibaba Cloud",
81
- "context_window": 131072,
82
- "max_completion_tokens": 40960
83
  },
84
  {
85
- "id": "meta-llama/llama-prompt-guard-2-22m",
86
- "owned_by": "Meta",
87
- "context_window": 512,
88
- "max_completion_tokens": 512
89
  },
90
  {
91
- "id": "groq/compound-mini",
92
- "owned_by": "Groq",
93
- "context_window": 131072,
94
- "max_completion_tokens": 8192
95
  },
96
  {
97
- "id": "meta-llama/llama-guard-4-12b",
98
- "owned_by": "Meta",
99
- "context_window": 131072,
100
- "max_completion_tokens": 1024
101
  },
102
  {
103
- "id": "openai/gpt-oss-20b",
104
- "owned_by": "OpenAI",
105
- "context_window": 131072,
106
- "max_completion_tokens": 65536
107
  },
108
  {
109
  "id": "openai/gpt-oss-safeguard-20b",
110
- "owned_by": "OpenAI",
111
- "context_window": 131072,
112
- "max_completion_tokens": 65536
113
- },
114
- {
115
- "id": "meta-llama/llama-4-maverick-17b-128e-instruct",
116
- "owned_by": "Meta",
117
- "context_window": 131072,
118
- "max_completion_tokens": 8192
119
  },
120
  {
121
- "id": "moonshotai/kimi-k2-instruct-0905",
122
- "owned_by": "Moonshot AI",
123
- "context_window": 262144,
124
- "max_completion_tokens": 16384
125
  }
126
- ]
127
- return R
128
-
129
-
130
-
131
-
132
- def LLMCM() -> list:
133
- R = [
134
- {
135
- "id": "@cf/aisingapore/gemma-sea-lion-v4-27b-it",
136
- "owned_by": "AI Singapore",
137
- "context_window": None,
138
- "max_completion_tokens": None
139
- },
140
- {
141
- "id": "@cf/deepseek-ai/deepseek-r1-distill-qwen-32b",
142
- "owned_by": "DeepSeek AI",
143
- "context_window": None,
144
- "max_completion_tokens": None
145
- },
146
  {
147
- "id": "@cf/defog/sqlcoder-7b-2",
148
- "owned_by": "Defog",
149
- "context_window": None,
150
- "max_completion_tokens": None
151
  },
152
  {
153
- "id": "@cf/google/gemma-2b-it-lora",
154
- "owned_by": "Google",
155
- "context_window": None,
156
- "max_completion_tokens": None
157
  },
158
  {
159
- "id": "@cf/google/gemma-3-12b-it",
160
- "owned_by": "Google",
161
- "context_window": None,
162
- "max_completion_tokens": None
163
  },
164
  {
165
- "id": "@cf/ibm-granite/granite-4.0-h-micro",
166
- "owned_by": "IBM",
167
- "context_window": None,
168
- "max_completion_tokens": None
169
  },
170
  {
171
- "id": "@cf/meta/llama-2-7b-chat-fp16",
172
- "owned_by": "Meta",
173
- "context_window": None,
174
- "max_completion_tokens": None
175
  },
176
  {
177
- "id": "@cf/meta/llama-2-7b-chat-int8",
178
- "owned_by": "Meta",
179
- "context_window": None,
180
- "max_completion_tokens": None
181
  },
182
  {
183
- "id": "@cf/meta/llama-3-8b-instruct",
184
- "owned_by": "Meta",
185
- "context_window": None,
186
- "max_completion_tokens": None
187
  },
188
  {
189
- "id": "@cf/meta/llama-3-8b-instruct-awq",
190
- "owned_by": "Meta",
191
- "context_window": None,
192
- "max_completion_tokens": None
193
  },
194
  {
195
- "id": "@cf/meta/llama-3.1-70b-instruct",
196
- "owned_by": "Meta",
197
- "context_window": None,
198
- "max_completion_tokens": None
199
  },
200
  {
201
- "id": "@cf/meta/llama-3.1-8b-instruct",
202
- "owned_by": "Meta",
203
- "context_window": None,
204
- "max_completion_tokens": None
205
  },
206
  {
207
- "id": "@cf/meta/llama-3.2-1b-instruct",
208
- "owned_by": "Meta",
209
- "context_window": None,
210
- "max_completion_tokens": None
211
  },
212
  {
213
- "id": "@cf/meta/llama-3.2-3b-instruct",
214
- "owned_by": "Meta",
215
- "context_window": None,
216
- "max_completion_tokens": None
217
  },
218
  {
219
- "id": "@cf/meta/llama-3.3-70b-instruct-fp8-fast",
220
- "owned_by": "Meta",
221
- "context_window": None,
222
- "max_completion_tokens": None
223
  },
224
  {
225
- "id": "@cf/meta/llama-4-scout-17b-16e-instruct",
226
- "owned_by": "Meta",
227
- "context_window": None,
228
- "max_completion_tokens": None
229
  },
230
  {
231
- "id": "@cf/microsoft/phi-2",
232
- "owned_by": "Microsoft",
233
- "context_window": None,
234
- "max_completion_tokens": None
235
  },
236
  {
237
- "id": "@cf/mistral/mistral-7b-instruct-v0.2-lora",
238
- "owned_by": "Mistral AI",
239
- "context_window": None,
240
- "max_completion_tokens": None
241
  },
242
  {
243
- "id": "@cf/mistralai/mistral-small-3.1-24b-instruct",
244
- "owned_by": "Mistral AI",
245
- "context_window": None,
246
- "max_completion_tokens": None
247
  },
248
  {
249
- "id": "@cf/qwen/qwen2.5-coder-32b-instruct",
250
- "owned_by": "Alibaba Cloud",
251
- "context_window": None,
252
- "max_completion_tokens": None
253
  },
254
  {
255
- "id": "@cf/qwen/qwen3-30b-a3b-fp8",
256
- "owned_by": "Alibaba Cloud",
257
- "context_window": None,
258
- "max_completion_tokens": None
259
  },
260
  {
261
- "id": "@cf/qwen/qwq-32b",
262
- "owned_by": "Alibaba Cloud",
263
- "context_window": None,
264
- "max_completion_tokens": None
265
  },
266
  {
267
- "id": "@hf/google/gemma-7b-it",
268
- "owned_by": "Google",
269
- "context_window": None,
270
- "max_completion_tokens": None
271
  },
272
  {
273
- "id": "@hf/meta-llama/meta-llama-3-8b-instruct",
274
- "owned_by": "Meta",
275
- "context_window": None,
276
- "max_completion_tokens": None
277
  }
278
  ]
279
- return R
280
-
281
-
 
 
 
 
 
282
 
283
 
284
 
285
  try:
286
- MODEL_NAMES = {"GROQ" : GROQM() , "LLMC" : LLMCM()}
287
  except Exception:
288
  MODEL_NAMES = {"GROQ": "GROQ-FALLBACK", "LLMC": "LLMC-FALLBACK"}
289
 
290
 
291
  class Config:
292
- DEFAULT_PROVIDER = "GROQ"
293
  DEFAULT_MODEL = "llama-3.3-70b-versatile"
294
  DEFAULT_TEMPERATURE = 0.7
295
  CHUNK_SIZE = 1000
@@ -299,7 +315,7 @@ class Config:
299
  STREAM_BATCH_BYTES = 0
300
 
301
  PROVIDERS: Dict[str, Dict[str, Any]] = {
302
- "GROQ": {
303
  "AUTH": True,
304
  "BASE_URL": "https://api.groq.com/openai/v1/chat/completions",
305
  "DEFAULT_MODEL": "qwen/qwen3-32b",
@@ -312,8 +328,8 @@ PROVIDERS: Dict[str, Dict[str, Any]] = {
312
  "stream": "{stream}",
313
  },
314
  },
315
- "LLMC": {
316
- "AUTH": True,
317
  "BASE_URL": "https://llmchat.in/inference/stream?model={model}",
318
  "DEFAULT_MODEL": "@cf/meta/llama-3.1-8b-instruct",
319
  "HEADERS": {
@@ -324,6 +340,26 @@ PROVIDERS: Dict[str, Dict[str, Any]] = {
324
  },
325
  "PAYLOAD": {"messages": "{messages}", "stream": "{stream}"},
326
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
327
  }
328
 
329
  _placeholder_re = re.compile(r"\{(.*?)\}")
 
6
  import json
7
  from fastapi import FastAPI, Request, HTTPException
8
  from fastapi.responses import StreamingResponse
9
+ import requests
10
+
11
+
12
+
13
+ def get_models():
14
+
15
+ mord = {
16
+ "Providers" : ["1","2" ,"3","4","5"],
17
+ "Models" : {
18
+ "1" : [
19
+ {
20
+ "id": "openai/gpt-oss-120b",
21
+ "owned_by": "OpenAI"
22
+ },
23
+ {
24
+ "id": "moonshotai/kimi-k2-instruct",
25
+ "owned_by": "Moonshot AI"
26
+ },
27
+ {
28
+ "id": "canopylabs/orpheus-v1-english",
29
+ "owned_by": "Canopy Labs"
30
+ },
31
+ {
32
+ "id": "llama-3.1-8b-instant",
33
+ "owned_by": "Meta"
34
+ },
35
+ {
36
+ "id": "whisper-large-v3",
37
+ "owned_by": "OpenAI"
38
+ },
39
+ {
40
+ "id": "meta-llama/llama-4-scout-17b-16e-instruct",
41
+ "owned_by": "Meta"
42
+ },
43
+ {
44
+ "id": "allam-2-7b",
45
+ "owned_by": "SDAIA"
46
+ },
47
+ {
48
+ "id": "groq/compound",
49
+ "owned_by": "Groq"
50
+ },
51
+ {
52
+ "id": "canopylabs/orpheus-arabic-saudi",
53
+ "owned_by": "Canopy Labs"
54
+ },
55
+ {
56
+ "id": "llama-3.3-70b-versatile",
57
+ "owned_by": "Meta"
58
+ },
59
+ {
60
+ "id": "qwen/qwen3-32b",
61
+ "owned_by": "Alibaba Cloud"
62
+ },
63
+ {
64
+ "id": "meta-llama/llama-prompt-guard-2-22m",
65
+ "owned_by": "Meta"
66
+ },
67
+ {
68
+ "id": "groq/compound-mini",
69
+ "owned_by": "Groq"
70
+ },
71
+ {
72
+ "id": "meta-llama/llama-guard-4-12b",
73
+ "owned_by": "Meta"
74
+ },
75
+ {
76
+ "id": "openai/gpt-oss-20b",
77
+ "owned_by": "OpenAI"
78
+ },
79
+ {
80
+ "id": "openai/gpt-oss-safeguard-20b",
81
+ "owned_by": "OpenAI"
82
+ },
83
+ {
84
+ "id": "meta-llama/llama-4-maverick-17b-128e-instruct",
85
+ "owned_by": "Meta"
86
+ },
87
+ {
88
+ "id": "moonshotai/kimi-k2-instruct-0905",
89
+ "owned_by": "Moonshot AI"
90
+ }
91
+ ],
92
+
93
+ "2" : [
94
+ {
95
+ "id": "aisingapore/gemma-sea-lion-v4-27b-it",
96
+ "owned_by": "AI Singapore"
97
+ },
98
+ {
99
+ "id": "defog/sqlcoder-7b-2",
100
+ "owned_by": "Defog"
101
+ },
102
+ {
103
+ "id": "ibm-granite/granite-4.0-h-micro",
104
+ "owned_by": "IBM"
105
+ },
106
+ {
107
+ "id": "meta/llama-3.1-8b-instruct",
108
+ "owned_by": "Meta"
109
+ },
110
+ {
111
+ "id": "microsoft/phi-2",
112
+ "owned_by": "Microsoft"
113
+ },
114
+ {
115
+ "id": "qwen/qwen3-30b-a3b-fp8",
116
+ "owned_by": "Alibaba Cloud"
117
+ },
118
+ {
119
+ "id": "qwen/qwq-32b",
120
+ "owned_by": "Alibaba Cloud"
121
+ }
122
+ ],
123
+
124
+ "3" : [
125
+ {
126
+ "id": "zai-org/glm-4.6",
127
+ "owned_by": "Zhipu AI"
128
+ },
129
+ {
130
+ "id": "openai/gpt-5-nano-2025-08-07",
131
+ "owned_by": "OpenAI"
132
+ },
133
+ {
134
+ "id": "deepseek-ai/deepseek-v3.2-thinking",
135
+ "owned_by": "DeepSeek AI"
136
+ },
137
+ {
138
+ "id": "nvidia/nvidia-nemotron-3-nano-30b-a3b",
139
+ "owned_by": "NVIDIA"
140
+ },
141
+ {
142
+ "id": "nvidia/nvidia-nemotron-3-nano-30b-a3b-thinking",
143
+ "owned_by": "NVIDIA"
144
+ },
145
+ {
146
+ "id": "openai/gpt-5-mini-2025-08-07",
147
+ "owned_by": "OpenAI"
148
+ },
149
+ {
150
+ "id": "qwen/qwen3-vl-235b-a22b-thinking",
151
+ "owned_by": "Alibaba Cloud"
152
+ },
153
+ {
154
+ "id": "qwen/qwen3-vl-235b-a22b-instruct",
155
+ "owned_by": "Alibaba Cloud"
156
+ },
157
+ {
158
+ "id": "perplexity/sonar",
159
+ "owned_by": "Perplexity"
160
  },
161
  {
162
+ "id": "moonshotai/kimi-k2.5",
163
+ "owned_by": "Moonshot AI"
 
 
164
  },
165
  {
166
+ "id": "anthropic/claude-haiku-4-5-20251001",
167
+ "owned_by": "Anthropic"
 
 
168
  },
169
  {
170
+ "id": "google/gemini-2.5-flash-lite",
171
+ "owned_by": "Google"
 
 
172
  },
173
  {
174
+ "id": "moonshotai/kimi-k2-thinking",
175
+ "owned_by": "Moonshot AI"
 
 
176
  },
177
  {
178
+ "id": "mistralai/devstral-2-123b-instruct-2512",
179
+ "owned_by": "Mistral AI"
 
 
180
  },
181
  {
182
+ "id": "mistralai/mistral-large-3-675b-instruct-2512",
183
+ "owned_by": "Mistral AI"
 
 
184
  },
185
  {
186
  "id": "openai/gpt-oss-safeguard-20b",
187
+ "owned_by": "OpenAI"
 
 
 
 
 
 
 
 
188
  },
189
  {
190
+ "id": "openai/gpt-oss-120b",
191
+ "owned_by": "OpenAI"
 
 
192
  }
193
+ ],
194
+ "4" : [
195
+ {
196
+ "id": "qwen3-4b-thinking-2507",
197
+ "owned_by": "Alibaba Cloud"
198
+ }
199
+ ],
200
+ "5" : [
 
 
 
 
 
 
 
 
 
 
 
 
201
  {
202
+ "id": "meta/llama-3.1-70b-instruct",
203
+ "owned_by": "Meta"
 
 
204
  },
205
  {
206
+ "id": "qwen/qwen2.5-coder-32b-instruct",
207
+ "owned_by": "Alibaba Cloud"
 
 
208
  },
209
  {
210
+ "id": "deepseek-ai/deepseek-r1-distill-qwen-32b",
211
+ "owned_by": "DeepSeek AI"
 
 
212
  },
213
  {
214
+ "id": "meta/llama-4-scout-17b-16e-instruct",
215
+ "owned_by": "Meta"
 
 
216
  },
217
  {
218
+ "id": "google/gemma-3-12b-it",
219
+ "owned_by": "Google"
 
 
220
  },
221
  {
222
+ "id": "mistralai/mistral-small-3.1-24b-instruct",
223
+ "owned_by": "Mistral AI"
 
 
224
  },
225
  {
226
+ "id": "meta/llama-3.3-70b-instruct-fp8-fast",
227
+ "owned_by": "Meta"
 
 
228
  },
229
  {
230
+ "id": "meta/llama-3.2-3b-instruct",
231
+ "owned_by": "Meta"
 
 
232
  },
233
  {
234
+ "id": "meta/llama-3.2-1b-instruct",
235
+ "owned_by": "Meta"
 
 
236
  },
237
  {
238
+ "id": "meta-llama/meta-llama-3-8b-instruct",
239
+ "owned_by": "Meta"
 
 
240
  },
241
  {
242
+ "id": "meta/llama-3-8b-instruct",
243
+ "owned_by": "Meta"
 
 
244
  },
245
  {
246
+ "id": "meta/llama-2-7b-chat-int8",
247
+ "owned_by": "Meta"
 
 
248
  },
249
  {
250
+ "id": "meta/llama-2-7b-chat-fp16",
251
+ "owned_by": "Meta"
 
 
252
  },
253
  {
254
+ "id": "meta/llama-3-8b-instruct-awq",
255
+ "owned_by": "Meta"
 
 
256
  },
257
  {
258
+ "id": "meta-llama/meta-llama-3-8b-instruct",
259
+ "owned_by": "Meta"
 
 
260
  },
261
  {
262
+ "id": "meta/llama-3-8b-instruct",
263
+ "owned_by": "Meta"
 
 
264
  },
265
  {
266
+ "id": "meta/llama-2-7b-chat-int8",
267
+ "owned_by": "Meta"
 
 
268
  },
269
  {
270
+ "id": "meta/llama-3-8b-instruct-awq",
271
+ "owned_by": "Meta"
 
 
272
  },
273
  {
274
+ "id": "google/gemma-7b-it",
275
+ "owned_by": "Google"
 
 
276
  },
277
  {
278
+ "id": "google/gemma-2b-it-lora",
279
+ "owned_by": "Google"
 
 
280
  },
281
  {
282
+ "id": "mistral/mistral-7b-instruct-v0.2",
283
+ "owned_by": "Mistral AI"
 
 
284
  },
285
  {
286
+ "id": "mistral/mistral-7b-instruct-v0.2-lora",
287
+ "owned_by": "Mistral AI"
 
 
288
  }
289
  ]
290
+
291
+
292
+
293
+
294
+ }
295
+ }
296
+
297
+ return mord
298
 
299
 
300
 
301
  try:
302
+ MODEL_NAMES = get_models()
303
  except Exception:
304
  MODEL_NAMES = {"GROQ": "GROQ-FALLBACK", "LLMC": "LLMC-FALLBACK"}
305
 
306
 
307
  class Config:
308
+ DEFAULT_PROVIDER = "1"
309
  DEFAULT_MODEL = "llama-3.3-70b-versatile"
310
  DEFAULT_TEMPERATURE = 0.7
311
  CHUNK_SIZE = 1000
 
315
  STREAM_BATCH_BYTES = 0
316
 
317
  PROVIDERS: Dict[str, Dict[str, Any]] = {
318
+ "1": {
319
  "AUTH": True,
320
  "BASE_URL": "https://api.groq.com/openai/v1/chat/completions",
321
  "DEFAULT_MODEL": "qwen/qwen3-32b",
 
328
  "stream": "{stream}",
329
  },
330
  },
331
+ "2": {
332
+ "AUTH": False,
333
  "BASE_URL": "https://llmchat.in/inference/stream?model={model}",
334
  "DEFAULT_MODEL": "@cf/meta/llama-3.1-8b-instruct",
335
  "HEADERS": {
 
340
  },
341
  "PAYLOAD": {"messages": "{messages}", "stream": "{stream}"},
342
  },
343
+ "3": {
344
+ "AUTH": False,
345
+ "BASE_URL": "https://adarshji-md.hf.space/gen",
346
+ "DEFAULT_MODEL": "openai/gpt-oss-120b",
347
+ "PAYLOAD": {"api_key": "LOL", "provider": "1","messages": "{messages}","model" : "{model}","stream": "{stream}"},
348
+ },
349
+ "4": {
350
+ "AUTH": False,
351
+ "BASE_URL": "https://adarshji-md.hf.space/gen",
352
+ "DEFAULT_MODEL": "qwen3-4b-thinking-2507",
353
+ "PAYLOAD": {"api_key": "LOL", "provider": "2","messages": "{messages}","model" : "{model}","stream": "{stream}"},
354
+
355
+ },
356
+ "5": {
357
+ "AUTH": False,
358
+ "BASE_URL": "https://adarshji-md.hf.space/gen",
359
+ "DEFAULT_MODEL": "deepseek-ai/deepseek-r1-distill-qwen-32b",
360
+ "PAYLOAD": {"api_key": "LOL", "provider": "3","messages": "{messages}","model" : "{model}","stream": "{stream}"},
361
+
362
+ },
363
  }
364
 
365
  _placeholder_re = re.compile(r"\{(.*?)\}")