AdarshJi commited on
Commit
033253e
·
verified ·
1 Parent(s): 56447dc

Create server.py

Browse files
Files changed (1) hide show
  1. server.py +835 -0
server.py ADDED
@@ -0,0 +1,835 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # server_sse.py
2
+ import asyncio
3
+ import inspect
4
+ import logging
5
+ from dataclasses import dataclass
6
+ from typing import Any, Dict, List, Optional, AsyncGenerator,Tuple
7
+ import time
8
+ import json
9
+ import uuid
10
+ import aiohttp
11
+ from fastapi import FastAPI, HTTPException, Request
12
+ from fastapi.responses import StreamingResponse, JSONResponse
13
+ from curl_cffi.requests import Session
14
+
15
+
16
+ # try fast json library
17
+ try:
18
+ import orjson as _jsonlib # type: ignore
19
+ def _loads(b: bytes):
20
+ return _jsonlib.loads(b)
21
+ def _dumps(obj) -> str:
22
+ # orjson.dumps returns bytes
23
+ return _jsonlib.dumps(obj).decode("utf-8")
24
+ except Exception:
25
+ import json as _jsonlib # type: ignore
26
+ def _loads(b: bytes):
27
+ return _jsonlib.loads(b)
28
+ def _dumps(obj) -> str:
29
+ return _jsonlib.dumps(obj)
30
+
31
+ logging.basicConfig(level=logging.INFO)
32
+ logger = logging.getLogger("chat-server-sse")
33
+
34
+ # preserve this global Requests (unchanged)
35
+ Requests = Session(impersonate="chrome110")
36
+
37
+ app = FastAPI()
38
+
39
+
40
+ M2 = [
41
+ {
42
+ "tag": "@cf",
43
+ "model": "meta/llama-3.1-70b-instruct",
44
+ "max_tokens" : 8192
45
+ },
46
+
47
+ {
48
+ "tag": "@cf",
49
+ "model": "qwen/qwen2.5-coder-32b-instruct",
50
+ "max_tokens" : 8192
51
+ },
52
+ {
53
+ "tag": "@cf",
54
+ "model": "deepseek-ai/deepseek-r1-distill-qwen-32b",
55
+ "max_tokens" : 40960
56
+ # ok
57
+
58
+ },
59
+ {
60
+ "tag": "@cf",
61
+ "model": "meta/llama-4-scout-17b-16e-instruct",
62
+ "max_tokens" : 40960
63
+ # ok
64
+
65
+ },
66
+ {
67
+ "tag": "@cf",
68
+ "model": "google/gemma-3-12b-it",
69
+ "max_tokens" : 40960
70
+ # ok
71
+
72
+ },
73
+ {
74
+ "tag": "@cf",
75
+ "model": "mistralai/mistral-small-3.1-24b-instruct",
76
+ "max_tokens" : 40960
77
+ # ok
78
+
79
+ },
80
+ {
81
+ "tag": "@cf",
82
+ "model": "meta/llama-3.3-70b-instruct-fp8-fast",
83
+ "max_tokens" : 8192
84
+ },
85
+ {
86
+ "tag": "@cf",
87
+ "model": "meta/llama-3.2-3b-instruct",
88
+ "max_tokens" : 40960
89
+ # ok
90
+
91
+ },
92
+ {
93
+ "tag": "@cf",
94
+ "model": "meta/llama-3.2-1b-instruct",
95
+ "max_tokens" : 40960
96
+ # ok
97
+ },
98
+ {
99
+ "tag": "@hf",
100
+ "model": "meta-llama/meta-llama-3-8b-instruct",
101
+ "max_tokens" : 4391
102
+ },
103
+ {
104
+ "tag": "@cf",
105
+ "model": "meta/llama-3-8b-instruct",
106
+ "max_tokens" : 4391
107
+ },
108
+ {
109
+ "tag": "@cf",
110
+ "model": "meta/llama-2-7b-chat-int8",
111
+ "max_tokens" : 4391
112
+ },
113
+ {
114
+ "tag": "@cf",
115
+ "model": "meta/llama-2-7b-chat-fp16",
116
+ "max_tokens" : None
117
+ },
118
+ {
119
+ "tag": "@cf",
120
+ "model": "meta/llama-3-8b-instruct-awq",
121
+ "max_tokens" : 4391
122
+ },
123
+ {
124
+ "tag": "@hf",
125
+ "model": "meta-llama/meta-llama-3-8b-instruct",
126
+ "max_tokens" : 4391
127
+ },
128
+ {
129
+ "tag": "@cf",
130
+ "model": "meta/llama-3-8b-instruct",
131
+ "max_tokens" : 4391
132
+ },
133
+ {
134
+ "tag": "@cf",
135
+ "model": "meta/llama-2-7b-chat-int8",
136
+ "max_tokens" : 4391
137
+ },
138
+ {
139
+ "tag": "@cf",
140
+ "model": "meta/llama-3-8b-instruct-awq",
141
+ "max_tokens" : 4391
142
+ },
143
+ {
144
+ "tag": "@hf",
145
+ "model": "google/gemma-7b-it",
146
+ "max_tokens" : None
147
+ },
148
+ {
149
+ "tag": "@cf",
150
+ "model": "google/gemma-2b-it-lora",
151
+ "max_tokens" : 4391
152
+ },
153
+ {
154
+ "tag": "@hf",
155
+ "model": "mistral/mistral-7b-instruct-v0.2",
156
+ "max_tokens" : 8192
157
+ },
158
+ {
159
+ "tag": "@cf",
160
+ "model": "mistral/mistral-7b-instruct-v0.2-lora",
161
+ "max_tokens" : 8192
162
+ }
163
+ ]
164
+
165
+ def FREEGPT(
166
+ RQ : Any,
167
+ messages : List[Dict],
168
+ model : str = "deepseek-ai/deepseek-r1-distill-qwen-32b",
169
+ max_token : int = 40960,
170
+ stream : bool = True,
171
+ timeout: Optional[float] = None
172
+ ):
173
+ md = next((item["tag"] + "/" + item["model"] for item in M2 if item["model"] == model), "@cf/meta/llama-3.2-1b-instruct")
174
+
175
+ URL = f"https://llmchat.in/inference/stream?model={md}"
176
+
177
+
178
+ headers = {
179
+ "Accept": "text/event-stream,*/*",
180
+ "Content-Type": "application/json",
181
+ "Origin": "https://llmchat.in",
182
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/144.0.0.0 Safari/537.36",
183
+ "Cache-Control": "no-cache",
184
+ "Accept-Encoding": "identity",
185
+ "cf-ray" : "9cba9edd9f909aaf-SIN",
186
+
187
+ }
188
+
189
+
190
+ payload = {
191
+ "messages": messages,
192
+ "stream": stream,
193
+ **({"max_tokens": max_token} if max_token is not None else {}),
194
+ **({"max_tokens": next((item["max_tokens"] for item in M2 if item["model"] == model and item["max_tokens"] is not None), None)} if next((True for item in M2 if item["model"] == model and item["max_tokens"] is not None), None) else {})
195
+ }
196
+
197
+
198
+ # print(payload)
199
+
200
+ try:
201
+ RESP = RQ.post(url=URL,json=payload , headers=headers , timeout=timeout,stream=stream)
202
+ print(RESP.status_code)
203
+ except:
204
+ return
205
+ if RESP.status_code == 200:
206
+ for raw in RESP.iter_lines():
207
+ if not raw:
208
+ continue
209
+
210
+ try:
211
+ line = raw.decode("utf-8", errors="replace").strip()
212
+ except Exception:
213
+ line = raw.decode("latin-1", errors="replace").strip()
214
+
215
+ if line.startswith("data:"):
216
+ data_json = line.split('data: ')[1]
217
+ try:
218
+ data = json.loads(data_json)
219
+ except:
220
+ continue
221
+
222
+ try:
223
+ yield data["response"]
224
+ except: pass
225
+
226
+ else:
227
+ print(RESP.status_code)
228
+
229
+
230
+
231
+ class CONV:
232
+
233
+ def __init__(self, default_system: str = ""):
234
+ self.default_system = default_system
235
+
236
+ @staticmethod
237
+ def _make_id() -> str:
238
+ return uuid.uuid4().hex[:20]
239
+
240
+ def alpaca_to_msg(
241
+ self,
242
+ alpaca_obj: Dict[str, Any],
243
+ insert_system: bool = True,
244
+ system_override: Optional[str] = None,
245
+ skip_empty: bool = True,
246
+ ) -> Tuple[List[Dict[str, str]], float]:
247
+
248
+ t0 = time.perf_counter()
249
+
250
+ out: List[Dict[str, str]] = []
251
+ sys_text = system_override if system_override is not None else self.default_system
252
+ if insert_system and sys_text is not None:
253
+ out.append({"role": "system", "content": sys_text})
254
+
255
+ msgs = alpaca_obj
256
+ append = out.append # micro-optimization
257
+ for m in msgs:
258
+ role = (m.get("role") or "").strip().lower()
259
+ if role not in ("user", "assistant", "system"):
260
+ role = "user"
261
+
262
+ parts = m.get("parts") or []
263
+ # gather textual parts quickly
264
+ texts: List[str] = []
265
+ for p in parts: # iterate in order
266
+ # only include parts with type == "text" and non-empty text
267
+ if isinstance(p, dict) and p.get("type") == "text":
268
+ txt = p.get("text", "")
269
+ if isinstance(txt, str) and txt:
270
+ # keep as-is except trim trailing spaces/newlines
271
+ texts.append(txt.rstrip())
272
+
273
+ if not texts and skip_empty:
274
+ continue
275
+
276
+ if texts:
277
+ content = "\n\n".join(texts)
278
+ append({"role": role, "content": content})
279
+ else:
280
+ # if not skipping empty, include empty content to preserve role
281
+ append({"role": role, "content": ""})
282
+
283
+ elapsed = time.perf_counter() - t0
284
+ return out, elapsed
285
+
286
+ def msg_to_alpaca(
287
+ self,
288
+ msg_list: List[Dict[str, Any]],
289
+ include_step_start: bool = True,
290
+ assistant_state_done: bool = True,
291
+ preserve_ids: bool = False,
292
+ skip_empty_text_parts: bool = False,
293
+ ) -> Tuple[Dict[str, List[Dict[str, Any]]], float]:
294
+
295
+ t0 = time.perf_counter()
296
+
297
+ out_messages: List[Dict[str, Any]] = []
298
+ append = out_messages.append
299
+
300
+ for entry in msg_list:
301
+ # allow both dicts and fallback strings
302
+ if not isinstance(entry, dict):
303
+ role = "user"
304
+ content = str(entry)
305
+ entry_id = None
306
+ else:
307
+ role = (entry.get("role") or "user").strip().lower()
308
+ content = entry.get("content", "")
309
+ entry_id = entry.get("id") if preserve_ids else None
310
+
311
+ if role not in ("user", "assistant"):
312
+ role = "user"
313
+
314
+ parts: List[Dict[str, Any]] = []
315
+ if role == "assistant" and include_step_start:
316
+ parts.append({"type": "step-start"})
317
+
318
+ # Only add the text part if it's non-empty (or skip_empty_text_parts False)
319
+ if isinstance(content, str):
320
+ if not skip_empty_text_parts or content.strip() != "":
321
+ text_part: Dict[str, Any] = {"type": "text", "text": content}
322
+ if role == "assistant" and assistant_state_done:
323
+ text_part["state"] = "done"
324
+ parts.append(text_part)
325
+
326
+ # Build message object
327
+ msg_obj: Dict[str, Any] = {
328
+ "id": entry_id if (entry_id is not None and isinstance(entry_id, str) and entry_id != "") else self._make_id(),
329
+ "role": role,
330
+ "parts": parts,
331
+ "metadata": {"custom": {}},
332
+ }
333
+
334
+ append(msg_obj)
335
+
336
+ elapsed = time.perf_counter() - t0
337
+ return out_messages, elapsed
338
+
339
+
340
+ M1=[
341
+ "zai-org/glm-4.6",
342
+ "openai/gpt-5-nano-2025-08-07",
343
+ "deepseek-ai/deepseek-v3.2-thinking",
344
+ "nvidia/nvidia-nemotron-3-nano-30b-a3b",
345
+ "nvidia/nvidia-nemotron-3-nano-30b-a3b-thinking",
346
+ "openai/gpt-5-mini-2025-08-07",
347
+ "qwen/qwen3-vl-235b-a22b-thinking",
348
+ "qwen/qwen3-vl-235b-a22b-instruct",
349
+ "perplexity/sonar",
350
+ "moonshotai/kimi-k2.5",
351
+ "anthropic/claude-haiku-4-5-20251001", #-----depcriating model
352
+ "google/gemini-2.5-flash-lite",
353
+ "moonshotai/kimi-k2-thinking"
354
+ "mistralai/devstral-2-123b-instruct-2512" #good mordal
355
+ "mistralai/mistral-large-3-675b-instruct-2512",
356
+ "openai/gpt-oss-safeguard-20b",
357
+ "openai/gpt-oss-120b"
358
+
359
+ ]
360
+
361
+
362
+ def Adarsh_Personal(
363
+ RQ : Any,
364
+ messages : List[Dict],
365
+ model : str = "deepseek-ai/deepseek-r1-distill-qwen-32b",
366
+ max_token : int = 40960,
367
+ stream : bool = True,
368
+ timeout: Optional[float] = None
369
+ ):
370
+
371
+ RES=False
372
+ URL = "https://hadadxyz-ai.hf.space/api/mz1a85y5n80zy5127hgsba5f3a9c2d1Np0x300vcgduqxb7ep084fygd016c9a2d16fa8b3c41gut432pvjctr75hhspjae25d6f7a8b9c0d1e2pjf43v16f3a4b5c6dd7e8fba2bdx9a0b6dv1c2d7e2b4c9f83d6a4f1bb6c152f9pe3c7a88qv5d91f3c2b765g134bp9a41ne4yx4b3vda8w074"
373
+
374
+
375
+ NEW_MSGS , S = CONV().msg_to_alpaca(messages, include_step_start=True, assistant_state_done=True)
376
+
377
+ # print(NEW_MSGS)
378
+
379
+ payload = {
380
+ "tools": {},
381
+ "modelId": model,
382
+ "sessionId": "sess_7ef524b9_mlfe4ped",
383
+ "clientId": "7ef524b98a963b507ec9f4000fdea38c-mlfe4pea",
384
+ "requestId": "req_7ef524b9_mlfg1cpq_jjxb7p",
385
+ "clientIp": "122.161.52.54",
386
+ "realIp": "122.161.52.54",
387
+ "forwardedFor": "122.161.52.54",
388
+ "userAgent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/144.0.0.0 Safari/537.36",
389
+ "id": "DEFAULT_THREAD_ID",
390
+ "messages": NEW_MSGS,
391
+ "trigger": "submit-message",
392
+ "metadata": {}
393
+ }
394
+
395
+ headers = {
396
+ "Accept": "text/event-stream, */*",
397
+ "Content-Type": "application/json",
398
+ "Origin": "https://hadadxyz-ai.hf.space",
399
+ "User-Agent": payload["userAgent"],
400
+ "Cache-Control": "no-cache",
401
+ "Accept-Encoding": "identity",
402
+ "x-turnstile-token": "mlfe5357-zq9depfzhpb-e18cbvzrpid",
403
+ "x-turnstile-verified": "true",
404
+ }
405
+
406
+
407
+ RESP = RQ.post(URL, json=payload, headers=headers, stream=stream, timeout=timeout)
408
+
409
+ if RESP.status_code == 200:
410
+ for raw in RESP.iter_lines():
411
+ if not raw:
412
+ continue
413
+
414
+ try:
415
+ line = raw.decode("utf-8", errors="replace").strip()
416
+ except Exception:
417
+ line = raw.decode("latin-1", errors="replace").strip()
418
+
419
+ if line.startswith("data:"):
420
+ data_json = line.split('data: ')[1]
421
+ try:
422
+ data = json.loads(data_json)
423
+ except:
424
+ continue
425
+ try:
426
+ if data['type']=="reasoning-delta":
427
+ if not RES:
428
+ RES = True
429
+ yield "<think>\n"
430
+ try:
431
+ yield data["delta"]
432
+ except:
433
+ pass
434
+ except :
435
+ pass
436
+ try:
437
+ if data["type"]=="text-delta":
438
+ if RES:
439
+ RES = False
440
+ yield "\n</think>\n"
441
+
442
+ try:
443
+ yield data["delta"]
444
+ except:
445
+ pass
446
+ except:
447
+ pass
448
+
449
+ M3 = ["qwen3-4b-thinking-2507"]
450
+
451
+ def QWEN(
452
+ RQ : Any,
453
+ messages : List[Dict],
454
+ model : str = "NONE",
455
+ max_token : int = 40960,
456
+ stream : bool = True,
457
+ timeout: Optional[float] = None
458
+ ):
459
+
460
+ def GEN(RQ:any,messages:list,timeout:int=None):
461
+ API_URL = "https://teichai-qwen3-4b-thinking-2507-claude-4-5-opus.hf.space/api/chat"
462
+
463
+ payload = {
464
+ "messages":messages,
465
+ "searchEnabled":False
466
+ }
467
+
468
+ headers = {"Accept": "*/*","Content-Type": "application/json","Origin": "https://teichai-qwen3-4b-thinking-2507-claude-4-5-opus.hf.space","Referer": "https://teichai-qwen3-4b-thinking-2507-claude-4-5-opus.hf.space/","User-Agent": "python-requests/2.x"}
469
+
470
+ # c = t()
471
+ RESPO = RQ.post(API_URL, headers=headers, json=payload, stream=stream, timeout=timeout)
472
+ # print(c-t())
473
+ # print(RESPO)
474
+ buffer_lines = []
475
+ for raw in RESPO.iter_lines():
476
+ if raw is None:
477
+ continue
478
+ try:
479
+ line = raw.decode("utf-8", errors="replace").strip()
480
+ except Exception:
481
+ line = raw.decode("latin-1", errors="replace").strip()
482
+
483
+ if line == "":
484
+ if not buffer_lines:
485
+ continue
486
+ data_text = "".join(buffer_lines)
487
+ buffer_lines = []
488
+ if data_text == "[DONE]":
489
+ break
490
+ try:
491
+ obj = json.loads(data_text)
492
+ try:
493
+ yield obj
494
+ except:
495
+ pass
496
+ except json.JSONDecodeError:
497
+ pass
498
+ continue
499
+
500
+ if line.startswith("data:"):
501
+ buffer_lines.append(line[len("data:"):].lstrip())
502
+
503
+ RES = False
504
+ for i in GEN(RQ=RQ,message=messages,timeout=timeout):
505
+ if i["type"]=="reasoning":
506
+ if not RES:
507
+ RES = True
508
+ yield "<think>\n"
509
+ yield i["content"]
510
+
511
+ else:
512
+ if RES:
513
+ RES = False
514
+ yield "\n</think>\n\n"
515
+ try:
516
+ yield i["content"]
517
+ except:
518
+ pass
519
+
520
+
521
+
522
+
523
+
524
+
525
+
526
+
527
+
528
+
529
+
530
+
531
+
532
+
533
+
534
+
535
+
536
+
537
+
538
+
539
+
540
+
541
+
542
+
543
+
544
+
545
+
546
+ PROVIDERS: Dict[str, Dict[str, Any]] = {
547
+ "1": {"__func__": Adarsh_Personal, "models": M1},
548
+ "2": {"__func__": QWEN, "models": M2},
549
+ "3": {"__func__": FREEGPT, "models": M3},
550
+ }
551
+
552
+ # will be filled on startup to avoid per-request introspection
553
+ PROVIDER_META: Dict[str, Dict[str, Any]] = {}
554
+
555
+ class Config:
556
+ DEFAULT_PROVIDER = "1"
557
+ DEFAULT_MODEL = "llama-3.3-70b-versatile"
558
+ DEFAULT_MAX_TOKENS = 512
559
+ DEFAULT_TEMPERATURE = 0.7
560
+ TIMEOUT = 30.0
561
+ STREAM = True
562
+
563
+
564
+ @dataclass
565
+ class ChatRequest:
566
+ api_key: str
567
+ messages: List[Dict[str, Any]]
568
+ model: Optional[str] = None
569
+ provider: str = Config.DEFAULT_PROVIDER
570
+ max_tokens: int = Config.DEFAULT_MAX_TOKENS
571
+ temperature: float = Config.DEFAULT_TEMPERATURE
572
+ stream: bool = Config.STREAM
573
+
574
+ @staticmethod
575
+ def from_dict(payload: Dict[str, Any]) -> "ChatRequest":
576
+ api_key = payload.get("api_key") or payload.get("key") or payload.get("apikey")
577
+ messages = payload.get("messages") or payload.get("message") or payload.get("msgs")
578
+ model = payload.get("model_name") or payload.get("model")
579
+ provider = payload.get("provider") or Config.DEFAULT_PROVIDER
580
+ provider = str(provider) # keep "1","2","3" style
581
+ max_tokens = payload.get("max_tokens", Config.DEFAULT_MAX_TOKENS)
582
+ temperature = payload.get("temperature", Config.DEFAULT_TEMPERATURE)
583
+ stream = payload.get("stream", Config.STREAM)
584
+ if messages is None:
585
+ messages = []
586
+ if isinstance(messages, dict):
587
+ messages = [messages]
588
+ return ChatRequest(
589
+ api_key=api_key,
590
+ messages=messages,
591
+ model=model,
592
+ provider=provider,
593
+ max_tokens=max_tokens,
594
+ temperature=temperature,
595
+ stream=stream,
596
+ )
597
+
598
+
599
+ GLOBAL_AIOHTTP: Optional[aiohttp.ClientSession] = None
600
+
601
+
602
+ @app.on_event("startup")
603
+ async def on_startup():
604
+ global GLOBAL_AIOHTTP, PROVIDER_META
605
+ logger.info("Starting up - creating global aiohttp session and analyzing providers")
606
+ GLOBAL_AIOHTTP = aiohttp.ClientSession()
607
+ for key, payload in PROVIDERS.items():
608
+ func = payload["__func__"]
609
+ meta = {
610
+ "func": func,
611
+ "is_async_gen_fn": inspect.isasyncgenfunction(func),
612
+ "is_coroutine_fn": inspect.iscoroutinefunction(func),
613
+ "is_generator_fn": inspect.isgeneratorfunction(func),
614
+ # mark as sync if not coroutine/asyncgen/generator
615
+ "is_sync": not (inspect.iscoroutinefunction(func) or inspect.isasyncgenfunction(func) or inspect.isgeneratorfunction(func)),
616
+ }
617
+ PROVIDER_META[key] = meta
618
+ logger.info("Provider metadata prepared")
619
+
620
+
621
+ @app.on_event("shutdown")
622
+ async def on_shutdown():
623
+ global GLOBAL_AIOHTTP
624
+ logger.info("Shutting down - closing global aiohttp session")
625
+ if GLOBAL_AIOHTTP and not GLOBAL_AIOHTTP.closed:
626
+ await GLOBAL_AIOHTTP.close()
627
+
628
+
629
+ async def _call_provider_and_iterate(
630
+ provider_key: str,
631
+ messages: List[Dict],
632
+ model: str,
633
+ max_token: int,
634
+ stream_flag: bool,
635
+ timeout: float,
636
+ ) -> AsyncGenerator[bytes, None]:
637
+ """
638
+ Invoke provider according to metadata and yield raw bytes.
639
+ We'll transform these bytes into SSE events higher up.
640
+ """
641
+ if provider_key not in PROVIDER_META:
642
+ raise ValueError(f"Unknown provider '{provider_key}'")
643
+
644
+ meta = PROVIDER_META[provider_key]
645
+ func = meta["func"]
646
+
647
+ async def _invoke_async():
648
+ return func(Requests, Message=messages, Model=model, max_token=max_token, stream=stream_flag, timeout=timeout)
649
+
650
+ try:
651
+ provider_task = _invoke_async()
652
+
653
+ # async generator function
654
+ if meta["is_async_gen_fn"]:
655
+ agen = await asyncio.wait_for(provider_task, timeout=timeout)
656
+ async for item in agen:
657
+ if item is None:
658
+ continue
659
+ if isinstance(item, bytes):
660
+ yield item
661
+ elif isinstance(item, str):
662
+ yield item.encode("utf-8")
663
+ else:
664
+ yield str(item).encode("utf-8")
665
+ return
666
+
667
+ # coroutine function
668
+ if meta["is_coroutine_fn"]:
669
+ res = await asyncio.wait_for(provider_task, timeout=timeout)
670
+ if res is None:
671
+ return
672
+ # list/tuple
673
+ if isinstance(res, (list, tuple)):
674
+ for item in res:
675
+ if item is None:
676
+ continue
677
+ if isinstance(item, bytes):
678
+ yield item
679
+ elif isinstance(item, str):
680
+ yield item.encode("utf-8")
681
+ else:
682
+ yield str(item).encode("utf-8")
683
+ return
684
+ # sync-iterable
685
+ if inspect.isgenerator(res) or (hasattr(res, "__iter__") and not isinstance(res, (str, bytes, dict))):
686
+ for item in res:
687
+ if item is None:
688
+ continue
689
+ if isinstance(item, bytes):
690
+ yield item
691
+ elif isinstance(item, str):
692
+ yield item.encode("utf-8")
693
+ else:
694
+ yield str(item).encode("utf-8")
695
+ return
696
+ # single value
697
+ if isinstance(res, bytes):
698
+ yield res
699
+ elif isinstance(res, str):
700
+ yield res.encode("utf-8")
701
+ else:
702
+ yield str(res).encode("utf-8")
703
+ return
704
+
705
+ # sync function/generator: run in thread
706
+ sync_res = await asyncio.wait_for(
707
+ asyncio.to_thread(func, Requests, messages, model, max_token, stream_flag, timeout),
708
+ timeout=timeout,
709
+ )
710
+
711
+ if sync_res is None:
712
+ return
713
+ if isinstance(sync_res, (list, tuple)):
714
+ for item in sync_res:
715
+ if item is None:
716
+ continue
717
+ if isinstance(item, bytes):
718
+ yield item
719
+ elif isinstance(item, str):
720
+ yield item.encode("utf-8")
721
+ else:
722
+ yield str(item).encode("utf-8")
723
+ return
724
+ if inspect.isgenerator(sync_res) or (hasattr(sync_res, "__iter__") and not isinstance(sync_res, (str, bytes, dict))):
725
+ for item in sync_res:
726
+ if item is None:
727
+ continue
728
+ if isinstance(item, bytes):
729
+ yield item
730
+ elif isinstance(item, str):
731
+ yield item.encode("utf-8")
732
+ else:
733
+ yield str(item).encode("utf-8")
734
+ return
735
+ if isinstance(sync_res, bytes):
736
+ yield sync_res
737
+ elif isinstance(sync_res, str):
738
+ yield sync_res.encode("utf-8")
739
+ else:
740
+ yield str(sync_res).encode("utf-8")
741
+
742
+ except asyncio.TimeoutError:
743
+ err = f"[server_timeout] provider {provider_key} exceeded {timeout}s\n"
744
+ logger.warning(err.strip())
745
+ yield err.encode("utf-8")
746
+ except Exception as e:
747
+ logger.exception("Provider error")
748
+ err = f"[server_error] {type(e).__name__}: {e}\n"
749
+ yield err.encode("utf-8")
750
+
751
+
752
+ @app.post("/chat")
753
+ async def chat_endpoint(request: Request):
754
+ try:
755
+ body_bytes = await request.body()
756
+ payload = _loads(body_bytes)
757
+ except Exception as e:
758
+ raise HTTPException(status_code=400, detail=f"invalid json: {e}")
759
+
760
+ req = ChatRequest.from_dict(payload)
761
+ if not req.api_key or not req.messages:
762
+ raise HTTPException(status_code=400, detail="api_key and messages required")
763
+
764
+ provider_key = req.provider
765
+
766
+ if req.stream:
767
+ async def sse_stream_gen():
768
+ """
769
+ For every chunk from provider, send an SSE event line:
770
+ data: {"response":"..."}\n\n
771
+ After completion send a final line: [DONE]\n
772
+ """
773
+ # iterate provider outputs (raw bytes)
774
+ async for raw_chunk in _call_provider_and_iterate(
775
+ provider_key=provider_key,
776
+ messages=req.messages,
777
+ model=req.model or Config.DEFAULT_MODEL,
778
+ max_token=req.max_tokens,
779
+ stream_flag=req.stream,
780
+ timeout=Config.TIMEOUT,
781
+ ):
782
+ # decode provider chunk to text
783
+ if isinstance(raw_chunk, bytes):
784
+ text = raw_chunk.decode("utf-8", errors="ignore")
785
+ else:
786
+ text = str(raw_chunk)
787
+
788
+ # build the JSON payload {"response": "<text>"} and serialize
789
+ payload_obj = {"response": text}
790
+ try:
791
+ json_str = _dumps(payload_obj)
792
+ except Exception:
793
+ # fallback to manual safe-escape for string-only payload
794
+ import json as _json_fallback
795
+ json_str = _json_fallback.dumps(payload_obj)
796
+
797
+ # SSE data line + double newline (SSE event terminator)
798
+ sse_event = f"data: {json_str}\n\n"
799
+ yield sse_event.encode("utf-8")
800
+
801
+ # final termination marker exactly as you requested
802
+ # NOTE: sending it as a line by itself (not prefixed by 'data:')
803
+ yield ("[DONE]\n").encode("utf-8")
804
+
805
+ # content-type text/event-stream (SSE)
806
+ return StreamingResponse(sse_stream_gen(), media_type="text/event-stream")
807
+
808
+ else:
809
+ # non-stream: collect and return JSON (same as before)
810
+ collected = []
811
+ async for chunk in _call_provider_and_iterate(
812
+ provider_key=provider_key,
813
+ messages=req.messages,
814
+ model=req.model or Config.DEFAULT_MODEL,
815
+ max_token=req.max_tokens,
816
+ stream_flag=req.stream,
817
+ timeout=Config.TIMEOUT,
818
+ ):
819
+ if isinstance(chunk, bytes):
820
+ collected.append(chunk.decode("utf-8", errors="ignore"))
821
+ else:
822
+ collected.append(str(chunk))
823
+ full_text = "".join(collected)
824
+ return JSONResponse({"text": full_text})
825
+
826
+
827
+ @app.get("/model")
828
+ async def model():
829
+ models = [M1, M2, M3]
830
+ return {"models": models}
831
+
832
+
833
+ @app.get("/health")
834
+ async def health_check():
835
+ return {"status": "ok"}