AdarshJi commited on
Commit
573009b
·
verified ·
1 Parent(s): cce209a

Create main.py

Browse files
Files changed (1) hide show
  1. main.py +678 -0
main.py ADDED
@@ -0,0 +1,678 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ from flask import Flask, request, jsonify, Response, stream_with_context
3
+ from dataclasses import dataclass
4
+ from typing import Any, Dict, List, Optional
5
+ import time
6
+ import uuid
7
+ from curl_cffi.requests import Session
8
+
9
+
10
+
11
+
12
+
13
+
14
+
15
+ M3 = [
16
+ {
17
+ "tag": "@cf",
18
+ "model": "meta/llama-3.1-70b-instruct",
19
+ "max_tokens" : 8192
20
+ },
21
+
22
+ {
23
+ "tag": "@cf",
24
+ "model": "qwen/qwen2.5-coder-32b-instruct",
25
+ "max_tokens" : 8192
26
+ },
27
+ {
28
+ "tag": "@cf",
29
+ "model": "deepseek-ai/deepseek-r1-distill-qwen-32b",
30
+ "max_tokens" : 40960
31
+ # ok
32
+
33
+ },
34
+ {
35
+ "tag": "@cf",
36
+ "model": "meta/llama-4-scout-17b-16e-instruct",
37
+ "max_tokens" : 40960
38
+ # ok
39
+
40
+ },
41
+ {
42
+ "tag": "@cf",
43
+ "model": "google/gemma-3-12b-it",
44
+ "max_tokens" : 40960
45
+ # ok
46
+
47
+ },
48
+ {
49
+ "tag": "@cf",
50
+ "model": "mistralai/mistral-small-3.1-24b-instruct",
51
+ "max_tokens" : 40960
52
+ # ok
53
+
54
+ },
55
+ {
56
+ "tag": "@cf",
57
+ "model": "meta/llama-3.3-70b-instruct-fp8-fast",
58
+ "max_tokens" : 8192
59
+ },
60
+ {
61
+ "tag": "@cf",
62
+ "model": "meta/llama-3.2-3b-instruct",
63
+ "max_tokens" : 40960
64
+ # ok
65
+
66
+ },
67
+ {
68
+ "tag": "@cf",
69
+ "model": "meta/llama-3.2-1b-instruct",
70
+ "max_tokens" : 40960
71
+ # ok
72
+ },
73
+ {
74
+ "tag": "@hf",
75
+ "model": "meta-llama/meta-llama-3-8b-instruct",
76
+ "max_tokens" : 4391
77
+ },
78
+ {
79
+ "tag": "@cf",
80
+ "model": "meta/llama-3-8b-instruct",
81
+ "max_tokens" : 4391
82
+ },
83
+ {
84
+ "tag": "@cf",
85
+ "model": "meta/llama-2-7b-chat-int8",
86
+ "max_tokens" : 4391
87
+ },
88
+ {
89
+ "tag": "@cf",
90
+ "model": "meta/llama-2-7b-chat-fp16",
91
+ "max_tokens" : None
92
+ },
93
+ {
94
+ "tag": "@cf",
95
+ "model": "meta/llama-3-8b-instruct-awq",
96
+ "max_tokens" : 4391
97
+ },
98
+ {
99
+ "tag": "@hf",
100
+ "model": "meta-llama/meta-llama-3-8b-instruct",
101
+ "max_tokens" : 4391
102
+ },
103
+ {
104
+ "tag": "@cf",
105
+ "model": "meta/llama-3-8b-instruct",
106
+ "max_tokens" : 4391
107
+ },
108
+ {
109
+ "tag": "@cf",
110
+ "model": "meta/llama-2-7b-chat-int8",
111
+ "max_tokens" : 4391
112
+ },
113
+ {
114
+ "tag": "@cf",
115
+ "model": "meta/llama-3-8b-instruct-awq",
116
+ "max_tokens" : 4391
117
+ },
118
+ {
119
+ "tag": "@hf",
120
+ "model": "google/gemma-7b-it",
121
+ "max_tokens" : None
122
+ },
123
+ {
124
+ "tag": "@cf",
125
+ "model": "google/gemma-2b-it-lora",
126
+ "max_tokens" : 4391
127
+ },
128
+ {
129
+ "tag": "@hf",
130
+ "model": "mistral/mistral-7b-instruct-v0.2",
131
+ "max_tokens" : 8192
132
+ },
133
+ {
134
+ "tag": "@cf",
135
+ "model": "mistral/mistral-7b-instruct-v0.2-lora",
136
+ "max_tokens" : 8192
137
+ }
138
+ ]
139
+
140
+ def FREEGPT(
141
+ RQ : Any,
142
+ messages : List[Dict],
143
+ model : str = "deepseek-ai/deepseek-r1-distill-qwen-32b",
144
+ max_token : int = 40960,
145
+ stream : bool = True,
146
+ timeout: Optional[float] = None
147
+ ):
148
+ md = next((item["tag"] + "/" + item["model"] for item in MODELS if item["model"] == model), "@cf/meta/llama-3.2-1b-instruct")
149
+
150
+ URL = f"https://llmchat.in/inference/stream?model={md}"
151
+
152
+
153
+ headers = {
154
+ "Accept": "text/event-stream,*/*",
155
+ "Content-Type": "application/json",
156
+ "Origin": "https://llmchat.in",
157
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/144.0.0.0 Safari/537.36",
158
+ "Cache-Control": "no-cache",
159
+ "Accept-Encoding": "identity",
160
+ "cf-ray" : "9cba9edd9f909aaf-SIN",
161
+
162
+ }
163
+
164
+
165
+ payload = {
166
+ "messages": messages,
167
+ "stream": stream,
168
+ **({"max_tokens": max_token} if max_token is not None else {}),
169
+ **({"max_tokens": next((item["max_tokens"] for item in MODELS if item["model"] == model and item["max_tokens"] is not None), None)} if next((True for item in MODELS if item["model"] == model and item["max_tokens"] is not None), None) else {})
170
+ }
171
+
172
+
173
+ # print(payload)
174
+
175
+ try:
176
+ RESP = RQ.post(url=URL,json=payload , headers=headers , timeout=timeout,stream=stream)
177
+ print(RESP.status_code)
178
+ except:
179
+ return
180
+ if RESP.status_code == 200:
181
+ for raw in RESP.iter_lines():
182
+ if not raw:
183
+ continue
184
+
185
+ try:
186
+ line = raw.decode("utf-8", errors="replace").strip()
187
+ except Exception:
188
+ line = raw.decode("latin-1", errors="replace").strip()
189
+
190
+ if line.startswith("data:"):
191
+ data_json = line.split('data: ')[1]
192
+ try:
193
+ data = json.loads(data_json)
194
+ except:
195
+ continue
196
+
197
+ try:
198
+ yield data["response"]
199
+ except: pass
200
+
201
+ else:
202
+ print(RESP.status_code)
203
+
204
+
205
+
206
+
207
+
208
+
209
+ M2 = ["qwen3-4b-thinking-2507"]
210
+
211
+
212
+ def QWEN(
213
+ RQ : Any,
214
+ messages : List[Dict],
215
+ model : str = "NONE",
216
+ max_token : int = 40960,
217
+ stream : bool = True,
218
+ timeout: Optional[float] = None
219
+ ):
220
+
221
+ def GEN(RQ:any,messages:list,timeout:int=None):
222
+ API_URL = "https://teichai-qwen3-4b-thinking-2507-claude-4-5-opus.hf.space/api/chat"
223
+
224
+ payload = {
225
+ "messages":messages,
226
+ "searchEnabled":False
227
+ }
228
+
229
+ headers = {"Accept": "*/*","Content-Type": "application/json","Origin": "https://teichai-qwen3-4b-thinking-2507-claude-4-5-opus.hf.space","Referer": "https://teichai-qwen3-4b-thinking-2507-claude-4-5-opus.hf.space/","User-Agent": "python-requests/2.x"}
230
+
231
+ c = t()
232
+ RESPO = RQ.post(API_URL, headers=headers, json=payload, stream=stream, timeout=timeout)
233
+ print(c-t())
234
+ # print(RESPO)
235
+ buffer_lines = []
236
+ for raw in RESPO.iter_lines():
237
+ if raw is None:
238
+ continue
239
+ try:
240
+ line = raw.decode("utf-8", errors="replace").strip()
241
+ except Exception:
242
+ line = raw.decode("latin-1", errors="replace").strip()
243
+
244
+ if line == "":
245
+ if not buffer_lines:
246
+ continue
247
+ data_text = "".join(buffer_lines)
248
+ buffer_lines = []
249
+ if data_text == "[DONE]":
250
+ break
251
+ try:
252
+ obj = json.loads(data_text)
253
+ try:
254
+ yield obj
255
+ except:
256
+ pass
257
+ except json.JSONDecodeError:
258
+ pass
259
+ continue
260
+
261
+ if line.startswith("data:"):
262
+ buffer_lines.append(line[len("data:"):].lstrip())
263
+
264
+ RES = False
265
+ for i in GEN(RQ=RQ,messages=messages,timeout=timeout):
266
+ if i["type"]=="reasoning":
267
+ if not RES:
268
+ RES = True
269
+ yield "<think>\n"
270
+ yield i["content"]
271
+
272
+ else:
273
+ if RES:
274
+ RES = False
275
+ yield "\n</think>\n\n"
276
+ try:
277
+ yield i["content"]
278
+ except:
279
+ pass
280
+
281
+
282
+
283
+
284
+
285
+
286
+
287
+ class CONV:
288
+
289
+ def __init__(self, default_system: str = ""):
290
+ self.default_system = default_system
291
+
292
+ @staticmethod
293
+ def _make_id() -> str:
294
+ return uuid.uuid4().hex[:20]
295
+
296
+ def alpaca_to_msg(
297
+ self,
298
+ alpaca_obj: Dict[str, Any],
299
+ insert_system: bool = True,
300
+ system_override: Optional[str] = None,
301
+ skip_empty: bool = True,
302
+ ) -> Tuple[List[Dict[str, str]], float]:
303
+
304
+ t0 = time.perf_counter()
305
+
306
+ out: List[Dict[str, str]] = []
307
+ sys_text = system_override if system_override is not None else self.default_system
308
+ if insert_system and sys_text is not None:
309
+ out.append({"role": "system", "content": sys_text})
310
+
311
+ msgs = alpaca_obj
312
+ append = out.append # micro-optimization
313
+ for m in msgs:
314
+ role = (m.get("role") or "").strip().lower()
315
+ if role not in ("user", "assistant", "system"):
316
+ role = "user"
317
+
318
+ parts = m.get("parts") or []
319
+ # gather textual parts quickly
320
+ texts: List[str] = []
321
+ for p in parts: # iterate in order
322
+ # only include parts with type == "text" and non-empty text
323
+ if isinstance(p, dict) and p.get("type") == "text":
324
+ txt = p.get("text", "")
325
+ if isinstance(txt, str) and txt:
326
+ # keep as-is except trim trailing spaces/newlines
327
+ texts.append(txt.rstrip())
328
+
329
+ if not texts and skip_empty:
330
+ continue
331
+
332
+ if texts:
333
+ content = "\n\n".join(texts)
334
+ append({"role": role, "content": content})
335
+ else:
336
+ # if not skipping empty, include empty content to preserve role
337
+ append({"role": role, "content": ""})
338
+
339
+ elapsed = time.perf_counter() - t0
340
+ return out, elapsed
341
+
342
+ def msg_to_alpaca(
343
+ self,
344
+ msg_list: List[Dict[str, Any]],
345
+ include_step_start: bool = True,
346
+ assistant_state_done: bool = True,
347
+ preserve_ids: bool = False,
348
+ skip_empty_text_parts: bool = False,
349
+ ) -> Tuple[Dict[str, List[Dict[str, Any]]], float]:
350
+
351
+ t0 = time.perf_counter()
352
+
353
+ out_messages: List[Dict[str, Any]] = []
354
+ append = out_messages.append
355
+
356
+ for entry in msg_list:
357
+ # allow both dicts and fallback strings
358
+ if not isinstance(entry, dict):
359
+ role = "user"
360
+ content = str(entry)
361
+ entry_id = None
362
+ else:
363
+ role = (entry.get("role") or "user").strip().lower()
364
+ content = entry.get("content", "")
365
+ entry_id = entry.get("id") if preserve_ids else None
366
+
367
+ if role not in ("user", "assistant"):
368
+ role = "user"
369
+
370
+ parts: List[Dict[str, Any]] = []
371
+ if role == "assistant" and include_step_start:
372
+ parts.append({"type": "step-start"})
373
+
374
+ # Only add the text part if it's non-empty (or skip_empty_text_parts False)
375
+ if isinstance(content, str):
376
+ if not skip_empty_text_parts or content.strip() != "":
377
+ text_part: Dict[str, Any] = {"type": "text", "text": content}
378
+ if role == "assistant" and assistant_state_done:
379
+ text_part["state"] = "done"
380
+ parts.append(text_part)
381
+
382
+ # Build message object
383
+ msg_obj: Dict[str, Any] = {
384
+ "id": entry_id if (entry_id is not None and isinstance(entry_id, str) and entry_id != "") else self._make_id(),
385
+ "role": role,
386
+ "parts": parts,
387
+ "metadata": {"custom": {}},
388
+ }
389
+
390
+ append(msg_obj)
391
+
392
+ elapsed = time.perf_counter() - t0
393
+ return out_messages, elapsed
394
+
395
+
396
+
397
+
398
+
399
+
400
+ MODELS=[
401
+ "zai-org/glm-4.6",
402
+ "openai/gpt-5-nano-2025-08-07",
403
+ "deepseek-ai/deepseek-v3.2-thinking",
404
+ "nvidia/nvidia-nemotron-3-nano-30b-a3b",
405
+ "nvidia/nvidia-nemotron-3-nano-30b-a3b-thinking",
406
+ "openai/gpt-5-mini-2025-08-07",
407
+ "qwen/qwen3-vl-235b-a22b-thinking",
408
+ "qwen/qwen3-vl-235b-a22b-instruct",
409
+ "perplexity/sonar",
410
+ "moonshotai/kimi-k2.5",
411
+ "anthropic/claude-haiku-4-5-20251001", #-----depcriating model
412
+ "google/gemini-2.5-flash-lite",
413
+ "moonshotai/kimi-k2-thinking"
414
+ "mistralai/devstral-2-123b-instruct-2512" #good mordal
415
+ "mistralai/mistral-large-3-675b-instruct-2512",
416
+ "openai/gpt-oss-safeguard-20b",
417
+ "openai/gpt-oss-120b"
418
+
419
+ ]
420
+
421
+
422
+ def Adarsh_Personal(
423
+ RQ : Any,
424
+ messages : List[Dict],
425
+ model : str = "deepseek-ai/deepseek-r1-distill-qwen-32b",
426
+ max_token : int = 40960,
427
+ stream : bool = True,
428
+ timeout: Optional[float] = None
429
+ ):
430
+
431
+ RES=False
432
+ URL = "https://hadadxyz-ai.hf.space/api/mz1a85y5n80zy5127hgsba5f3a9c2d1Np0x300vcgduqxb7ep084fygd016c9a2d16fa8b3c41gut432pvjctr75hhspjae25d6f7a8b9c0d1e2pjf43v16f3a4b5c6dd7e8fba2bdx9a0b6dv1c2d7e2b4c9f83d6a4f1bb6c152f9pe3c7a88qv5d91f3c2b765g134bp9a41ne4yx4b3vda8w074"
433
+
434
+
435
+ NEW_MSGS , S = CONV().msg_to_alpaca(messages, include_step_start=True, assistant_state_done=True)
436
+
437
+ # print(NEW_MSGS)
438
+
439
+ payload = {
440
+ "tools": {},
441
+ "modelId": model,
442
+ "sessionId": "sess_7ef524b9_mlfe4ped",
443
+ "clientId": "7ef524b98a963b507ec9f4000fdea38c-mlfe4pea",
444
+ "requestId": "req_7ef524b9_mlfg1cpq_jjxb7p",
445
+ "clientIp": "122.161.52.54",
446
+ "realIp": "122.161.52.54",
447
+ "forwardedFor": "122.161.52.54",
448
+ "userAgent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/144.0.0.0 Safari/537.36",
449
+ "id": "DEFAULT_THREAD_ID",
450
+ "messages": NEW_MSGS,
451
+ "trigger": "submit-message",
452
+ "metadata": {}
453
+ }
454
+
455
+ headers = {
456
+ "Accept": "text/event-stream, */*",
457
+ "Content-Type": "application/json",
458
+ "Origin": "https://hadadxyz-ai.hf.space",
459
+ "User-Agent": payload["userAgent"],
460
+ "Cache-Control": "no-cache",
461
+ "Accept-Encoding": "identity",
462
+ "x-turnstile-token": "mlfe5357-zq9depfzhpb-e18cbvzrpid",
463
+ "x-turnstile-verified": "true",
464
+ }
465
+
466
+
467
+ RESP = RQ.post(URL, json=payload, headers=headers, stream=stream, timeout=timeout)
468
+
469
+ if RESP.status_code == 200:
470
+ for raw in RESP.iter_lines():
471
+ if not raw:
472
+ continue
473
+
474
+ try:
475
+ line = raw.decode("utf-8", errors="replace").strip()
476
+ except Exception:
477
+ line = raw.decode("latin-1", errors="replace").strip()
478
+
479
+ if line.startswith("data:"):
480
+ data_json = line.split('data: ')[1]
481
+ try:
482
+ data = json.loads(data_json)
483
+ except:
484
+ continue
485
+ try:
486
+ if data['type']=="reasoning-delta":
487
+ if not RES:
488
+ RES = True
489
+ yield "<think>\n"
490
+ try:
491
+ yield data["delta"]
492
+ except:
493
+ pass
494
+ except :
495
+ pass
496
+ try:
497
+ if data["type"]=="text-delta":
498
+ if RES:
499
+ RES = False
500
+ yield "\n</think>\n"
501
+
502
+ try:
503
+ yield data["delta"]
504
+ except:
505
+ pass
506
+ except:
507
+ pass
508
+
509
+ import uuid
510
+
511
+
512
+
513
+
514
+
515
+
516
+
517
+
518
+
519
+
520
+
521
+
522
+
523
+
524
+
525
+
526
+
527
+
528
+
529
+
530
+
531
+
532
+
533
+ # ---------------------------------------------------------------------
534
+ # App & Session
535
+ # ---------------------------------------------------------------------
536
+
537
+ app = Flask(__name__)
538
+ RQ = Session(impersonate="chrome110")
539
+
540
+
541
+ # ---------------------------------------------------------------------
542
+ # Config
543
+ # ---------------------------------------------------------------------
544
+
545
+ class Config:
546
+ DEFAULT_PROVIDER = "1"
547
+ DEFAULT_MODEL = "llama-3.3-70b-versatile"
548
+ DEFAULT_MAX_TOKENS = 512
549
+ DEFAULT_TEMPERATURE = 0.7
550
+ TIMEOUT = 30.0
551
+ STREAM = True
552
+
553
+
554
+ PROVIDERS: Dict[str, Dict[str, Any]] = {
555
+ "1": {"func": Adarsh_Personal, "models": M1},
556
+ "2": {"func": QWEN, "models": M2},
557
+ "3": {"func": FREEGPT, "models": M3},
558
+ }
559
+
560
+
561
+ # ---------------------------------------------------------------------
562
+ # Request Schema
563
+ # ---------------------------------------------------------------------
564
+
565
+ @dataclass(slots=True)
566
+ class ChatRequest:
567
+ api_key: str
568
+ messages: List[Dict[str, Any]]
569
+ model: str
570
+ provider: str
571
+ max_tokens: int
572
+ temperature: float
573
+ stream: bool
574
+
575
+ @classmethod
576
+ def from_dict(cls, payload: Dict[str, Any]) -> "ChatRequest":
577
+ api_key = payload.get("api_key") or payload.get("key") or payload.get("apikey")
578
+ messages = payload.get("messages") or payload.get("message") or payload.get("msgs") or []
579
+ model = payload.get("model") or payload.get("model_name") or Config.DEFAULT_MODEL
580
+ provider = str(payload.get("provider", Config.DEFAULT_PROVIDER))
581
+
582
+ if isinstance(messages, dict):
583
+ messages = [messages]
584
+
585
+ return cls(
586
+ api_key=api_key,
587
+ messages=messages,
588
+ model=model,
589
+ provider=provider,
590
+ max_tokens=int(payload.get("max_tokens", Config.DEFAULT_MAX_TOKENS)),
591
+ temperature=float(payload.get("temperature", Config.DEFAULT_TEMPERATURE)),
592
+ stream=bool(payload.get("stream", Config.STREAM)),
593
+ )
594
+
595
+
596
+ # ---------------------------------------------------------------------
597
+ # Streaming Generator
598
+ # ---------------------------------------------------------------------
599
+
600
+ def stream_chat(req: ChatRequest):
601
+ provider = PROVIDERS.get(req.provider)
602
+
603
+ if not provider:
604
+ yield json.dumps({"error": "Invalid provider"}) + "\n"
605
+ return
606
+
607
+ try:
608
+ for chunk in provider["func"](
609
+ RQ,
610
+ req.messages,
611
+ req.model,
612
+ req.max_tokens,
613
+ req.stream,
614
+ Config.TIMEOUT,
615
+ ):
616
+ if not chunk:
617
+ continue
618
+ yield f"data: {json.dumps({'response': chunk})}\n\n"
619
+
620
+ yield "data: [DONE]\n\n"
621
+
622
+ except Exception as e:
623
+ yield f"data: {json.dumps({'error': str(e)})}\n\n"
624
+
625
+
626
+ # ---------------------------------------------------------------------
627
+ # Routes
628
+ # ---------------------------------------------------------------------
629
+
630
+ @app.route("/gen", methods=["POST"])
631
+ def generate():
632
+ payload = request.get_json(silent=True)
633
+ if not payload:
634
+ return jsonify({"error": "Invalid JSON body"}), 400
635
+
636
+ req = ChatRequest.from_dict(payload)
637
+
638
+ if not req.api_key or not req.messages:
639
+ return jsonify({"error": "api_key and messages are required"}), 400
640
+
641
+ if req.stream:
642
+ return Response(
643
+ stream_with_context(stream_chat(req)),
644
+ content_type="text/event-stream",
645
+ headers={
646
+ "Cache-Control": "no-cache",
647
+ "X-Accel-Buffering": "no",
648
+ },
649
+ )
650
+
651
+ # Non-stream fallback
652
+ final = []
653
+ for part in stream_chat(req):
654
+ if part.startswith("data:"):
655
+ data = json.loads(part[6:])
656
+ if "response" in data:
657
+ final.append(data["response"])
658
+
659
+ return jsonify({"response": "".join(final)})
660
+
661
+
662
+ @app.route("/info", methods=["GET"])
663
+ def info():
664
+ return jsonify({
665
+ "providers": {
666
+ "1": M1,
667
+ "2": M2,
668
+ "3": M3,
669
+ }
670
+ })
671
+
672
+
673
+ # ---------------------------------------------------------------------
674
+ # Run
675
+ # ---------------------------------------------------------------------
676
+
677
+ if __name__ == "__main__":
678
+ app.run(host="0.0.0.0", port=5550, threaded=True)