import json from flask import Flask, request, jsonify, Response, stream_with_context from dataclasses import dataclass from typing import Any, Dict, List, Optional ,Tuple import time import uuid from curl_cffi.requests import Session def get_models(): mord = { "Providers" : ["1","2" ,"3","4"], "Models" : { "1" : [ { "id": "openai/gpt-oss-120b", "owned_by": "OpenAI" }, { "id": "moonshotai/kimi-k2-instruct", "owned_by": "Moonshot AI" }, { "id": "llama-3.1-8b-instant", "owned_by": "Meta" }, { "id": "whisper-large-v3", "owned_by": "OpenAI" }, { "id": "meta-llama/llama-4-scout-17b-16e-instruct", "owned_by": "Meta" }, { "id": "allam-2-7b", "owned_by": "SDAIA" }, { "id": "groq/compound", "owned_by": "Groq" }, { "id": "llama-3.3-70b-versatile", "owned_by": "Meta" }, { "id": "qwen/qwen3-32b", "owned_by": "Alibaba Cloud" }, { "id": "meta-llama/llama-prompt-guard-2-22m", "owned_by": "Meta" }, { "id": "groq/compound-mini", "owned_by": "Groq" }, { "id": "meta-llama/llama-guard-4-12b", "owned_by": "Meta" }, { "id": "openai/gpt-oss-20b", "owned_by": "OpenAI" }, { "id": "openai/gpt-oss-safeguard-20b", "owned_by": "OpenAI" }, { "id": "meta-llama/llama-4-maverick-17b-128e-instruct", "owned_by": "Meta" }, { "id": "moonshotai/kimi-k2-instruct-0905", "owned_by": "Moonshot AI" } ], "2" : [ { "id": "zai-org/glm-4.6", "owned_by": "Zhipu AI" }, { "id": "openai/gpt-5-nano-2025-08-07", "owned_by": "OpenAI" }, { "id": "deepseek-ai/deepseek-v3.2-thinking", "owned_by": "DeepSeek AI" }, { "id": "nvidia/nvidia-nemotron-3-nano-30b-a3b", "owned_by": "NVIDIA" }, { "id": "nvidia/nvidia-nemotron-3-nano-30b-a3b-thinking", "owned_by": "NVIDIA" }, { "id": "openai/gpt-5-mini-2025-08-07", "owned_by": "OpenAI" }, { "id": "qwen/qwen3-vl-235b-a22b-thinking", "owned_by": "Alibaba Cloud" }, { "id": "qwen/qwen3-vl-235b-a22b-instruct", "owned_by": "Alibaba Cloud" }, { "id": "perplexity/sonar", "owned_by": "Perplexity" }, { "id": "moonshotai/kimi-k2.5", "owned_by": "Moonshot AI" }, { "id": "anthropic/claude-haiku-4-5-20251001", "owned_by": "Anthropic" }, { "id": "google/gemini-2.5-flash-lite", "owned_by": "Google" }, { "id": "moonshotai/kimi-k2-thinking", "owned_by": "Moonshot AI" }, { "id": "mistralai/devstral-2-123b-instruct-2512", "owned_by": "Mistral AI" }, { "id": "mistralai/mistral-large-3-675b-instruct-2512", "owned_by": "Mistral AI" }, { "id": "openai/gpt-oss-safeguard-20b", "owned_by": "OpenAI" }, { "id": "openai/gpt-oss-120b", "owned_by": "OpenAI" } ], "3" : [ { "id": "qwen3-4b-thinking-2507", "owned_by": "Alibaba Cloud" } ], "4" : [ { "id": "meta/llama-3.1-70b-instruct", "owned_by": "Meta" }, { "id": "qwen/qwen2.5-coder-32b-instruct", "owned_by": "Alibaba Cloud" }, { "id": "deepseek-ai/deepseek-r1-distill-qwen-32b", "owned_by": "DeepSeek AI" }, { "id": "meta/llama-4-scout-17b-16e-instruct", "owned_by": "Meta" }, { "id": "google/gemma-3-12b-it", "owned_by": "Google" }, { "id": "mistralai/mistral-small-3.1-24b-instruct", "owned_by": "Mistral AI" }, { "id": "meta/llama-3.3-70b-instruct-fp8-fast", "owned_by": "Meta" }, { "id": "meta/llama-3.2-3b-instruct", "owned_by": "Meta" }, { "id": "meta/llama-3.2-1b-instruct", "owned_by": "Meta" }, { "id": "meta-llama/meta-llama-3-8b-instruct", "owned_by": "Meta" }, { "id": "meta/llama-3-8b-instruct", "owned_by": "Meta" }, { "id": "meta/llama-2-7b-chat-int8", "owned_by": "Meta" }, { "id": "meta/llama-2-7b-chat-fp16", "owned_by": "Meta" }, { "id": "meta/llama-3-8b-instruct-awq", "owned_by": "Meta" }, { "id": "meta-llama/meta-llama-3-8b-instruct", "owned_by": "Meta" }, { "id": "meta/llama-3-8b-instruct", "owned_by": "Meta" }, { "id": "meta/llama-2-7b-chat-int8", "owned_by": "Meta" }, { "id": "meta/llama-3-8b-instruct-awq", "owned_by": "Meta" }, { "id": "google/gemma-7b-it", "owned_by": "Google" }, { "id": "google/gemma-2b-it-lora", "owned_by": "Google" }, { "id": "mistral/mistral-7b-instruct-v0.2", "owned_by": "Mistral AI" }, { "id": "mistral/mistral-7b-instruct-v0.2-lora", "owned_by": "Mistral AI" } ] } } return mord M3 = [ { "tag": "@cf", "model": "meta/llama-3.1-70b-instruct", "max_tokens" : 8192 }, { "tag": "@cf", "model": "qwen/qwen2.5-coder-32b-instruct", "max_tokens" : 8192 }, { "tag": "@cf", "model": "deepseek-ai/deepseek-r1-distill-qwen-32b", "max_tokens" : 40960 # ok }, { "tag": "@cf", "model": "meta/llama-4-scout-17b-16e-instruct", "max_tokens" : 40960 # ok }, { "tag": "@cf", "model": "google/gemma-3-12b-it", "max_tokens" : 40960 # ok }, { "tag": "@cf", "model": "mistralai/mistral-small-3.1-24b-instruct", "max_tokens" : 40960 # ok }, { "tag": "@cf", "model": "meta/llama-3.3-70b-instruct-fp8-fast", "max_tokens" : 8192 }, { "tag": "@cf", "model": "meta/llama-3.2-3b-instruct", "max_tokens" : 40960 # ok }, { "tag": "@cf", "model": "meta/llama-3.2-1b-instruct", "max_tokens" : 40960 # ok }, { "tag": "@hf", "model": "meta-llama/meta-llama-3-8b-instruct", "max_tokens" : 4391 }, { "tag": "@cf", "model": "meta/llama-3-8b-instruct", "max_tokens" : 4391 }, { "tag": "@cf", "model": "meta/llama-2-7b-chat-int8", "max_tokens" : 4391 }, { "tag": "@cf", "model": "meta/llama-2-7b-chat-fp16", "max_tokens" : None }, { "tag": "@cf", "model": "meta/llama-3-8b-instruct-awq", "max_tokens" : 4391 }, { "tag": "@hf", "model": "meta-llama/meta-llama-3-8b-instruct", "max_tokens" : 4391 }, { "tag": "@cf", "model": "meta/llama-3-8b-instruct", "max_tokens" : 4391 }, { "tag": "@cf", "model": "meta/llama-2-7b-chat-int8", "max_tokens" : 4391 }, { "tag": "@cf", "model": "meta/llama-3-8b-instruct-awq", "max_tokens" : 4391 }, { "tag": "@hf", "model": "google/gemma-7b-it", "max_tokens" : None }, { "tag": "@cf", "model": "google/gemma-2b-it-lora", "max_tokens" : 4391 }, { "tag": "@hf", "model": "mistral/mistral-7b-instruct-v0.2", "max_tokens" : 8192 }, { "tag": "@cf", "model": "mistral/mistral-7b-instruct-v0.2-lora", "max_tokens" : 8192 } ] def FREEGPT( RQ : Any, api_key : str, messages : List[Dict], model : str = "deepseek-ai/deepseek-r1-distill-qwen-32b", max_token : int = 40960, stream : bool = True, timeout: Optional[float] = None ): md = next((item["tag"] + "/" + item["model"] for item in M3 if item["model"] == model), "@cf/meta/llama-3.2-1b-instruct") URL = f"https://llmchat.in/inference/stream?model={md}" headers = { "Accept": "text/event-stream,*/*", "Content-Type": "application/json", "Origin": "https://llmchat.in", "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/144.0.0.0 Safari/537.36", "Cache-Control": "no-cache", "Accept-Encoding": "identity", "cf-ray" : "9cba9edd9f909aaf-SIN", } payload = { "messages": messages, "stream": stream, **({"max_tokens": max_token} if max_token is not None else {}), **({"max_tokens": next((item["max_tokens"] for item in M3 if item["model"] == model and item["max_tokens"] is not None), None)} if next((True for item in M3 if item["model"] == model and item["max_tokens"] is not None), None) else {}) } # print(payload) try: RESP = RQ.post(url=URL,json=payload , headers=headers , timeout=timeout,stream=stream) print(RESP.status_code) except: return if RESP.status_code == 200: for raw in RESP.iter_lines(): if not raw: continue try: line = raw.decode("utf-8", errors="replace").strip() except Exception: line = raw.decode("latin-1", errors="replace").strip() if line.startswith("data:"): data_json = line.split('data: ')[1] try: data = json.loads(data_json) except: continue try: yield data["response"] except: pass else: # print(RESP.status_code) yield "AN ERROR OCCURED" M2 = ["qwen3-4b-thinking-2507"] def QWEN( RQ : Any, api_key : str, messages : List[Dict], model : str = "NONE", max_token : int = 40960, stream : bool = True, timeout: Optional[float] = None ): def GEN(RQ:any,messages:list,timeout:int=None): API_URL = "https://teichai-qwen3-4b-thinking-2507-claude-4-5-opus.hf.space/api/chat" payload = { "messages":messages, "searchEnabled":False } headers = {"Accept": "*/*","Content-Type": "application/json","Origin": "https://teichai-qwen3-4b-thinking-2507-claude-4-5-opus.hf.space","Referer": "https://teichai-qwen3-4b-thinking-2507-claude-4-5-opus.hf.space/","User-Agent": "python-requests/2.x"} RESPO = RQ.post(API_URL, headers=headers, json=payload, stream=stream, timeout=timeout) # print(RESPO) buffer_lines = [] for raw in RESPO.iter_lines(): if raw is None: continue try: line = raw.decode("utf-8", errors="replace").strip() except Exception: line = raw.decode("latin-1", errors="replace").strip() if line == "": if not buffer_lines: continue data_text = "".join(buffer_lines) buffer_lines = [] if data_text == "[DONE]": break try: obj = json.loads(data_text) try: yield obj except: pass except json.JSONDecodeError: pass continue if line.startswith("data:"): buffer_lines.append(line[len("data:"):].lstrip()) RES = False for i in GEN(RQ=RQ,messages=messages,timeout=timeout): if i["type"]=="reasoning": if not RES: RES = True yield "\n" yield i["content"] else: if RES: RES = False yield "\n\n\n" try: yield i["content"] except: pass class CONV: def __init__(self, default_system: str = ""): self.default_system = default_system @staticmethod def _make_id() -> str: return uuid.uuid4().hex[:20] def alpaca_to_msg( self, alpaca_obj: Dict[str, Any], insert_system: bool = True, system_override: Optional[str] = None, skip_empty: bool = True, ) -> Tuple[List[Dict[str, str]], float]: t0 = time.perf_counter() out: List[Dict[str, str]] = [] sys_text = system_override if system_override is not None else self.default_system if insert_system and sys_text is not None: out.append({"role": "system", "content": sys_text}) msgs = alpaca_obj append = out.append # micro-optimization for m in msgs: role = (m.get("role") or "").strip().lower() if role not in ("user", "assistant", "system"): role = "user" parts = m.get("parts") or [] # gather textual parts quickly texts: List[str] = [] for p in parts: # iterate in order # only include parts with type == "text" and non-empty text if isinstance(p, dict) and p.get("type") == "text": txt = p.get("text", "") if isinstance(txt, str) and txt: # keep as-is except trim trailing spaces/newlines texts.append(txt.rstrip()) if not texts and skip_empty: continue if texts: content = "\n\n".join(texts) append({"role": role, "content": content}) else: # if not skipping empty, include empty content to preserve role append({"role": role, "content": ""}) elapsed = time.perf_counter() - t0 return out, elapsed def msg_to_alpaca( self, msg_list: List[Dict[str, Any]], include_step_start: bool = True, assistant_state_done: bool = True, preserve_ids: bool = False, skip_empty_text_parts: bool = False, ) -> Tuple[Dict[str, List[Dict[str, Any]]], float]: t0 = time.perf_counter() out_messages: List[Dict[str, Any]] = [] append = out_messages.append for entry in msg_list: # allow both dicts and fallback strings if not isinstance(entry, dict): role = "user" content = str(entry) entry_id = None else: role = (entry.get("role") or "user").strip().lower() content = entry.get("content", "") entry_id = entry.get("id") if preserve_ids else None if role not in ("user", "assistant"): role = "user" parts: List[Dict[str, Any]] = [] if role == "assistant" and include_step_start: parts.append({"type": "step-start"}) # Only add the text part if it's non-empty (or skip_empty_text_parts False) if isinstance(content, str): if not skip_empty_text_parts or content.strip() != "": text_part: Dict[str, Any] = {"type": "text", "text": content} if role == "assistant" and assistant_state_done: text_part["state"] = "done" parts.append(text_part) # Build message object msg_obj: Dict[str, Any] = { "id": entry_id if (entry_id is not None and isinstance(entry_id, str) and entry_id != "") else self._make_id(), "role": role, "parts": parts, "metadata": {"custom": {}}, } append(msg_obj) elapsed = time.perf_counter() - t0 return out_messages, elapsed M1=[ "zai-org/glm-4.6", "openai/gpt-5-nano-2025-08-07", "deepseek-ai/deepseek-v3.2-thinking", "nvidia/nvidia-nemotron-3-nano-30b-a3b", "nvidia/nvidia-nemotron-3-nano-30b-a3b-thinking", "openai/gpt-5-mini-2025-08-07", "qwen/qwen3-vl-235b-a22b-thinking", "qwen/qwen3-vl-235b-a22b-instruct", "perplexity/sonar", "moonshotai/kimi-k2.5", "anthropic/claude-haiku-4-5-20251001", #-----depcriating model "google/gemini-2.5-flash-lite", "moonshotai/kimi-k2-thinking" "mistralai/devstral-2-123b-instruct-2512" #good mordal "mistralai/mistral-large-3-675b-instruct-2512", "openai/gpt-oss-safeguard-20b", "openai/gpt-oss-120b" ] def Adarsh_Personal( RQ : Any, api_key : str, messages : List[Dict], model : str = "deepseek-ai/deepseek-r1-distill-qwen-32b", max_token : int = 40960, stream : bool = True, timeout: Optional[float] = None ): RES=False URL = "https://hadadxyz-ai.hf.space/api/mz1a85y5n80zy5127hgsba5f3a9c2d1Np0x300vcgduqxb7ep084fygd016c9a2d16fa8b3c41gut432pvjctr75hhspjae25d6f7a8b9c0d1e2pjf43v16f3a4b5c6dd7e8fba2bdx9a0b6dv1c2d7e2b4c9f83d6a4f1bb6c152f9pe3c7a88qv5d91f3c2b765g134bp9a41ne4yx4b3vda8w074" NEW_MSGS , S = CONV().msg_to_alpaca(messages, include_step_start=True, assistant_state_done=True) # print(NEW_MSGS) payload = { "tools": {}, "modelId": model, "sessionId": "sess_7ef524b9_mlfe4ped", "clientId": "7ef524b98a963b507ec9f4000fdea38c-mlfe4pea", "requestId": "req_7ef524b9_mlfg1cpq_jjxb7p", "clientIp": "122.161.52.54", "realIp": "122.161.52.54", "forwardedFor": "122.161.52.54", "userAgent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/144.0.0.0 Safari/537.36", "id": "DEFAULT_THREAD_ID", "messages": NEW_MSGS, "trigger": "submit-message", "metadata": {} } headers = { "Accept": "text/event-stream, */*", "Content-Type": "application/json", "Origin": "https://hadadxyz-ai.hf.space", "User-Agent": payload["userAgent"], "Cache-Control": "no-cache", "Accept-Encoding": "identity", "x-turnstile-token": "mlfe5357-zq9depfzhpb-e18cbvzrpid", "x-turnstile-verified": "true", } RESP = RQ.post(URL, json=payload, headers=headers, stream=stream, timeout=timeout) if RESP.status_code == 200: for raw in RESP.iter_lines(): if not raw: continue try: line = raw.decode("utf-8", errors="replace").strip() except Exception: line = raw.decode("latin-1", errors="replace").strip() if line.startswith("data:"): data_json = line.split('data: ')[1] try: data = json.loads(data_json) except: continue try: if data['type']=="reasoning-delta": if not RES: RES = True yield "\n" try: yield data["delta"] except: pass except : pass try: if data["type"]=="text-delta": if RES: RES = False yield "\n\n" try: yield data["delta"] except: pass except: pass import uuid def GROQ( RQ : Any, api_key : str, messages : List[Dict], model : str = "deepseek-ai/deepseek-r1-distill-qwen-32b", max_token : int = 40960, stream : bool = True, timeout: Optional[float] = None ): RES=False URL = "https://api.groq.com/openai/v1/chat/completions" payload = { "model": model, "messages": messages, "temperature": 0.9, "stop": None, "stream": stream, } headers = { "Authorization": f"Bearer {api_key}", "Content-Type": "application/json" } RESP = RQ.post(URL, json=payload, headers=headers, stream=stream, timeout=timeout) if RESP.status_code == 200: for raw in RESP.iter_lines(): if not raw: continue try: line = raw.decode("utf-8", errors="replace").strip() except Exception: line = raw.decode("latin-1", errors="replace").strip() if line.startswith("data:"): data_json = line.split('data: ')[1] try: data = json.loads(data_json) except: if data_json == "[DONE]": continue try: if data['choices'][0]['delta']['reasoning']: if not RES: RES = True yield "\n" yield data['choices'][0]['delta']['reasoning'] except: if RES: RES = False yield "\n\n" try: yield data['choices'][0]['delta']['content'] except: pass # --------------------------------------------------------------------- # App & Session # --------------------------------------------------------------------- app = Flask(__name__) RQ = Session(impersonate="chrome110") # --------------------------------------------------------------------- # Config # --------------------------------------------------------------------- class Config: DEFAULT_PROVIDER = "1" DEFAULT_MODEL = "llama-3.3-70b-versatile" DEFAULT_MAX_TOKENS = 512 DEFAULT_TEMPERATURE = 0.7 TIMEOUT = 30.0 STREAM = True PROVIDERS: Dict[str, Dict[str, Any]] = { "1": {"func": GROQ, "models": None}, "2": {"func": Adarsh_Personal, "models": M1}, "3": {"func": QWEN, "models": M2}, "4": {"func": FREEGPT, "models": M3}, } # --------------------------------------------------------------------- # Request Schema # --------------------------------------------------------------------- @dataclass class ChatRequest: api_key: str messages: List[Dict[str, Any]] model: str provider: str max_tokens: int temperature: float stream: bool @classmethod def from_dict(cls, payload: Dict[str, Any]) -> "ChatRequest": api_key = payload.get("api_key") or payload.get("key") or payload.get("apikey") messages = payload.get("messages") or payload.get("message") or payload.get("msgs") or [] model = payload.get("model") or payload.get("model_name") or Config.DEFAULT_MODEL provider = str(payload.get("provider", Config.DEFAULT_PROVIDER)) if isinstance(messages, dict): messages = [messages] return cls( api_key=api_key, messages=messages, model=model, provider=provider, max_tokens=int(payload.get("max_tokens", Config.DEFAULT_MAX_TOKENS)), temperature=float(payload.get("temperature", Config.DEFAULT_TEMPERATURE)), stream=bool(payload.get("stream", Config.STREAM)), ) # --------------------------------------------------------------------- # Streaming Generator # --------------------------------------------------------------------- def stream_chat(req: ChatRequest): provider = PROVIDERS.get(req.provider) if not provider: yield json.dumps({"error": "Invalid provider"}) + "\n" return try: for chunk in provider["func"]( RQ, req.api_key, req.messages, req.model, req.max_tokens, req.stream, Config.TIMEOUT, ): if not chunk: continue yield f"data: {json.dumps({'response': chunk})}\n\n" yield "data: [DONE]\n\n" except Exception as e: yield f"data: {json.dumps({'error': str(e)})}\n\n" # --------------------------------------------------------------------- # Routes # --------------------------------------------------------------------- @app.route("/v1/chat/completions", methods=["POST"]) def generate(): payload = request.get_json(silent=True) if not payload: return jsonify({"error": "Invalid JSON body"}), 400 req = ChatRequest.from_dict(payload) if not req.api_key or not req.messages: return jsonify({"error": "api_key and messages are required"}), 400 if req.stream: return Response( stream_with_context(stream_chat(req)), content_type="text/event-stream", headers={ "Cache-Control": "no-cache", "X-Accel-Buffering": "no", }, ) # Non-stream fallback final = [] for part in stream_chat(req): if part.startswith("data:"): data = json.loads(part[6:]) if "response" in data: final.append(data["response"]) return jsonify({"response": "".join(final)}) @app.route("/v1/models", methods=["GET"]) def info(): return jsonify({ "models": get_models() }) # --------------------------------------------------------------------- # Run # --------------------------------------------------------------------- # if __name__ == "__main__": # app.run(host="0.0.0.0", port=5550, threaded=True) if __name__=="__main__": app.run()