| | import json |
| | from flask import Flask, request, jsonify, Response, stream_with_context |
| | from dataclasses import dataclass |
| | from typing import Any, Dict, List, Optional ,Tuple |
| | import time |
| | import uuid |
| | from curl_cffi.requests import Session |
| |
|
| | def get_models(): |
| | |
| | mord = { |
| | "Providers" : ["1","2" ,"3","4"], |
| | "Models" : { |
| | "1" : [ |
| | { |
| | "id": "openai/gpt-oss-120b", |
| | "owned_by": "OpenAI" |
| | }, |
| | { |
| | "id": "moonshotai/kimi-k2-instruct", |
| | "owned_by": "Moonshot AI" |
| | }, |
| | { |
| | "id": "llama-3.1-8b-instant", |
| | "owned_by": "Meta" |
| | }, |
| | { |
| | "id": "whisper-large-v3", |
| | "owned_by": "OpenAI" |
| | }, |
| | { |
| | "id": "meta-llama/llama-4-scout-17b-16e-instruct", |
| | "owned_by": "Meta" |
| | }, |
| | { |
| | "id": "allam-2-7b", |
| | "owned_by": "SDAIA" |
| | }, |
| | { |
| | "id": "groq/compound", |
| | "owned_by": "Groq" |
| | }, |
| | { |
| | "id": "llama-3.3-70b-versatile", |
| | "owned_by": "Meta" |
| | }, |
| | { |
| | "id": "qwen/qwen3-32b", |
| | "owned_by": "Alibaba Cloud" |
| | }, |
| | { |
| | "id": "meta-llama/llama-prompt-guard-2-22m", |
| | "owned_by": "Meta" |
| | }, |
| | { |
| | "id": "groq/compound-mini", |
| | "owned_by": "Groq" |
| | }, |
| | { |
| | "id": "meta-llama/llama-guard-4-12b", |
| | "owned_by": "Meta" |
| | }, |
| | { |
| | "id": "openai/gpt-oss-20b", |
| | "owned_by": "OpenAI" |
| | }, |
| | { |
| | "id": "openai/gpt-oss-safeguard-20b", |
| | "owned_by": "OpenAI" |
| | }, |
| | { |
| | "id": "meta-llama/llama-4-maverick-17b-128e-instruct", |
| | "owned_by": "Meta" |
| | }, |
| | { |
| | "id": "moonshotai/kimi-k2-instruct-0905", |
| | "owned_by": "Moonshot AI" |
| | } |
| | ], |
| | |
| | "2" : [ |
| | { |
| | "id": "zai-org/glm-4.6", |
| | "owned_by": "Zhipu AI" |
| | }, |
| | { |
| | "id": "openai/gpt-5-nano-2025-08-07", |
| | "owned_by": "OpenAI" |
| | }, |
| | { |
| | "id": "deepseek-ai/deepseek-v3.2-thinking", |
| | "owned_by": "DeepSeek AI" |
| | }, |
| | { |
| | "id": "nvidia/nvidia-nemotron-3-nano-30b-a3b", |
| | "owned_by": "NVIDIA" |
| | }, |
| | { |
| | "id": "nvidia/nvidia-nemotron-3-nano-30b-a3b-thinking", |
| | "owned_by": "NVIDIA" |
| | }, |
| | { |
| | "id": "openai/gpt-5-mini-2025-08-07", |
| | "owned_by": "OpenAI" |
| | }, |
| | { |
| | "id": "qwen/qwen3-vl-235b-a22b-thinking", |
| | "owned_by": "Alibaba Cloud" |
| | }, |
| | { |
| | "id": "qwen/qwen3-vl-235b-a22b-instruct", |
| | "owned_by": "Alibaba Cloud" |
| | }, |
| | { |
| | "id": "perplexity/sonar", |
| | "owned_by": "Perplexity" |
| | }, |
| | { |
| | "id": "moonshotai/kimi-k2.5", |
| | "owned_by": "Moonshot AI" |
| | }, |
| | { |
| | "id": "anthropic/claude-haiku-4-5-20251001", |
| | "owned_by": "Anthropic" |
| | }, |
| | { |
| | "id": "google/gemini-2.5-flash-lite", |
| | "owned_by": "Google" |
| | }, |
| | { |
| | "id": "moonshotai/kimi-k2-thinking", |
| | "owned_by": "Moonshot AI" |
| | }, |
| | { |
| | "id": "mistralai/devstral-2-123b-instruct-2512", |
| | "owned_by": "Mistral AI" |
| | }, |
| | { |
| | "id": "mistralai/mistral-large-3-675b-instruct-2512", |
| | "owned_by": "Mistral AI" |
| | }, |
| | { |
| | "id": "openai/gpt-oss-safeguard-20b", |
| | "owned_by": "OpenAI" |
| | }, |
| | { |
| | "id": "openai/gpt-oss-120b", |
| | "owned_by": "OpenAI" |
| | } |
| | ], |
| | "3" : [ |
| | { |
| | "id": "qwen3-4b-thinking-2507", |
| | "owned_by": "Alibaba Cloud" |
| | } |
| | ], |
| | "4" : [ |
| | { |
| | "id": "meta/llama-3.1-70b-instruct", |
| | "owned_by": "Meta" |
| | }, |
| | { |
| | "id": "qwen/qwen2.5-coder-32b-instruct", |
| | "owned_by": "Alibaba Cloud" |
| | }, |
| | { |
| | "id": "deepseek-ai/deepseek-r1-distill-qwen-32b", |
| | "owned_by": "DeepSeek AI" |
| | }, |
| | { |
| | "id": "meta/llama-4-scout-17b-16e-instruct", |
| | "owned_by": "Meta" |
| | }, |
| | { |
| | "id": "google/gemma-3-12b-it", |
| | "owned_by": "Google" |
| | }, |
| | { |
| | "id": "mistralai/mistral-small-3.1-24b-instruct", |
| | "owned_by": "Mistral AI" |
| | }, |
| | { |
| | "id": "meta/llama-3.3-70b-instruct-fp8-fast", |
| | "owned_by": "Meta" |
| | }, |
| | { |
| | "id": "meta/llama-3.2-3b-instruct", |
| | "owned_by": "Meta" |
| | }, |
| | { |
| | "id": "meta/llama-3.2-1b-instruct", |
| | "owned_by": "Meta" |
| | }, |
| | { |
| | "id": "meta-llama/meta-llama-3-8b-instruct", |
| | "owned_by": "Meta" |
| | }, |
| | { |
| | "id": "meta/llama-3-8b-instruct", |
| | "owned_by": "Meta" |
| | }, |
| | { |
| | "id": "meta/llama-2-7b-chat-int8", |
| | "owned_by": "Meta" |
| | }, |
| | { |
| | "id": "meta/llama-2-7b-chat-fp16", |
| | "owned_by": "Meta" |
| | }, |
| | { |
| | "id": "meta/llama-3-8b-instruct-awq", |
| | "owned_by": "Meta" |
| | }, |
| | { |
| | "id": "meta-llama/meta-llama-3-8b-instruct", |
| | "owned_by": "Meta" |
| | }, |
| | { |
| | "id": "meta/llama-3-8b-instruct", |
| | "owned_by": "Meta" |
| | }, |
| | { |
| | "id": "meta/llama-2-7b-chat-int8", |
| | "owned_by": "Meta" |
| | }, |
| | { |
| | "id": "meta/llama-3-8b-instruct-awq", |
| | "owned_by": "Meta" |
| | }, |
| | { |
| | "id": "google/gemma-7b-it", |
| | "owned_by": "Google" |
| | }, |
| | { |
| | "id": "google/gemma-2b-it-lora", |
| | "owned_by": "Google" |
| | }, |
| | { |
| | "id": "mistral/mistral-7b-instruct-v0.2", |
| | "owned_by": "Mistral AI" |
| | }, |
| | { |
| | "id": "mistral/mistral-7b-instruct-v0.2-lora", |
| | "owned_by": "Mistral AI" |
| | } |
| | ] |
| | |
| | } |
| | } |
| | |
| | return mord |
| |
|
| |
|
| |
|
| |
|
| |
|
| | M3 = [ |
| | { |
| | "tag": "@cf", |
| | "model": "meta/llama-3.1-70b-instruct", |
| | "max_tokens" : 8192 |
| | }, |
| |
|
| | { |
| | "tag": "@cf", |
| | "model": "qwen/qwen2.5-coder-32b-instruct", |
| | "max_tokens" : 8192 |
| | }, |
| | { |
| | "tag": "@cf", |
| | "model": "deepseek-ai/deepseek-r1-distill-qwen-32b", |
| | "max_tokens" : 40960 |
| | |
| |
|
| | }, |
| | { |
| | "tag": "@cf", |
| | "model": "meta/llama-4-scout-17b-16e-instruct", |
| | "max_tokens" : 40960 |
| | |
| |
|
| | }, |
| | { |
| | "tag": "@cf", |
| | "model": "google/gemma-3-12b-it", |
| | "max_tokens" : 40960 |
| | |
| |
|
| | }, |
| | { |
| | "tag": "@cf", |
| | "model": "mistralai/mistral-small-3.1-24b-instruct", |
| | "max_tokens" : 40960 |
| | |
| |
|
| | }, |
| | { |
| | "tag": "@cf", |
| | "model": "meta/llama-3.3-70b-instruct-fp8-fast", |
| | "max_tokens" : 8192 |
| | }, |
| | { |
| | "tag": "@cf", |
| | "model": "meta/llama-3.2-3b-instruct", |
| | "max_tokens" : 40960 |
| | |
| |
|
| | }, |
| | { |
| | "tag": "@cf", |
| | "model": "meta/llama-3.2-1b-instruct", |
| | "max_tokens" : 40960 |
| | |
| | }, |
| | { |
| | "tag": "@hf", |
| | "model": "meta-llama/meta-llama-3-8b-instruct", |
| | "max_tokens" : 4391 |
| | }, |
| | { |
| | "tag": "@cf", |
| | "model": "meta/llama-3-8b-instruct", |
| | "max_tokens" : 4391 |
| | }, |
| | { |
| | "tag": "@cf", |
| | "model": "meta/llama-2-7b-chat-int8", |
| | "max_tokens" : 4391 |
| | }, |
| | { |
| | "tag": "@cf", |
| | "model": "meta/llama-2-7b-chat-fp16", |
| | "max_tokens" : None |
| | }, |
| | { |
| | "tag": "@cf", |
| | "model": "meta/llama-3-8b-instruct-awq", |
| | "max_tokens" : 4391 |
| | }, |
| | { |
| | "tag": "@hf", |
| | "model": "meta-llama/meta-llama-3-8b-instruct", |
| | "max_tokens" : 4391 |
| | }, |
| | { |
| | "tag": "@cf", |
| | "model": "meta/llama-3-8b-instruct", |
| | "max_tokens" : 4391 |
| | }, |
| | { |
| | "tag": "@cf", |
| | "model": "meta/llama-2-7b-chat-int8", |
| | "max_tokens" : 4391 |
| | }, |
| | { |
| | "tag": "@cf", |
| | "model": "meta/llama-3-8b-instruct-awq", |
| | "max_tokens" : 4391 |
| | }, |
| | { |
| | "tag": "@hf", |
| | "model": "google/gemma-7b-it", |
| | "max_tokens" : None |
| | }, |
| | { |
| | "tag": "@cf", |
| | "model": "google/gemma-2b-it-lora", |
| | "max_tokens" : 4391 |
| | }, |
| | { |
| | "tag": "@hf", |
| | "model": "mistral/mistral-7b-instruct-v0.2", |
| | "max_tokens" : 8192 |
| | }, |
| | { |
| | "tag": "@cf", |
| | "model": "mistral/mistral-7b-instruct-v0.2-lora", |
| | "max_tokens" : 8192 |
| | } |
| | ] |
| |
|
| | def FREEGPT( |
| | RQ : Any, |
| | api_key : str, |
| | messages : List[Dict], |
| | model : str = "deepseek-ai/deepseek-r1-distill-qwen-32b", |
| | max_token : int = 40960, |
| | stream : bool = True, |
| | timeout: Optional[float] = None |
| | ): |
| | md = next((item["tag"] + "/" + item["model"] for item in M3 if item["model"] == model), "@cf/meta/llama-3.2-1b-instruct") |
| |
|
| | URL = f"https://llmchat.in/inference/stream?model={md}" |
| |
|
| | |
| | headers = { |
| | "Accept": "text/event-stream,*/*", |
| | "Content-Type": "application/json", |
| | "Origin": "https://llmchat.in", |
| | "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/144.0.0.0 Safari/537.36", |
| | "Cache-Control": "no-cache", |
| | "Accept-Encoding": "identity", |
| | "cf-ray" : "9cba9edd9f909aaf-SIN", |
| |
|
| | } |
| |
|
| |
|
| | payload = { |
| | "messages": messages, |
| | "stream": stream, |
| | **({"max_tokens": max_token} if max_token is not None else {}), |
| | **({"max_tokens": next((item["max_tokens"] for item in M3 if item["model"] == model and item["max_tokens"] is not None), None)} if next((True for item in M3 if item["model"] == model and item["max_tokens"] is not None), None) else {}) |
| | } |
| |
|
| |
|
| | |
| |
|
| | try: |
| | RESP = RQ.post(url=URL,json=payload , headers=headers , timeout=timeout,stream=stream) |
| | print(RESP.status_code) |
| | except: |
| | return |
| | if RESP.status_code == 200: |
| | for raw in RESP.iter_lines(): |
| | if not raw: |
| | continue |
| |
|
| | try: |
| | line = raw.decode("utf-8", errors="replace").strip() |
| | except Exception: |
| | line = raw.decode("latin-1", errors="replace").strip() |
| |
|
| | if line.startswith("data:"): |
| | data_json = line.split('data: ')[1] |
| | try: |
| | data = json.loads(data_json) |
| | except: |
| | continue |
| |
|
| | try: |
| | yield data["response"] |
| | except: pass |
| | |
| | else: |
| | |
| | yield "AN ERROR OCCURED" |
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| | M2 = ["qwen3-4b-thinking-2507"] |
| |
|
| |
|
| | def QWEN( |
| | RQ : Any, |
| | api_key : str, |
| | messages : List[Dict], |
| | model : str = "NONE", |
| | max_token : int = 40960, |
| | stream : bool = True, |
| | timeout: Optional[float] = None |
| | ): |
| |
|
| | def GEN(RQ:any,messages:list,timeout:int=None): |
| | API_URL = "https://teichai-qwen3-4b-thinking-2507-claude-4-5-opus.hf.space/api/chat" |
| |
|
| | payload = { |
| | "messages":messages, |
| | "searchEnabled":False |
| | } |
| | |
| | headers = {"Accept": "*/*","Content-Type": "application/json","Origin": "https://teichai-qwen3-4b-thinking-2507-claude-4-5-opus.hf.space","Referer": "https://teichai-qwen3-4b-thinking-2507-claude-4-5-opus.hf.space/","User-Agent": "python-requests/2.x"} |
| |
|
| |
|
| | RESPO = RQ.post(API_URL, headers=headers, json=payload, stream=stream, timeout=timeout) |
| |
|
| | |
| | buffer_lines = [] |
| | for raw in RESPO.iter_lines(): |
| | if raw is None: |
| | continue |
| | try: |
| | line = raw.decode("utf-8", errors="replace").strip() |
| | except Exception: |
| | line = raw.decode("latin-1", errors="replace").strip() |
| |
|
| | if line == "": |
| | if not buffer_lines: |
| | continue |
| | data_text = "".join(buffer_lines) |
| | buffer_lines = [] |
| | if data_text == "[DONE]": |
| | break |
| | try: |
| | obj = json.loads(data_text) |
| | try: |
| | yield obj |
| | except: |
| | pass |
| | except json.JSONDecodeError: |
| | pass |
| | continue |
| |
|
| | if line.startswith("data:"): |
| | buffer_lines.append(line[len("data:"):].lstrip()) |
| |
|
| | RES = False |
| | for i in GEN(RQ=RQ,messages=messages,timeout=timeout): |
| | if i["type"]=="reasoning": |
| | if not RES: |
| | RES = True |
| | yield "<think>\n" |
| | yield i["content"] |
| |
|
| | else: |
| | if RES: |
| | RES = False |
| | yield "\n</think>\n\n" |
| | try: |
| | yield i["content"] |
| | except: |
| | pass |
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| | class CONV: |
| |
|
| | def __init__(self, default_system: str = ""): |
| | self.default_system = default_system |
| |
|
| | @staticmethod |
| | def _make_id() -> str: |
| | return uuid.uuid4().hex[:20] |
| |
|
| | def alpaca_to_msg( |
| | self, |
| | alpaca_obj: Dict[str, Any], |
| | insert_system: bool = True, |
| | system_override: Optional[str] = None, |
| | skip_empty: bool = True, |
| | ) -> Tuple[List[Dict[str, str]], float]: |
| |
|
| | t0 = time.perf_counter() |
| |
|
| | out: List[Dict[str, str]] = [] |
| | sys_text = system_override if system_override is not None else self.default_system |
| | if insert_system and sys_text is not None: |
| | out.append({"role": "system", "content": sys_text}) |
| |
|
| | msgs = alpaca_obj |
| | append = out.append |
| | for m in msgs: |
| | role = (m.get("role") or "").strip().lower() |
| | if role not in ("user", "assistant", "system"): |
| | role = "user" |
| |
|
| | parts = m.get("parts") or [] |
| | |
| | texts: List[str] = [] |
| | for p in parts: |
| | |
| | if isinstance(p, dict) and p.get("type") == "text": |
| | txt = p.get("text", "") |
| | if isinstance(txt, str) and txt: |
| | |
| | texts.append(txt.rstrip()) |
| |
|
| | if not texts and skip_empty: |
| | continue |
| |
|
| | if texts: |
| | content = "\n\n".join(texts) |
| | append({"role": role, "content": content}) |
| | else: |
| | |
| | append({"role": role, "content": ""}) |
| |
|
| | elapsed = time.perf_counter() - t0 |
| | return out, elapsed |
| |
|
| | def msg_to_alpaca( |
| | self, |
| | msg_list: List[Dict[str, Any]], |
| | include_step_start: bool = True, |
| | assistant_state_done: bool = True, |
| | preserve_ids: bool = False, |
| | skip_empty_text_parts: bool = False, |
| | ) -> Tuple[Dict[str, List[Dict[str, Any]]], float]: |
| |
|
| | t0 = time.perf_counter() |
| |
|
| | out_messages: List[Dict[str, Any]] = [] |
| | append = out_messages.append |
| |
|
| | for entry in msg_list: |
| | |
| | if not isinstance(entry, dict): |
| | role = "user" |
| | content = str(entry) |
| | entry_id = None |
| | else: |
| | role = (entry.get("role") or "user").strip().lower() |
| | content = entry.get("content", "") |
| | entry_id = entry.get("id") if preserve_ids else None |
| |
|
| | if role not in ("user", "assistant"): |
| | role = "user" |
| |
|
| | parts: List[Dict[str, Any]] = [] |
| | if role == "assistant" and include_step_start: |
| | parts.append({"type": "step-start"}) |
| |
|
| | |
| | if isinstance(content, str): |
| | if not skip_empty_text_parts or content.strip() != "": |
| | text_part: Dict[str, Any] = {"type": "text", "text": content} |
| | if role == "assistant" and assistant_state_done: |
| | text_part["state"] = "done" |
| | parts.append(text_part) |
| |
|
| | |
| | msg_obj: Dict[str, Any] = { |
| | "id": entry_id if (entry_id is not None and isinstance(entry_id, str) and entry_id != "") else self._make_id(), |
| | "role": role, |
| | "parts": parts, |
| | "metadata": {"custom": {}}, |
| | } |
| |
|
| | append(msg_obj) |
| |
|
| | elapsed = time.perf_counter() - t0 |
| | return out_messages, elapsed |
| |
|
| |
|
| | |
| |
|
| |
|
| |
|
| | M1=[ |
| | "zai-org/glm-4.6", |
| | "openai/gpt-5-nano-2025-08-07", |
| | "deepseek-ai/deepseek-v3.2-thinking", |
| | "nvidia/nvidia-nemotron-3-nano-30b-a3b", |
| | "nvidia/nvidia-nemotron-3-nano-30b-a3b-thinking", |
| | "openai/gpt-5-mini-2025-08-07", |
| | "qwen/qwen3-vl-235b-a22b-thinking", |
| | "qwen/qwen3-vl-235b-a22b-instruct", |
| | "perplexity/sonar", |
| | "moonshotai/kimi-k2.5", |
| | "anthropic/claude-haiku-4-5-20251001", |
| | "google/gemini-2.5-flash-lite", |
| | "moonshotai/kimi-k2-thinking" |
| | "mistralai/devstral-2-123b-instruct-2512" |
| | "mistralai/mistral-large-3-675b-instruct-2512", |
| | "openai/gpt-oss-safeguard-20b", |
| | "openai/gpt-oss-120b" |
| | |
| | ] |
| |
|
| |
|
| | def Adarsh_Personal( |
| | RQ : Any, |
| | api_key : str, |
| | messages : List[Dict], |
| | model : str = "deepseek-ai/deepseek-r1-distill-qwen-32b", |
| | max_token : int = 40960, |
| | stream : bool = True, |
| | timeout: Optional[float] = None |
| | ): |
| | |
| | RES=False |
| | URL = "https://hadadxyz-ai.hf.space/api/mz1a85y5n80zy5127hgsba5f3a9c2d1Np0x300vcgduqxb7ep084fygd016c9a2d16fa8b3c41gut432pvjctr75hhspjae25d6f7a8b9c0d1e2pjf43v16f3a4b5c6dd7e8fba2bdx9a0b6dv1c2d7e2b4c9f83d6a4f1bb6c152f9pe3c7a88qv5d91f3c2b765g134bp9a41ne4yx4b3vda8w074" |
| |
|
| |
|
| | NEW_MSGS , S = CONV().msg_to_alpaca(messages, include_step_start=True, assistant_state_done=True) |
| |
|
| | |
| |
|
| | payload = { |
| | "tools": {}, |
| | "modelId": model, |
| | "sessionId": "sess_7ef524b9_mlfe4ped", |
| | "clientId": "7ef524b98a963b507ec9f4000fdea38c-mlfe4pea", |
| | "requestId": "req_7ef524b9_mlfg1cpq_jjxb7p", |
| | "clientIp": "122.161.52.54", |
| | "realIp": "122.161.52.54", |
| | "forwardedFor": "122.161.52.54", |
| | "userAgent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/144.0.0.0 Safari/537.36", |
| | "id": "DEFAULT_THREAD_ID", |
| | "messages": NEW_MSGS, |
| | "trigger": "submit-message", |
| | "metadata": {} |
| | } |
| |
|
| | headers = { |
| | "Accept": "text/event-stream, */*", |
| | "Content-Type": "application/json", |
| | "Origin": "https://hadadxyz-ai.hf.space", |
| | "User-Agent": payload["userAgent"], |
| | "Cache-Control": "no-cache", |
| | "Accept-Encoding": "identity", |
| | "x-turnstile-token": "mlfe5357-zq9depfzhpb-e18cbvzrpid", |
| | "x-turnstile-verified": "true", |
| | } |
| |
|
| |
|
| | RESP = RQ.post(URL, json=payload, headers=headers, stream=stream, timeout=timeout) |
| |
|
| | if RESP.status_code == 200: |
| | for raw in RESP.iter_lines(): |
| | if not raw: |
| | continue |
| |
|
| | try: |
| | line = raw.decode("utf-8", errors="replace").strip() |
| | except Exception: |
| | line = raw.decode("latin-1", errors="replace").strip() |
| |
|
| | if line.startswith("data:"): |
| | data_json = line.split('data: ')[1] |
| | try: |
| | data = json.loads(data_json) |
| | except: |
| | continue |
| | try: |
| | if data['type']=="reasoning-delta": |
| | if not RES: |
| | RES = True |
| | yield "<think>\n" |
| | try: |
| | yield data["delta"] |
| | except: |
| | pass |
| | except : |
| | pass |
| | try: |
| | if data["type"]=="text-delta": |
| | if RES: |
| | RES = False |
| | yield "\n</think>\n" |
| |
|
| | try: |
| | yield data["delta"] |
| | except: |
| | pass |
| | except: |
| | pass |
| | |
| | import uuid |
| |
|
| |
|
| | def GROQ( |
| | RQ : Any, |
| | api_key : str, |
| | messages : List[Dict], |
| | model : str = "deepseek-ai/deepseek-r1-distill-qwen-32b", |
| | max_token : int = 40960, |
| | stream : bool = True, |
| | timeout: Optional[float] = None |
| | ): |
| | RES=False |
| | URL = "https://api.groq.com/openai/v1/chat/completions" |
| |
|
| | payload = { |
| | "model": model, |
| | "messages": messages, |
| | "temperature": 0.9, |
| | "stop": None, |
| | "stream": stream, |
| | } |
| |
|
| | headers = { |
| | "Authorization": f"Bearer {api_key}", "Content-Type": "application/json" |
| | } |
| |
|
| |
|
| | RESP = RQ.post(URL, json=payload, headers=headers, stream=stream, timeout=timeout) |
| |
|
| | if RESP.status_code == 200: |
| | for raw in RESP.iter_lines(): |
| | if not raw: |
| | continue |
| |
|
| | try: |
| | line = raw.decode("utf-8", errors="replace").strip() |
| | except Exception: |
| | line = raw.decode("latin-1", errors="replace").strip() |
| |
|
| | if line.startswith("data:"): |
| | data_json = line.split('data: ')[1] |
| | try: |
| | data = json.loads(data_json) |
| | except: |
| | if data_json == "[DONE]": |
| | continue |
| | try: |
| | if data['choices'][0]['delta']['reasoning']: |
| | if not RES: |
| | RES = True |
| | yield "<think>\n" |
| | yield data['choices'][0]['delta']['reasoning'] |
| | except: |
| | if RES: |
| | RES = False |
| | yield "</think>\n\n" |
| | try: |
| | yield data['choices'][0]['delta']['content'] |
| | except: |
| | pass |
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| | |
| | |
| | |
| |
|
| | app = Flask(__name__) |
| | RQ = Session(impersonate="chrome110") |
| |
|
| |
|
| | |
| | |
| | |
| |
|
| | class Config: |
| | DEFAULT_PROVIDER = "1" |
| | DEFAULT_MODEL = "llama-3.3-70b-versatile" |
| | DEFAULT_MAX_TOKENS = 512 |
| | DEFAULT_TEMPERATURE = 0.7 |
| | TIMEOUT = 30.0 |
| | STREAM = True |
| |
|
| |
|
| | PROVIDERS: Dict[str, Dict[str, Any]] = { |
| | "1": {"func": GROQ, "models": None}, |
| | "2": {"func": Adarsh_Personal, "models": M1}, |
| | "3": {"func": QWEN, "models": M2}, |
| | "4": {"func": FREEGPT, "models": M3}, |
| | } |
| |
|
| |
|
| | |
| | |
| | |
| |
|
| | @dataclass |
| | class ChatRequest: |
| | api_key: str |
| | messages: List[Dict[str, Any]] |
| | model: str |
| | provider: str |
| | max_tokens: int |
| | temperature: float |
| | stream: bool |
| |
|
| | @classmethod |
| | def from_dict(cls, payload: Dict[str, Any]) -> "ChatRequest": |
| | api_key = payload.get("api_key") or payload.get("key") or payload.get("apikey") |
| | messages = payload.get("messages") or payload.get("message") or payload.get("msgs") or [] |
| | model = payload.get("model") or payload.get("model_name") or Config.DEFAULT_MODEL |
| | provider = str(payload.get("provider", Config.DEFAULT_PROVIDER)) |
| |
|
| | if isinstance(messages, dict): |
| | messages = [messages] |
| |
|
| | return cls( |
| | api_key=api_key, |
| | messages=messages, |
| | model=model, |
| | provider=provider, |
| | max_tokens=int(payload.get("max_tokens", Config.DEFAULT_MAX_TOKENS)), |
| | temperature=float(payload.get("temperature", Config.DEFAULT_TEMPERATURE)), |
| | stream=bool(payload.get("stream", Config.STREAM)), |
| | ) |
| |
|
| |
|
| | |
| | |
| | |
| |
|
| | def stream_chat(req: ChatRequest): |
| | provider = PROVIDERS.get(req.provider) |
| |
|
| | if not provider: |
| | yield json.dumps({"error": "Invalid provider"}) + "\n" |
| | return |
| |
|
| | try: |
| | for chunk in provider["func"]( |
| | RQ, |
| | req.api_key, |
| | req.messages, |
| | req.model, |
| | req.max_tokens, |
| | req.stream, |
| | Config.TIMEOUT, |
| | ): |
| | if not chunk: |
| | continue |
| | yield f"data: {json.dumps({'response': chunk})}\n\n" |
| |
|
| | yield "data: [DONE]\n\n" |
| |
|
| | except Exception as e: |
| | yield f"data: {json.dumps({'error': str(e)})}\n\n" |
| |
|
| |
|
| | |
| | |
| | |
| |
|
| | @app.route("/v1/chat/completions", methods=["POST"]) |
| | def generate(): |
| | payload = request.get_json(silent=True) |
| | if not payload: |
| | return jsonify({"error": "Invalid JSON body"}), 400 |
| |
|
| | req = ChatRequest.from_dict(payload) |
| |
|
| | if not req.api_key or not req.messages: |
| | return jsonify({"error": "api_key and messages are required"}), 400 |
| |
|
| | if req.stream: |
| | return Response( |
| | stream_with_context(stream_chat(req)), |
| | content_type="text/event-stream", |
| | headers={ |
| | "Cache-Control": "no-cache", |
| | "X-Accel-Buffering": "no", |
| | }, |
| | ) |
| |
|
| | |
| | final = [] |
| | for part in stream_chat(req): |
| | if part.startswith("data:"): |
| | data = json.loads(part[6:]) |
| | if "response" in data: |
| | final.append(data["response"]) |
| |
|
| | return jsonify({"response": "".join(final)}) |
| |
|
| |
|
| | @app.route("/v1/models", methods=["GET"]) |
| | def info(): |
| | return jsonify({ |
| | "models": get_models() |
| | }) |
| |
|
| |
|
| | |
| | |
| | |
| |
|
| | |
| | |
| |
|
| | if __name__=="__main__": |
| | app.run() |