Spaces:
Sleeping
Sleeping
| from dataclasses import dataclass | |
| from typing import List, Dict, Any, AsyncGenerator, Optional | |
| import re | |
| import orjson | |
| import httpx | |
| import json | |
| from fastapi import FastAPI, Request, HTTPException | |
| from fastapi.responses import StreamingResponse | |
| def get_models(): | |
| mord = { | |
| "Providers" : ["1","2" ,"3","4","5"], | |
| "Models" : { | |
| "1" : [ | |
| { | |
| "id": "openai/gpt-oss-120b", | |
| "owned_by": "OpenAI" | |
| }, | |
| { | |
| "id": "moonshotai/kimi-k2-instruct", | |
| "owned_by": "Moonshot AI" | |
| }, | |
| { | |
| "id": "canopylabs/orpheus-v1-english", | |
| "owned_by": "Canopy Labs" | |
| }, | |
| { | |
| "id": "llama-3.1-8b-instant", | |
| "owned_by": "Meta" | |
| }, | |
| { | |
| "id": "whisper-large-v3", | |
| "owned_by": "OpenAI" | |
| }, | |
| { | |
| "id": "meta-llama/llama-4-scout-17b-16e-instruct", | |
| "owned_by": "Meta" | |
| }, | |
| { | |
| "id": "allam-2-7b", | |
| "owned_by": "SDAIA" | |
| }, | |
| { | |
| "id": "groq/compound", | |
| "owned_by": "Groq" | |
| }, | |
| { | |
| "id": "canopylabs/orpheus-arabic-saudi", | |
| "owned_by": "Canopy Labs" | |
| }, | |
| { | |
| "id": "llama-3.3-70b-versatile", | |
| "owned_by": "Meta" | |
| }, | |
| { | |
| "id": "qwen/qwen3-32b", | |
| "owned_by": "Alibaba Cloud" | |
| }, | |
| { | |
| "id": "meta-llama/llama-prompt-guard-2-22m", | |
| "owned_by": "Meta" | |
| }, | |
| { | |
| "id": "groq/compound-mini", | |
| "owned_by": "Groq" | |
| }, | |
| { | |
| "id": "meta-llama/llama-guard-4-12b", | |
| "owned_by": "Meta" | |
| }, | |
| { | |
| "id": "openai/gpt-oss-20b", | |
| "owned_by": "OpenAI" | |
| }, | |
| { | |
| "id": "openai/gpt-oss-safeguard-20b", | |
| "owned_by": "OpenAI" | |
| }, | |
| { | |
| "id": "meta-llama/llama-4-maverick-17b-128e-instruct", | |
| "owned_by": "Meta" | |
| }, | |
| { | |
| "id": "moonshotai/kimi-k2-instruct-0905", | |
| "owned_by": "Moonshot AI" | |
| } | |
| ], | |
| "2" : [ | |
| { | |
| "id": "aisingapore/gemma-sea-lion-v4-27b-it", | |
| "owned_by": "AI Singapore" | |
| }, | |
| { | |
| "id": "defog/sqlcoder-7b-2", | |
| "owned_by": "Defog" | |
| }, | |
| { | |
| "id": "ibm-granite/granite-4.0-h-micro", | |
| "owned_by": "IBM" | |
| }, | |
| { | |
| "id": "meta/llama-3.1-8b-instruct", | |
| "owned_by": "Meta" | |
| }, | |
| { | |
| "id": "microsoft/phi-2", | |
| "owned_by": "Microsoft" | |
| }, | |
| { | |
| "id": "qwen/qwen3-30b-a3b-fp8", | |
| "owned_by": "Alibaba Cloud" | |
| }, | |
| { | |
| "id": "qwen/qwq-32b", | |
| "owned_by": "Alibaba Cloud" | |
| } | |
| ], | |
| "3" : [ | |
| { | |
| "id": "zai-org/glm-4.6", | |
| "owned_by": "Zhipu AI" | |
| }, | |
| { | |
| "id": "openai/gpt-5-nano-2025-08-07", | |
| "owned_by": "OpenAI" | |
| }, | |
| { | |
| "id": "deepseek-ai/deepseek-v3.2-thinking", | |
| "owned_by": "DeepSeek AI" | |
| }, | |
| { | |
| "id": "nvidia/nvidia-nemotron-3-nano-30b-a3b", | |
| "owned_by": "NVIDIA" | |
| }, | |
| { | |
| "id": "nvidia/nvidia-nemotron-3-nano-30b-a3b-thinking", | |
| "owned_by": "NVIDIA" | |
| }, | |
| { | |
| "id": "openai/gpt-5-mini-2025-08-07", | |
| "owned_by": "OpenAI" | |
| }, | |
| { | |
| "id": "qwen/qwen3-vl-235b-a22b-thinking", | |
| "owned_by": "Alibaba Cloud" | |
| }, | |
| { | |
| "id": "qwen/qwen3-vl-235b-a22b-instruct", | |
| "owned_by": "Alibaba Cloud" | |
| }, | |
| { | |
| "id": "perplexity/sonar", | |
| "owned_by": "Perplexity" | |
| }, | |
| { | |
| "id": "moonshotai/kimi-k2.5", | |
| "owned_by": "Moonshot AI" | |
| }, | |
| { | |
| "id": "anthropic/claude-haiku-4-5-20251001", | |
| "owned_by": "Anthropic" | |
| }, | |
| { | |
| "id": "google/gemini-2.5-flash-lite", | |
| "owned_by": "Google" | |
| }, | |
| { | |
| "id": "moonshotai/kimi-k2-thinking", | |
| "owned_by": "Moonshot AI" | |
| }, | |
| { | |
| "id": "mistralai/devstral-2-123b-instruct-2512", | |
| "owned_by": "Mistral AI" | |
| }, | |
| { | |
| "id": "mistralai/mistral-large-3-675b-instruct-2512", | |
| "owned_by": "Mistral AI" | |
| }, | |
| { | |
| "id": "openai/gpt-oss-safeguard-20b", | |
| "owned_by": "OpenAI" | |
| }, | |
| { | |
| "id": "openai/gpt-oss-120b", | |
| "owned_by": "OpenAI" | |
| } | |
| ], | |
| "4" : [ | |
| { | |
| "id": "qwen3-4b-thinking-2507", | |
| "owned_by": "Alibaba Cloud" | |
| } | |
| ], | |
| "5" : [ | |
| { | |
| "id": "meta/llama-3.1-70b-instruct", | |
| "owned_by": "Meta" | |
| }, | |
| { | |
| "id": "qwen/qwen2.5-coder-32b-instruct", | |
| "owned_by": "Alibaba Cloud" | |
| }, | |
| { | |
| "id": "deepseek-ai/deepseek-r1-distill-qwen-32b", | |
| "owned_by": "DeepSeek AI" | |
| }, | |
| { | |
| "id": "meta/llama-4-scout-17b-16e-instruct", | |
| "owned_by": "Meta" | |
| }, | |
| { | |
| "id": "google/gemma-3-12b-it", | |
| "owned_by": "Google" | |
| }, | |
| { | |
| "id": "mistralai/mistral-small-3.1-24b-instruct", | |
| "owned_by": "Mistral AI" | |
| }, | |
| { | |
| "id": "meta/llama-3.3-70b-instruct-fp8-fast", | |
| "owned_by": "Meta" | |
| }, | |
| { | |
| "id": "meta/llama-3.2-3b-instruct", | |
| "owned_by": "Meta" | |
| }, | |
| { | |
| "id": "meta/llama-3.2-1b-instruct", | |
| "owned_by": "Meta" | |
| }, | |
| { | |
| "id": "meta-llama/meta-llama-3-8b-instruct", | |
| "owned_by": "Meta" | |
| }, | |
| { | |
| "id": "meta/llama-3-8b-instruct", | |
| "owned_by": "Meta" | |
| }, | |
| { | |
| "id": "meta/llama-2-7b-chat-int8", | |
| "owned_by": "Meta" | |
| }, | |
| { | |
| "id": "meta/llama-2-7b-chat-fp16", | |
| "owned_by": "Meta" | |
| }, | |
| { | |
| "id": "meta/llama-3-8b-instruct-awq", | |
| "owned_by": "Meta" | |
| }, | |
| { | |
| "id": "meta-llama/meta-llama-3-8b-instruct", | |
| "owned_by": "Meta" | |
| }, | |
| { | |
| "id": "meta/llama-3-8b-instruct", | |
| "owned_by": "Meta" | |
| }, | |
| { | |
| "id": "meta/llama-2-7b-chat-int8", | |
| "owned_by": "Meta" | |
| }, | |
| { | |
| "id": "meta/llama-3-8b-instruct-awq", | |
| "owned_by": "Meta" | |
| }, | |
| { | |
| "id": "google/gemma-7b-it", | |
| "owned_by": "Google" | |
| }, | |
| { | |
| "id": "google/gemma-2b-it-lora", | |
| "owned_by": "Google" | |
| }, | |
| { | |
| "id": "mistral/mistral-7b-instruct-v0.2", | |
| "owned_by": "Mistral AI" | |
| }, | |
| { | |
| "id": "mistral/mistral-7b-instruct-v0.2-lora", | |
| "owned_by": "Mistral AI" | |
| } | |
| ] | |
| } | |
| } | |
| return mord | |
| try: | |
| MODEL_NAMES = get_models() | |
| except Exception: | |
| MODEL_NAMES = {"GROQ": "GROQ-FALLBACK", "LLMC": "LLMC-FALLBACK"} | |
| class Config: | |
| DEFAULT_PROVIDER = "1" | |
| DEFAULT_MODEL = "llama-3.3-70b-versatile" | |
| DEFAULT_TEMPERATURE = 0.7 | |
| CHUNK_SIZE = 1000 | |
| MAX_CONNECTIONS = 200 | |
| HTTP2 = True | |
| TIMEOUT = 30.0 | |
| STREAM_BATCH_BYTES = 0 | |
| PROVIDERS: Dict[str, Dict[str, Any]] = { | |
| "1": { | |
| "AUTH": True, | |
| "BASE_URL": "https://api.groq.com/openai/v1/chat/completions", | |
| "DEFAULT_MODEL": "qwen/qwen3-32b", | |
| "HEADERS": {"Authorization": "Bearer {API}", "Content-Type": "application/json"}, | |
| "PAYLOAD": { | |
| "model": "{model}", | |
| "messages": "{messages}", | |
| "temperature": "{temperature}", | |
| "stop": None, | |
| "stream": "{stream}", | |
| }, | |
| }, | |
| "2": { | |
| "AUTH": False, | |
| "BASE_URL": "https://llmchat.in/inference/stream?model={model}", | |
| "DEFAULT_MODEL": "@cf/meta/llama-3.1-8b-instruct", | |
| "HEADERS": { | |
| "Content-Type": "application/json", | |
| "Accept": "*/*", | |
| "Origin": "https://llmchat.in", | |
| "Referer": "https://llmchat.in/", | |
| }, | |
| "PAYLOAD": {"messages": "{messages}", "stream": "{stream}"}, | |
| }, | |
| "3": { | |
| "AUTH": False, | |
| "BASE_URL": "https://adarshji-md.hf.space/gen", | |
| "DEFAULT_MODEL": "openai/gpt-oss-120b", | |
| "PAYLOAD": {"api_key": "LOL", "provider": "1","messages": "{messages}","model" : "{model}","stream": "{stream}"}, | |
| }, | |
| "4": { | |
| "AUTH": False, | |
| "BASE_URL": "https://adarshji-md.hf.space/gen", | |
| "DEFAULT_MODEL": "qwen3-4b-thinking-2507", | |
| "PAYLOAD": {"api_key": "LOL", "provider": "2","messages": "{messages}","model" : "{model}","stream": "{stream}"}, | |
| }, | |
| "5": { | |
| "AUTH": False, | |
| "BASE_URL": "https://adarshji-md.hf.space/gen", | |
| "DEFAULT_MODEL": "deepseek-ai/deepseek-r1-distill-qwen-32b", | |
| "PAYLOAD": {"api_key": "LOL", "provider": "3","messages": "{messages}","model" : "{model}","stream": "{stream}"}, | |
| }, | |
| } | |
| _placeholder_re = re.compile(r"\{(.*?)\}") | |
| def apply_values_to_template(template: Any, values: Dict[str, Any]) -> Any: | |
| if isinstance(template, str): | |
| m = _placeholder_re.fullmatch(template.strip()) | |
| if m: | |
| return values.get(m.group(1), template) | |
| str_values = { | |
| k: (v if isinstance(v, str) else (orjson.dumps(v).decode("utf-8") if not isinstance(v, (int, float, bool, type(None))) else v)) | |
| for k, v in values.items() | |
| } | |
| try: | |
| return template.format(**str_values) | |
| except Exception: | |
| return template | |
| if isinstance(template, dict): | |
| return {k: apply_values_to_template(v, values) for k, v in template.items()} | |
| if isinstance(template, list): | |
| return [apply_values_to_template(i, values) for i in template] | |
| return template | |
| def build_values_from_request(req: "ChatRequest") -> Dict[str, Any]: | |
| return { | |
| "api_key": req.api_key, | |
| "API": req.api_key, | |
| "messages": req.messages, | |
| "message": req.messages, | |
| "model": req.model or None, | |
| "temperature": req.temperature, | |
| "stream": req.stream, | |
| } | |
| class ChatRequest: | |
| api_key: str | |
| messages: List[Dict[str, Any]] | |
| model: Optional[str] = None | |
| provider: str = Config.DEFAULT_PROVIDER | |
| temperature: float = Config.DEFAULT_TEMPERATURE | |
| stream: bool = True | |
| def from_dict(payload: Dict[str, Any]) -> "ChatRequest": | |
| api_key = payload.get("api_key") or payload.get("key") or payload.get("apikey") | |
| messages = payload.get("messages") or payload.get("message") or payload.get("msgs") | |
| model = payload.get("model_name") or payload.get("model") | |
| provider = (payload.get("provider") or Config.DEFAULT_PROVIDER).upper() | |
| temperature = payload.get("temperature", Config.DEFAULT_TEMPERATURE) | |
| stream = payload.get("stream", True) | |
| if messages is None: | |
| messages = [] | |
| if isinstance(messages, dict): | |
| messages = [messages] | |
| return ChatRequest(api_key=api_key, messages=messages, model=model, provider=provider, temperature=temperature, stream=stream) | |
| class AsyncUpstreamClient: | |
| def __init__(self): | |
| limits = httpx.Limits(max_connections=Config.MAX_CONNECTIONS) | |
| self._client = httpx.AsyncClient(timeout=Config.TIMEOUT, limits=limits, http2=Config.HTTP2) | |
| def _prepare_headers(self, headers_template: Dict[str, str], values: Dict[str, Any]) -> Dict[str, str]: | |
| headers = {} | |
| for k, v in headers_template.items(): | |
| f = apply_values_to_template(v, values) | |
| if f is None: | |
| continue | |
| headers[k] = f if isinstance(f, str) else str(f) | |
| return headers | |
| async def close(self): | |
| await self._client.aclose() | |
| async def post_json(self, url: str, headers: Dict[str, str], payload: Any) -> Dict[str, Any]: | |
| resp = await self._client.post(url, headers=headers, json=payload) | |
| resp.raise_for_status() | |
| return resp.json() | |
| def _is_metadata_blob(self, obj: Dict[str, Any]) -> bool: | |
| if not isinstance(obj, dict): | |
| return False | |
| if ("id" in obj and "object" in obj) or "x_groq" in obj or "tool_calls" in obj or ("usage" in obj and isinstance(obj.get("usage"), dict)): | |
| return True | |
| if obj.get("choices") and isinstance(obj.get("choices"), list): | |
| try: | |
| c0 = obj["choices"][0] | |
| delta = c0.get("delta", {}) if isinstance(c0, dict) else {} | |
| content = delta.get("content") or (c0.get("message", {}) or {}).get("content") | |
| if not content: | |
| return True | |
| except Exception: | |
| return False | |
| return False | |
| async def stream_post(self, url: str, headers: Dict[str, str], payload: Any) -> AsyncGenerator[bytes, None]: | |
| async with self._client.stream("POST", url, headers=headers, json=payload) as resp: | |
| resp.raise_for_status() | |
| buf = b"" | |
| RES = False | |
| async for chunk in resp.aiter_bytes(chunk_size=Config.CHUNK_SIZE): | |
| if not chunk: | |
| continue | |
| buf += chunk | |
| while b"\n\n" in buf: | |
| event, buf = buf.split(b"\n\n", 1) | |
| for lines in event.splitlines(): | |
| if not lines: | |
| continue | |
| line = lines.decode('utf-8') | |
| try: | |
| data_json = line.split('data: ')[1] | |
| except: | |
| pass | |
| print("ERROR0") | |
| # print(line) | |
| try: | |
| data = json.loads(data_json) | |
| except: | |
| if data_json == "[DONE]": | |
| continue | |
| else: | |
| print("ERROR1") | |
| pass | |
| # print(data_json) | |
| # print(len(data_json)) | |
| try: | |
| if data['choices'][0]['delta']['reasoning']: | |
| if not RES: | |
| RES = True | |
| yield orjson.dumps({"response": "<think>\n"}) + b"\n" | |
| yield orjson.dumps({"response": data['choices'][0]['delta']['reasoning']}) + b"\n" | |
| except: | |
| try: | |
| try: | |
| yield orjson.dumps({"response": data["response"]}) + b"\n" | |
| except: | |
| if RES: | |
| RES = False | |
| yield orjson.dumps({"response": "</think>\n\n"}) + b"\n" | |
| yield orjson.dumps({"response": data['choices'][0]['delta']['content']}) + b"\n" | |
| except: | |
| pass | |
| # print("ERROR2") | |
| # print(data) | |
| # yield orjson.dumps({"response": "okk\n"}) + b"\n" | |
| class ChatService: | |
| def __init__(self, client: Optional[AsyncUpstreamClient] = None): | |
| self.client = client or AsyncUpstreamClient() | |
| def _get_provider_config(self, provider_name: str) -> Dict[str, Any]: | |
| return PROVIDERS.get(provider_name.upper(), PROVIDERS.get(Config.DEFAULT_PROVIDER, {})) | |
| def build_request_for_provider(self, req: ChatRequest) -> Dict[str, Any]: | |
| prov = self._get_provider_config(req.provider) | |
| values = build_values_from_request(req) | |
| if not values.get("model"): | |
| values["model"] = prov.get("DEFAULT_MODEL") or Config.DEFAULT_MODEL | |
| url = apply_values_to_template(prov.get("BASE_URL", ""), values) | |
| headers = self.client._prepare_headers(prov.get("HEADERS", {}), values) | |
| payload = apply_values_to_template(prov.get("PAYLOAD", {}), values) | |
| return {"url": url, "headers": headers, "payload": payload} | |
| async def generate(self, req: ChatRequest) -> str: | |
| data = self.build_request_for_provider(req) | |
| result = await self.client.post_json(data["url"], data["headers"], data["payload"]) | |
| try: | |
| return result["choices"][0]["message"]["content"] | |
| except Exception: | |
| if isinstance(result, dict) and "response" in result: | |
| return result["response"] | |
| return orjson.dumps(result).decode("utf-8") | |
| async def generate_stream(self, req: ChatRequest) -> AsyncGenerator[bytes, None]: | |
| data = self.build_request_for_provider(req) | |
| async for token_bytes in self.client.stream_post(data["url"], data["headers"], data["payload"]): | |
| yield token_bytes | |
| app = FastAPI(title="High-speed Chat Proxy") | |
| service = ChatService() | |
| async def shutdown_event(): | |
| try: | |
| await service.client.close() | |
| except Exception: | |
| pass | |
| async def completions(request: Request): | |
| body = await request.json() | |
| req = ChatRequest.from_dict(body) | |
| if not req.api_key or not req.messages: | |
| raise HTTPException(status_code=400, detail="api_key and messages required") | |
| async def streamer(): | |
| if req.stream: | |
| buf = bytearray() | |
| threshold = Config.STREAM_BATCH_BYTES | |
| async for chunk_bytes in service.generate_stream(req): | |
| if not chunk_bytes: | |
| continue | |
| buf.extend(chunk_bytes) | |
| if len(buf) >= threshold: | |
| yield b"data: " + bytes(buf) | |
| buf.clear() | |
| if buf: | |
| yield b"data: " + bytes(buf) | |
| yield b"data: [DONE]\n\n" | |
| else: | |
| text = await service.generate(req) | |
| yield orjson.dumps({"response": text}) + b"\n" | |
| return StreamingResponse(streamer(), media_type="application/x-ndjson", headers={"Cache-Control": "no-cache"}) | |
| async def models(): | |
| return {"models": MODEL_NAMES} | |
| async def root(): | |
| return {"service": "High-speed Chat Proxy", "status": "running"} | |