Spaces:

AdarshJi
/

FALTU_ADARSH

Sleeping

File size: 19,368 Bytes

from dataclasses import dataclass
from typing import List, Dict, Any, AsyncGenerator, Optional
import re
import orjson
import httpx
import json
from fastapi import FastAPI, Request, HTTPException
from fastapi.responses import StreamingResponse




def get_models():
    
    mord = {
        "Providers" : ["1","2" ,"3","4","5"],
        "Models" : {
            "1" : [
            {
                "id": "openai/gpt-oss-120b",
                "owned_by": "OpenAI"
            },
            {
                "id": "moonshotai/kimi-k2-instruct",
                "owned_by": "Moonshot AI"
            },
            {
                "id": "canopylabs/orpheus-v1-english",
                "owned_by": "Canopy Labs"
            },
            {
                "id": "llama-3.1-8b-instant",
                "owned_by": "Meta"
            },
            {
                "id": "whisper-large-v3",
                "owned_by": "OpenAI"
            },
            {
                "id": "meta-llama/llama-4-scout-17b-16e-instruct",
                "owned_by": "Meta"
            },
            {
                "id": "allam-2-7b",
                "owned_by": "SDAIA"
            },
            {
                "id": "groq/compound",
                "owned_by": "Groq"
            },
            {
                "id": "canopylabs/orpheus-arabic-saudi",
                "owned_by": "Canopy Labs"
            },
            {
                "id": "llama-3.3-70b-versatile",
                "owned_by": "Meta"
            },
            {
                "id": "qwen/qwen3-32b",
                "owned_by": "Alibaba Cloud"
            },
            {
                "id": "meta-llama/llama-prompt-guard-2-22m",
                "owned_by": "Meta"
            },
            {
                "id": "groq/compound-mini",
                "owned_by": "Groq"
            },
            {
                "id": "meta-llama/llama-guard-4-12b",
                "owned_by": "Meta"
            },
            {
                "id": "openai/gpt-oss-20b",
                "owned_by": "OpenAI"
            },
            {
                "id": "openai/gpt-oss-safeguard-20b",
                "owned_by": "OpenAI"
            },
            {
                "id": "meta-llama/llama-4-maverick-17b-128e-instruct",
                "owned_by": "Meta"
            },
            {
                "id": "moonshotai/kimi-k2-instruct-0905",
                "owned_by": "Moonshot AI"
            }
        ],
    
            "2" : [
            {
                "id": "aisingapore/gemma-sea-lion-v4-27b-it",
                "owned_by": "AI Singapore"
            },
            {
                "id": "defog/sqlcoder-7b-2",
                "owned_by": "Defog"
            },
            {
                "id": "ibm-granite/granite-4.0-h-micro",
                "owned_by": "IBM"
            },
            {
                "id": "meta/llama-3.1-8b-instruct",
                "owned_by": "Meta"
            },
            {
                "id": "microsoft/phi-2",
                "owned_by": "Microsoft"
            },
            {
                "id": "qwen/qwen3-30b-a3b-fp8",
                "owned_by": "Alibaba Cloud"
            },
            {
                "id": "qwen/qwq-32b",
                "owned_by": "Alibaba Cloud"
            }
        ],
    
        "3" : [
        {
            "id": "zai-org/glm-4.6",
            "owned_by": "Zhipu AI"
        },
        {
            "id": "openai/gpt-5-nano-2025-08-07",
            "owned_by": "OpenAI"
        },
        {
            "id": "deepseek-ai/deepseek-v3.2-thinking",
            "owned_by": "DeepSeek AI"
        },
        {
            "id": "nvidia/nvidia-nemotron-3-nano-30b-a3b",
            "owned_by": "NVIDIA"
        },
        {
            "id": "nvidia/nvidia-nemotron-3-nano-30b-a3b-thinking",
            "owned_by": "NVIDIA"
        },
        {
            "id": "openai/gpt-5-mini-2025-08-07",
            "owned_by": "OpenAI"
        },
        {
            "id": "qwen/qwen3-vl-235b-a22b-thinking",
            "owned_by": "Alibaba Cloud"
        },
        {
            "id": "qwen/qwen3-vl-235b-a22b-instruct",
            "owned_by": "Alibaba Cloud"
        },
        {
            "id": "perplexity/sonar",
            "owned_by": "Perplexity"
        },
        {
            "id": "moonshotai/kimi-k2.5",
            "owned_by": "Moonshot AI"
        },
        {
            "id": "anthropic/claude-haiku-4-5-20251001",
            "owned_by": "Anthropic"
        },
        {
            "id": "google/gemini-2.5-flash-lite",
            "owned_by": "Google"
        },
        {
            "id": "moonshotai/kimi-k2-thinking",
            "owned_by": "Moonshot AI"
        },
        {
            "id": "mistralai/devstral-2-123b-instruct-2512",
            "owned_by": "Mistral AI"
        },
        {
            "id": "mistralai/mistral-large-3-675b-instruct-2512",
            "owned_by": "Mistral AI"
        },
        {
            "id": "openai/gpt-oss-safeguard-20b",
            "owned_by": "OpenAI"
        },
        {
            "id": "openai/gpt-oss-120b",
            "owned_by": "OpenAI"
        }
    ],
        "4" : [
                {
            "id": "qwen3-4b-thinking-2507",
            "owned_by": "Alibaba Cloud"
        }
        ],
        "5" : [
        {
            "id": "meta/llama-3.1-70b-instruct",
            "owned_by": "Meta"
        },
        {
            "id": "qwen/qwen2.5-coder-32b-instruct",
            "owned_by": "Alibaba Cloud"
        },
        {
            "id": "deepseek-ai/deepseek-r1-distill-qwen-32b",
            "owned_by": "DeepSeek AI"
        },
        {
            "id": "meta/llama-4-scout-17b-16e-instruct",
            "owned_by": "Meta"
        },
        {
            "id": "google/gemma-3-12b-it",
            "owned_by": "Google"
        },
        {
            "id": "mistralai/mistral-small-3.1-24b-instruct",
            "owned_by": "Mistral AI"
        },
        {
            "id": "meta/llama-3.3-70b-instruct-fp8-fast",
            "owned_by": "Meta"
        },
        {
            "id": "meta/llama-3.2-3b-instruct",
            "owned_by": "Meta"
        },
        {
            "id": "meta/llama-3.2-1b-instruct",
            "owned_by": "Meta"
        },
        {
            "id": "meta-llama/meta-llama-3-8b-instruct",
            "owned_by": "Meta"
        },
        {
            "id": "meta/llama-3-8b-instruct",
            "owned_by": "Meta"
        },
        {
            "id": "meta/llama-2-7b-chat-int8",
            "owned_by": "Meta"
        },
        {
            "id": "meta/llama-2-7b-chat-fp16",
            "owned_by": "Meta"
        },
        {
            "id": "meta/llama-3-8b-instruct-awq",
            "owned_by": "Meta"
        },
        {
            "id": "meta-llama/meta-llama-3-8b-instruct",
            "owned_by": "Meta"
        },
        {
            "id": "meta/llama-3-8b-instruct",
            "owned_by": "Meta"
        },
        {
            "id": "meta/llama-2-7b-chat-int8",
            "owned_by": "Meta"
        },
        {
            "id": "meta/llama-3-8b-instruct-awq",
            "owned_by": "Meta"
        },
        {
            "id": "google/gemma-7b-it",
            "owned_by": "Google"
        },
        {
            "id": "google/gemma-2b-it-lora",
            "owned_by": "Google"
        },
        {
            "id": "mistral/mistral-7b-instruct-v0.2",
            "owned_by": "Mistral AI"
        },
        {
            "id": "mistral/mistral-7b-instruct-v0.2-lora",
            "owned_by": "Mistral AI"
        }
    ]
    
    
    
    
    }
    }
    
    return mord



try:
    MODEL_NAMES = get_models()
except Exception:
    MODEL_NAMES = {"GROQ": "GROQ-FALLBACK", "LLMC": "LLMC-FALLBACK"}


class Config:
    DEFAULT_PROVIDER = "1"
    DEFAULT_MODEL = "llama-3.3-70b-versatile"
    DEFAULT_TEMPERATURE = 0.7
    CHUNK_SIZE = 1000
    MAX_CONNECTIONS = 200
    HTTP2 = True
    TIMEOUT = 30.0
    STREAM_BATCH_BYTES = 0

PROVIDERS: Dict[str, Dict[str, Any]] = {
    "1": {
        "AUTH": True,
        "BASE_URL": "https://api.groq.com/openai/v1/chat/completions",
        "DEFAULT_MODEL": "qwen/qwen3-32b",
        "HEADERS": {"Authorization": "Bearer {API}", "Content-Type": "application/json"},
        "PAYLOAD": {
            "model": "{model}",
            "messages": "{messages}",
            "temperature": "{temperature}",
            "stop": None,
            "stream": "{stream}",
        },
    },
    "2": {
        "AUTH": False,
        "BASE_URL": "https://llmchat.in/inference/stream?model={model}",
        "DEFAULT_MODEL": "@cf/meta/llama-3.1-8b-instruct",
        "HEADERS": {
            "Content-Type": "application/json",
            "Accept": "*/*",
            "Origin": "https://llmchat.in",
            "Referer": "https://llmchat.in/",
        },
        "PAYLOAD": {"messages": "{messages}", "stream": "{stream}"},
    },
    "3": {
        "AUTH": False,
        "BASE_URL": "https://adarshji-md.hf.space/gen",
        "DEFAULT_MODEL": "openai/gpt-oss-120b",
        "PAYLOAD": {"api_key": "LOL", "provider": "1","messages": "{messages}","model" : "{model}","stream": "{stream}"},
    },
    "4": {
        "AUTH": False,
        "BASE_URL": "https://adarshji-md.hf.space/gen",
        "DEFAULT_MODEL": "qwen3-4b-thinking-2507",
        "PAYLOAD": {"api_key": "LOL", "provider": "2","messages": "{messages}","model" : "{model}","stream": "{stream}"},

    },
    "5": {
        "AUTH": False,
        "BASE_URL": "https://adarshji-md.hf.space/gen",
        "DEFAULT_MODEL": "deepseek-ai/deepseek-r1-distill-qwen-32b",
        "PAYLOAD": {"api_key": "LOL", "provider": "3","messages": "{messages}","model" : "{model}","stream": "{stream}"},

    },
}

_placeholder_re = re.compile(r"\{(.*?)\}")

def apply_values_to_template(template: Any, values: Dict[str, Any]) -> Any:
    if isinstance(template, str):
        m = _placeholder_re.fullmatch(template.strip())
        if m:
            return values.get(m.group(1), template)
        str_values = {
            k: (v if isinstance(v, str) else (orjson.dumps(v).decode("utf-8") if not isinstance(v, (int, float, bool, type(None))) else v))
            for k, v in values.items()
        }
        try:
            return template.format(**str_values)
        except Exception:
            return template
    if isinstance(template, dict):
        return {k: apply_values_to_template(v, values) for k, v in template.items()}
    if isinstance(template, list):
        return [apply_values_to_template(i, values) for i in template]
    return template

def build_values_from_request(req: "ChatRequest") -> Dict[str, Any]:
    return {
        "api_key": req.api_key,
        "API": req.api_key,
        "messages": req.messages,
        "message": req.messages,
        "model": req.model or None,
        "temperature": req.temperature,
        "stream": req.stream,
    }

@dataclass
class ChatRequest:
    api_key: str
    messages: List[Dict[str, Any]]
    model: Optional[str] = None
    provider: str = Config.DEFAULT_PROVIDER
    temperature: float = Config.DEFAULT_TEMPERATURE
    stream: bool = True

    @staticmethod
    def from_dict(payload: Dict[str, Any]) -> "ChatRequest":
        api_key = payload.get("api_key") or payload.get("key") or payload.get("apikey")
        messages = payload.get("messages") or payload.get("message") or payload.get("msgs")
        model = payload.get("model_name") or payload.get("model")
        provider = (payload.get("provider") or Config.DEFAULT_PROVIDER).upper()
        temperature = payload.get("temperature", Config.DEFAULT_TEMPERATURE)
        stream = payload.get("stream", True)
        if messages is None:
            messages = []
        if isinstance(messages, dict):
            messages = [messages]
        return ChatRequest(api_key=api_key, messages=messages, model=model, provider=provider, temperature=temperature, stream=stream)

class AsyncUpstreamClient:
    def __init__(self):
        limits = httpx.Limits(max_connections=Config.MAX_CONNECTIONS)
        self._client = httpx.AsyncClient(timeout=Config.TIMEOUT, limits=limits, http2=Config.HTTP2)

    def _prepare_headers(self, headers_template: Dict[str, str], values: Dict[str, Any]) -> Dict[str, str]:
        headers = {}
        for k, v in headers_template.items():
            f = apply_values_to_template(v, values)
            if f is None:
                continue
            headers[k] = f if isinstance(f, str) else str(f)
        return headers

    async def close(self):
        await self._client.aclose()

    async def post_json(self, url: str, headers: Dict[str, str], payload: Any) -> Dict[str, Any]:
        resp = await self._client.post(url, headers=headers, json=payload)
        resp.raise_for_status()
        return resp.json()

    def _is_metadata_blob(self, obj: Dict[str, Any]) -> bool:
        if not isinstance(obj, dict):
            return False
        if ("id" in obj and "object" in obj) or "x_groq" in obj or "tool_calls" in obj or ("usage" in obj and isinstance(obj.get("usage"), dict)):
            return True
        if obj.get("choices") and isinstance(obj.get("choices"), list):
            try:
                c0 = obj["choices"][0]
                delta = c0.get("delta", {}) if isinstance(c0, dict) else {}
                content = delta.get("content") or (c0.get("message", {}) or {}).get("content")
                if not content:
                    return True
            except Exception:
                return False
        return False

    async def stream_post(self, url: str, headers: Dict[str, str], payload: Any) -> AsyncGenerator[bytes, None]:

        async with self._client.stream("POST", url, headers=headers, json=payload) as resp:
            resp.raise_for_status()
            buf = b""
            RES = False
            async for chunk in resp.aiter_bytes(chunk_size=Config.CHUNK_SIZE):
                if not chunk:
                    continue
                buf += chunk
                while b"\n\n" in buf:
                    event, buf = buf.split(b"\n\n", 1)
                    for lines in event.splitlines():
                        if not lines:
                            continue
                        line = lines.decode('utf-8')
                        try:
                            data_json = line.split('data: ')[1]
                        except:
                            pass
                            print("ERROR0")
                            # print(line)
                        try:
                            data = json.loads(data_json)
                        except:
                            if data_json == "[DONE]":
                                continue
                            else:
                                print("ERROR1")
                                pass
                                # print(data_json)
                                # print(len(data_json))
                        try:
                            if data['choices'][0]['delta']['reasoning']:
                                if not RES:
                                    RES = True

                                    yield orjson.dumps({"response": "<think>\n"}) + b"\n"
                                yield orjson.dumps({"response": data['choices'][0]['delta']['reasoning']}) + b"\n" 
                        except:
                            try:
                                try:
                                    yield orjson.dumps({"response": data["response"]}) + b"\n"
                                except:
                                    if  RES:
                                        RES = False
                                        yield orjson.dumps({"response": "</think>\n\n"}) + b"\n"

                                    yield orjson.dumps({"response": data['choices'][0]['delta']['content']}) + b"\n"

                            except:
                                pass
                                # print("ERROR2")
                                # print(data)
                        # yield orjson.dumps({"response": "okk\n"}) + b"\n"

class ChatService:
    def __init__(self, client: Optional[AsyncUpstreamClient] = None):
        self.client = client or AsyncUpstreamClient()

    def _get_provider_config(self, provider_name: str) -> Dict[str, Any]:
        return PROVIDERS.get(provider_name.upper(), PROVIDERS.get(Config.DEFAULT_PROVIDER, {}))

    def build_request_for_provider(self, req: ChatRequest) -> Dict[str, Any]:
        prov = self._get_provider_config(req.provider)
        values = build_values_from_request(req)
        if not values.get("model"):
            values["model"] = prov.get("DEFAULT_MODEL") or Config.DEFAULT_MODEL
        url = apply_values_to_template(prov.get("BASE_URL", ""), values)
        headers = self.client._prepare_headers(prov.get("HEADERS", {}), values)
        payload = apply_values_to_template(prov.get("PAYLOAD", {}), values)
        return {"url": url, "headers": headers, "payload": payload}

    async def generate(self, req: ChatRequest) -> str:
        data = self.build_request_for_provider(req)
        result = await self.client.post_json(data["url"], data["headers"], data["payload"])
        try:
            return result["choices"][0]["message"]["content"]
        except Exception:
            if isinstance(result, dict) and "response" in result:
                return result["response"]
            return orjson.dumps(result).decode("utf-8")

    async def generate_stream(self, req: ChatRequest) -> AsyncGenerator[bytes, None]:
        data = self.build_request_for_provider(req)
        async for token_bytes in self.client.stream_post(data["url"], data["headers"], data["payload"]):
            yield token_bytes

app = FastAPI(title="High-speed Chat Proxy")
service = ChatService()

@app.on_event("shutdown")
async def shutdown_event():
    try:
        await service.client.close()
    except Exception:
        pass

@app.post("/v1/chat/completions")
async def completions(request: Request):
    body = await request.json()
    req = ChatRequest.from_dict(body)
    if not req.api_key or not req.messages:
        raise HTTPException(status_code=400, detail="api_key and messages required")

    async def streamer():
        if req.stream:
            buf = bytearray()
            threshold = Config.STREAM_BATCH_BYTES
            async for chunk_bytes in service.generate_stream(req):
                if not chunk_bytes:
                    continue
                buf.extend(chunk_bytes)
                if len(buf) >= threshold:
                    yield b"data: " + bytes(buf)
                    buf.clear()
            if buf:
                yield b"data: " + bytes(buf)
            yield b"data: [DONE]\n\n"
        else:
            text = await service.generate(req)
            yield orjson.dumps({"response": text}) + b"\n"

    return StreamingResponse(streamer(), media_type="application/x-ndjson", headers={"Cache-Control": "no-cache"})

@app.get("/v1/models")
async def models():
    return {"models": MODEL_NAMES}

@app.get("/")
async def root():
    return {"service": "High-speed Chat Proxy", "status": "running"}