FALTU_ADARSH / server.py
AdarshJi's picture
Update server.py
5069f76 verified
from dataclasses import dataclass
from typing import List, Dict, Any, AsyncGenerator, Optional
import re
import orjson
import httpx
import json
from fastapi import FastAPI, Request, HTTPException
from fastapi.responses import StreamingResponse
def get_models():
mord = {
"Providers" : ["1","2" ,"3","4","5"],
"Models" : {
"1" : [
{
"id": "openai/gpt-oss-120b",
"owned_by": "OpenAI"
},
{
"id": "moonshotai/kimi-k2-instruct",
"owned_by": "Moonshot AI"
},
{
"id": "canopylabs/orpheus-v1-english",
"owned_by": "Canopy Labs"
},
{
"id": "llama-3.1-8b-instant",
"owned_by": "Meta"
},
{
"id": "whisper-large-v3",
"owned_by": "OpenAI"
},
{
"id": "meta-llama/llama-4-scout-17b-16e-instruct",
"owned_by": "Meta"
},
{
"id": "allam-2-7b",
"owned_by": "SDAIA"
},
{
"id": "groq/compound",
"owned_by": "Groq"
},
{
"id": "canopylabs/orpheus-arabic-saudi",
"owned_by": "Canopy Labs"
},
{
"id": "llama-3.3-70b-versatile",
"owned_by": "Meta"
},
{
"id": "qwen/qwen3-32b",
"owned_by": "Alibaba Cloud"
},
{
"id": "meta-llama/llama-prompt-guard-2-22m",
"owned_by": "Meta"
},
{
"id": "groq/compound-mini",
"owned_by": "Groq"
},
{
"id": "meta-llama/llama-guard-4-12b",
"owned_by": "Meta"
},
{
"id": "openai/gpt-oss-20b",
"owned_by": "OpenAI"
},
{
"id": "openai/gpt-oss-safeguard-20b",
"owned_by": "OpenAI"
},
{
"id": "meta-llama/llama-4-maverick-17b-128e-instruct",
"owned_by": "Meta"
},
{
"id": "moonshotai/kimi-k2-instruct-0905",
"owned_by": "Moonshot AI"
}
],
"2" : [
{
"id": "aisingapore/gemma-sea-lion-v4-27b-it",
"owned_by": "AI Singapore"
},
{
"id": "defog/sqlcoder-7b-2",
"owned_by": "Defog"
},
{
"id": "ibm-granite/granite-4.0-h-micro",
"owned_by": "IBM"
},
{
"id": "meta/llama-3.1-8b-instruct",
"owned_by": "Meta"
},
{
"id": "microsoft/phi-2",
"owned_by": "Microsoft"
},
{
"id": "qwen/qwen3-30b-a3b-fp8",
"owned_by": "Alibaba Cloud"
},
{
"id": "qwen/qwq-32b",
"owned_by": "Alibaba Cloud"
}
],
"3" : [
{
"id": "zai-org/glm-4.6",
"owned_by": "Zhipu AI"
},
{
"id": "openai/gpt-5-nano-2025-08-07",
"owned_by": "OpenAI"
},
{
"id": "deepseek-ai/deepseek-v3.2-thinking",
"owned_by": "DeepSeek AI"
},
{
"id": "nvidia/nvidia-nemotron-3-nano-30b-a3b",
"owned_by": "NVIDIA"
},
{
"id": "nvidia/nvidia-nemotron-3-nano-30b-a3b-thinking",
"owned_by": "NVIDIA"
},
{
"id": "openai/gpt-5-mini-2025-08-07",
"owned_by": "OpenAI"
},
{
"id": "qwen/qwen3-vl-235b-a22b-thinking",
"owned_by": "Alibaba Cloud"
},
{
"id": "qwen/qwen3-vl-235b-a22b-instruct",
"owned_by": "Alibaba Cloud"
},
{
"id": "perplexity/sonar",
"owned_by": "Perplexity"
},
{
"id": "moonshotai/kimi-k2.5",
"owned_by": "Moonshot AI"
},
{
"id": "anthropic/claude-haiku-4-5-20251001",
"owned_by": "Anthropic"
},
{
"id": "google/gemini-2.5-flash-lite",
"owned_by": "Google"
},
{
"id": "moonshotai/kimi-k2-thinking",
"owned_by": "Moonshot AI"
},
{
"id": "mistralai/devstral-2-123b-instruct-2512",
"owned_by": "Mistral AI"
},
{
"id": "mistralai/mistral-large-3-675b-instruct-2512",
"owned_by": "Mistral AI"
},
{
"id": "openai/gpt-oss-safeguard-20b",
"owned_by": "OpenAI"
},
{
"id": "openai/gpt-oss-120b",
"owned_by": "OpenAI"
}
],
"4" : [
{
"id": "qwen3-4b-thinking-2507",
"owned_by": "Alibaba Cloud"
}
],
"5" : [
{
"id": "meta/llama-3.1-70b-instruct",
"owned_by": "Meta"
},
{
"id": "qwen/qwen2.5-coder-32b-instruct",
"owned_by": "Alibaba Cloud"
},
{
"id": "deepseek-ai/deepseek-r1-distill-qwen-32b",
"owned_by": "DeepSeek AI"
},
{
"id": "meta/llama-4-scout-17b-16e-instruct",
"owned_by": "Meta"
},
{
"id": "google/gemma-3-12b-it",
"owned_by": "Google"
},
{
"id": "mistralai/mistral-small-3.1-24b-instruct",
"owned_by": "Mistral AI"
},
{
"id": "meta/llama-3.3-70b-instruct-fp8-fast",
"owned_by": "Meta"
},
{
"id": "meta/llama-3.2-3b-instruct",
"owned_by": "Meta"
},
{
"id": "meta/llama-3.2-1b-instruct",
"owned_by": "Meta"
},
{
"id": "meta-llama/meta-llama-3-8b-instruct",
"owned_by": "Meta"
},
{
"id": "meta/llama-3-8b-instruct",
"owned_by": "Meta"
},
{
"id": "meta/llama-2-7b-chat-int8",
"owned_by": "Meta"
},
{
"id": "meta/llama-2-7b-chat-fp16",
"owned_by": "Meta"
},
{
"id": "meta/llama-3-8b-instruct-awq",
"owned_by": "Meta"
},
{
"id": "meta-llama/meta-llama-3-8b-instruct",
"owned_by": "Meta"
},
{
"id": "meta/llama-3-8b-instruct",
"owned_by": "Meta"
},
{
"id": "meta/llama-2-7b-chat-int8",
"owned_by": "Meta"
},
{
"id": "meta/llama-3-8b-instruct-awq",
"owned_by": "Meta"
},
{
"id": "google/gemma-7b-it",
"owned_by": "Google"
},
{
"id": "google/gemma-2b-it-lora",
"owned_by": "Google"
},
{
"id": "mistral/mistral-7b-instruct-v0.2",
"owned_by": "Mistral AI"
},
{
"id": "mistral/mistral-7b-instruct-v0.2-lora",
"owned_by": "Mistral AI"
}
]
}
}
return mord
try:
MODEL_NAMES = get_models()
except Exception:
MODEL_NAMES = {"GROQ": "GROQ-FALLBACK", "LLMC": "LLMC-FALLBACK"}
class Config:
DEFAULT_PROVIDER = "1"
DEFAULT_MODEL = "llama-3.3-70b-versatile"
DEFAULT_TEMPERATURE = 0.7
CHUNK_SIZE = 1000
MAX_CONNECTIONS = 200
HTTP2 = True
TIMEOUT = 30.0
STREAM_BATCH_BYTES = 0
PROVIDERS: Dict[str, Dict[str, Any]] = {
"1": {
"AUTH": True,
"BASE_URL": "https://api.groq.com/openai/v1/chat/completions",
"DEFAULT_MODEL": "qwen/qwen3-32b",
"HEADERS": {"Authorization": "Bearer {API}", "Content-Type": "application/json"},
"PAYLOAD": {
"model": "{model}",
"messages": "{messages}",
"temperature": "{temperature}",
"stop": None,
"stream": "{stream}",
},
},
"2": {
"AUTH": False,
"BASE_URL": "https://llmchat.in/inference/stream?model={model}",
"DEFAULT_MODEL": "@cf/meta/llama-3.1-8b-instruct",
"HEADERS": {
"Content-Type": "application/json",
"Accept": "*/*",
"Origin": "https://llmchat.in",
"Referer": "https://llmchat.in/",
},
"PAYLOAD": {"messages": "{messages}", "stream": "{stream}"},
},
"3": {
"AUTH": False,
"BASE_URL": "https://adarshji-md.hf.space/gen",
"DEFAULT_MODEL": "openai/gpt-oss-120b",
"PAYLOAD": {"api_key": "LOL", "provider": "1","messages": "{messages}","model" : "{model}","stream": "{stream}"},
},
"4": {
"AUTH": False,
"BASE_URL": "https://adarshji-md.hf.space/gen",
"DEFAULT_MODEL": "qwen3-4b-thinking-2507",
"PAYLOAD": {"api_key": "LOL", "provider": "2","messages": "{messages}","model" : "{model}","stream": "{stream}"},
},
"5": {
"AUTH": False,
"BASE_URL": "https://adarshji-md.hf.space/gen",
"DEFAULT_MODEL": "deepseek-ai/deepseek-r1-distill-qwen-32b",
"PAYLOAD": {"api_key": "LOL", "provider": "3","messages": "{messages}","model" : "{model}","stream": "{stream}"},
},
}
_placeholder_re = re.compile(r"\{(.*?)\}")
def apply_values_to_template(template: Any, values: Dict[str, Any]) -> Any:
if isinstance(template, str):
m = _placeholder_re.fullmatch(template.strip())
if m:
return values.get(m.group(1), template)
str_values = {
k: (v if isinstance(v, str) else (orjson.dumps(v).decode("utf-8") if not isinstance(v, (int, float, bool, type(None))) else v))
for k, v in values.items()
}
try:
return template.format(**str_values)
except Exception:
return template
if isinstance(template, dict):
return {k: apply_values_to_template(v, values) for k, v in template.items()}
if isinstance(template, list):
return [apply_values_to_template(i, values) for i in template]
return template
def build_values_from_request(req: "ChatRequest") -> Dict[str, Any]:
return {
"api_key": req.api_key,
"API": req.api_key,
"messages": req.messages,
"message": req.messages,
"model": req.model or None,
"temperature": req.temperature,
"stream": req.stream,
}
@dataclass
class ChatRequest:
api_key: str
messages: List[Dict[str, Any]]
model: Optional[str] = None
provider: str = Config.DEFAULT_PROVIDER
temperature: float = Config.DEFAULT_TEMPERATURE
stream: bool = True
@staticmethod
def from_dict(payload: Dict[str, Any]) -> "ChatRequest":
api_key = payload.get("api_key") or payload.get("key") or payload.get("apikey")
messages = payload.get("messages") or payload.get("message") or payload.get("msgs")
model = payload.get("model_name") or payload.get("model")
provider = (payload.get("provider") or Config.DEFAULT_PROVIDER).upper()
temperature = payload.get("temperature", Config.DEFAULT_TEMPERATURE)
stream = payload.get("stream", True)
if messages is None:
messages = []
if isinstance(messages, dict):
messages = [messages]
return ChatRequest(api_key=api_key, messages=messages, model=model, provider=provider, temperature=temperature, stream=stream)
class AsyncUpstreamClient:
def __init__(self):
limits = httpx.Limits(max_connections=Config.MAX_CONNECTIONS)
self._client = httpx.AsyncClient(timeout=Config.TIMEOUT, limits=limits, http2=Config.HTTP2)
def _prepare_headers(self, headers_template: Dict[str, str], values: Dict[str, Any]) -> Dict[str, str]:
headers = {}
for k, v in headers_template.items():
f = apply_values_to_template(v, values)
if f is None:
continue
headers[k] = f if isinstance(f, str) else str(f)
return headers
async def close(self):
await self._client.aclose()
async def post_json(self, url: str, headers: Dict[str, str], payload: Any) -> Dict[str, Any]:
resp = await self._client.post(url, headers=headers, json=payload)
resp.raise_for_status()
return resp.json()
def _is_metadata_blob(self, obj: Dict[str, Any]) -> bool:
if not isinstance(obj, dict):
return False
if ("id" in obj and "object" in obj) or "x_groq" in obj or "tool_calls" in obj or ("usage" in obj and isinstance(obj.get("usage"), dict)):
return True
if obj.get("choices") and isinstance(obj.get("choices"), list):
try:
c0 = obj["choices"][0]
delta = c0.get("delta", {}) if isinstance(c0, dict) else {}
content = delta.get("content") or (c0.get("message", {}) or {}).get("content")
if not content:
return True
except Exception:
return False
return False
async def stream_post(self, url: str, headers: Dict[str, str], payload: Any) -> AsyncGenerator[bytes, None]:
async with self._client.stream("POST", url, headers=headers, json=payload) as resp:
resp.raise_for_status()
buf = b""
RES = False
async for chunk in resp.aiter_bytes(chunk_size=Config.CHUNK_SIZE):
if not chunk:
continue
buf += chunk
while b"\n\n" in buf:
event, buf = buf.split(b"\n\n", 1)
for lines in event.splitlines():
if not lines:
continue
line = lines.decode('utf-8')
try:
data_json = line.split('data: ')[1]
except:
pass
print("ERROR0")
# print(line)
try:
data = json.loads(data_json)
except:
if data_json == "[DONE]":
continue
else:
print("ERROR1")
pass
# print(data_json)
# print(len(data_json))
try:
if data['choices'][0]['delta']['reasoning']:
if not RES:
RES = True
yield orjson.dumps({"response": "<think>\n"}) + b"\n"
yield orjson.dumps({"response": data['choices'][0]['delta']['reasoning']}) + b"\n"
except:
try:
try:
yield orjson.dumps({"response": data["response"]}) + b"\n"
except:
if RES:
RES = False
yield orjson.dumps({"response": "</think>\n\n"}) + b"\n"
yield orjson.dumps({"response": data['choices'][0]['delta']['content']}) + b"\n"
except:
pass
# print("ERROR2")
# print(data)
# yield orjson.dumps({"response": "okk\n"}) + b"\n"
class ChatService:
def __init__(self, client: Optional[AsyncUpstreamClient] = None):
self.client = client or AsyncUpstreamClient()
def _get_provider_config(self, provider_name: str) -> Dict[str, Any]:
return PROVIDERS.get(provider_name.upper(), PROVIDERS.get(Config.DEFAULT_PROVIDER, {}))
def build_request_for_provider(self, req: ChatRequest) -> Dict[str, Any]:
prov = self._get_provider_config(req.provider)
values = build_values_from_request(req)
if not values.get("model"):
values["model"] = prov.get("DEFAULT_MODEL") or Config.DEFAULT_MODEL
url = apply_values_to_template(prov.get("BASE_URL", ""), values)
headers = self.client._prepare_headers(prov.get("HEADERS", {}), values)
payload = apply_values_to_template(prov.get("PAYLOAD", {}), values)
return {"url": url, "headers": headers, "payload": payload}
async def generate(self, req: ChatRequest) -> str:
data = self.build_request_for_provider(req)
result = await self.client.post_json(data["url"], data["headers"], data["payload"])
try:
return result["choices"][0]["message"]["content"]
except Exception:
if isinstance(result, dict) and "response" in result:
return result["response"]
return orjson.dumps(result).decode("utf-8")
async def generate_stream(self, req: ChatRequest) -> AsyncGenerator[bytes, None]:
data = self.build_request_for_provider(req)
async for token_bytes in self.client.stream_post(data["url"], data["headers"], data["payload"]):
yield token_bytes
app = FastAPI(title="High-speed Chat Proxy")
service = ChatService()
@app.on_event("shutdown")
async def shutdown_event():
try:
await service.client.close()
except Exception:
pass
@app.post("/v1/chat/completions")
async def completions(request: Request):
body = await request.json()
req = ChatRequest.from_dict(body)
if not req.api_key or not req.messages:
raise HTTPException(status_code=400, detail="api_key and messages required")
async def streamer():
if req.stream:
buf = bytearray()
threshold = Config.STREAM_BATCH_BYTES
async for chunk_bytes in service.generate_stream(req):
if not chunk_bytes:
continue
buf.extend(chunk_bytes)
if len(buf) >= threshold:
yield b"data: " + bytes(buf)
buf.clear()
if buf:
yield b"data: " + bytes(buf)
yield b"data: [DONE]\n\n"
else:
text = await service.generate(req)
yield orjson.dumps({"response": text}) + b"\n"
return StreamingResponse(streamer(), media_type="application/x-ndjson", headers={"Cache-Control": "no-cache"})
@app.get("/v1/models")
async def models():
return {"models": MODEL_NAMES}
@app.get("/")
async def root():
return {"service": "High-speed Chat Proxy", "status": "running"}