Spaces:

AdarshJi
/

FALTU_ADARSH

Sleeping

App Files Files Community

FALTU_ADARSH / server.py

AdarshJi

Update server.py

5069f76 verified 24 days ago

raw

history blame contribute delete

19.4 kB

	from dataclasses import dataclass
	from typing import List, Dict, Any, AsyncGenerator, Optional
	import re
	import orjson
	import httpx
	import json
	from fastapi import FastAPI, Request, HTTPException
	from fastapi.responses import StreamingResponse




	def get_models():

	mord = {
	"Providers" : ["1","2" ,"3","4","5"],
	"Models" : {
	"1" : [
	{
	"id": "openai/gpt-oss-120b",
	"owned_by": "OpenAI"
	},
	{
	"id": "moonshotai/kimi-k2-instruct",
	"owned_by": "Moonshot AI"
	},
	{
	"id": "canopylabs/orpheus-v1-english",
	"owned_by": "Canopy Labs"
	},
	{
	"id": "llama-3.1-8b-instant",
	"owned_by": "Meta"
	},
	{
	"id": "whisper-large-v3",
	"owned_by": "OpenAI"
	},
	{
	"id": "meta-llama/llama-4-scout-17b-16e-instruct",
	"owned_by": "Meta"
	},
	{
	"id": "allam-2-7b",
	"owned_by": "SDAIA"
	},
	{
	"id": "groq/compound",
	"owned_by": "Groq"
	},
	{
	"id": "canopylabs/orpheus-arabic-saudi",
	"owned_by": "Canopy Labs"
	},
	{
	"id": "llama-3.3-70b-versatile",
	"owned_by": "Meta"
	},
	{
	"id": "qwen/qwen3-32b",
	"owned_by": "Alibaba Cloud"
	},
	{
	"id": "meta-llama/llama-prompt-guard-2-22m",
	"owned_by": "Meta"
	},
	{
	"id": "groq/compound-mini",
	"owned_by": "Groq"
	},
	{
	"id": "meta-llama/llama-guard-4-12b",
	"owned_by": "Meta"
	},
	{
	"id": "openai/gpt-oss-20b",
	"owned_by": "OpenAI"
	},
	{
	"id": "openai/gpt-oss-safeguard-20b",
	"owned_by": "OpenAI"
	},
	{
	"id": "meta-llama/llama-4-maverick-17b-128e-instruct",
	"owned_by": "Meta"
	},
	{
	"id": "moonshotai/kimi-k2-instruct-0905",
	"owned_by": "Moonshot AI"
	}
	],

	"2" : [
	{
	"id": "aisingapore/gemma-sea-lion-v4-27b-it",
	"owned_by": "AI Singapore"
	},
	{
	"id": "defog/sqlcoder-7b-2",
	"owned_by": "Defog"
	},
	{
	"id": "ibm-granite/granite-4.0-h-micro",
	"owned_by": "IBM"
	},
	{
	"id": "meta/llama-3.1-8b-instruct",
	"owned_by": "Meta"
	},
	{
	"id": "microsoft/phi-2",
	"owned_by": "Microsoft"
	},
	{
	"id": "qwen/qwen3-30b-a3b-fp8",
	"owned_by": "Alibaba Cloud"
	},
	{
	"id": "qwen/qwq-32b",
	"owned_by": "Alibaba Cloud"
	}
	],

	"3" : [
	{
	"id": "zai-org/glm-4.6",
	"owned_by": "Zhipu AI"
	},
	{
	"id": "openai/gpt-5-nano-2025-08-07",
	"owned_by": "OpenAI"
	},
	{
	"id": "deepseek-ai/deepseek-v3.2-thinking",
	"owned_by": "DeepSeek AI"
	},
	{
	"id": "nvidia/nvidia-nemotron-3-nano-30b-a3b",
	"owned_by": "NVIDIA"
	},
	{
	"id": "nvidia/nvidia-nemotron-3-nano-30b-a3b-thinking",
	"owned_by": "NVIDIA"
	},
	{
	"id": "openai/gpt-5-mini-2025-08-07",
	"owned_by": "OpenAI"
	},
	{
	"id": "qwen/qwen3-vl-235b-a22b-thinking",
	"owned_by": "Alibaba Cloud"
	},
	{
	"id": "qwen/qwen3-vl-235b-a22b-instruct",
	"owned_by": "Alibaba Cloud"
	},
	{
	"id": "perplexity/sonar",
	"owned_by": "Perplexity"
	},
	{
	"id": "moonshotai/kimi-k2.5",
	"owned_by": "Moonshot AI"
	},
	{
	"id": "anthropic/claude-haiku-4-5-20251001",
	"owned_by": "Anthropic"
	},
	{
	"id": "google/gemini-2.5-flash-lite",
	"owned_by": "Google"
	},
	{
	"id": "moonshotai/kimi-k2-thinking",
	"owned_by": "Moonshot AI"
	},
	{
	"id": "mistralai/devstral-2-123b-instruct-2512",
	"owned_by": "Mistral AI"
	},
	{
	"id": "mistralai/mistral-large-3-675b-instruct-2512",
	"owned_by": "Mistral AI"
	},
	{
	"id": "openai/gpt-oss-safeguard-20b",
	"owned_by": "OpenAI"
	},
	{
	"id": "openai/gpt-oss-120b",
	"owned_by": "OpenAI"
	}
	],
	"4" : [
	{
	"id": "qwen3-4b-thinking-2507",
	"owned_by": "Alibaba Cloud"
	}
	],
	"5" : [
	{
	"id": "meta/llama-3.1-70b-instruct",
	"owned_by": "Meta"
	},
	{
	"id": "qwen/qwen2.5-coder-32b-instruct",
	"owned_by": "Alibaba Cloud"
	},
	{
	"id": "deepseek-ai/deepseek-r1-distill-qwen-32b",
	"owned_by": "DeepSeek AI"
	},
	{
	"id": "meta/llama-4-scout-17b-16e-instruct",
	"owned_by": "Meta"
	},
	{
	"id": "google/gemma-3-12b-it",
	"owned_by": "Google"
	},
	{
	"id": "mistralai/mistral-small-3.1-24b-instruct",
	"owned_by": "Mistral AI"
	},
	{
	"id": "meta/llama-3.3-70b-instruct-fp8-fast",
	"owned_by": "Meta"
	},
	{
	"id": "meta/llama-3.2-3b-instruct",
	"owned_by": "Meta"
	},
	{
	"id": "meta/llama-3.2-1b-instruct",
	"owned_by": "Meta"
	},
	{
	"id": "meta-llama/meta-llama-3-8b-instruct",
	"owned_by": "Meta"
	},
	{
	"id": "meta/llama-3-8b-instruct",
	"owned_by": "Meta"
	},
	{
	"id": "meta/llama-2-7b-chat-int8",
	"owned_by": "Meta"
	},
	{
	"id": "meta/llama-2-7b-chat-fp16",
	"owned_by": "Meta"
	},
	{
	"id": "meta/llama-3-8b-instruct-awq",
	"owned_by": "Meta"
	},
	{
	"id": "meta-llama/meta-llama-3-8b-instruct",
	"owned_by": "Meta"
	},
	{
	"id": "meta/llama-3-8b-instruct",
	"owned_by": "Meta"
	},
	{
	"id": "meta/llama-2-7b-chat-int8",
	"owned_by": "Meta"
	},
	{
	"id": "meta/llama-3-8b-instruct-awq",
	"owned_by": "Meta"
	},
	{
	"id": "google/gemma-7b-it",
	"owned_by": "Google"
	},
	{
	"id": "google/gemma-2b-it-lora",
	"owned_by": "Google"
	},
	{
	"id": "mistral/mistral-7b-instruct-v0.2",
	"owned_by": "Mistral AI"
	},
	{
	"id": "mistral/mistral-7b-instruct-v0.2-lora",
	"owned_by": "Mistral AI"
	}
	]




	}
	}

	return mord



	try:
	MODEL_NAMES = get_models()
	except Exception:
	MODEL_NAMES = {"GROQ": "GROQ-FALLBACK", "LLMC": "LLMC-FALLBACK"}


	class Config:
	DEFAULT_PROVIDER = "1"
	DEFAULT_MODEL = "llama-3.3-70b-versatile"
	DEFAULT_TEMPERATURE = 0.7
	CHUNK_SIZE = 1000
	MAX_CONNECTIONS = 200
	HTTP2 = True
	TIMEOUT = 30.0
	STREAM_BATCH_BYTES = 0

	PROVIDERS: Dict[str, Dict[str, Any]] = {
	"1": {
	"AUTH": True,
	"BASE_URL": "https://api.groq.com/openai/v1/chat/completions",
	"DEFAULT_MODEL": "qwen/qwen3-32b",
	"HEADERS": {"Authorization": "Bearer {API}", "Content-Type": "application/json"},
	"PAYLOAD": {
	"model": "{model}",
	"messages": "{messages}",
	"temperature": "{temperature}",
	"stop": None,
	"stream": "{stream}",
	},
	},
	"2": {
	"AUTH": False,
	"BASE_URL": "https://llmchat.in/inference/stream?model={model}",
	"DEFAULT_MODEL": "@cf/meta/llama-3.1-8b-instruct",
	"HEADERS": {
	"Content-Type": "application/json",
	"Accept": "/",
	"Origin": "https://llmchat.in",
	"Referer": "https://llmchat.in/",
	},
	"PAYLOAD": {"messages": "{messages}", "stream": "{stream}"},
	},
	"3": {
	"AUTH": False,
	"BASE_URL": "https://adarshji-md.hf.space/gen",
	"DEFAULT_MODEL": "openai/gpt-oss-120b",
	"PAYLOAD": {"api_key": "LOL", "provider": "1","messages": "{messages}","model" : "{model}","stream": "{stream}"},
	},
	"4": {
	"AUTH": False,
	"BASE_URL": "https://adarshji-md.hf.space/gen",
	"DEFAULT_MODEL": "qwen3-4b-thinking-2507",
	"PAYLOAD": {"api_key": "LOL", "provider": "2","messages": "{messages}","model" : "{model}","stream": "{stream}"},

	},
	"5": {
	"AUTH": False,
	"BASE_URL": "https://adarshji-md.hf.space/gen",
	"DEFAULT_MODEL": "deepseek-ai/deepseek-r1-distill-qwen-32b",
	"PAYLOAD": {"api_key": "LOL", "provider": "3","messages": "{messages}","model" : "{model}","stream": "{stream}"},

	},
	}

	_placeholder_re = re.compile(r"\{(.*?)\}")

	def apply_values_to_template(template: Any, values: Dict[str, Any]) -> Any:
	if isinstance(template, str):
	m = _placeholder_re.fullmatch(template.strip())
	if m:
	return values.get(m.group(1), template)
	str_values = {
	k: (v if isinstance(v, str) else (orjson.dumps(v).decode("utf-8") if not isinstance(v, (int, float, bool, type(None))) else v))
	for k, v in values.items()
	}
	try:
	return template.format(**str_values)
	except Exception:
	return template
	if isinstance(template, dict):
	return {k: apply_values_to_template(v, values) for k, v in template.items()}
	if isinstance(template, list):
	return [apply_values_to_template(i, values) for i in template]
	return template

	def build_values_from_request(req: "ChatRequest") -> Dict[str, Any]:
	return {
	"api_key": req.api_key,
	"API": req.api_key,
	"messages": req.messages,
	"message": req.messages,
	"model": req.model or None,
	"temperature": req.temperature,
	"stream": req.stream,
	}

	@dataclass
	class ChatRequest:
	api_key: str
	messages: List[Dict[str, Any]]
	model: Optional[str] = None
	provider: str = Config.DEFAULT_PROVIDER
	temperature: float = Config.DEFAULT_TEMPERATURE
	stream: bool = True

	@staticmethod
	def from_dict(payload: Dict[str, Any]) -> "ChatRequest":
	api_key = payload.get("api_key") or payload.get("key") or payload.get("apikey")
	messages = payload.get("messages") or payload.get("message") or payload.get("msgs")
	model = payload.get("model_name") or payload.get("model")
	provider = (payload.get("provider") or Config.DEFAULT_PROVIDER).upper()
	temperature = payload.get("temperature", Config.DEFAULT_TEMPERATURE)
	stream = payload.get("stream", True)
	if messages is None:
	messages = []
	if isinstance(messages, dict):
	messages = [messages]
	return ChatRequest(api_key=api_key, messages=messages, model=model, provider=provider, temperature=temperature, stream=stream)

	class AsyncUpstreamClient:
	def __init__(self):
	limits = httpx.Limits(max_connections=Config.MAX_CONNECTIONS)
	self._client = httpx.AsyncClient(timeout=Config.TIMEOUT, limits=limits, http2=Config.HTTP2)

	def _prepare_headers(self, headers_template: Dict[str, str], values: Dict[str, Any]) -> Dict[str, str]:
	headers = {}
	for k, v in headers_template.items():
	f = apply_values_to_template(v, values)
	if f is None:
	continue
	headers[k] = f if isinstance(f, str) else str(f)
	return headers

	async def close(self):
	await self._client.aclose()

	async def post_json(self, url: str, headers: Dict[str, str], payload: Any) -> Dict[str, Any]:
	resp = await self._client.post(url, headers=headers, json=payload)
	resp.raise_for_status()
	return resp.json()

	def _is_metadata_blob(self, obj: Dict[str, Any]) -> bool:
	if not isinstance(obj, dict):
	return False
	if ("id" in obj and "object" in obj) or "x_groq" in obj or "tool_calls" in obj or ("usage" in obj and isinstance(obj.get("usage"), dict)):
	return True
	if obj.get("choices") and isinstance(obj.get("choices"), list):
	try:
	c0 = obj["choices"][0]
	delta = c0.get("delta", {}) if isinstance(c0, dict) else {}
	content = delta.get("content") or (c0.get("message", {}) or {}).get("content")
	if not content:
	return True
	except Exception:
	return False
	return False

	async def stream_post(self, url: str, headers: Dict[str, str], payload: Any) -> AsyncGenerator[bytes, None]:

	async with self._client.stream("POST", url, headers=headers, json=payload) as resp:
	resp.raise_for_status()
	buf = b""
	RES = False
	async for chunk in resp.aiter_bytes(chunk_size=Config.CHUNK_SIZE):
	if not chunk:
	continue
	buf += chunk
	while b"\n\n" in buf:
	event, buf = buf.split(b"\n\n", 1)
	for lines in event.splitlines():
	if not lines:
	continue
	line = lines.decode('utf-8')
	try:
	data_json = line.split('data: ')[1]
	except:
	pass
	print("ERROR0")
	# print(line)
	try:
	data = json.loads(data_json)
	except:
	if data_json == "[DONE]":
	continue
	else:
	print("ERROR1")
	pass
	# print(data_json)
	# print(len(data_json))
	try:
	if data['choices'][0]['delta']['reasoning']:
	if not RES:
	RES = True

	yield orjson.dumps({"response": "<think>\n"}) + b"\n"
	yield orjson.dumps({"response": data['choices'][0]['delta']['reasoning']}) + b"\n"
	except:
	try:
	try:
	yield orjson.dumps({"response": data["response"]}) + b"\n"
	except:
	if RES:
	RES = False
	yield orjson.dumps({"response": "</think>\n\n"}) + b"\n"

	yield orjson.dumps({"response": data['choices'][0]['delta']['content']}) + b"\n"

	except:
	pass
	# print("ERROR2")
	# print(data)
	# yield orjson.dumps({"response": "okk\n"}) + b"\n"

	class ChatService:
	def __init__(self, client: Optional[AsyncUpstreamClient] = None):
	self.client = client or AsyncUpstreamClient()

	def _get_provider_config(self, provider_name: str) -> Dict[str, Any]:
	return PROVIDERS.get(provider_name.upper(), PROVIDERS.get(Config.DEFAULT_PROVIDER, {}))

	def build_request_for_provider(self, req: ChatRequest) -> Dict[str, Any]:
	prov = self._get_provider_config(req.provider)
	values = build_values_from_request(req)
	if not values.get("model"):
	values["model"] = prov.get("DEFAULT_MODEL") or Config.DEFAULT_MODEL
	url = apply_values_to_template(prov.get("BASE_URL", ""), values)
	headers = self.client._prepare_headers(prov.get("HEADERS", {}), values)
	payload = apply_values_to_template(prov.get("PAYLOAD", {}), values)
	return {"url": url, "headers": headers, "payload": payload}

	async def generate(self, req: ChatRequest) -> str:
	data = self.build_request_for_provider(req)
	result = await self.client.post_json(data["url"], data["headers"], data["payload"])
	try:
	return result["choices"][0]["message"]["content"]
	except Exception:
	if isinstance(result, dict) and "response" in result:
	return result["response"]
	return orjson.dumps(result).decode("utf-8")

	async def generate_stream(self, req: ChatRequest) -> AsyncGenerator[bytes, None]:
	data = self.build_request_for_provider(req)
	async for token_bytes in self.client.stream_post(data["url"], data["headers"], data["payload"]):
	yield token_bytes

	app = FastAPI(title="High-speed Chat Proxy")
	service = ChatService()

	@app.on_event("shutdown")
	async def shutdown_event():
	try:
	await service.client.close()
	except Exception:
	pass

	@app.post("/v1/chat/completions")
	async def completions(request: Request):
	body = await request.json()
	req = ChatRequest.from_dict(body)
	if not req.api_key or not req.messages:
	raise HTTPException(status_code=400, detail="api_key and messages required")

	async def streamer():
	if req.stream:
	buf = bytearray()
	threshold = Config.STREAM_BATCH_BYTES
	async for chunk_bytes in service.generate_stream(req):
	if not chunk_bytes:
	continue
	buf.extend(chunk_bytes)
	if len(buf) >= threshold:
	yield b"data: " + bytes(buf)
	buf.clear()
	if buf:
	yield b"data: " + bytes(buf)
	yield b"data: [DONE]\n\n"
	else:
	text = await service.generate(req)
	yield orjson.dumps({"response": text}) + b"\n"

	return StreamingResponse(streamer(), media_type="application/x-ndjson", headers={"Cache-Control": "no-cache"})

	@app.get("/v1/models")
	async def models():
	return {"models": MODEL_NAMES}

	@app.get("/")
	async def root():
	return {"service": "High-speed Chat Proxy", "status": "running"}