Spaces:

AdarshJi
/

MD

Running

App Files Files Community

MD / app.py

AdarshJi

Update app.py

1bf613c verified 24 days ago

raw

history blame contribute delete

26.8 kB

	import json
	from flask import Flask, request, jsonify, Response, stream_with_context
	from dataclasses import dataclass
	from typing import Any, Dict, List, Optional ,Tuple
	import time
	import uuid
	from curl_cffi.requests import Session

	def get_models():

	mord = {
	"Providers" : ["1","2" ,"3","4"],
	"Models" : {
	"1" : [
	{
	"id": "openai/gpt-oss-120b",
	"owned_by": "OpenAI"
	},
	{
	"id": "moonshotai/kimi-k2-instruct",
	"owned_by": "Moonshot AI"
	},
	{
	"id": "llama-3.1-8b-instant",
	"owned_by": "Meta"
	},
	{
	"id": "whisper-large-v3",
	"owned_by": "OpenAI"
	},
	{
	"id": "meta-llama/llama-4-scout-17b-16e-instruct",
	"owned_by": "Meta"
	},
	{
	"id": "allam-2-7b",
	"owned_by": "SDAIA"
	},
	{
	"id": "groq/compound",
	"owned_by": "Groq"
	},
	{
	"id": "llama-3.3-70b-versatile",
	"owned_by": "Meta"
	},
	{
	"id": "qwen/qwen3-32b",
	"owned_by": "Alibaba Cloud"
	},
	{
	"id": "meta-llama/llama-prompt-guard-2-22m",
	"owned_by": "Meta"
	},
	{
	"id": "groq/compound-mini",
	"owned_by": "Groq"
	},
	{
	"id": "meta-llama/llama-guard-4-12b",
	"owned_by": "Meta"
	},
	{
	"id": "openai/gpt-oss-20b",
	"owned_by": "OpenAI"
	},
	{
	"id": "openai/gpt-oss-safeguard-20b",
	"owned_by": "OpenAI"
	},
	{
	"id": "meta-llama/llama-4-maverick-17b-128e-instruct",
	"owned_by": "Meta"
	},
	{
	"id": "moonshotai/kimi-k2-instruct-0905",
	"owned_by": "Moonshot AI"
	}
	],

	"2" : [
	{
	"id": "zai-org/glm-4.6",
	"owned_by": "Zhipu AI"
	},
	{
	"id": "openai/gpt-5-nano-2025-08-07",
	"owned_by": "OpenAI"
	},
	{
	"id": "deepseek-ai/deepseek-v3.2-thinking",
	"owned_by": "DeepSeek AI"
	},
	{
	"id": "nvidia/nvidia-nemotron-3-nano-30b-a3b",
	"owned_by": "NVIDIA"
	},
	{
	"id": "nvidia/nvidia-nemotron-3-nano-30b-a3b-thinking",
	"owned_by": "NVIDIA"
	},
	{
	"id": "openai/gpt-5-mini-2025-08-07",
	"owned_by": "OpenAI"
	},
	{
	"id": "qwen/qwen3-vl-235b-a22b-thinking",
	"owned_by": "Alibaba Cloud"
	},
	{
	"id": "qwen/qwen3-vl-235b-a22b-instruct",
	"owned_by": "Alibaba Cloud"
	},
	{
	"id": "perplexity/sonar",
	"owned_by": "Perplexity"
	},
	{
	"id": "moonshotai/kimi-k2.5",
	"owned_by": "Moonshot AI"
	},
	{
	"id": "anthropic/claude-haiku-4-5-20251001",
	"owned_by": "Anthropic"
	},
	{
	"id": "google/gemini-2.5-flash-lite",
	"owned_by": "Google"
	},
	{
	"id": "moonshotai/kimi-k2-thinking",
	"owned_by": "Moonshot AI"
	},
	{
	"id": "mistralai/devstral-2-123b-instruct-2512",
	"owned_by": "Mistral AI"
	},
	{
	"id": "mistralai/mistral-large-3-675b-instruct-2512",
	"owned_by": "Mistral AI"
	},
	{
	"id": "openai/gpt-oss-safeguard-20b",
	"owned_by": "OpenAI"
	},
	{
	"id": "openai/gpt-oss-120b",
	"owned_by": "OpenAI"
	}
	],
	"3" : [
	{
	"id": "qwen3-4b-thinking-2507",
	"owned_by": "Alibaba Cloud"
	}
	],
	"4" : [
	{
	"id": "meta/llama-3.1-70b-instruct",
	"owned_by": "Meta"
	},
	{
	"id": "qwen/qwen2.5-coder-32b-instruct",
	"owned_by": "Alibaba Cloud"
	},
	{
	"id": "deepseek-ai/deepseek-r1-distill-qwen-32b",
	"owned_by": "DeepSeek AI"
	},
	{
	"id": "meta/llama-4-scout-17b-16e-instruct",
	"owned_by": "Meta"
	},
	{
	"id": "google/gemma-3-12b-it",
	"owned_by": "Google"
	},
	{
	"id": "mistralai/mistral-small-3.1-24b-instruct",
	"owned_by": "Mistral AI"
	},
	{
	"id": "meta/llama-3.3-70b-instruct-fp8-fast",
	"owned_by": "Meta"
	},
	{
	"id": "meta/llama-3.2-3b-instruct",
	"owned_by": "Meta"
	},
	{
	"id": "meta/llama-3.2-1b-instruct",
	"owned_by": "Meta"
	},
	{
	"id": "meta-llama/meta-llama-3-8b-instruct",
	"owned_by": "Meta"
	},
	{
	"id": "meta/llama-3-8b-instruct",
	"owned_by": "Meta"
	},
	{
	"id": "meta/llama-2-7b-chat-int8",
	"owned_by": "Meta"
	},
	{
	"id": "meta/llama-2-7b-chat-fp16",
	"owned_by": "Meta"
	},
	{
	"id": "meta/llama-3-8b-instruct-awq",
	"owned_by": "Meta"
	},
	{
	"id": "meta-llama/meta-llama-3-8b-instruct",
	"owned_by": "Meta"
	},
	{
	"id": "meta/llama-3-8b-instruct",
	"owned_by": "Meta"
	},
	{
	"id": "meta/llama-2-7b-chat-int8",
	"owned_by": "Meta"
	},
	{
	"id": "meta/llama-3-8b-instruct-awq",
	"owned_by": "Meta"
	},
	{
	"id": "google/gemma-7b-it",
	"owned_by": "Google"
	},
	{
	"id": "google/gemma-2b-it-lora",
	"owned_by": "Google"
	},
	{
	"id": "mistral/mistral-7b-instruct-v0.2",
	"owned_by": "Mistral AI"
	},
	{
	"id": "mistral/mistral-7b-instruct-v0.2-lora",
	"owned_by": "Mistral AI"
	}
	]

	}
	}

	return mord





	M3 = [
	{
	"tag": "@cf",
	"model": "meta/llama-3.1-70b-instruct",
	"max_tokens" : 8192
	},

	{
	"tag": "@cf",
	"model": "qwen/qwen2.5-coder-32b-instruct",
	"max_tokens" : 8192
	},
	{
	"tag": "@cf",
	"model": "deepseek-ai/deepseek-r1-distill-qwen-32b",
	"max_tokens" : 40960
	# ok

	},
	{
	"tag": "@cf",
	"model": "meta/llama-4-scout-17b-16e-instruct",
	"max_tokens" : 40960
	# ok

	},
	{
	"tag": "@cf",
	"model": "google/gemma-3-12b-it",
	"max_tokens" : 40960
	# ok

	},
	{
	"tag": "@cf",
	"model": "mistralai/mistral-small-3.1-24b-instruct",
	"max_tokens" : 40960
	# ok

	},
	{
	"tag": "@cf",
	"model": "meta/llama-3.3-70b-instruct-fp8-fast",
	"max_tokens" : 8192
	},
	{
	"tag": "@cf",
	"model": "meta/llama-3.2-3b-instruct",
	"max_tokens" : 40960
	# ok

	},
	{
	"tag": "@cf",
	"model": "meta/llama-3.2-1b-instruct",
	"max_tokens" : 40960
	# ok
	},
	{
	"tag": "@hf",
	"model": "meta-llama/meta-llama-3-8b-instruct",
	"max_tokens" : 4391
	},
	{
	"tag": "@cf",
	"model": "meta/llama-3-8b-instruct",
	"max_tokens" : 4391
	},
	{
	"tag": "@cf",
	"model": "meta/llama-2-7b-chat-int8",
	"max_tokens" : 4391
	},
	{
	"tag": "@cf",
	"model": "meta/llama-2-7b-chat-fp16",
	"max_tokens" : None
	},
	{
	"tag": "@cf",
	"model": "meta/llama-3-8b-instruct-awq",
	"max_tokens" : 4391
	},
	{
	"tag": "@hf",
	"model": "meta-llama/meta-llama-3-8b-instruct",
	"max_tokens" : 4391
	},
	{
	"tag": "@cf",
	"model": "meta/llama-3-8b-instruct",
	"max_tokens" : 4391
	},
	{
	"tag": "@cf",
	"model": "meta/llama-2-7b-chat-int8",
	"max_tokens" : 4391
	},
	{
	"tag": "@cf",
	"model": "meta/llama-3-8b-instruct-awq",
	"max_tokens" : 4391
	},
	{
	"tag": "@hf",
	"model": "google/gemma-7b-it",
	"max_tokens" : None
	},
	{
	"tag": "@cf",
	"model": "google/gemma-2b-it-lora",
	"max_tokens" : 4391
	},
	{
	"tag": "@hf",
	"model": "mistral/mistral-7b-instruct-v0.2",
	"max_tokens" : 8192
	},
	{
	"tag": "@cf",
	"model": "mistral/mistral-7b-instruct-v0.2-lora",
	"max_tokens" : 8192
	}
	]

	def FREEGPT(
	RQ : Any,
	api_key : str,
	messages : List[Dict],
	model : str = "deepseek-ai/deepseek-r1-distill-qwen-32b",
	max_token : int = 40960,
	stream : bool = True,
	timeout: Optional[float] = None
	):
	md = next((item["tag"] + "/" + item["model"] for item in M3 if item["model"] == model), "@cf/meta/llama-3.2-1b-instruct")

	URL = f"https://llmchat.in/inference/stream?model={md}"


	headers = {
	"Accept": "text/event-stream,/",
	"Content-Type": "application/json",
	"Origin": "https://llmchat.in",
	"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/144.0.0.0 Safari/537.36",
	"Cache-Control": "no-cache",
	"Accept-Encoding": "identity",
	"cf-ray" : "9cba9edd9f909aaf-SIN",

	}


	payload = {
	"messages": messages,
	"stream": stream,
	**({"max_tokens": max_token} if max_token is not None else {}),
	**({"max_tokens": next((item["max_tokens"] for item in M3 if item["model"] == model and item["max_tokens"] is not None), None)} if next((True for item in M3 if item["model"] == model and item["max_tokens"] is not None), None) else {})
	}


	# print(payload)

	try:
	RESP = RQ.post(url=URL,json=payload , headers=headers , timeout=timeout,stream=stream)
	print(RESP.status_code)
	except:
	return
	if RESP.status_code == 200:
	for raw in RESP.iter_lines():
	if not raw:
	continue

	try:
	line = raw.decode("utf-8", errors="replace").strip()
	except Exception:
	line = raw.decode("latin-1", errors="replace").strip()

	if line.startswith("data:"):
	data_json = line.split('data: ')[1]
	try:
	data = json.loads(data_json)
	except:
	continue

	try:
	yield data["response"]
	except: pass

	else:
	# print(RESP.status_code)
	yield "AN ERROR OCCURED"






	M2 = ["qwen3-4b-thinking-2507"]


	def QWEN(
	RQ : Any,
	api_key : str,
	messages : List[Dict],
	model : str = "NONE",
	max_token : int = 40960,
	stream : bool = True,
	timeout: Optional[float] = None
	):

	def GEN(RQ:any,messages:list,timeout:int=None):
	API_URL = "https://teichai-qwen3-4b-thinking-2507-claude-4-5-opus.hf.space/api/chat"

	payload = {
	"messages":messages,
	"searchEnabled":False
	}

	headers = {"Accept": "/","Content-Type": "application/json","Origin": "https://teichai-qwen3-4b-thinking-2507-claude-4-5-opus.hf.space","Referer": "https://teichai-qwen3-4b-thinking-2507-claude-4-5-opus.hf.space/","User-Agent": "python-requests/2.x"}


	RESPO = RQ.post(API_URL, headers=headers, json=payload, stream=stream, timeout=timeout)

	# print(RESPO)
	buffer_lines = []
	for raw in RESPO.iter_lines():
	if raw is None:
	continue
	try:
	line = raw.decode("utf-8", errors="replace").strip()
	except Exception:
	line = raw.decode("latin-1", errors="replace").strip()

	if line == "":
	if not buffer_lines:
	continue
	data_text = "".join(buffer_lines)
	buffer_lines = []
	if data_text == "[DONE]":
	break
	try:
	obj = json.loads(data_text)
	try:
	yield obj
	except:
	pass
	except json.JSONDecodeError:
	pass
	continue

	if line.startswith("data:"):
	buffer_lines.append(line[len("data:"):].lstrip())

	RES = False
	for i in GEN(RQ=RQ,messages=messages,timeout=timeout):
	if i["type"]=="reasoning":
	if not RES:
	RES = True
	yield "<think>\n"
	yield i["content"]

	else:
	if RES:
	RES = False
	yield "\n</think>\n\n"
	try:
	yield i["content"]
	except:
	pass







	class CONV:

	def __init__(self, default_system: str = ""):
	self.default_system = default_system

	@staticmethod
	def _make_id() -> str:
	return uuid.uuid4().hex[:20]

	def alpaca_to_msg(
	self,
	alpaca_obj: Dict[str, Any],
	insert_system: bool = True,
	system_override: Optional[str] = None,
	skip_empty: bool = True,
	) -> Tuple[List[Dict[str, str]], float]:

	t0 = time.perf_counter()

	out: List[Dict[str, str]] = []
	sys_text = system_override if system_override is not None else self.default_system
	if insert_system and sys_text is not None:
	out.append({"role": "system", "content": sys_text})

	msgs = alpaca_obj
	append = out.append # micro-optimization
	for m in msgs:
	role = (m.get("role") or "").strip().lower()
	if role not in ("user", "assistant", "system"):
	role = "user"

	parts = m.get("parts") or []
	# gather textual parts quickly
	texts: List[str] = []
	for p in parts: # iterate in order
	# only include parts with type == "text" and non-empty text
	if isinstance(p, dict) and p.get("type") == "text":
	txt = p.get("text", "")
	if isinstance(txt, str) and txt:
	# keep as-is except trim trailing spaces/newlines
	texts.append(txt.rstrip())

	if not texts and skip_empty:
	continue

	if texts:
	content = "\n\n".join(texts)
	append({"role": role, "content": content})
	else:
	# if not skipping empty, include empty content to preserve role
	append({"role": role, "content": ""})

	elapsed = time.perf_counter() - t0
	return out, elapsed

	def msg_to_alpaca(
	self,
	msg_list: List[Dict[str, Any]],
	include_step_start: bool = True,
	assistant_state_done: bool = True,
	preserve_ids: bool = False,
	skip_empty_text_parts: bool = False,
	) -> Tuple[Dict[str, List[Dict[str, Any]]], float]:

	t0 = time.perf_counter()

	out_messages: List[Dict[str, Any]] = []
	append = out_messages.append

	for entry in msg_list:
	# allow both dicts and fallback strings
	if not isinstance(entry, dict):
	role = "user"
	content = str(entry)
	entry_id = None
	else:
	role = (entry.get("role") or "user").strip().lower()
	content = entry.get("content", "")
	entry_id = entry.get("id") if preserve_ids else None

	if role not in ("user", "assistant"):
	role = "user"

	parts: List[Dict[str, Any]] = []
	if role == "assistant" and include_step_start:
	parts.append({"type": "step-start"})

	# Only add the text part if it's non-empty (or skip_empty_text_parts False)
	if isinstance(content, str):
	if not skip_empty_text_parts or content.strip() != "":
	text_part: Dict[str, Any] = {"type": "text", "text": content}
	if role == "assistant" and assistant_state_done:
	text_part["state"] = "done"
	parts.append(text_part)

	# Build message object
	msg_obj: Dict[str, Any] = {
	"id": entry_id if (entry_id is not None and isinstance(entry_id, str) and entry_id != "") else self._make_id(),
	"role": role,
	"parts": parts,
	"metadata": {"custom": {}},
	}

	append(msg_obj)

	elapsed = time.perf_counter() - t0
	return out_messages, elapsed






	M1=[
	"zai-org/glm-4.6",
	"openai/gpt-5-nano-2025-08-07",
	"deepseek-ai/deepseek-v3.2-thinking",
	"nvidia/nvidia-nemotron-3-nano-30b-a3b",
	"nvidia/nvidia-nemotron-3-nano-30b-a3b-thinking",
	"openai/gpt-5-mini-2025-08-07",
	"qwen/qwen3-vl-235b-a22b-thinking",
	"qwen/qwen3-vl-235b-a22b-instruct",
	"perplexity/sonar",
	"moonshotai/kimi-k2.5",
	"anthropic/claude-haiku-4-5-20251001", #-----depcriating model
	"google/gemini-2.5-flash-lite",
	"moonshotai/kimi-k2-thinking"
	"mistralai/devstral-2-123b-instruct-2512" #good mordal
	"mistralai/mistral-large-3-675b-instruct-2512",
	"openai/gpt-oss-safeguard-20b",
	"openai/gpt-oss-120b"

	]


	def Adarsh_Personal(
	RQ : Any,
	api_key : str,
	messages : List[Dict],
	model : str = "deepseek-ai/deepseek-r1-distill-qwen-32b",
	max_token : int = 40960,
	stream : bool = True,
	timeout: Optional[float] = None
	):

	RES=False
	URL = "https://hadadxyz-ai.hf.space/api/mz1a85y5n80zy5127hgsba5f3a9c2d1Np0x300vcgduqxb7ep084fygd016c9a2d16fa8b3c41gut432pvjctr75hhspjae25d6f7a8b9c0d1e2pjf43v16f3a4b5c6dd7e8fba2bdx9a0b6dv1c2d7e2b4c9f83d6a4f1bb6c152f9pe3c7a88qv5d91f3c2b765g134bp9a41ne4yx4b3vda8w074"


	NEW_MSGS , S = CONV().msg_to_alpaca(messages, include_step_start=True, assistant_state_done=True)

	# print(NEW_MSGS)

	payload = {
	"tools": {},
	"modelId": model,
	"sessionId": "sess_7ef524b9_mlfe4ped",
	"clientId": "7ef524b98a963b507ec9f4000fdea38c-mlfe4pea",
	"requestId": "req_7ef524b9_mlfg1cpq_jjxb7p",
	"clientIp": "122.161.52.54",
	"realIp": "122.161.52.54",
	"forwardedFor": "122.161.52.54",
	"userAgent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/144.0.0.0 Safari/537.36",
	"id": "DEFAULT_THREAD_ID",
	"messages": NEW_MSGS,
	"trigger": "submit-message",
	"metadata": {}
	}

	headers = {
	"Accept": "text/event-stream, /",
	"Content-Type": "application/json",
	"Origin": "https://hadadxyz-ai.hf.space",
	"User-Agent": payload["userAgent"],
	"Cache-Control": "no-cache",
	"Accept-Encoding": "identity",
	"x-turnstile-token": "mlfe5357-zq9depfzhpb-e18cbvzrpid",
	"x-turnstile-verified": "true",
	}


	RESP = RQ.post(URL, json=payload, headers=headers, stream=stream, timeout=timeout)

	if RESP.status_code == 200:
	for raw in RESP.iter_lines():
	if not raw:
	continue

	try:
	line = raw.decode("utf-8", errors="replace").strip()
	except Exception:
	line = raw.decode("latin-1", errors="replace").strip()

	if line.startswith("data:"):
	data_json = line.split('data: ')[1]
	try:
	data = json.loads(data_json)
	except:
	continue
	try:
	if data['type']=="reasoning-delta":
	if not RES:
	RES = True
	yield "<think>\n"
	try:
	yield data["delta"]
	except:
	pass
	except :
	pass
	try:
	if data["type"]=="text-delta":
	if RES:
	RES = False
	yield "\n</think>\n"

	try:
	yield data["delta"]
	except:
	pass
	except:
	pass

	import uuid


	def GROQ(
	RQ : Any,
	api_key : str,
	messages : List[Dict],
	model : str = "deepseek-ai/deepseek-r1-distill-qwen-32b",
	max_token : int = 40960,
	stream : bool = True,
	timeout: Optional[float] = None
	):
	RES=False
	URL = "https://api.groq.com/openai/v1/chat/completions"

	payload = {
	"model": model,
	"messages": messages,
	"temperature": 0.9,
	"stop": None,
	"stream": stream,
	}

	headers = {
	"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"
	}


	RESP = RQ.post(URL, json=payload, headers=headers, stream=stream, timeout=timeout)

	if RESP.status_code == 200:
	for raw in RESP.iter_lines():
	if not raw:
	continue

	try:
	line = raw.decode("utf-8", errors="replace").strip()
	except Exception:
	line = raw.decode("latin-1", errors="replace").strip()

	if line.startswith("data:"):
	data_json = line.split('data: ')[1]
	try:
	data = json.loads(data_json)
	except:
	if data_json == "[DONE]":
	continue
	try:
	if data['choices'][0]['delta']['reasoning']:
	if not RES:
	RES = True
	yield "<think>\n"
	yield data['choices'][0]['delta']['reasoning']
	except:
	if RES:
	RES = False
	yield "</think>\n\n"
	try:
	yield data['choices'][0]['delta']['content']
	except:
	pass























	# ---------------------------------------------------------------------
	# App & Session
	# ---------------------------------------------------------------------

	app = Flask(__name__)
	RQ = Session(impersonate="chrome110")


	# ---------------------------------------------------------------------
	# Config
	# ---------------------------------------------------------------------

	class Config:
	DEFAULT_PROVIDER = "1"
	DEFAULT_MODEL = "llama-3.3-70b-versatile"
	DEFAULT_MAX_TOKENS = 512
	DEFAULT_TEMPERATURE = 0.7
	TIMEOUT = 30.0
	STREAM = True


	PROVIDERS: Dict[str, Dict[str, Any]] = {
	"1": {"func": GROQ, "models": None},
	"2": {"func": Adarsh_Personal, "models": M1},
	"3": {"func": QWEN, "models": M2},
	"4": {"func": FREEGPT, "models": M3},
	}


	# ---------------------------------------------------------------------
	# Request Schema
	# ---------------------------------------------------------------------

	@dataclass
	class ChatRequest:
	api_key: str
	messages: List[Dict[str, Any]]
	model: str
	provider: str
	max_tokens: int
	temperature: float
	stream: bool

	@classmethod
	def from_dict(cls, payload: Dict[str, Any]) -> "ChatRequest":
	api_key = payload.get("api_key") or payload.get("key") or payload.get("apikey")
	messages = payload.get("messages") or payload.get("message") or payload.get("msgs") or []
	model = payload.get("model") or payload.get("model_name") or Config.DEFAULT_MODEL
	provider = str(payload.get("provider", Config.DEFAULT_PROVIDER))

	if isinstance(messages, dict):
	messages = [messages]

	return cls(
	api_key=api_key,
	messages=messages,
	model=model,
	provider=provider,
	max_tokens=int(payload.get("max_tokens", Config.DEFAULT_MAX_TOKENS)),
	temperature=float(payload.get("temperature", Config.DEFAULT_TEMPERATURE)),
	stream=bool(payload.get("stream", Config.STREAM)),
	)


	# ---------------------------------------------------------------------
	# Streaming Generator
	# ---------------------------------------------------------------------

	def stream_chat(req: ChatRequest):
	provider = PROVIDERS.get(req.provider)

	if not provider:
	yield json.dumps({"error": "Invalid provider"}) + "\n"
	return

	try:
	for chunk in provider["func"](
	RQ,
	req.api_key,
	req.messages,
	req.model,
	req.max_tokens,
	req.stream,
	Config.TIMEOUT,
	):
	if not chunk:
	continue
	yield f"data: {json.dumps({'response': chunk})}\n\n"

	yield "data: [DONE]\n\n"

	except Exception as e:
	yield f"data: {json.dumps({'error': str(e)})}\n\n"


	# ---------------------------------------------------------------------
	# Routes
	# ---------------------------------------------------------------------

	@app.route("/v1/chat/completions", methods=["POST"])
	def generate():
	payload = request.get_json(silent=True)
	if not payload:
	return jsonify({"error": "Invalid JSON body"}), 400

	req = ChatRequest.from_dict(payload)

	if not req.api_key or not req.messages:
	return jsonify({"error": "api_key and messages are required"}), 400

	if req.stream:
	return Response(
	stream_with_context(stream_chat(req)),
	content_type="text/event-stream",
	headers={
	"Cache-Control": "no-cache",
	"X-Accel-Buffering": "no",
	},
	)

	# Non-stream fallback
	final = []
	for part in stream_chat(req):
	if part.startswith("data:"):
	data = json.loads(part[6:])
	if "response" in data:
	final.append(data["response"])

	return jsonify({"response": "".join(final)})


	@app.route("/v1/models", methods=["GET"])
	def info():
	return jsonify({
	"models": get_models()
	})


	# ---------------------------------------------------------------------
	# Run
	# ---------------------------------------------------------------------

	# if __name__ == "__main__":
	# app.run(host="0.0.0.0", port=5550, threaded=True)

	if __name__=="__main__":
	app.run()