import json
from flask import Flask, request, jsonify, Response, stream_with_context
from dataclasses import dataclass
from typing import Any, Dict, List, Optional ,Tuple
import time
import uuid
from curl_cffi.requests import Session

def get_models():
    
    mord = {
        "Providers" : ["1","2" ,"3","4"],
        "Models" : {
            "1" : [
            {
                "id": "openai/gpt-oss-120b",
                "owned_by": "OpenAI"
            },
            {
                "id": "moonshotai/kimi-k2-instruct",
                "owned_by": "Moonshot AI"
            },
            {
                "id": "llama-3.1-8b-instant",
                "owned_by": "Meta"
            },
            {
                "id": "whisper-large-v3",
                "owned_by": "OpenAI"
            },
            {
                "id": "meta-llama/llama-4-scout-17b-16e-instruct",
                "owned_by": "Meta"
            },
            {
                "id": "allam-2-7b",
                "owned_by": "SDAIA"
            },
            {
                "id": "groq/compound",
                "owned_by": "Groq"
            },
            {
                "id": "llama-3.3-70b-versatile",
                "owned_by": "Meta"
            },
            {
                "id": "qwen/qwen3-32b",
                "owned_by": "Alibaba Cloud"
            },
            {
                "id": "meta-llama/llama-prompt-guard-2-22m",
                "owned_by": "Meta"
            },
            {
                "id": "groq/compound-mini",
                "owned_by": "Groq"
            },
            {
                "id": "meta-llama/llama-guard-4-12b",
                "owned_by": "Meta"
            },
            {
                "id": "openai/gpt-oss-20b",
                "owned_by": "OpenAI"
            },
            {
                "id": "openai/gpt-oss-safeguard-20b",
                "owned_by": "OpenAI"
            },
            {
                "id": "meta-llama/llama-4-maverick-17b-128e-instruct",
                "owned_by": "Meta"
            },
            {
                "id": "moonshotai/kimi-k2-instruct-0905",
                "owned_by": "Moonshot AI"
            }
        ],
    
        "2" : [
        {
            "id": "zai-org/glm-4.6",
            "owned_by": "Zhipu AI"
        },
        {
            "id": "openai/gpt-5-nano-2025-08-07",
            "owned_by": "OpenAI"
        },
        {
            "id": "deepseek-ai/deepseek-v3.2-thinking",
            "owned_by": "DeepSeek AI"
        },
        {
            "id": "nvidia/nvidia-nemotron-3-nano-30b-a3b",
            "owned_by": "NVIDIA"
        },
        {
            "id": "nvidia/nvidia-nemotron-3-nano-30b-a3b-thinking",
            "owned_by": "NVIDIA"
        },
        {
            "id": "openai/gpt-5-mini-2025-08-07",
            "owned_by": "OpenAI"
        },
        {
            "id": "qwen/qwen3-vl-235b-a22b-thinking",
            "owned_by": "Alibaba Cloud"
        },
        {
            "id": "qwen/qwen3-vl-235b-a22b-instruct",
            "owned_by": "Alibaba Cloud"
        },
        {
            "id": "perplexity/sonar",
            "owned_by": "Perplexity"
        },
        {
            "id": "moonshotai/kimi-k2.5",
            "owned_by": "Moonshot AI"
        },
        {
            "id": "anthropic/claude-haiku-4-5-20251001",
            "owned_by": "Anthropic"
        },
        {
            "id": "google/gemini-2.5-flash-lite",
            "owned_by": "Google"
        },
        {
            "id": "moonshotai/kimi-k2-thinking",
            "owned_by": "Moonshot AI"
        },
        {
            "id": "mistralai/devstral-2-123b-instruct-2512",
            "owned_by": "Mistral AI"
        },
        {
            "id": "mistralai/mistral-large-3-675b-instruct-2512",
            "owned_by": "Mistral AI"
        },
        {
            "id": "openai/gpt-oss-safeguard-20b",
            "owned_by": "OpenAI"
        },
        {
            "id": "openai/gpt-oss-120b",
            "owned_by": "OpenAI"
        }
    ],
        "3" : [
                {
            "id": "qwen3-4b-thinking-2507",
            "owned_by": "Alibaba Cloud"
        }
        ],
        "4" : [
        {
            "id": "meta/llama-3.1-70b-instruct",
            "owned_by": "Meta"
        },
        {
            "id": "qwen/qwen2.5-coder-32b-instruct",
            "owned_by": "Alibaba Cloud"
        },
        {
            "id": "deepseek-ai/deepseek-r1-distill-qwen-32b",
            "owned_by": "DeepSeek AI"
        },
        {
            "id": "meta/llama-4-scout-17b-16e-instruct",
            "owned_by": "Meta"
        },
        {
            "id": "google/gemma-3-12b-it",
            "owned_by": "Google"
        },
        {
            "id": "mistralai/mistral-small-3.1-24b-instruct",
            "owned_by": "Mistral AI"
        },
        {
            "id": "meta/llama-3.3-70b-instruct-fp8-fast",
            "owned_by": "Meta"
        },
        {
            "id": "meta/llama-3.2-3b-instruct",
            "owned_by": "Meta"
        },
        {
            "id": "meta/llama-3.2-1b-instruct",
            "owned_by": "Meta"
        },
        {
            "id": "meta-llama/meta-llama-3-8b-instruct",
            "owned_by": "Meta"
        },
        {
            "id": "meta/llama-3-8b-instruct",
            "owned_by": "Meta"
        },
        {
            "id": "meta/llama-2-7b-chat-int8",
            "owned_by": "Meta"
        },
        {
            "id": "meta/llama-2-7b-chat-fp16",
            "owned_by": "Meta"
        },
        {
            "id": "meta/llama-3-8b-instruct-awq",
            "owned_by": "Meta"
        },
        {
            "id": "meta-llama/meta-llama-3-8b-instruct",
            "owned_by": "Meta"
        },
        {
            "id": "meta/llama-3-8b-instruct",
            "owned_by": "Meta"
        },
        {
            "id": "meta/llama-2-7b-chat-int8",
            "owned_by": "Meta"
        },
        {
            "id": "meta/llama-3-8b-instruct-awq",
            "owned_by": "Meta"
        },
        {
            "id": "google/gemma-7b-it",
            "owned_by": "Google"
        },
        {
            "id": "google/gemma-2b-it-lora",
            "owned_by": "Google"
        },
        {
            "id": "mistral/mistral-7b-instruct-v0.2",
            "owned_by": "Mistral AI"
        },
        {
            "id": "mistral/mistral-7b-instruct-v0.2-lora",
            "owned_by": "Mistral AI"
        }
    ]
        
    }
    }
    
    return mord


M3 = [
  {
    "tag": "@cf",
    "model": "meta/llama-3.1-70b-instruct",
    "max_tokens" : 8192
  },

  {
    "tag": "@cf",
    "model": "qwen/qwen2.5-coder-32b-instruct",
    "max_tokens" : 8192
  },
  {
    "tag": "@cf",
    "model": "deepseek-ai/deepseek-r1-distill-qwen-32b",
    "max_tokens" : 40960
    # ok

  },
  {
    "tag": "@cf",
    "model": "meta/llama-4-scout-17b-16e-instruct",
    "max_tokens" : 40960
    # ok

  },
  {
    "tag": "@cf",
    "model": "google/gemma-3-12b-it",
    "max_tokens" : 40960
    # ok

  },
  {
    "tag": "@cf",
    "model": "mistralai/mistral-small-3.1-24b-instruct",
    "max_tokens" : 40960
    # ok

  },
  {
    "tag": "@cf",
    "model": "meta/llama-3.3-70b-instruct-fp8-fast",
    "max_tokens" : 8192
  },
  {
    "tag": "@cf",
    "model": "meta/llama-3.2-3b-instruct",
    "max_tokens" : 40960
    # ok

  },
  {
    "tag": "@cf",
    "model": "meta/llama-3.2-1b-instruct",
    "max_tokens" : 40960
    # ok
  },
  {
    "tag": "@hf",
    "model": "meta-llama/meta-llama-3-8b-instruct",
    "max_tokens" : 4391
  },
  {
    "tag": "@cf",
    "model": "meta/llama-3-8b-instruct",
    "max_tokens" : 4391
  },
  {
    "tag": "@cf",
    "model": "meta/llama-2-7b-chat-int8",
    "max_tokens" : 4391
  },
  {
    "tag": "@cf",
    "model": "meta/llama-2-7b-chat-fp16",
    "max_tokens" : None
  },
  {
    "tag": "@cf",
    "model": "meta/llama-3-8b-instruct-awq",
    "max_tokens" : 4391
  },
  {
    "tag": "@hf",
    "model": "meta-llama/meta-llama-3-8b-instruct",
    "max_tokens" : 4391
  },
  {
    "tag": "@cf",
    "model": "meta/llama-3-8b-instruct",
    "max_tokens" : 4391
  },
  {
    "tag": "@cf",
    "model": "meta/llama-2-7b-chat-int8",
    "max_tokens" : 4391
  },
  {
    "tag": "@cf",
    "model": "meta/llama-3-8b-instruct-awq",
    "max_tokens" : 4391
  },
  {
    "tag": "@hf",
    "model": "google/gemma-7b-it",
    "max_tokens" : None
  },
  {
    "tag": "@cf",
    "model": "google/gemma-2b-it-lora",
    "max_tokens" : 4391
  },
  {
    "tag": "@hf",
    "model": "mistral/mistral-7b-instruct-v0.2",
    "max_tokens" : 8192
  },
  {
    "tag": "@cf",
    "model": "mistral/mistral-7b-instruct-v0.2-lora",
    "max_tokens" : 8192
  }
]

def FREEGPT(
        RQ : Any,
        api_key : str,
        messages : List[Dict],
        model : str = "deepseek-ai/deepseek-r1-distill-qwen-32b",
        max_token : int = 40960,
        stream : bool = True,
        timeout: Optional[float] = None
):
    md = next((item["tag"] + "/" + item["model"] for item in M3 if item["model"] == model), "@cf/meta/llama-3.2-1b-instruct")

    URL = f"https://llmchat.in/inference/stream?model={md}"

    
    headers = {
        "Accept": "text/event-stream,*/*",
        "Content-Type": "application/json",
        "Origin": "https://llmchat.in",
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/144.0.0.0 Safari/537.36",
        "Cache-Control": "no-cache",
        "Accept-Encoding": "identity",
        "cf-ray" : "9cba9edd9f909aaf-SIN",

    }


    payload = {
    "messages": messages,
    "stream": stream,
    **({"max_tokens": max_token} if max_token is not None else {}),
    **({"max_tokens": next((item["max_tokens"] for item in M3 if item["model"] == model and item["max_tokens"] is not None), None)} if next((True for item in M3 if item["model"] == model and item["max_tokens"] is not None), None) else {})
}


    # print(payload)

    try:
        RESP = RQ.post(url=URL,json=payload , headers=headers , timeout=timeout,stream=stream)
        print(RESP.status_code)
    except:
         return
    if RESP.status_code == 200:
        for raw in RESP.iter_lines():
            if not raw:
                continue

            try:
                line = raw.decode("utf-8", errors="replace").strip()
            except Exception:
                line = raw.decode("latin-1", errors="replace").strip()

            if line.startswith("data:"):
                    data_json = line.split('data: ')[1]
            try:
                data = json.loads(data_json)
            except:
                continue

            try:
                yield data["response"]
            except: pass
        
    else:
        # print(RESP.status_code)
        yield "AN ERROR OCCURED"


M2 = ["qwen3-4b-thinking-2507"]


def QWEN(
        RQ : Any,
        api_key : str,
        messages : List[Dict],
        model : str = "NONE",
        max_token : int = 40960,
        stream : bool = True,
        timeout: Optional[float] = None
):

    def GEN(RQ:any,messages:list,timeout:int=None):
        API_URL = "https://teichai-qwen3-4b-thinking-2507-claude-4-5-opus.hf.space/api/chat"

        payload = {
            "messages":messages,
            "searchEnabled":False
            }
        
        headers = {"Accept": "*/*","Content-Type": "application/json","Origin": "https://teichai-qwen3-4b-thinking-2507-claude-4-5-opus.hf.space","Referer": "https://teichai-qwen3-4b-thinking-2507-claude-4-5-opus.hf.space/","User-Agent": "python-requests/2.x"}


        RESPO = RQ.post(API_URL, headers=headers, json=payload, stream=stream, timeout=timeout)

        # print(RESPO)
        buffer_lines = []
        for raw in RESPO.iter_lines():
            if raw is None:
                continue
            try:
                line = raw.decode("utf-8", errors="replace").strip()
            except Exception:
                line = raw.decode("latin-1", errors="replace").strip()

            if line == "":
                if not buffer_lines:
                    continue
                data_text = "".join(buffer_lines)
                buffer_lines = []
                if data_text == "[DONE]":
                    break
                try:
                    obj = json.loads(data_text)
                    try:
                        yield obj                        
                    except:
                        pass
                except json.JSONDecodeError:
                    pass
                continue

            if line.startswith("data:"):
                buffer_lines.append(line[len("data:"):].lstrip())

    RES = False
    for i in GEN(RQ=RQ,messages=messages,timeout=timeout):    
        if i["type"]=="reasoning":
            if not RES:
                RES = True
                yield "<think>\n"
            yield i["content"]

        else:
            if  RES:
                RES = False
                yield "\n</think>\n\n"
            try:
                yield i["content"]
            except:
                pass


class CONV:

    def __init__(self, default_system: str = ""):
        self.default_system = default_system

    @staticmethod
    def _make_id() -> str:
        return uuid.uuid4().hex[:20]

    def alpaca_to_msg(
        self,
        alpaca_obj: Dict[str, Any],
        insert_system: bool = True,
        system_override: Optional[str] = None,
        skip_empty: bool = True,
    ) -> Tuple[List[Dict[str, str]], float]:

        t0 = time.perf_counter()

        out: List[Dict[str, str]] = []
        sys_text = system_override if system_override is not None else self.default_system
        if insert_system and sys_text is not None:
            out.append({"role": "system", "content": sys_text})

        msgs = alpaca_obj
        append = out.append  # micro-optimization
        for m in msgs:
            role = (m.get("role") or "").strip().lower()
            if role not in ("user", "assistant", "system"):
                role = "user"

            parts = m.get("parts") or []
            # gather textual parts quickly
            texts: List[str] = []
            for p in parts:  # iterate in order
                # only include parts with type == "text" and non-empty text
                if isinstance(p, dict) and p.get("type") == "text":
                    txt = p.get("text", "")
                    if isinstance(txt, str) and txt:
                        # keep as-is except trim trailing spaces/newlines
                        texts.append(txt.rstrip())

            if not texts and skip_empty:
                continue

            if texts:
                content = "\n\n".join(texts)
                append({"role": role, "content": content})
            else:
                # if not skipping empty, include empty content to preserve role
                append({"role": role, "content": ""})

        elapsed = time.perf_counter() - t0
        return out, elapsed

    def msg_to_alpaca(
        self,
        msg_list: List[Dict[str, Any]],
        include_step_start: bool = True,
        assistant_state_done: bool = True,
        preserve_ids: bool = False,
        skip_empty_text_parts: bool = False,
    ) -> Tuple[Dict[str, List[Dict[str, Any]]], float]:

        t0 = time.perf_counter()

        out_messages: List[Dict[str, Any]] = []
        append = out_messages.append

        for entry in msg_list:
            # allow both dicts and fallback strings
            if not isinstance(entry, dict):
                role = "user"
                content = str(entry)
                entry_id = None
            else:
                role = (entry.get("role") or "user").strip().lower()
                content = entry.get("content", "")
                entry_id = entry.get("id") if preserve_ids else None

            if role not in ("user", "assistant"):
                role = "user"

            parts: List[Dict[str, Any]] = []
            if role == "assistant" and include_step_start:
                parts.append({"type": "step-start"})

            # Only add the text part if it's non-empty (or skip_empty_text_parts False)
            if isinstance(content, str):
                if not skip_empty_text_parts or content.strip() != "":
                    text_part: Dict[str, Any] = {"type": "text", "text": content}
                    if role == "assistant" and assistant_state_done:
                        text_part["state"] = "done"
                    parts.append(text_part)

            # Build message object
            msg_obj: Dict[str, Any] = {
                "id": entry_id if (entry_id is not None and isinstance(entry_id, str) and entry_id != "") else self._make_id(),
                "role": role,
                "parts": parts,
                "metadata": {"custom": {}},
            }

            append(msg_obj)

        elapsed = time.perf_counter() - t0
        return out_messages, elapsed


M1=[
    "zai-org/glm-4.6",
    "openai/gpt-5-nano-2025-08-07",
    "deepseek-ai/deepseek-v3.2-thinking",
    "nvidia/nvidia-nemotron-3-nano-30b-a3b",
    "nvidia/nvidia-nemotron-3-nano-30b-a3b-thinking",
    "openai/gpt-5-mini-2025-08-07",
    "qwen/qwen3-vl-235b-a22b-thinking",
    "qwen/qwen3-vl-235b-a22b-instruct",
    "perplexity/sonar",
    "moonshotai/kimi-k2.5",
    "anthropic/claude-haiku-4-5-20251001",      #-----depcriating model
    "google/gemini-2.5-flash-lite",
    "moonshotai/kimi-k2-thinking"
    "mistralai/devstral-2-123b-instruct-2512" #good mordal
    "mistralai/mistral-large-3-675b-instruct-2512",
    "openai/gpt-oss-safeguard-20b",
    "openai/gpt-oss-120b"
    
]


def Adarsh_Personal(
        RQ : Any,
        api_key : str,
        messages : List[Dict],
        model : str = "deepseek-ai/deepseek-r1-distill-qwen-32b",
        max_token : int = 40960,
        stream : bool = True,
        timeout: Optional[float] = None
):
    
    RES=False
    URL = "https://hadadxyz-ai.hf.space/api/mz1a85y5n80zy5127hgsba5f3a9c2d1Np0x300vcgduqxb7ep084fygd016c9a2d16fa8b3c41gut432pvjctr75hhspjae25d6f7a8b9c0d1e2pjf43v16f3a4b5c6dd7e8fba2bdx9a0b6dv1c2d7e2b4c9f83d6a4f1bb6c152f9pe3c7a88qv5d91f3c2b765g134bp9a41ne4yx4b3vda8w074"


    NEW_MSGS , S = CONV().msg_to_alpaca(messages, include_step_start=True, assistant_state_done=True)

    # print(NEW_MSGS)

    payload = {
    "tools": {},
    "modelId": model,
    "sessionId": "sess_7ef524b9_mlfe4ped",
    "clientId": "7ef524b98a963b507ec9f4000fdea38c-mlfe4pea",
    "requestId": "req_7ef524b9_mlfg1cpq_jjxb7p",
    "clientIp": "122.161.52.54",
    "realIp": "122.161.52.54",
    "forwardedFor": "122.161.52.54",
    "userAgent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/144.0.0.0 Safari/537.36",
    "id": "DEFAULT_THREAD_ID",
    "messages": NEW_MSGS,
    "trigger": "submit-message",
    "metadata": {}
    }

    headers = {
    "Accept": "text/event-stream, */*",
    "Content-Type": "application/json",
    "Origin": "https://hadadxyz-ai.hf.space",
    "User-Agent": payload["userAgent"],
    "Cache-Control": "no-cache",
    "Accept-Encoding": "identity",
    "x-turnstile-token": "mlfe5357-zq9depfzhpb-e18cbvzrpid",
    "x-turnstile-verified": "true",
    }


    RESP = RQ.post(URL, json=payload, headers=headers, stream=stream, timeout=timeout)

    if RESP.status_code == 200:
        for raw in RESP.iter_lines():
            if not raw:
                continue

            try:
                line = raw.decode("utf-8", errors="replace").strip()
            except Exception:
                line = raw.decode("latin-1", errors="replace").strip()

            if line.startswith("data:"):
                    data_json = line.split('data: ')[1]
            try:
                data = json.loads(data_json)
            except:
                continue
            try:
                 if data['type']=="reasoning-delta":
                    if not RES:
                        RES = True
                        yield "<think>\n"
                    try:
                        yield data["delta"]
                    except:
                        pass
            except :
                pass     
            try:
                if data["type"]=="text-delta":
                    if  RES:
                        RES = False
                        yield "\n</think>\n"

                    try:
                        yield data["delta"]
                    except:
                        pass                   
            except:
                 pass
            
import uuid


def GROQ(
        RQ : Any,
        api_key : str,
        messages : List[Dict],
        model : str = "deepseek-ai/deepseek-r1-distill-qwen-32b",
        max_token : int = 40960,
        stream : bool = True,
        timeout: Optional[float] = None
):
    RES=False
    URL = "https://api.groq.com/openai/v1/chat/completions"

    payload = {
            "model": model,
            "messages": messages,
            "temperature": 0.9,
            "stop": None,
            "stream": stream,
    }

    headers = {
        "Authorization": f"Bearer {api_key}", "Content-Type": "application/json"
    }


    RESP = RQ.post(URL, json=payload, headers=headers, stream=stream, timeout=timeout)

    if RESP.status_code == 200:
        for raw in RESP.iter_lines():
            if not raw:
                continue

            try:
                line = raw.decode("utf-8", errors="replace").strip()
            except Exception:
                line = raw.decode("latin-1", errors="replace").strip()

            if line.startswith("data:"):
                    data_json = line.split('data: ')[1]
            try:
                data = json.loads(data_json)
            except:
                if data_json == "[DONE]":
                    continue
            try:
                if data['choices'][0]['delta']['reasoning']:
                    if not RES:
                        RES = True
                        yield "<think>\n"
                    yield data['choices'][0]['delta']['reasoning']
            except:
                if RES:
                    RES = False 
                    yield "</think>\n\n"
                try:
                    yield data['choices'][0]['delta']['content']
                except:
                    pass


# ---------------------------------------------------------------------
# App & Session
# ---------------------------------------------------------------------

app = Flask(__name__)
RQ = Session(impersonate="chrome110")


# ---------------------------------------------------------------------
# Config
# ---------------------------------------------------------------------

class Config:
    DEFAULT_PROVIDER = "1"
    DEFAULT_MODEL = "llama-3.3-70b-versatile"
    DEFAULT_MAX_TOKENS = 512
    DEFAULT_TEMPERATURE = 0.7
    TIMEOUT = 30.0
    STREAM = True


PROVIDERS: Dict[str, Dict[str, Any]] = {
    "1": {"func": GROQ, "models": None},
    "2": {"func": Adarsh_Personal, "models": M1},
    "3": {"func": QWEN, "models": M2},
    "4": {"func": FREEGPT, "models": M3},
}


# ---------------------------------------------------------------------
# Request Schema
# ---------------------------------------------------------------------

@dataclass
class ChatRequest:
    api_key: str
    messages: List[Dict[str, Any]]
    model: str
    provider: str
    max_tokens: int
    temperature: float
    stream: bool

    @classmethod
    def from_dict(cls, payload: Dict[str, Any]) -> "ChatRequest":
        api_key = payload.get("api_key") or payload.get("key") or payload.get("apikey")
        messages = payload.get("messages") or payload.get("message") or payload.get("msgs") or []
        model = payload.get("model") or payload.get("model_name") or Config.DEFAULT_MODEL
        provider = str(payload.get("provider", Config.DEFAULT_PROVIDER))

        if isinstance(messages, dict):
            messages = [messages]

        return cls(
            api_key=api_key,
            messages=messages,
            model=model,
            provider=provider,
            max_tokens=int(payload.get("max_tokens", Config.DEFAULT_MAX_TOKENS)),
            temperature=float(payload.get("temperature", Config.DEFAULT_TEMPERATURE)),
            stream=bool(payload.get("stream", Config.STREAM)),
        )


# ---------------------------------------------------------------------
# Streaming Generator
# ---------------------------------------------------------------------

def stream_chat(req: ChatRequest):
    provider = PROVIDERS.get(req.provider)

    if not provider:
        yield json.dumps({"error": "Invalid provider"}) + "\n"
        return

    try:
        for chunk in provider["func"](
            RQ,
            req.api_key,
            req.messages,
            req.model,
            req.max_tokens,
            req.stream,
            Config.TIMEOUT,
        ):
            if not chunk:
                continue
            yield f"data: {json.dumps({'response': chunk})}\n\n"

        yield "data: [DONE]\n\n"

    except Exception as e:
        yield f"data: {json.dumps({'error': str(e)})}\n\n"


# ---------------------------------------------------------------------
# Routes
# ---------------------------------------------------------------------

@app.route("/v1/chat/completions", methods=["POST"])
def generate():
    payload = request.get_json(silent=True)
    if not payload:
        return jsonify({"error": "Invalid JSON body"}), 400

    req = ChatRequest.from_dict(payload)

    if not req.api_key or not req.messages:
        return jsonify({"error": "api_key and messages are required"}), 400

    if req.stream:
        return Response(
            stream_with_context(stream_chat(req)),
            content_type="text/event-stream",
            headers={
                "Cache-Control": "no-cache",
                "X-Accel-Buffering": "no",
            },
        )

    # Non-stream fallback
    final = []
    for part in stream_chat(req):
        if part.startswith("data:"):
            data = json.loads(part[6:])
            if "response" in data:
                final.append(data["response"])

    return jsonify({"response": "".join(final)})


@app.route("/v1/models", methods=["GET"])
def info():
    return jsonify({
        "models": get_models()
    })


# ---------------------------------------------------------------------
# Run
# ---------------------------------------------------------------------

# if __name__ == "__main__":
#     app.run(host="0.0.0.0", port=5550, threaded=True)

if __name__=="__main__":
    app.run()