import os
import json
import gradio as gr
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from openai import OpenAI

# ---------------------------
# 1. Setup Hugging Face Router client
# ---------------------------

HF_TOKEN = os.environ.get("HF_TOKEN")

client = OpenAI(
    base_url="https://router.huggingface.co/v1",
    api_key=HF_TOKEN,
)

# ---------------------------
# 2. System Prompt
# ---------------------------

SYSTEM_PROMPT = (
    "Your name is YAH Assistant. "
    "If the user asks your name, always answer: 'My name is YAH Assistant.' "
    "Your tone is precise, formal, and concise. "
    "Only mention your name when specifically asked or when introducing yourself."
    "Avoid slang. Stay helpful and direct."
)

MODEL_ID = "Qwen/Qwen3-Next-80B-A3B-Instruct:novita"

# ---------------------------
# 3. Pydantic models for API
# ---------------------------

class ChatMessage(BaseModel):
    role: str
    content: str

class ChatRequest(BaseModel):
    message: str
    history: list[tuple[str, str]] = []

class ChatResponse(BaseModel):
    response: str
    error: str = None

# ---------------------------
# 4. Core chat function
# ---------------------------

def chat_fn(message, history):
    """
    history = list of [user, assistant] messages.
    We convert this into OpenAI-style messages.
    """

    messages = [{"role": "system", "content": SYSTEM_PROMPT}]

    # Add chat history
    for user_msg, bot_msg in history:
        messages.append({"role": "user", "content": user_msg})
        messages.append({"role": "assistant", "content": bot_msg})

    # Add new user message
    messages.append({"role": "user", "content": message})

    try:
        completion = client.chat.completions.create(
            model=MODEL_ID,
            messages=messages,
        )

        reply = completion.choices[0].message.content

    except Exception as e:
        reply = f"Error: {str(e)}"

    return reply

# ---------------------------
# 5. API Endpoint Function
# ---------------------------

def chat_api_endpoint(chat_request: ChatRequest) -> ChatResponse:
    """
    API endpoint for chat interactions
    """
    try:
        reply = chat_fn(chat_request.message, chat_request.history)
        return ChatResponse(response=reply)
    except Exception as e:
        return ChatResponse(response="", error=str(e))

# ---------------------------
# 6. FastAPI App Setup
# ---------------------------

app = FastAPI(title="YAH Assistant API")

# Add the API endpoint
@app.post("/chat/")
async def chat(request: ChatRequest):
    return chat_api_endpoint(request)

# Health check endpoint
@app.get("/health")
async def health_check():
    return {"status": "healthy", "model": MODEL_ID}

# ---------------------------
# 7. Gradio UI (keeps the original interface)
# ---------------------------

with gr.Blocks(title="YAH Assistant") as demo:
    gr.Markdown(
        """
        ## YAH Assistant  
        Large-model chat interface powered by Hugging Face Router.
        
        ### API Usage
        You can also interact with this assistant via API:
        - **Endpoint:** `POST /chat/`
        - **Body:**
        ```json
        {
            "message": "Your message here",
            "history": [["Hello", "Hi there!"], ["How are you?", "I'm good!"]]
        }
        ```
        """
    )

    chatbot = gr.Chatbot(height=500)
    msg = gr.Textbox(label="Message")

    def respond(message, chat_history):
        reply = chat_fn(message, chat_history)
        chat_history.append([message, reply])
        return "", chat_history

    msg.submit(respond, [msg, chatbot], [msg, chatbot])

# ---------------------------
# 8. Mount Gradio app to FastAPI
# ---------------------------

app = gr.mount_gradio_app(app, demo, path="/")

# Launch instructions
if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=7860)