import os
import time
import gradio as gr
from huggingface_hub import InferenceClient

APP_TITLE = "YellowLabsStudio Research Console"
APP_SUBTITLE = (
    "Independent home lab AI research. Efficient models, local GPU experimentation, and practical agent systems."
)

DEFAULT_SYSTEM_PROMPT = (
    "You are a research assistant in an independent home lab. "
    "Be practical, technical, and reproducible. "
    "When you make claims that could be uncertain, say you cannot confirm. "
    "Prefer clear steps, short sections, and actionable guidance."
)

MODEL_PRESETS = [
    "moonshotai/Kimi-K2-Instruct-0905",
    "meta-llama/Llama-3.1-8B-Instruct",
    "mistralai/Mistral-7B-Instruct-v0.3",
]

PROMPT_PRESETS = {
    "Home Lab Agent Workflow": "Design a simple tool using agent workflow that runs on local GPU. Include steps and tradeoffs.",
    "RAG Sanity Check": "Given a small set of notes, design a minimal RAG pipeline and an evaluation approach.",
    "Efficiency Test": "Explain how to reduce inference latency and memory use for a small language model on consumer GPU.",
    "Production Mindset": "Convert this experiment into a production ready service. Include observability, risks, and rollout steps.",
}

def _client() -> InferenceClient:
    token = os.environ.get("HF_TOKEN")
    if not token:
        raise RuntimeError(
            "Missing HF_TOKEN. Add it in Space Settings: Secrets, then restart the Space."
        )
    return InferenceClient(token=token)

def build_messages(system_prompt: str, chat_history, user_message: str):
    messages = []
    if system_prompt.strip():
        messages.append({"role": "system", "content": system_prompt.strip()})

    for user_text, assistant_text in chat_history:
        if user_text:
            messages.append({"role": "user", "content": user_text})
        if assistant_text:
            messages.append({"role": "assistant", "content": assistant_text})

    messages.append({"role": "user", "content": user_message})
    return messages

def chat(
    model: str,
    system_prompt: str,
    temperature: float,
    top_p: float,
    max_tokens: int,
    user_message: str,
    chat_history,
):
    if not user_message or not user_message.strip():
        return chat_history, ""

    started = time.time()
    meta = {
        "model": model,
        "temperature": temperature,
        "top_p": top_p,
        "max_tokens": max_tokens,
    }

    try:
        client = _client()
        messages = build_messages(system_prompt, chat_history, user_message)

        output = client.chat.completions.create(
            model=model,
            messages=messages,
            temperature=temperature,
            top_p=top_p,
            max_tokens=max_tokens,
        )

        answer = output.choices[0].message.content or ""
        duration = round(time.time() - started, 3)

        meta_line = (
            f"\n\n[run meta]\nmodel: {meta['model']}\n"
            f"temperature: {meta['temperature']}\ntop_p: {meta['top_p']}\n"
            f"max_tokens: {meta['max_tokens']}\nseconds: {duration}\n"
        )

        chat_history = chat_history + [(user_message, answer + meta_line)]
        return chat_history, ""

    except Exception as e:
        err = f"Run failed. {type(e).__name__}: {e}"
        chat_history = chat_history + [(user_message, err)]
        return chat_history, ""

with gr.Blocks(title=APP_TITLE) as demo:
    gr.Markdown(f"# {APP_TITLE}\n\n{APP_SUBTITLE}")

    with gr.Row():
        with gr.Column(scale=1):
            gr.Markdown("## Controls")

            model = gr.Dropdown(
                choices=MODEL_PRESETS,
                value=MODEL_PRESETS[0],
                label="Model",
            )

            system_prompt = gr.Textbox(
                value=DEFAULT_SYSTEM_PROMPT,
                label="System Prompt",
                lines=6,
            )

            temperature = gr.Slider(
                minimum=0.0,
                maximum=1.5,
                value=0.4,
                step=0.05,
                label="Temperature",
            )

            top_p = gr.Slider(
                minimum=0.1,
                maximum=1.0,
                value=0.95,
                step=0.05,
                label="Top P",
            )

            max_tokens = gr.Slider(
                minimum=64,
                maximum=2048,
                value=512,
                step=64,
                label="Max Tokens",
            )

            gr.Markdown("## Prompt Presets")

            preset = gr.Dropdown(
                choices=list(PROMPT_PRESETS.keys()),
                value=list(PROMPT_PRESETS.keys())[0],
                label="Preset",
            )

            apply_preset = gr.Button("Load preset into input")

        with gr.Column(scale=2):
            gr.Markdown("## Lab Chat")

            chatbot = gr.Chatbot(height=520)
            user_message = gr.Textbox(
                label="Input",
                placeholder="Ask a research question, test an idea, or run an experiment prompt.",
                lines=3,
            )

            with gr.Row():
                send = gr.Button("Run")
                clear = gr.Button("Clear")

    def _load_preset(preset_name: str):
        return PROMPT_PRESETS.get(preset_name, "")

    apply_preset.click(_load_preset, inputs=preset, outputs=user_message)

    send.click(
        chat,
        inputs=[model, system_prompt, temperature, top_p, max_tokens, user_message, chatbot],
        outputs=[chatbot, user_message],
    )

    clear.click(lambda: [], outputs=chatbot)

demo.launch()