"""
DataSense E2B — Hugging Face Space demo
Execution-grounded data agent (SFT v1) with bundled or uploaded CSVs/Excel.
"""

from __future__ import annotations

import html
import re
import spaces  # must be first — before any torch/CUDA import

from pathlib import Path

import gradio as gr
import pandas as pd

from agent import run_agent
from config import ADAPTER_MODEL, AGENT_MAX_STEPS, DATA_DIR
from examples import DEMO_DATASETS, DEMO_EXAMPLES

MODEL, TOKENIZER = None, None
STORY_URL = "https://datasense-e2b.netlify.app/"
DEMO_VIDEO_URL = "https://youtu.be/ucFoCdMK7sE"
LINKEDIN_POST_URL = (
    "https://www.linkedin.com/posts/sanjaymalladi_buildsmall-huggingface-modal-share-7471993638814654464-47hY/"
)

CUSTOM_CSS = """
@import url('https://fonts.googleapis.com/css2?family=IBM+Plex+Mono:wght@400;500&family=Newsreader:ital,opsz,wght@0,6..72,400;0,6..72,600;1,6..72,400&display=swap');

.gradio-container {
  --ds-bg: #0c1117;
  --ds-surface: #151c26;
  --ds-border: #2a3544;
  --ds-accent: #3ecfae;
  --ds-accent-dim: #1f6f5c;
  --ds-text: #e8eef5;
  --ds-muted: #8b9cb3;
  max-width: 1280px !important;
  font-family: 'Newsreader', Georgia, serif !important;
}
#ds-header {
  background: linear-gradient(135deg, #0f1a24 0%, #122a2a 55%, #0c1117 100%);
  border: 1px solid var(--ds-border);
  border-radius: 16px;
  padding: 1.5rem 1.75rem;
  margin-bottom: 1rem;
}
#ds-header h1 {
  font-family: 'Newsreader', Georgia, serif;
  font-size: 2rem;
  font-weight: 600;
  margin: 0 0 0.35rem 0;
  color: var(--ds-text);
}
#ds-badge {
  display: inline-block;
  font-family: 'IBM Plex Mono', monospace;
  font-size: 0.72rem;
  letter-spacing: 0.08em;
  text-transform: uppercase;
  color: var(--ds-accent);
  border: 1px solid var(--ds-accent-dim);
  border-radius: 999px;
  padding: 0.2rem 0.65rem;
  margin-bottom: 0.75rem;
}
#ds-panel, #ds-results {
  background: var(--ds-surface);
  border: 1px solid var(--ds-border);
  border-radius: 14px;
  padding: 1.1rem;
  min-height: 520px;
}
#ds-preview-box {
  margin-top: 0.5rem;
  border: 1px solid var(--ds-border);
  border-radius: 10px;
  overflow: hidden;
}
#ds-preview-box .label-wrap { padding: 0.5rem 0.75rem !important; }
#run-btn {
  background: linear-gradient(90deg, #1f6f5c, #3ecfae) !important;
  border: none !important;
  font-weight: 600 !important;
  letter-spacing: 0.02em;
}
#ds-progress-label {
  font-family: 'IBM Plex Mono', monospace;
  font-size: 0.82rem;
  color: var(--ds-accent);
  margin: 0 0 0.35rem 0;
}
.ds-progress-wrap {
  margin: 0 0 1rem 0;
  padding: 0.85rem 1rem;
  background: #0f1419;
  border: 1px solid #2a3544;
  border-radius: 10px;
}
.ds-progress-text {
  font-family: 'IBM Plex Mono', monospace;
  font-size: 0.82rem;
  color: #3ecfae;
  margin-bottom: 0.55rem;
  line-height: 1.4;
}
.ds-progress-text .ds-pct {
  color: #8b9cb3;
  font-size: 0.75rem;
}
.ds-progress-track {
  height: 6px;
  background: #1a2330;
  border-radius: 999px;
  overflow: hidden;
}
.ds-progress-fill {
  height: 100%;
  background: linear-gradient(90deg, #1f6f5c, #3ecfae);
  border-radius: 999px;
  transition: width 0.35s ease;
}
.ds-progress-wrap.ds-idle .ds-progress-track { display: none; }
#ds-results .tabitem { padding-top: 0.75rem !important; }
.ds-answer-card {
  background: linear-gradient(145deg, #122a2a 0%, #151c26 100%);
  border: 1px solid #2a3544;
  border-left: 4px solid #3ecfae;
  border-radius: 12px;
  padding: 1.5rem 1.75rem;
  margin: 0;
  min-height: 200px;
}
.ds-answer-label {
  font-family: 'IBM Plex Mono', monospace;
  font-size: 0.72rem;
  letter-spacing: 0.1em;
  text-transform: uppercase;
  color: #3ecfae;
  margin-bottom: 0.75rem;
}
.ds-answer-value {
  font-family: 'Newsreader', Georgia, serif;
  font-size: 1.35rem;
  font-weight: 400;
  color: #c5d0de;
  line-height: 1.55;
  word-break: break-word;
}
.ds-answer-value strong {
  color: #ffffff;
  font-weight: 700;
}
.ds-summary {
  margin-top: 1.25rem;
  padding-top: 1rem;
  border-top: 1px solid #2a3544;
  font-size: 1.05rem;
  color: #8b9cb3;
  line-height: 1.55;
}
.ds-answer-empty {
  color: #8b9cb3;
  font-style: italic;
  padding: 1rem 0;
}
.ds-trace-wrap { margin: 0; padding: 0; }
.ds-trace-wrap h3 { margin-top: 1rem; color: #3ecfae; font-size: 1rem; }
footer { visibility: hidden; }
"""


def build_theme() -> gr.Theme:
    return (
        gr.themes.Base(
            primary_hue=gr.themes.colors.emerald,
            secondary_hue=gr.themes.colors.slate,
            neutral_hue=gr.themes.colors.gray,
            font=gr.themes.GoogleFont("Newsreader"),
            font_mono=gr.themes.GoogleFont("IBM Plex Mono"),
        )
        .set(
            body_background_fill="#0c1117",
            body_background_fill_dark="#0c1117",
            block_background_fill="#151c26",
            block_background_fill_dark="#151c26",
            block_border_width="1px",
            block_border_color="#2a3544",
            block_border_color_dark="#2a3544",
            body_text_color="#e8eef5",
            body_text_color_dark="#e8eef5",
            button_primary_background_fill="linear-gradient(90deg, #1f6f5c, #3ecfae)",
            button_primary_background_fill_hover="linear-gradient(90deg, #2a8a72, #4de0c0)",
        )
    )


def _load_model():
    global MODEL, TOKENIZER
    if MODEL is None or TOKENIZER is None:
        from model_loader import load_model_and_tokenizer

        MODEL, TOKENIZER = load_model_and_tokenizer()
    return MODEL, TOKENIZER


def _resolve_data_path(data_mode: str, dataset_name: str, upload_file) -> Path | None:
    if data_mode == "Upload your file":
        if upload_file is None:
            return None
        path_str = upload_file[0] if isinstance(upload_file, list) else upload_file
        if not path_str:
            return None
        path = Path(path_str)
        return path if path.is_file() else None
    return DEMO_DATASETS.get(dataset_name)


def _load_preview(data_mode: str, dataset_name: str, upload_file):
    path = _resolve_data_path(data_mode, dataset_name, upload_file)
    if path is None:
        return pd.DataFrame(), "_Select or upload a file to preview._"
    try:
        suffix = path.suffix.lower()
        if suffix in (".xlsx", ".xls"):
            df = pd.read_excel(path, nrows=100)
        else:
            df = pd.read_csv(path, nrows=100)
        meta = f"**{path.name}** · {len(df)} rows · {len(df.columns)} columns"
        return df, meta
    except Exception as exc:
        return pd.DataFrame(), f"_Could not preview file: {exc}_"


def _inline_markdown_to_html(text: str) -> str:
    """Turn **bold** into <strong>; escape everything else."""
    if not text:
        return ""
    parts = re.split(r"\*\*(.+?)\*\*", text)
    chunks: list[str] = []
    for i, part in enumerate(parts):
        safe = html.escape(part)
        chunks.append(f"<strong>{safe}</strong>" if i % 2 == 1 else safe)
    return "".join(chunks).replace("**", "")


def _format_answer_html(answer: str, summary: str = "") -> str:
    if not answer:
        return '<div class="ds-answer-empty">Could not parse an answer — check the execution trace tab.</div>'
    answer_html = _inline_markdown_to_html(answer)
    summary_block = ""
    if summary:
        summary_block = f'<p class="ds-summary">{_inline_markdown_to_html(summary)}</p>'
    return (
        f'<div class="ds-answer-card">'
        f'<div class="ds-answer-label">Verified answer</div>'
        f'<div class="ds-answer-value">{answer_html}</div>'
        f"{summary_block}"
        f"</div>"
    )


def _toggle_data_inputs(data_mode: str):
    is_upload = data_mode == "Upload your file"
    return (
        gr.update(visible=not is_upload),
        gr.update(visible=is_upload),
    )


def _wrap_trace(trace: str) -> str:
    if not trace:
        return ""
    return f'<div class="ds-trace-wrap">\n\n{trace}\n\n</div>'


def _progress_html(pct: int | None, label: str) -> str:
    safe_label = html.escape(label)
    if pct is None:
        return f'<div class="ds-progress-wrap ds-idle"><div class="ds-progress-text">{safe_label}</div></div>'
    pct = max(0, min(100, pct))
    return (
        f'<div class="ds-progress-wrap">'
        f'<div class="ds-progress-text">{safe_label} <span class="ds-pct">{pct}%</span></div>'
        f'<div class="ds-progress-track"><div class="ds-progress-fill" style="width:{pct}%"></div></div>'
        f"</div>"
    )


def _progress_update(pct: int | None, label: str, trace: str = "", answer: str = ""):
    return (
        _progress_html(pct, label),
        _wrap_trace(trace),
        answer,
    )


IDLE_PROGRESS_HTML = _progress_html(None, "Ready — click Run DataSense")


@spaces.GPU(duration=180)
def run_task(
    data_mode: str,
    dataset_name: str,
    upload_file,
    task: str,
    max_steps: int,
    progress=gr.Progress(),
):
    if not task.strip():
        yield _progress_update(None, "Enter a question to run DataSense", "", '<div class="ds-answer-empty">Enter a task question.</div>')
        return

    yield _progress_update(5, "Reading your dataset…")
    data_path = _resolve_data_path(data_mode, dataset_name, upload_file)
    if data_path is None:
        msg = "⚠️ Upload a `.csv` or `.xlsx` file first." if data_mode == "Upload your file" else f"⚠️ Dataset not found: {dataset_name}"
        yield _progress_update(0, "Dataset missing", "", f'<div class="ds-answer-empty">{html.escape(msg)}</div>')
        return

    try:
        yield _progress_update(12, "Loading DataSense on GPU…")
        progress(0.15, desc="Loading DataSense…")
        model, tokenizer = _load_model()

        yield _progress_update(22, "DataSense is starting…")
        agent_stream = run_agent(
            model,
            tokenizer,
            data_path,
            task.strip(),
            max_steps=int(max_steps),
            progress=progress,
            stream=True,
        )

        for event in agent_stream:
            if event[0] == "progress":
                _, step, total, trace_md = event
                pct = int(22 + (73 * step / max(total, 1)))
                yield _progress_update(
                    pct,
                    f"DataSense · step {step}/{total}",
                    trace_md,
                    "",
                )
            elif event[0] == "final":
                result = event[1]
                answer_html = _format_answer_html(result.get("answer", ""), result.get("summary", ""))
                yield _progress_update(
                    100,
                    "DataSense finished",
                    result["steps_markdown"],
                    answer_html,
                )
    except Exception as exc:
        yield _progress_update(0, "Error", "", f'<div class="ds-answer-empty">Error: {html.escape(str(exc))}</div>')


@spaces.GPU(duration=300)
def preload_model():
    _load_model()


def build_ui() -> gr.Blocks:
    dataset_choices = list(DEMO_DATASETS.keys())
    default_df, default_meta = _load_preview("Bundled examples", dataset_choices[0], None)

    with gr.Blocks(title="DataSense E2B") as demo:
        with gr.Column(elem_id="ds-header"):
            gr.HTML('<div id="ds-badge">Execution-verified · Gemma / DataBench</div>')
            gr.Markdown(
                """
# DataSense E2B
**Live inference** — Gemma-4 2B + SFT v1 writes Python, runs it on your data, reads real stdout/errors.
                """
            )
            gr.Markdown(
                f"📖 [Full project story]({STORY_URL}) · "
                f"🎬 [Demo video]({DEMO_VIDEO_URL}) · "
                f"[LinkedIn post]({LINKEDIN_POST_URL}) · "
                f"LoRA [`DataSense-Modal-E2B-SFT`](https://huggingface.co/{ADAPTER_MODEL})",
            )

        with gr.Row(equal_height=False):
            with gr.Column(scale=5, elem_id="ds-panel"):
                gr.Markdown("### Configure")
                data_mode = gr.Radio(
                    choices=["Bundled examples", "Upload your file"],
                    value="Bundled examples",
                    label="Data source",
                )
                dataset = gr.Dropdown(
                    choices=dataset_choices,
                    value=dataset_choices[0],
                    label="Demo dataset",
                )
                upload = gr.File(
                    label="Your CSV or Excel file",
                    file_types=[".csv", ".xlsx", ".xls"],
                    type="filepath",
                    visible=False,
                )

                gr.Markdown("### Data preview")
                preview_meta = gr.Markdown(default_meta)
                with gr.Group(elem_id="ds-preview-box"):
                    preview_df = gr.Dataframe(
                        value=default_df,
                        interactive=False,
                        wrap=True,
                        max_height=280,
                    )

                task = gr.Textbox(
                    label="Question / task",
                    placeholder="e.g. Which product had the highest total revenue?",
                    lines=3,
                )
                max_steps = gr.Slider(
                    minimum=3,
                    maximum=12,
                    value=AGENT_MAX_STEPS,
                    step=1,
                    label="Max agent steps",
                )
                run_btn = gr.Button("▶ Run DataSense", variant="primary", elem_id="run-btn")

                gr.Examples(
                    examples=DEMO_EXAMPLES,
                    inputs=[dataset, task],
                    label="Quick examples (bundled data)",
                )

            with gr.Column(scale=7, elem_id="ds-results"):
                gr.Markdown("### Results")
                progress_out = gr.HTML(value=IDLE_PROGRESS_HTML)
                with gr.Tabs():
                    with gr.Tab("🔍 Execution trace", id="trace_tab"):
                        steps_out = gr.Markdown()
                    with gr.Tab("✅ Answer", id="answer_tab"):
                        answer_out = gr.HTML()

        preview_inputs = [data_mode, dataset, upload]
        data_mode.change(_toggle_data_inputs, data_mode, [dataset, upload]).then(
            _load_preview, preview_inputs, [preview_df, preview_meta]
        )
        dataset.change(_load_preview, preview_inputs, [preview_df, preview_meta])
        upload.change(_load_preview, preview_inputs, [preview_df, preview_meta])

        run_btn.click(
            fn=run_task,
            inputs=[data_mode, dataset, upload, task, max_steps],
            outputs=[progress_out, steps_out, answer_out],
            show_progress="hidden",
        )

        demo.load(_load_preview, preview_inputs, [preview_df, preview_meta])

    return demo


try:
    preload_model()
except Exception as exc:
    print(f"Startup preload skipped (will load on first run): {exc}")

demo = build_ui()

if __name__ == "__main__":
    demo.queue(max_size=8).launch(theme=build_theme(), css=CUSTOM_CSS)