import json
import os
import re
from dataclasses import dataclass
from typing import Dict, List, Tuple, Optional

import gradio as gr

# Lazy-loaded quantizer
_quantizer = None
_quantizer_error = None

def _get_quantizer():
    """Lazy load the embedding quantizer."""
    global _quantizer, _quantizer_error
    if _quantizer is not None:
        return _quantizer
    if _quantizer_error is not None:
        return None
    try:
        from quantizer import EmbeddingQuantizer
        _quantizer = EmbeddingQuantizer(fallback_threshold=0.3)
        return _quantizer
    except Exception as e:
        _quantizer_error = str(e)
        return None

def _get_keyword_quantizer():
    """Fallback to keyword quantizer."""
    try:
        from quantizer import KeywordQuantizer
        return KeywordQuantizer()
    except Exception:
        return None


# ---------------------------
# Loaded at build time
# ---------------------------
PAPER_TITLE = "Slipstream: Semantic Quantization for Efficient Multi-Agent Coordination"
PAPER_AUTHORS = "Anthony Maio"
PAPER_ABSTRACT = "As multi-agent LLM systems scale,coordination bandwidthbecomes a primary cost\ndriver: every token spent on routing, intent framing, and redundant context is paid repeat-\nedly across agents and turns. Current approaches waste 40\u201360% of compute on coordination\noverhead, with communication costs scalingO(n2)as agent counts increase.\nThis paper introducesSlipstream, a protocol that performssemantic quantization:\nmapping free-form messages onto a sharedUniversal Concept Reference (UCR)and\ntransmitting compactmnemonic anchorsthat identify structured intents. Unlike syn-\ntactic compression (which fails due to BPE tokenizer fragmentation), Slipstream transmits\nnatural-language mnemonics that tokenize efficiently across model architectures.\nSlipstream combines (1) a symbolic4D semantic manifold\u2014Action, Polarity, Domain,\nUrgency\u2014with (2) a data-drivenvector engine(embeddings + nearest-centroid retrieval)\nplus anevolutionary extension layerthat learns new anchors from low-confidence traf-\nfic. Results show82% token reduction(41.9\u21927.4 tokens average) while maintaining\nsemantic fidelity, making large-scale multi-agent deployments economically viable."
PAPER_TAGS = "semantic-quantization, multi-agent-systems, protocol-standards, token-ef-"
DEFAULT_LLM_MODEL = None
DETECTED_MODELS = []


# ---------------------------
# Robust file loading
# ---------------------------
def _load_chunks(path: str = "paper_chunks.jsonl") -> List[str]:
    chunks: List[str] = []
    try:
        with open(path, "r", encoding="utf-8") as f:
            for line in f:
                line = line.strip()
                if not line:
                    continue
                try:
                    rec = json.loads(line)
                    txt = (rec.get("text") or "").strip()
                    if txt:
                        chunks.append(txt)
                except Exception:
                    continue
    except FileNotFoundError:
        return []
    except Exception:
        return []
    return chunks


# ---------------------------
# Tiny BM25-ish retrieval (pure Python)
# ---------------------------
def _tokenize(s: str) -> List[str]:
    return re.findall(r"[A-Za-z0-9][A-Za-z0-9_-]{1,}", s.lower())


@dataclass
class Retriever:
    chunks: List[str]
    doc_tokens: List[List[str]]
    df: Dict[str, int]
    idf: Dict[str, float]

    @staticmethod
    def build(chunks: List[str]) -> "Retriever":
        doc_tokens = [_tokenize(c) for c in chunks]
        df: Dict[str, int] = {}
        for toks in doc_tokens:
            for t in set(toks):
                df[t] = df.get(t, 0) + 1
        n = max(1, len(doc_tokens))
        idf = {}
        for t, d in df.items():
            idf[t] = float((n - d + 0.5) / (d + 0.5))
        return Retriever(chunks=chunks, doc_tokens=doc_tokens, df=df, idf=idf)

    def topk(self, query: str, k: int = 4) -> List[Tuple[int, float]]:
        q = _tokenize(query)
        if not q:
            return []
        scores: List[Tuple[int, float]] = []
        qset = set(q)
        for i, toks in enumerate(self.doc_tokens):
            if not toks:
                continue
            overlap = qset.intersection(toks)
            if not overlap:
                continue
            score = 0.0
            for t in overlap:
                score += self.idf.get(t, 0.0)
            score = score / (1.0 + (len(toks) / 200.0))
            scores.append((i, score))
        scores.sort(key=lambda x: x[1], reverse=True)
        return scores[:k]


CHUNKS = _load_chunks()
RETRIEVER = Retriever.build(CHUNKS) if CHUNKS else None


def retrieve_context(query: str, k: int = 4, max_chars: int = 6000) -> str:
    if not RETRIEVER:
        return ""
    hits = RETRIEVER.topk(query, k=k)
    parts: List[str] = []
    for idx, _score in hits:
        txt = CHUNKS[idx].strip()
        if txt:
            parts.append(txt)
    ctx = "\n\n".join(parts).strip()
    return ctx[:max_chars]


# ---------------------------
# HF Inference helpers (optional)
# ---------------------------
def _get_hf_client(model_id: str):
    try:
        from huggingface_hub import InferenceClient
    except Exception as e:
        raise RuntimeError("huggingface_hub is not installed. Add it to requirements.txt") from e

    token = os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACEHUB_API_TOKEN")
    if not token:
        raise RuntimeError("HF_TOKEN is not set as a Space secret.")
    return InferenceClient(model=model_id, token=token)


def _llm_chat(model_id: str, messages: List[dict], max_tokens: int = 512) -> str:
    client = _get_hf_client(model_id)
    out = client.chat_completion(messages=messages, max_tokens=max_tokens)
    return out.choices[0].message.content


# ---------------------------
# Chat with paper (RAG-lite)
# ---------------------------
def paper_chat(message: str, history: List[Tuple[str, str]]) -> str:
    message = (message or "").strip()
    if not message:
        return "Ask a question about the paper."

    ctx = retrieve_context(message, k=4, max_chars=6000)

    model_id = os.environ.get("PAPER_LLM_MODEL") or DEFAULT_LLM_MODEL
    if not model_id:
        if not ctx:
            return "No indexed context found. (paper_chunks.jsonl missing?)"
        return "Top matches in the paper:\n\n" + ctx[:1200]

    trimmed_history = history[-4:] if history else []
    messages: List[dict] = [
        {
            "role": "system",
            "content": (
                "You are a precise research assistant. Answer using ONLY the provided paper context. "
                "If the context is insufficient, say what is missing and point to what section would help."
            ),
        },
    ]
    if ctx:
        messages.append({"role": "system", "content": "PAPER CONTEXT:\n\n" + ctx})

    for u, a in trimmed_history:
        messages.append({"role": "user", "content": u})
        messages.append({"role": "assistant", "content": a})

    messages.append({"role": "user", "content": message})

    try:
        return _llm_chat(model_id, messages, max_tokens=512).strip()
    except Exception as e:
        if ctx:
            return f"(LLM unavailable: {e})\n\nTop matches in the paper:\n\n" + ctx[:1200]
        return f"LLM unavailable: {e}"


# ---------------------------
# Share Kit (generators)
# ---------------------------
def _fallback_tweet_thread(title: str, abstract: str) -> str:
    abs_one = re.sub(r"\s+", " ", abstract).strip()
    bullets = [
        f"1/ {title}",
        "2/ TL;DR: " + (abs_one[:220] + ("…" if len(abs_one) > 220 else "")),
        "3/ Key idea: (open the Space → Chat tab and ask for the method overview)",
        "4/ Try it: use the Share Kit tab to generate a talk outline / FAQ.",
        "5/ Links: add your paper + code links in the README.",
    ]
    return "\n\n".join(bullets)

def generate_share(kind: str) -> str:
    kind = (kind or "").strip().lower()
    base_title = PAPER_TITLE or "Paper"
    base_abs = PAPER_ABSTRACT or ""

    model_id = os.environ.get("PAPER_LLM_MODEL") or DEFAULT_LLM_MODEL
    if not model_id:
        if kind == "tweet thread":
            return _fallback_tweet_thread(base_title, base_abs)
        if kind == "talk outline":
            return "\n".join([
                f"Title: {base_title}",
                "- Motivation",
                "- Problem setup",
                "- Method",
                "- Results",
                "- Limitations",
                "- Q&A",
            ])
        if kind == "faq":
            return "\n".join([
                "Q: What problem does this address?\nA: " + (base_abs[:220] + ("…" if len(base_abs) > 220 else "")),
                "",
                "Q: What is the main contribution?\nA: Ask in the Chat tab.",
                "",
                "Q: How do I reproduce it?\nA: Link code + add steps in README.",
            ])
        return "Select an item to generate."

    prompt = {
        "tweet thread": "Write a concise 6-tweet thread summarizing the paper for the ML community.",
        "talk outline": "Create a 10-minute talk outline with section headers and bullet points.",
        "faq": "Write an FAQ with 6 Q/A pairs focused on method, results, limitations, and usage.",
    }.get(kind, "Summarize the paper in 8 bullet points.")

    ctx = (PAPER_ABSTRACT or "").strip()
    messages = [
        {"role": "system", "content": "You are an expert technical writer for ML research audiences."},
        {"role": "user", "content": f"Paper title: {base_title}\nAuthors: {PAPER_AUTHORS}\n\nAbstract/context:\n{ctx}\n\nTask: {prompt}"},
    ]
    try:
        return _llm_chat(model_id, messages, max_tokens=600).strip()
    except Exception as e:
        if kind == "tweet thread":
            return _fallback_tweet_thread(base_title, base_abs) + f"\n\n(LLM unavailable: {e})"
        return f"LLM unavailable: {e}"


# ---------------------------
# Model Playground (chat)
# ---------------------------
def model_chat(model_id: str, message: str, history: List[Tuple[str, str]]) -> str:
    model_id = (model_id or "").strip()
    message = (message or "").strip()
    if not model_id:
        return "Provide a model id."
    if not message:
        return "Send a message."

    messages: List[dict] = [{"role": "system", "content": "You are a helpful assistant."}]
    for u, a in (history[-4:] if history else []):
        messages.append({"role": "user", "content": u})
        messages.append({"role": "assistant", "content": a})
    messages.append({"role": "user", "content": message})

    try:
        return _llm_chat(model_id, messages, max_tokens=512).strip()
    except Exception as e:
        return f"Model call failed: {e}"


# ---------------------------
# UI helpers
# ---------------------------
def quantize_intent(intent: str) -> Tuple[str, str, str]:
    """
    Quantize a natural language intent to UCR anchor.
    Returns: (primary_result_md, alternatives_md, wire_format)
    """
    intent = (intent or "").strip()
    if not intent:
        return "Enter an intent to quantize.", "", ""

    # Try embedding quantizer first, fall back to keyword
    quantizer = _get_quantizer()
    method = "embedding"
    if quantizer is None:
        quantizer = _get_keyword_quantizer()
        method = "keyword"

    if quantizer is None:
        return "Quantizer unavailable. Check logs.", "", ""

    try:
        result = quantizer.quantize(intent)
    except Exception as e:
        return f"Quantization error: {e}", "", ""

    # Confidence color
    conf = result.confidence
    if conf >= 0.7:
        color = "green"
        conf_label = "High"
    elif conf >= 0.5:
        color = "orange"
        conf_label = "Medium"
    else:
        color = "red"
        conf_label = "Low"

    # Primary result
    primary_md = f"""
### {result.anchor.mnemonic}

**Confidence:** <span style="color:{color}; font-weight:bold">{conf:.0%}</span> ({conf_label})

**Canonical meaning:** {result.anchor.canonical}

**Method:** {method} {'(fallback)' if result.is_fallback else ''}

**Coordinates:** `{result.anchor.coords}` (Action, Polarity, Domain, Urgency)
"""

    # Alternatives
    if result.alternatives:
        alt_lines = ["| Anchor | Similarity |", "|--------|------------|"]
        for alt_anchor, alt_score in result.alternatives[:3]:
            bar_len = int(alt_score * 10)
            bar = "█" * bar_len + "░" * (10 - bar_len)
            alt_lines.append(f"| {alt_anchor.mnemonic} | {bar} {alt_score:.0%} |")
        alternatives_md = "\n".join(alt_lines)
    else:
        alternatives_md = "*No alternatives*"

    # Wire format
    wire = f"SLIP v1 user agent {result.anchor.mnemonic}"
    if result.is_fallback:
        # Truncate long intents for fallback payload
        payload = intent[:100].replace('"', "'")
        wire = f'SLIP v1 user agent Fallback "{payload}"'

    return primary_md, alternatives_md, wire


EXAMPLE_INTENTS = [
    ("Review my code", "RequestReview"),
    ("Task complete!", "InformComplete"),
    ("System down!", "ObserveError"),
    ("Can you help?", "RequestHelp"),
    ("Looks good to me", "EvalApprove"),
]


def start_here(choice: str) -> str:
    choice = (choice or "").strip().lower()
    if choice == "quick summary":
        return f"### {PAPER_TITLE}\n\n**Authors:** {PAPER_AUTHORS}\n\n**Abstract:**\n\n{PAPER_ABSTRACT}"
    if choice == "how does it work?":
        return "Go to **Chat** and ask: *Give me a method overview with the key steps.*"
    if choice == "what are the limitations?":
        return "Go to **Chat** and ask: *List limitations and failure modes discussed in the paper.*"
    if choice == "generate a tweet thread":
        return generate_share("tweet thread")
    return "Pick an option."


def _load_gallery_items() -> List[Tuple[str, str]]:
    items: List[Tuple[str, str]] = []
    if os.path.isdir("assets/images"):
        for fn in sorted(os.listdir("assets/images"))[:48]:
            path = os.path.join("assets/images", fn)
            if os.path.isfile(path):
                items.append((path, fn))
    if not items and os.path.isdir("assets/pages"):
        for fn in sorted(os.listdir("assets/pages"))[:24]:
            path = os.path.join("assets/pages", fn)
            if os.path.isfile(path):
                items.append((path, fn))
    return items


CSS = '''
.paper-hero h1 { margin-bottom: 0.2rem; }
.paper-hero p { margin-top: 0.2rem; opacity: 0.9; }
.hint { opacity: 0.85; }
'''

with gr.Blocks(theme=gr.themes.Soft(), css=CSS) as demo:
    gr.Markdown(f"# {PAPER_TITLE}", elem_classes=["paper-hero"])
    if PAPER_AUTHORS:
        gr.Markdown(f"**Authors:** {PAPER_AUTHORS}", elem_classes=["paper-hero"])
    if PAPER_TAGS:
        gr.Markdown(f"**Tags:** {PAPER_TAGS}", elem_classes=["paper-hero"])

    with gr.Tabs():
        with gr.Tab("Start here"):
            gr.Markdown("Pick an interaction to explore the paper quickly.", elem_classes=["hint"])
            choice = gr.Radio(
                ["Quick summary", "How does it work?", "What are the limitations?", "Generate a tweet thread"],
                value="Quick summary",
                label="What do you want?",
            )
            out = gr.Markdown()
            choice.change(start_here, inputs=choice, outputs=out)
            demo.load(start_here, inputs=choice, outputs=out)

        with gr.Tab("Overview"):
            gr.Markdown("## Abstract")
            gr.Markdown(PAPER_ABSTRACT)

            gr.Markdown("---")
            gr.Markdown("### Text search (snippet)")
            q = gr.Textbox(label="Find a phrase", placeholder="e.g., scalable oversight", lines=1)
            snippet = gr.Textbox(label="Top matching context", lines=10)

            def _snippet(query: str) -> str:
                query = (query or "").strip()
                if not query:
                    return ""
                ctx = retrieve_context(query, k=4, max_chars=1600)
                return ctx or "No matches."

            q.change(_snippet, inputs=q, outputs=snippet)

        with gr.Tab("Gallery"):
            gr.Markdown("Extracted images / rendered page previews (if included at build time).", elem_classes=["hint"])
            gallery = gr.Gallery(label="Figures / pages", columns=2, rows=2, height=520)

            def _gallery():
                return _load_gallery_items()

            demo.load(_gallery, outputs=gallery)

        with gr.Tab("Chat"):
            gr.Markdown(
                "Ask questions. If you set `HF_TOKEN` + `PAPER_LLM_MODEL` as Space secrets, answers become generative; "
                "otherwise it returns top-matching snippets.",
                elem_classes=["hint"],
            )
            gr.ChatInterface(fn=paper_chat, title="Chat with the Paper")

        with gr.Tab("Share Kit"):
            gr.Markdown("Generate shareable assets. Works without secrets (deterministic fallback).", elem_classes=["hint"])
            kind = gr.Dropdown(["Tweet thread", "Talk outline", "FAQ"], value="Tweet thread", label="Generate")
            btn = gr.Button("Create")
            share_out = gr.Textbox(lines=14, label="Output")
            btn.click(lambda k: generate_share(k), inputs=kind, outputs=share_out)

        with gr.Tab("Model Playground"):
            gr.Markdown("Chat with a referenced Hub model (if any) or provide your own. Requires `HF_TOKEN` secret.", elem_classes=["hint"])
            model_id = gr.Dropdown(
                choices=(DETECTED_MODELS if DETECTED_MODELS else []),
                value=(DETECTED_MODELS[0] if DETECTED_MODELS else None),
                label="Model id",
                allow_custom_value=True,
            )

            def _model_chat_fn(message: str, history: List[Tuple[str, str]], mid: str) -> str:
                return model_chat(mid, message, history)

            gr.ChatInterface(fn=_model_chat_fn, additional_inputs=[model_id], title="Model Playground")

        with gr.Tab("Live Quantizer"):
            gr.Markdown("""
## Think → Quantize → Transmit

Type a messy, natural-language intent and watch it get quantized to a UCR anchor.
This demonstrates the core Slipstream innovation: mapping free-form language onto a shared semantic manifold.
""")

            with gr.Row():
                with gr.Column(scale=2):
                    intent_input = gr.Textbox(
                        label="Your intent (natural language)",
                        placeholder="Hey, I'm kinda stuck on this auth bug, can you take a look?",
                        lines=2,
                    )
                    quantize_btn = gr.Button("Quantize", variant="primary")

                    gr.Markdown("**Try these examples:**")
                    with gr.Row():
                        for ex_text, ex_anchor in EXAMPLE_INTENTS:
                            ex_btn = gr.Button(ex_text, size="sm")
                            ex_btn.click(lambda t=ex_text: t, outputs=intent_input)

                with gr.Column(scale=3):
                    primary_out = gr.Markdown(label="Result")
                    with gr.Accordion("Nearby Anchors", open=True):
                        alternatives_out = gr.Markdown()
                    wire_out = gr.Code(label="SLIP Wire Format", language=None)

            quantize_btn.click(
                quantize_intent,
                inputs=intent_input,
                outputs=[primary_out, alternatives_out, wire_out],
            )
            intent_input.submit(
                quantize_intent,
                inputs=intent_input,
                outputs=[primary_out, alternatives_out, wire_out],
            )

    gr.Markdown("---\nBuilt with Gradio on Hugging Face Spaces.")

if __name__ == "__main__":
    demo.launch()