"""
Sentinel Tiny Text Space — Interactive text generation with Sentinel transformer
"""
import gradio as gr
import torch
import torch.nn as nn
import numpy as np
from transformers import AutoTokenizer
import json

# ─── Sentinel Components ─────────────────────────────────────────────────────
class SentinelAct(nn.Module):
    def __init__(self):
        super().__init__()
        self.inv_e = 1.0 / np.e
    def forward(self, x):
        return x * (1.0 / torch.cosh(self.inv_e * x))

class SentinelAttn(nn.Module):
    def __init__(self, d, h=4):
        super().__init__()
        self.d, self.h, self.hd = d, h, d // h
        self.Wq = nn.Linear(d, d, bias=False)
        self.Wk = nn.Linear(d, d, bias=False)
        self.Wv = nn.Linear(d, d, bias=False)
        self.Wo = nn.Linear(d, d, bias=False)
    def forward(self, x, mask):
        B, S, _ = x.shape
        Q = self.Wq(x).view(B, S, self.h, self.hd).transpose(1, 2)
        K = self.Wk(x).view(B, S, self.h, self.hd).transpose(1, 2)
        V = self.Wv(x).view(B, S, self.h, self.hd).transpose(1, 2)
        scores = torch.matmul(Q, K.transpose(-2, -1)) / np.sqrt(self.hd)
        scores = scores.masked_fill(mask == 0, float('-inf'))
        attn = torch.where(scores == float('-inf'), torch.zeros_like(scores), 1.0 / torch.cosh(scores))
        attn = attn / (attn.sum(dim=-1, keepdim=True) + 1e-8)
        out = torch.matmul(attn, V)
        out = out.transpose(1, 2).contiguous().view(B, S, self.d)
        return self.Wo(out)

class TinyTrans(nn.Module):
    def __init__(self, v=50257, d=128, h=4, l=4, ff=256, s=128):
        super().__init__()
        self.tok = nn.Embedding(v, d)
        self.pos = nn.Embedding(s, d)
        self.layers = nn.ModuleList([nn.ModuleDict({
            'attn': SentinelAttn(d, h),
            'ffn': nn.Sequential(nn.Linear(d, ff), SentinelAct(), nn.Linear(ff, d)),
            'n1': nn.LayerNorm(d), 'n2': nn.LayerNorm(d),
        }) for _ in range(l)])
        self.norm = nn.LayerNorm(d)
        self.head = nn.Linear(d, v, bias=False)
        self.seq = s
    def forward(self, ids):
        B, S = ids.shape
        pos = torch.arange(S, device=ids.device).unsqueeze(0).expand(B, -1)
        x = self.tok(ids) + self.pos(pos)
        mask = torch.tril(torch.ones(S, S, device=ids.device)).view(1, 1, S, S)
        for L in self.layers:
            x = x + L['attn'](L['n1'](x), mask)
            x = x + L['ffn'](L['n2'](x))
        x = self.norm(x)
        return self.head(x)

# ─── Load Model ──────────────────────────────────────────────────────────────
MODEL_URL = "https://huggingface.co/5dimension/sentinel-tiny-text/resolve/main/model.pt"
TOKENIZER = "gpt2"

tokenizer = AutoTokenizer.from_pretrained(TOKENIZER)
tokenizer.pad_token = tokenizer.eos_token

model = TinyTrans(v=tokenizer.vocab_size, d=128, h=4, l=4, ff=256, s=128)

# Try to load weights from local or download
try:
    import os
    from huggingface_hub import hf_hub_download
    model_path = hf_hub_download(repo_id="5dimension/sentinel-tiny-text", filename="model.pt")
    state_dict = torch.load(model_path, map_location="cpu", weights_only=True)
    model.load_state_dict(state_dict)
    model_status = "✅ Model loaded from HF Hub"
except Exception as e:
    model_status = f"⚠️ Using random weights: {str(e)[:100]}"

model = model.cpu().eval()

# ─── Generation Function ────────────────────────────────────────────────────
def generate_text(prompt, max_tokens=50, temperature=0.8, top_p=0.9):
    with torch.no_grad():
        ids = torch.tensor([tokenizer.encode(prompt)], device="cpu")
        for _ in range(max_tokens):
            logits = model(ids)[:, -1, :] / temperature
            probs = torch.softmax(logits, dim=-1)
            # Top-p sampling
            sorted_probs, sorted_indices = torch.sort(probs, descending=True)
            cumsum = torch.cumsum(sorted_probs, dim=-1)
            mask = cumsum <= top_p
            mask[0, 0] = True  # At least one token
            filtered_probs = sorted_probs * mask.float()
            filtered_probs = filtered_probs / filtered_probs.sum()
            idx = torch.multinomial(filtered_probs, 1)
            tok = sorted_indices[0, idx].unsqueeze(1)
            ids = torch.cat([ids, tok], dim=1)
            if ids.size(1) >= 128:
                break
        return tokenizer.decode(ids[0], skip_special_tokens=True)

# ─── UI ────────────────────────────────────────────────────────────────────────
with gr.Blocks(title="🦴 Sentinel Tiny Text", css="""
    .gradio-container { max-width: 800px; margin: 0 auto; }
    .title { text-align: center; font-size: 2em; font-weight: bold; color: #6b4c9a; }
    .subtitle { text-align: center; color: #888; margin-bottom: 1em; }
""") as demo:
    gr.Markdown("""
    <div class="title">🦴 Sentinel Tiny Text</div>
    <div class="subtitle">13.4M parameter transformer with Sentinel activation σ(x) = x·sech(x/e)</div>
    """)
    
    gr.Markdown(f"**Status**: {model_status}")
    
    with gr.Row():
        with gr.Column(scale=2):
            prompt = gr.Textbox(
                label="Prompt",
                placeholder="Once upon a time, a little cat...",
                value="Once upon a time",
                lines=2
            )
        with gr.Column(scale=1):
            max_tokens = gr.Slider(10, 100, value=50, step=5, label="Max Tokens")
            temperature = gr.Slider(0.3, 1.5, value=0.8, step=0.1, label="Temperature")
    
    generate_btn = gr.Button("🚀 Generate", variant="primary")
    output = gr.Textbox(label="Generated Text", lines=8, interactive=False)
    
    with gr.Row():
        gr.Markdown("""
        ### About
        - **Activation**: Sentinel sech: σ(x) = x·sech(x/e)
        - **Attention**: Sentinel sech (no softmax)
        - **Architecture**: 4 layers, 128 hidden, 4 heads
        - **Dataset**: TinyStories (1K samples demo)
        - **Parameters**: 13.4M | **Quantized INT8**: [13 MB](https://huggingface.co/5dimension/sentinel-tiny-text-int8) | **INT4**: [6.4 MB](https://huggingface.co/5dimension/sentinel-tiny-text-int4)
        """)
    
    generate_btn.click(generate_text, [prompt, max_tokens, temperature], output)

demo.launch()