import gradio as gr
import torch
import numpy as np
import pandas as pd
import random
import os
from transformers import AutoTokenizer, AutoModelForSequenceClassification

# --- 1. LOAD ARTIFACTS ---
PKG_PATH = "neuro_semantic_package.pt"

print("🚀 System Startup: Loading Artifacts...")
if not os.path.exists(PKG_PATH):
    # Fallback for local testing if file isn't in root
    POSSIBLE_PATHS = [
        "neuro_semantic_package.pt",
        "/content/drive/MyDrive/Brain2Text_Project/demo_research_v2/neuro_semantic_package.pt"
    ]
    for p in POSSIBLE_PATHS:
        if os.path.exists(p):
            PKG_PATH = p
            break
    
    if not os.path.exists(PKG_PATH):
        raise FileNotFoundError(f"CRITICAL: '{PKG_PATH}' missing. Please upload the .pt file.")

# Load the "Black Box" package
# map_location='cpu' ensures it runs on basic HF spaces without GPU if needed
PKG = torch.load(PKG_PATH, map_location="cpu", weights_only=False) 
DATA = PKG['data']
MODELS = PKG['models']       # The Projectors
MATRIX = PKG['matrix']       # Pre-calculated Accuracy Table
MAPPING = PKG['mapping_key'] # Secret Mapping

# Inverse mapping (Alias -> Real Sub)
ALIAS_TO_REAL = {v: k for k, v in MAPPING.items()}

# Load Decoder
print("🤖 Loading RoBERTa-GoEmotions...")
MODEL_NAME = "SamLowe/roberta-base-go_emotions"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
classifier = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME)
classifier.eval()
id2label = classifier.config.id2label

# --- 2. LOGIC FUNCTIONS ---

def get_sentence_options(subject_name):
    # Return available sentences for the selected subject
    choices = DATA[subject_name]['Text']
    # Pick a random one as default to encourage exploration
    default = random.choice(choices)
    return gr.Dropdown(choices=choices, value=default)

def get_warning_status(subject, projector_alias):
    """Checks for Data Leakage"""
    clean_alias = projector_alias.split(" ")[1]
    source_subject = ALIAS_TO_REAL.get(clean_alias)
    
    if source_subject == subject:
        return (
            "⚠️ **WARNING: DATA LEAKAGE DETECTED**\n\n"
            f"The selected Projector ({projector_alias}) includes data from Subject {subject} in its training set.\n"
            "Results will be artificially high (Self-Test). For valid research verification, please select a different Projector."
        )
    else:
        return "✅ **VALID ZERO-SHOT CONFIGURATION**\n\nTarget Subject was NOT seen during Projector training."

def get_historical_accuracy(subject, projector_alias):
    """Retrieves pre-calculated accuracy"""
    try:
        acc = MATRIX.loc[projector_alias, subject]
        return f"**Historical Compatibility:** {acc}"
    except:
        return "**Historical Compatibility:** N/A"

def decode_neuro_semantics(subject, projector_alias, text):
    # 1. Fetch Data
    try:
        idx = DATA[subject]['Text'].index(text)
        eeg_input = DATA[subject]['X'][idx].reshape(1, -1)
    except ValueError:
        return pd.DataFrame(), "Error: Data point not found."

    # 2. Project (EEG -> Vector)
    proj_model = MODELS[projector_alias]
    predicted_vector = proj_model.predict(eeg_input)
    tensor_vec = torch.tensor(predicted_vector).float()

    # 3. Decode (Vector -> Emotions)
    with torch.no_grad():
        # Brain Path
        x = classifier.classifier.dense(tensor_vec.unsqueeze(1))
        x = torch.tanh(x)
        logits_b = classifier.classifier.out_proj(x)
        probs_brain = torch.sigmoid(logits_b).squeeze().numpy()
        
        # Text Path (Ground Truth)
        inputs = tokenizer(text, return_tensors="pt")
        logits_t = classifier(**inputs).logits
        probs_text = torch.sigmoid(logits_t).squeeze().numpy()

    # 4. Rank & Format
    top3_b = np.argsort(probs_brain)[::-1][:3]
    top2_t = np.argsort(probs_text)[::-1][:2]
    
    # Check Match (Top-1 Brain vs Top-2 Text)
    brain_top1 = id2label[top3_b[0]]
    text_top2 = [id2label[i] for i in top2_t]
    
    match_icon = "✅" if brain_top1 in text_top2 else "❌"

    # Build Result Table for ONE sentence
    # We display the probabilities nicely
    brain_str = ", ".join([f"{id2label[i]} ({probs_brain[i]:.2f})" for i in top3_b])
    text_str = ", ".join([f"{id2label[i]} ({probs_text[i]:.2f})" for i in top2_t])
    
    df = pd.DataFrame([{
        "Sentence Stimulus": text,
        "Text Ground Truth (Top 2)": text_str,
        "Brain Decoding (Top 3)": brain_str,
        "Match": match_icon
    }])
    
    return df

def run_batch_analysis(subject, projector_alias):
    # Runs 5 random samples for robust demo
    subject_data = DATA[subject]
    total_indices = list(range(len(subject_data['Text'])))
    # Sample up to 5 sentences
    selected_indices = random.sample(total_indices, min(5, len(total_indices)))
    
    results = []
    
    for idx in selected_indices:
        txt = subject_data['Text'][idx]
        df = decode_neuro_semantics(subject, projector_alias, txt)
        results.append(df)
        
    final_df = pd.concat(results)
    
    # Calculate Batch Accuracy
    acc = (final_df["Match"] == "✅").mean() * 100
    return final_df, f"**Batch Accuracy:** {acc:.1f}%"

# --- 3. UI LAYOUT ---

# Formatted Report Text
REPORT_TEXT = """
### 1. Abstract
This interface demonstrates a **Brain-Computer Interface (BCI)** capable of decoding high-level semantic information directly from non-invasive EEG signals. By aligning biological neural activity with the latent space of Large Language Models (LLMs), we show that it is possible to reconstruct the **emotional sentiment** of a sentence a user is reading, even if the model has **never seen that user's brain data before**.

### 2. The Dataset: ZuCo (Zurich Cognitive Language Processing Corpus)
This project utilizes the **ZuCo 2.0 dataset**, a benchmark for cognitive modeling.
*   **Protocol:** Subjects read movie reviews naturally while their brain activity (EEG) and eye movements were recorded.
*   **The Challenge:** Unlike synthetic tasks, natural reading involves rapid, complex cognitive processing, making signal decoding significantly harder.

### 3. Methodology: Latent Space Projection
Instead of training a simple classifier to predict "Positive" or "Negative" from brain waves, we employ a **Neuro-Semantic Projector**.
*   **The Goal:** To learn a mapping function `f(EEG) → R^768` that transforms raw brain signals into the high-dimensional embedding space of **RoBERTa**.
*   **The Mechanism:** The system projects the EEG signal into a vector. This vector is then fed into a frozen, pre-trained LLM (`roberta-base-go_emotions`) to generate a probability distribution over **28 distinct emotional states** (e.g., *Admiration, Annoyance, Gratitude, Remorse*).

### 4. Experimental Setup: Strict Zero-Shot Evaluation
To ensure scientific rigor, this demo adheres to a **Strict Leave-One-Group-Out** protocol.
*   **Disjoint Training:** The "Projectors" available in this demo were trained on a subset of subjects and validated on **completely different subjects**.
*   **No Calibration:** The model does not receive any calibration data from the target subject. It must rely on universal neural patterns shared across humans.

### 5. Interpretation of Results
The demo compares two probability distributions for every sentence:
1.  **Text Ground Truth:** What the AI model thinks the sentence means based on the text alone.
2.  **Brain Prediction:** What the AI model thinks the sentence means based **only** on the user's brain waves.

**Accuracy Metric:** A prediction is considered correct if the **Top-1 Emotion** predicted from the Brain Signal matches either the **#1 or #2 Emotion** predicted from the Text.
"""

with gr.Blocks(theme=gr.themes.Soft(), title="Neuro-Semantic Decoder") as demo:
    gr.Markdown("# 🧠 Neuro-Semantic Alignment: Zero-Shot Decoding")
    
    with gr.Tabs():
        # --- TAB 1: INTERACTIVE DEMO ---
        with gr.TabItem("🔮 Interactive Demo"):
            with gr.Row():
                with gr.Column(scale=1):
                    gr.Markdown("### ⚙️ Configuration")
                    
                    # Selectors
                    sub_dropdown = gr.Dropdown(choices=list(DATA.keys()), value="ZKB", label="Select Target Subject (Data Source)")
                    proj_dropdown = gr.Dropdown(choices=list(MODELS.keys()), value="Projector A", label="Select Projector (Decoding Model)")
                    
                    # Dynamic Info Boxes
                    warning_box = gr.Markdown("✅ **VALID ZERO-SHOT CONFIGURATION**\n\nTarget Subject was NOT seen during Projector training.")
                    history_box = gr.Markdown("**Historical Compatibility:** 40.0%")
                    
                    btn = gr.Button("🔮 Run Batch Analysis (5 Samples)", variant="primary")
                    
                with gr.Column(scale=2):
                    gr.Markdown("### 📊 Decoding Results")
                    
                    # Output Table
                    result_table = gr.Dataframe(
                        headers=["Sentence Stimulus", "Text Ground Truth (Top 2)", "Brain Decoding (Top 3)", "Match"],
                        wrap=True
                    )
                    batch_accuracy_box = gr.Markdown("**Batch Accuracy:** -")

            # Interactivity
            sub_dropdown.change(fn=get_warning_status, inputs=[sub_dropdown, proj_dropdown], outputs=warning_box)
            sub_dropdown.change(fn=get_historical_accuracy, inputs=[sub_dropdown, proj_dropdown], outputs=history_box)
            
            proj_dropdown.change(fn=get_warning_status, inputs=[sub_dropdown, proj_dropdown], outputs=warning_box)
            proj_dropdown.change(fn=get_historical_accuracy, inputs=[sub_dropdown, proj_dropdown], outputs=history_box)
            
            # Run
            btn.click(
                fn=run_batch_analysis,
                inputs=[sub_dropdown, proj_dropdown],
                outputs=[result_table, batch_accuracy_box]
            )

        # --- TAB 2: REPORT ---
        with gr.TabItem("📘 Project Report"):
            gr.Markdown(REPORT_TEXT)

if __name__ == "__main__":
    demo.launch()