Spaces:

st192011
/

ZuCo-EEG-Lab

Sleeping

App Files Files Community

st192011 commited on Jan 15

Commit

5251234

verified ·

1 Parent(s): f235648

Create app.py

Browse files

Files changed (1) hide show

app.py +194 -0

app.py ADDED Viewed

	@@ -0,0 +1,194 @@

+import gradio as gr
+import torch
+import numpy as np
+import pandas as pd
+import random
+import os
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+# --- 1. LOAD ARTIFACTS ---
+PKG_PATH = "neuro_semantic_package.pt"
+print("🚀 System Startup: Loading Artifacts...")
+if not os.path.exists(PKG_PATH):
+    # Error handling for the web logs
+    raise FileNotFoundError(f"CRITICAL: '{PKG_PATH}' missing. Please upload the .pt file.")
+# Load the "Black Box" package
+PKG = torch.load(PKG_PATH, map_location="cpu", weights_only=False) # Load to CPU for HF Spaces
+DATA = PKG['data']
+MODELS = PKG['models']       # The Projectors
+MATRIX = PKG['matrix']       # Pre-calculated Accuracy Table
+MAPPING = PKG['mapping_key'] # Secret Mapping
+# Inverse mapping (Alias -> Real Sub)
+ALIAS_TO_REAL = {v: k for k, v in MAPPING.items()}
+# Load Decoder
+print("🤖 Loading RoBERTa-GoEmotions...")
+MODEL_NAME = "SamLowe/roberta-base-go_emotions"
+tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+classifier = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME)
+classifier.eval()
+id2label = classifier.config.id2label
+# --- 2. LOGIC FUNCTIONS ---
+def get_sentence_options(subject_name):
+    # Return available sentences for the selected subject
+    choices = DATA[subject_name]['Text']
+    # Pick a random one as default to encourage exploration
+    default = random.choice(choices)
+    return gr.Dropdown(choices=choices, value=default)
+def get_warning_status(subject, projector_alias):
+    """Checks for Data Leakage"""
+    clean_alias = projector_alias.split(" ")[1]
+    source_subject = ALIAS_TO_REAL.get(clean_alias)
+    if source_subject == subject:
+        return (
+            "⚠️ **WARNING: DATA LEAKAGE DETECTED**\n\n"
+            f"The selected Projector ({projector_alias}) includes data from Subject {subject} in its training set.\n"
+            "Results will be artificially high (Self-Test). For valid research verification, please select a different Projector."
+        )
+    else:
+        return "✅ **VALID ZERO-SHOT CONFIGURATION**\n\nTarget Subject was NOT seen during Projector training."
+def get_historical_accuracy(subject, projector_alias):
+    """Retrieves pre-calculated accuracy"""
+    try:
+        acc = MATRIX.loc[projector_alias, subject]
+        return f"**Historical Compatibility:** {acc}"
+    except:
+        return "**Historical Compatibility:** N/A"
+def decode_neuro_semantics(subject, projector_alias, text):
+    # 1. Fetch Data
+    try:
+        idx = DATA[subject]['Text'].index(text)
+        eeg_input = DATA[subject]['X'][idx].reshape(1, -1)
+    except ValueError:
+        return pd.DataFrame(), "Error: Data point not found."
+    # 2. Project (EEG -> Vector)
+    proj_model = MODELS[projector_alias]
+    predicted_vector = proj_model.predict(eeg_input)
+    tensor_vec = torch.tensor(predicted_vector).float()
+    # 3. Decode (Vector -> Emotions)
+    with torch.no_grad():
+        # Brain Path
+        x = classifier.classifier.dense(tensor_vec.unsqueeze(1))
+        x = torch.tanh(x)
+        logits_b = classifier.classifier.out_proj(x)
+        probs_brain = torch.sigmoid(logits_b).squeeze().numpy()
+        # Text Path (Ground Truth)
+        inputs = tokenizer(text, return_tensors="pt")
+        logits_t = classifier(**inputs).logits
+        probs_text = torch.sigmoid(logits_t).squeeze().numpy()
+    # 4. Rank & Format
+    top3_b = np.argsort(probs_brain)[::-1][:3]
+    top2_t = np.argsort(probs_text)[::-1][:2]
+    # Check Match (Top-1 Brain vs Top-2 Text)
+    brain_top1 = id2label[top3_b[0]]
+    text_top2 = [id2label[i] for i in top2_t]
+    match_icon = "✅" if brain_top1 in text_top2 else "❌"
+    # Build Result Table for ONE sentence
+    # We display the probabilities nicely
+    brain_str = ", ".join([f"{id2label[i]} ({probs_brain[i]:.2f})" for i in top3_b])
+    text_str = ", ".join([f"{id2label[i]} ({probs_text[i]:.2f})" for i in top2_t])
+    df = pd.DataFrame([{
+        "Sentence Stimulus": text,
+        "Text Ground Truth (Top 2)": text_str,
+        "Brain Decoding (Top 3)": brain_str,
+        "Match": match_icon
+    }])
+    return df, f"**Prediction Status:** {match_icon}"
+def run_batch_analysis(subject, projector_alias):
+    # Runs 5 random samples for robust demo
+    subject_data = DATA[subject]
+    total_indices = list(range(len(subject_data['Text'])))
+    selected_indices = random.sample(total_indices, min(5, len(total_indices)))
+    results = []
+    for idx in selected_indices:
+        txt = subject_data['Text'][idx]
+        df, stat = decode_neuro_semantics(subject, projector_alias, txt)
+        results.append(df)
+    final_df = pd.concat(results)
+    # Calculate Batch Accuracy
+    acc = (final_df["Match"] == "✅").mean() * 100
+    return final_df, f"**Batch Accuracy:** {acc:.1f}%"
+# --- 3. UI LAYOUT ---
+INTRODUCTION = """
+### 🔬 Abstract & Methodology
+**Goal:** Zero-Shot decoding of emotional sentiment from raw EEG signals.
+**Methodology:**
+1.  **Input:** EEG signals from the ZuCo 2.0 dataset (Movie Reviews).
+2.  **Projection:** A Ridge Regression model maps EEG features ($f(EEG)$) to the **RoBERTa-GoEmotions** latent space ($\mathbb{R}^{768}$).
+3.  **Inference:** The projected vector is classified by the frozen RoBERTa head to recover the sentiment probability distribution.
+**Evaluation Metric:** A prediction is correct if the **Top-1 Brain Prediction** appears within the **Top-2 Text Predictions**.
+"""
+with gr.Blocks(theme=gr.themes.Soft()) as demo:
+    gr.Markdown("# 🧠 Neuro-Semantic Alignment: Zero-Shot Decoding")
+    with gr.Accordion("📘 Read Project Report (Abstract & Methodology)", open=False):
+        gr.Markdown(INTRODUCTION)
+    with gr.Row():
+        with gr.Column(scale=1):
+            gr.Markdown("### ⚙️ Configuration")
+            # Selectors
+            sub_dropdown = gr.Dropdown(choices=list(DATA.keys()), value="ZKB", label="Select Target Subject (Data Source)")
+            proj_dropdown = gr.Dropdown(choices=list(MODELS.keys()), value="Projector A", label="Select Projector (Decoding Model)")
+            # Dynamic Info Boxes
+            warning_box = gr.Markdown("✅ **VALID ZERO-SHOT CONFIGURATION**\n\nTarget Subject was NOT seen during Projector training.")
+            history_box = gr.Markdown("**Historical Compatibility:** 40.0%")
+            btn = gr.Button("🔮 Run Batch Analysis (5 Samples)", variant="primary")
+        with gr.Column(scale=2):
+            gr.Markdown("### 📊 Decoding Results")
+            # Output Table
+            result_table = gr.Dataframe(
+                headers=["Sentence Stimulus", "Text Ground Truth (Top 2)", "Brain Decoding (Top 3)", "Match"],
+                wrap=True
+            )
+            batch_accuracy_box = gr.Markdown("**Batch Accuracy:** -")
+    # Interactivity
+    sub_dropdown.change(fn=get_warning_status, inputs=[sub_dropdown, proj_dropdown], outputs=warning_box)
+    sub_dropdown.change(fn=get_historical_accuracy, inputs=[sub_dropdown, proj_dropdown], outputs=history_box)
+    proj_dropdown.change(fn=get_warning_status, inputs=[sub_dropdown, proj_dropdown], outputs=warning_box)
+    proj_dropdown.change(fn=get_historical_accuracy, inputs=[sub_dropdown, proj_dropdown], outputs=history_box)
+    # Run
+    btn.click(
+        fn=run_batch_analysis,
+        inputs=[sub_dropdown, proj_dropdown],
+        outputs=[result_table, batch_accuracy_box]
+    )
+if __name__ == "__main__":
+    demo.launch()