File size: 10,248 Bytes
5251234
 
 
 
 
 
 
 
4bcefdd
5251234
 
4bcefdd
5251234
4bcefdd
 
cbbd266
 
 
4bcefdd
cbbd266
 
 
4bcefdd
 
 
5251234
4bcefdd
 
 
5251234
4bcefdd
 
 
5251234
4bcefdd
 
 
 
5251234
 
 
 
 
 
 
4bcefdd
5251234
4bcefdd
 
 
 
 
 
5251234
4bcefdd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5251234
4bcefdd
5251234
 
 
 
 
4bcefdd
5251234
4bcefdd
 
 
 
 
 
4f85db7
4bcefdd
 
 
 
 
 
 
5251234
 
 
 
4bcefdd
 
5251234
 
4bcefdd
5251234
 
 
 
 
4bcefdd
 
 
5251234
 
4bcefdd
5251234
 
 
 
4bcefdd
 
 
5251234
4bcefdd
 
5251234
 
4bcefdd
 
5251234
4bcefdd
5251234
 
 
4bcefdd
 
5251234
4bcefdd
5251234
4bcefdd
 
 
 
 
5251234
4bcefdd
cbbd266
 
 
 
 
 
 
 
 
 
 
 
 
5251234
cbbd266
 
4bcefdd
 
5251234
cbbd266
 
 
 
 
 
5251234
 
cbbd266
5251234
 
cbbd266
4bcefdd
cbbd266
 
 
 
 
4bcefdd
 
 
cbbd266
4bcefdd
 
 
cbbd266
4bcefdd
cbbd266
 
4bcefdd
 
 
cbbd266
 
 
 
4bcefdd
cbbd266
4bcefdd
 
 
 
 
 
 
 
 
 
 
 
 
5251234
cbbd266
 
 
5251234
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
import gradio as gr
import torch
import numpy as np
import pandas as pd
import random
import os
from transformers import AutoTokenizer, AutoModelForSequenceClassification

# --- 1. LOAD ARTIFACTS ---
PKG_PATH = "neuro_semantic_package.pt"

print("๐Ÿš€ System Startup: Loading Artifacts...")
if not os.path.exists(PKG_PATH):
    # Fallback for local testing if file isn't in root
    POSSIBLE_PATHS = [
        "neuro_semantic_package.pt",
        "/content/drive/MyDrive/Brain2Text_Project/demo_research_v2/neuro_semantic_package.pt"
    ]
    for p in POSSIBLE_PATHS:
        if os.path.exists(p):
            PKG_PATH = p
            break
    
    if not os.path.exists(PKG_PATH):
        raise FileNotFoundError(f"CRITICAL: '{PKG_PATH}' missing. Please upload the .pt file.")

# Load the "Black Box" package
# map_location='cpu' ensures it runs on basic HF spaces without GPU if needed
PKG = torch.load(PKG_PATH, map_location="cpu", weights_only=False) 
DATA = PKG['data']
MODELS = PKG['models']       # The Projectors
MATRIX = PKG['matrix']       # Pre-calculated Accuracy Table
MAPPING = PKG['mapping_key'] # Secret Mapping

# Inverse mapping (Alias -> Real Sub)
ALIAS_TO_REAL = {v: k for k, v in MAPPING.items()}

# Load Decoder
print("๐Ÿค– Loading RoBERTa-GoEmotions...")
MODEL_NAME = "SamLowe/roberta-base-go_emotions"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
classifier = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME)
classifier.eval()
id2label = classifier.config.id2label

# --- 2. LOGIC FUNCTIONS ---

def get_sentence_options(subject_name):
    # Return available sentences for the selected subject
    choices = DATA[subject_name]['Text']
    # Pick a random one as default to encourage exploration
    default = random.choice(choices)
    return gr.Dropdown(choices=choices, value=default)

def get_warning_status(subject, projector_alias):
    """Checks for Data Leakage"""
    clean_alias = projector_alias.split(" ")[1]
    source_subject = ALIAS_TO_REAL.get(clean_alias)
    
    if source_subject == subject:
        return (
            "โš ๏ธ **WARNING: DATA LEAKAGE DETECTED**\n\n"
            f"The selected Projector ({projector_alias}) includes data from Subject {subject} in its training set.\n"
            "Results will be artificially high (Self-Test). For valid research verification, please select a different Projector."
        )
    else:
        return "โœ… **VALID ZERO-SHOT CONFIGURATION**\n\nTarget Subject was NOT seen during Projector training."

def get_historical_accuracy(subject, projector_alias):
    """Retrieves pre-calculated accuracy"""
    try:
        acc = MATRIX.loc[projector_alias, subject]
        return f"**Historical Compatibility:** {acc}"
    except:
        return "**Historical Compatibility:** N/A"

def decode_neuro_semantics(subject, projector_alias, text):
    # 1. Fetch Data
    try:
        idx = DATA[subject]['Text'].index(text)
        eeg_input = DATA[subject]['X'][idx].reshape(1, -1)
    except ValueError:
        return pd.DataFrame(), "Error: Data point not found."

    # 2. Project (EEG -> Vector)
    proj_model = MODELS[projector_alias]
    predicted_vector = proj_model.predict(eeg_input)
    tensor_vec = torch.tensor(predicted_vector).float()

    # 3. Decode (Vector -> Emotions)
    with torch.no_grad():
        # Brain Path
        x = classifier.classifier.dense(tensor_vec.unsqueeze(1))
        x = torch.tanh(x)
        logits_b = classifier.classifier.out_proj(x)
        probs_brain = torch.sigmoid(logits_b).squeeze().numpy()
        
        # Text Path (Ground Truth)
        inputs = tokenizer(text, return_tensors="pt")
        logits_t = classifier(**inputs).logits
        probs_text = torch.sigmoid(logits_t).squeeze().numpy()

    # 4. Rank & Format
    top3_b = np.argsort(probs_brain)[::-1][:3]
    top2_t = np.argsort(probs_text)[::-1][:2]
    
    # Check Match (Top-1 Brain vs Top-2 Text)
    brain_top1 = id2label[top3_b[0]]
    text_top2 = [id2label[i] for i in top2_t]
    
    match_icon = "โœ…" if brain_top1 in text_top2 else "โŒ"

    # Build Result Table for ONE sentence
    # We display the probabilities nicely
    brain_str = ", ".join([f"{id2label[i]} ({probs_brain[i]:.2f})" for i in top3_b])
    text_str = ", ".join([f"{id2label[i]} ({probs_text[i]:.2f})" for i in top2_t])
    
    df = pd.DataFrame([{
        "Sentence Stimulus": text,
        "Text Ground Truth (Top 2)": text_str,
        "Brain Decoding (Top 3)": brain_str,
        "Match": match_icon
    }])
    
    return df

def run_batch_analysis(subject, projector_alias):
    # Runs 5 random samples for robust demo
    subject_data = DATA[subject]
    total_indices = list(range(len(subject_data['Text'])))
    # Sample up to 5 sentences
    selected_indices = random.sample(total_indices, min(5, len(total_indices)))
    
    results = []
    
    for idx in selected_indices:
        txt = subject_data['Text'][idx]
        df = decode_neuro_semantics(subject, projector_alias, txt)
        results.append(df)
        
    final_df = pd.concat(results)
    
    # Calculate Batch Accuracy
    acc = (final_df["Match"] == "โœ…").mean() * 100
    return final_df, f"**Batch Accuracy:** {acc:.1f}%"

# --- 3. UI LAYOUT ---

# Formatted Report Text
REPORT_TEXT = """
### 1. Abstract
This interface demonstrates a **Brain-Computer Interface (BCI)** capable of decoding high-level semantic information directly from non-invasive EEG signals. By aligning biological neural activity with the latent space of Large Language Models (LLMs), we show that it is possible to reconstruct the **emotional sentiment** of a sentence a user is reading, even if the model has **never seen that user's brain data before**.

### 2. The Dataset: ZuCo (Zurich Cognitive Language Processing Corpus)
This project utilizes the **ZuCo 2.0 dataset**, a benchmark for cognitive modeling.
*   **Protocol:** Subjects read movie reviews naturally while their brain activity (EEG) and eye movements were recorded.
*   **The Challenge:** Unlike synthetic tasks, natural reading involves rapid, complex cognitive processing, making signal decoding significantly harder.

### 3. Methodology: Latent Space Projection
Instead of training a simple classifier to predict "Positive" or "Negative" from brain waves, we employ a **Neuro-Semantic Projector**.
*   **The Goal:** To learn a mapping function `f(EEG) โ†’ R^768` that transforms raw brain signals into the high-dimensional embedding space of **RoBERTa**.
*   **The Mechanism:** The system projects the EEG signal into a vector. This vector is then fed into a frozen, pre-trained LLM (`roberta-base-go_emotions`) to generate a probability distribution over **28 distinct emotional states** (e.g., *Admiration, Annoyance, Gratitude, Remorse*).

### 4. Experimental Setup: Strict Zero-Shot Evaluation
To ensure scientific rigor, this demo adheres to a **Strict Leave-One-Group-Out** protocol.
*   **Disjoint Training:** The "Projectors" available in this demo were trained on a subset of subjects and validated on **completely different subjects**.
*   **No Calibration:** The model does not receive any calibration data from the target subject. It must rely on universal neural patterns shared across humans.

### 5. Interpretation of Results
The demo compares two probability distributions for every sentence:
1.  **Text Ground Truth:** What the AI model thinks the sentence means based on the text alone.
2.  **Brain Prediction:** What the AI model thinks the sentence means based **only** on the user's brain waves.

**Accuracy Metric:** A prediction is considered correct if the **Top-1 Emotion** predicted from the Brain Signal matches either the **#1 or #2 Emotion** predicted from the Text.
"""

with gr.Blocks(theme=gr.themes.Soft(), title="Neuro-Semantic Decoder") as demo:
    gr.Markdown("# ๐Ÿง  Neuro-Semantic Alignment: Zero-Shot Decoding")
    
    with gr.Tabs():
        # --- TAB 1: INTERACTIVE DEMO ---
        with gr.TabItem("๐Ÿ”ฎ Interactive Demo"):
            with gr.Row():
                with gr.Column(scale=1):
                    gr.Markdown("### โš™๏ธ Configuration")
                    
                    # Selectors
                    sub_dropdown = gr.Dropdown(choices=list(DATA.keys()), value="ZKB", label="Select Target Subject (Data Source)")
                    proj_dropdown = gr.Dropdown(choices=list(MODELS.keys()), value="Projector A", label="Select Projector (Decoding Model)")
                    
                    # Dynamic Info Boxes
                    warning_box = gr.Markdown("โœ… **VALID ZERO-SHOT CONFIGURATION**\n\nTarget Subject was NOT seen during Projector training.")
                    history_box = gr.Markdown("**Historical Compatibility:** 40.0%")
                    
                    btn = gr.Button("๐Ÿ”ฎ Run Batch Analysis (5 Samples)", variant="primary")
                    
                with gr.Column(scale=2):
                    gr.Markdown("### ๐Ÿ“Š Decoding Results")
                    
                    # Output Table
                    result_table = gr.Dataframe(
                        headers=["Sentence Stimulus", "Text Ground Truth (Top 2)", "Brain Decoding (Top 3)", "Match"],
                        wrap=True
                    )
                    batch_accuracy_box = gr.Markdown("**Batch Accuracy:** -")

            # Interactivity
            sub_dropdown.change(fn=get_warning_status, inputs=[sub_dropdown, proj_dropdown], outputs=warning_box)
            sub_dropdown.change(fn=get_historical_accuracy, inputs=[sub_dropdown, proj_dropdown], outputs=history_box)
            
            proj_dropdown.change(fn=get_warning_status, inputs=[sub_dropdown, proj_dropdown], outputs=warning_box)
            proj_dropdown.change(fn=get_historical_accuracy, inputs=[sub_dropdown, proj_dropdown], outputs=history_box)
            
            # Run
            btn.click(
                fn=run_batch_analysis,
                inputs=[sub_dropdown, proj_dropdown],
                outputs=[result_table, batch_accuracy_box]
            )

        # --- TAB 2: REPORT ---
        with gr.TabItem("๐Ÿ“˜ Project Report"):
            gr.Markdown(REPORT_TEXT)

if __name__ == "__main__":
    demo.launch()