similarity_analysis / debug_gui_numbers.py
DanJChong's picture
Upload folder using huggingface_hub
329d553 verified
"""
Debug GUI Numbers - Let's see what you're actually getting
"""
import pandas as pd
from data.data_loader import DataLoader
from visualization.plot_generator import PlotGenerator
from scipy.stats import pearsonr
print("="*80)
print("DEBUGGING GUI NUMBERS")
print("="*80)
# Load data exactly like GUI does
print("\nLoading data...")
loader = DataLoader()
loader.load_csv('data/Final_similarity_matrix_standardized.csv')
plot_gen = PlotGenerator(loader)
data = loader.data
print(f"\nData loaded: {len(data)} rows")
# Get brain measures
early_brain = data['hierarchy_early_visual_avg']
late_brain = data['hierarchy_late_semantic_avg']
print("\n" + "="*80)
print("WHAT NUMBERS ARE YOU SEEING?")
print("="*80)
print("\nPlease tell me:")
print("1. Which ML Model did you select?")
print(" (e.g., 'AVERAGE - Neural Language (Captions)')")
print("\n2. Which brain measure?")
print(" (e.g., 'Early Visual Average (7 ROIs)')")
print("\n3. What correlation value do you see for 'Brain and ML Model'?")
print("\n" + "="*80)
print("LET'S TEST DIFFERENT SCENARIOS")
print("="*80)
# Scenario 1: Using Method 1 (average-then-correlate) - OLD WAY
print("\n--- SCENARIO 1: Method 1 (Old Way - Average then Correlate) ---")
# Get language models
models = [model[0] for model in loader.model_categories['captions_neural']]
available_models = [m for m in models if m in data.columns]
print(f"Found {len(available_models)} language models")
# Average similarities
avg_sims = data[available_models].mean(axis=1)
# Correlate with brain
corr_early_method1 = avg_sims.corr(early_brain)
corr_late_method1 = avg_sims.corr(late_brain)
print(f"Early: {corr_early_method1:.4f}")
print(f"Late: {corr_late_method1:.4f}")
# Scenario 2: Using Method 2 (correlate-then-average) - NEW WAY
print("\n--- SCENARIO 2: Method 2 (New Way - Correlate then Average) ---")
corr_early_method2 = plot_gen.compute_category_correlation_method2('captions_neural', early_brain)
corr_late_method2 = plot_gen.compute_category_correlation_method2('captions_neural', late_brain)
print(f"Early: {corr_early_method2:.4f}")
print(f"Late: {corr_late_method2:.4f}")
# Scenario 3: What if you're looking at a different brain measure?
print("\n--- SCENARIO 3: Testing Individual ROI Measures ---")
# Check if you might be looking at an individual ROI instead of the average
test_rois = ['roi_voxel_pearson_V1v', 'roi_voxel_pearson_mfswords', 'roi_voxel_pearson_VWFA1']
print("\nIf you selected an INDIVIDUAL ROI (not the average):")
for roi in test_rois:
if roi in data.columns:
corr_method1 = avg_sims.corr(data[roi])
corr_method2 = plot_gen.compute_category_correlation_method2('captions_neural', data[roi])
print(f" {roi}: Method1={corr_method1:.4f}, Method2={corr_method2:.4f}")
# Scenario 4: What if app.py is creating its own averages?
print("\n--- SCENARIO 4: Check if avg columns exist ---")
avg_cols = ['avg_vision', 'avg_captions_neural', 'avg_captions_statistical', 'avg_tags_statistical']
for col in avg_cols:
exists = col in data.columns
print(f" {col}: {'EXISTS' if exists else 'NOT FOUND'}")
if 'avg_captions_neural' in data.columns:
print("\nThe avg_captions_neural column EXISTS in data!")
print("This means Method 1 averaged similarities might be used.")
# Test correlation with this column
test_corr_early = data['avg_captions_neural'].corr(early_brain)
test_corr_late = data['avg_captions_neural'].corr(late_brain)
print(f" Correlation with early brain: {test_corr_early:.4f}")
print(f" Correlation with late brain: {test_corr_late:.4f}")
print("\n" + "="*80)
print("TESTING: Simulate generate_scatter function")
print("="*80)
# Simulate what generate_scatter does
ml_model_selection = "avg_captions_neural"
brain_measure = "hierarchy_early_visual_avg"
print(f"\nSimulating: ml_model={ml_model_selection}, brain={brain_measure}")
# Get model data (this is what get_model_data returns)
try:
ml_data, ml_name = plot_gen.get_model_data(ml_model_selection)
print(f"Model name returned: {ml_name}")
print(f"Model data shape: {ml_data.shape}")
print(f"Model data first 5 values: {ml_data.head().values}")
# Check what correlation would be computed
if ml_model_selection.startswith('avg_'):
print("\nDETECTED: This is a category average (starts with 'avg_')")
print("Should use Method 2...")
category_map = {
'avg_vision': 'vision',
'avg_captions_neural': 'captions_neural',
'avg_captions_statistical': 'captions_statistical',
'avg_tags_statistical': 'tags_statistical'
}
if ml_model_selection in category_map:
category_key = category_map[ml_model_selection]
print(f"Category key: {category_key}")
# Method 2
corr_method2 = plot_gen.compute_category_correlation_method2(category_key, data[brain_measure])
print(f"Method 2 correlation: {corr_method2:.4f}")
# Method 1 (for comparison)
corr_method1 = ml_data.corr(data[brain_measure])
print(f"Method 1 correlation: {corr_method1:.4f}")
print(f"\nWhich one is the GUI showing you?")
print(f"If you see {corr_method1:.3f}, it's using Method 1 (OLD)")
print(f"If you see {corr_method2:.3f}, it's using Method 2 (NEW)")
else:
print("This is an individual model, not a category average")
corr = ml_data.corr(data[brain_measure])
print(f"Correlation: {corr:.4f}")
except Exception as e:
print(f"Error: {e}")
import traceback
traceback.print_exc()
print("\n" + "="*80)
print("SUMMARY: WHAT SHOULD YOU SEE?")
print("="*80)
print(f"""
If you select:
Model: "AVERAGE - Neural Language (Captions)"
Brain: "Early Visual Average (7 ROIs)"
You should see:
Brain and ML Model: r = 0.208 (Method 2, matches bar chart)
If you see:
r = 0.324 --> GUI is still using Method 1 (old way, needs fixing)
r = 0.208 --> GUI is using Method 2 (new way, correct!)
Something else --> Tell me what you see!
TELL ME EXACTLY WHAT NUMBER YOU SEE and I'll help debug!
""")