Spaces:

DanJChong
/

similarity_analysis

Sleeping

App Files Files Community

similarity_analysis / debug_gui_numbers.py

DanJChong

Upload folder using huggingface_hub

329d553 verified 4 months ago

raw

history blame contribute delete

6.32 kB

	"""
	Debug GUI Numbers - Let's see what you're actually getting
	"""

	import pandas as pd
	from data.data_loader import DataLoader
	from visualization.plot_generator import PlotGenerator
	from scipy.stats import pearsonr

	print("="*80)
	print("DEBUGGING GUI NUMBERS")
	print("="*80)

	# Load data exactly like GUI does
	print("\nLoading data...")
	loader = DataLoader()
	loader.load_csv('data/Final_similarity_matrix_standardized.csv')
	plot_gen = PlotGenerator(loader)

	data = loader.data

	print(f"\nData loaded: {len(data)} rows")

	# Get brain measures
	early_brain = data['hierarchy_early_visual_avg']
	late_brain = data['hierarchy_late_semantic_avg']

	print("\n" + "="*80)
	print("WHAT NUMBERS ARE YOU SEEING?")
	print("="*80)

	print("\nPlease tell me:")
	print("1. Which ML Model did you select?")
	print(" (e.g., 'AVERAGE - Neural Language (Captions)')")
	print("\n2. Which brain measure?")
	print(" (e.g., 'Early Visual Average (7 ROIs)')")
	print("\n3. What correlation value do you see for 'Brain and ML Model'?")

	print("\n" + "="*80)
	print("LET'S TEST DIFFERENT SCENARIOS")
	print("="*80)

	# Scenario 1: Using Method 1 (average-then-correlate) - OLD WAY
	print("\n--- SCENARIO 1: Method 1 (Old Way - Average then Correlate) ---")

	# Get language models
	models = [model[0] for model in loader.model_categories['captions_neural']]
	available_models = [m for m in models if m in data.columns]
	print(f"Found {len(available_models)} language models")

	# Average similarities
	avg_sims = data[available_models].mean(axis=1)

	# Correlate with brain
	corr_early_method1 = avg_sims.corr(early_brain)
	corr_late_method1 = avg_sims.corr(late_brain)

	print(f"Early: {corr_early_method1:.4f}")
	print(f"Late: {corr_late_method1:.4f}")

	# Scenario 2: Using Method 2 (correlate-then-average) - NEW WAY
	print("\n--- SCENARIO 2: Method 2 (New Way - Correlate then Average) ---")

	corr_early_method2 = plot_gen.compute_category_correlation_method2('captions_neural', early_brain)
	corr_late_method2 = plot_gen.compute_category_correlation_method2('captions_neural', late_brain)

	print(f"Early: {corr_early_method2:.4f}")
	print(f"Late: {corr_late_method2:.4f}")

	# Scenario 3: What if you're looking at a different brain measure?
	print("\n--- SCENARIO 3: Testing Individual ROI Measures ---")

	# Check if you might be looking at an individual ROI instead of the average
	test_rois = ['roi_voxel_pearson_V1v', 'roi_voxel_pearson_mfswords', 'roi_voxel_pearson_VWFA1']

	print("\nIf you selected an INDIVIDUAL ROI (not the average):")
	for roi in test_rois:
	if roi in data.columns:
	corr_method1 = avg_sims.corr(data[roi])
	corr_method2 = plot_gen.compute_category_correlation_method2('captions_neural', data[roi])
	print(f" {roi}: Method1={corr_method1:.4f}, Method2={corr_method2:.4f}")

	# Scenario 4: What if app.py is creating its own averages?
	print("\n--- SCENARIO 4: Check if avg columns exist ---")

	avg_cols = ['avg_vision', 'avg_captions_neural', 'avg_captions_statistical', 'avg_tags_statistical']
	for col in avg_cols:
	exists = col in data.columns
	print(f" {col}: {'EXISTS' if exists else 'NOT FOUND'}")

	if 'avg_captions_neural' in data.columns:
	print("\nThe avg_captions_neural column EXISTS in data!")
	print("This means Method 1 averaged similarities might be used.")

	# Test correlation with this column
	test_corr_early = data['avg_captions_neural'].corr(early_brain)
	test_corr_late = data['avg_captions_neural'].corr(late_brain)
	print(f" Correlation with early brain: {test_corr_early:.4f}")
	print(f" Correlation with late brain: {test_corr_late:.4f}")

	print("\n" + "="*80)
	print("TESTING: Simulate generate_scatter function")
	print("="*80)

	# Simulate what generate_scatter does
	ml_model_selection = "avg_captions_neural"
	brain_measure = "hierarchy_early_visual_avg"

	print(f"\nSimulating: ml_model={ml_model_selection}, brain={brain_measure}")

	# Get model data (this is what get_model_data returns)
	try:
	ml_data, ml_name = plot_gen.get_model_data(ml_model_selection)
	print(f"Model name returned: {ml_name}")
	print(f"Model data shape: {ml_data.shape}")
	print(f"Model data first 5 values: {ml_data.head().values}")

	# Check what correlation would be computed
	if ml_model_selection.startswith('avg_'):
	print("\nDETECTED: This is a category average (starts with 'avg_')")
	print("Should use Method 2...")

	category_map = {
	'avg_vision': 'vision',
	'avg_captions_neural': 'captions_neural',
	'avg_captions_statistical': 'captions_statistical',
	'avg_tags_statistical': 'tags_statistical'
	}

	if ml_model_selection in category_map:
	category_key = category_map[ml_model_selection]
	print(f"Category key: {category_key}")

	# Method 2
	corr_method2 = plot_gen.compute_category_correlation_method2(category_key, data[brain_measure])
	print(f"Method 2 correlation: {corr_method2:.4f}")

	# Method 1 (for comparison)
	corr_method1 = ml_data.corr(data[brain_measure])
	print(f"Method 1 correlation: {corr_method1:.4f}")

	print(f"\nWhich one is the GUI showing you?")
	print(f"If you see {corr_method1:.3f}, it's using Method 1 (OLD)")
	print(f"If you see {corr_method2:.3f}, it's using Method 2 (NEW)")
	else:
	print("This is an individual model, not a category average")
	corr = ml_data.corr(data[brain_measure])
	print(f"Correlation: {corr:.4f}")

	except Exception as e:
	print(f"Error: {e}")
	import traceback
	traceback.print_exc()

	print("\n" + "="*80)
	print("SUMMARY: WHAT SHOULD YOU SEE?")
	print("="*80)

	print(f"""
	If you select:
	Model: "AVERAGE - Neural Language (Captions)"
	Brain: "Early Visual Average (7 ROIs)"

	You should see:
	Brain and ML Model: r = 0.208 (Method 2, matches bar chart)

	If you see:
	r = 0.324 --> GUI is still using Method 1 (old way, needs fixing)
	r = 0.208 --> GUI is using Method 2 (new way, correct!)
	Something else --> Tell me what you see!

	TELL ME EXACTLY WHAT NUMBER YOU SEE and I'll help debug!
	""")