similarity_analysis / create_example_summary_table.py
DanJChong's picture
Upload folder using huggingface_hub
329d553 verified
"""
Create Summary Table of Example Pairs (No Images Needed)
This creates a simple table/figure showing the best example pairs
without needing the actual images.
"""
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
print("="*80)
print("CREATING EXAMPLE SUMMARY TABLE")
print("="*80)
# Load exemplar pairs
exemplar_file = 'hierarchy_analysis/exemplar_image_pairs.csv'
print(f"\nLoading: {exemplar_file}")
exemplars = pd.read_csv(exemplar_file, index_col=0)
print(f"Loaded {len(exemplars)} exemplar pairs")
# Create summary figure
fig = plt.figure(figsize=(16, 12))
# Title
fig.suptitle('Exemplar Image Pairs Demonstrating Key Findings\n' +
'Use these pairs to show concrete examples to your professor',
fontsize=16, fontweight='bold', y=0.98)
# SECTION 1: Dissociation examples
dissociation = exemplars[exemplars['example_type'] == 'dissociation'].head(5)
ax1 = plt.subplot(3, 1, 1)
ax1.axis('off')
y_pos = 0.95
ax1.text(0.5, y_pos, 'FINDING 1: Semantic Dissociation', ha='center',
fontsize=14, fontweight='bold', transform=ax1.transAxes)
y_pos -= 0.08
ax1.text(0.5, y_pos, 'Language similarity HIGH, Vision similarity LOW → Language captures meaning, not just pixels',
ha='center', fontsize=11, style='italic', transform=ax1.transAxes)
y_pos -= 0.12
for idx, (pair_idx, row) in enumerate(dissociation.iterrows()):
# Header
ax1.text(0.02, y_pos, f"Example {idx+1}:", fontweight='bold',
fontsize=10, transform=ax1.transAxes)
# Image names
y_pos -= 0.05
ax1.text(0.02, y_pos, f" Images: {row['image_1'][:30]}... vs {row['image_2'][:30]}...",
fontsize=9, family='monospace', transform=ax1.transAxes)
# Scores
y_pos -= 0.04
lang_val = row['language_avg']
vis_val = row['vision_avg']
diff = lang_val - vis_val
human = row['human_judgement']
score_text = (f" Language: {lang_val:+.2f} | Vision: {vis_val:+.2f} | "
f"Diff: {diff:+.2f} | Human: {human:.2f}")
# Color code based on values
if lang_val > 0.5:
lang_color = '#27ae60' # Green for high
else:
lang_color = 'black'
if vis_val < -0.5:
vis_color = '#e74c3c' # Red for low
else:
vis_color = 'black'
ax1.text(0.02, y_pos, score_text, fontsize=9, transform=ax1.transAxes)
y_pos -= 0.07
if y_pos < 0.05:
break
# SECTION 2: Language-Human Alignment
alignment = exemplars[exemplars['example_type'] == 'language_human_alignment'].head(5)
ax2 = plt.subplot(3, 1, 2)
ax2.axis('off')
y_pos = 0.95
ax2.text(0.5, y_pos, 'FINDING 2: Language-Human Alignment', ha='center',
fontsize=14, fontweight='bold', transform=ax2.transAxes)
y_pos -= 0.08
ax2.text(0.5, y_pos, 'Language models predict human judgments better than vision models',
ha='center', fontsize=11, style='italic', transform=ax2.transAxes)
y_pos -= 0.12
for idx, (pair_idx, row) in enumerate(alignment.iterrows()):
# Header
ax2.text(0.02, y_pos, f"Example {idx+1}:", fontweight='bold',
fontsize=10, transform=ax2.transAxes)
# Image names
y_pos -= 0.05
ax2.text(0.02, y_pos, f" Images: {row['image_1'][:30]}... vs {row['image_2'][:30]}...",
fontsize=9, family='monospace', transform=ax2.transAxes)
# Alignment scores
y_pos -= 0.04
human = row['human_judgement']
lang = row['language_avg']
vis = row['vision_avg']
lang_dist = abs(human - lang)
vis_dist = abs(human - vis)
closer = vis_dist - lang_dist
score_text = (f" Human: {human:.2f} | Language: {lang:.2f} (dist: {lang_dist:.2f}) | "
f"Vision: {vis:.2f} (dist: {vis_dist:.2f}) → Language {closer:.2f} closer")
ax2.text(0.02, y_pos, score_text, fontsize=9, transform=ax2.transAxes)
y_pos -= 0.07
if y_pos < 0.05:
break
# SECTION 3: Brain Semantic Preference
brain_pref = exemplars[exemplars['example_type'] == 'brain_semantic_preference'].head(5)
ax3 = plt.subplot(3, 1, 3)
ax3.axis('off')
y_pos = 0.95
ax3.text(0.5, y_pos, 'FINDING 3: Brain Prioritizes Semantics', ha='center',
fontsize=14, fontweight='bold', transform=ax3.transAxes)
y_pos -= 0.08
ax3.text(0.5, y_pos, 'Late semantic brain regions respond more than early visual regions',
ha='center', fontsize=11, style='italic', transform=ax3.transAxes)
y_pos -= 0.12
for idx, (pair_idx, row) in enumerate(brain_pref.iterrows()):
# Header
ax3.text(0.02, y_pos, f"Example {idx+1}:", fontweight='bold',
fontsize=10, transform=ax3.transAxes)
# Image names
y_pos -= 0.05
ax3.text(0.02, y_pos, f" Images: {row['image_1'][:30]}... vs {row['image_2'][:30]}...",
fontsize=9, family='monospace', transform=ax3.transAxes)
# Brain responses
y_pos -= 0.04
early = row['hierarchy_early_visual_avg']
late = row['hierarchy_late_semantic_avg']
diff = late - early
score_text = (f" Early Visual: {early:+.3f} | Late Semantic: {late:+.3f} | "
f"Diff: {diff:+.3f} | Human: {row['human_judgement']:.2f}")
ax3.text(0.02, y_pos, score_text, fontsize=9, transform=ax3.transAxes)
y_pos -= 0.07
if y_pos < 0.05:
break
plt.tight_layout(rect=[0, 0, 1, 0.96])
output_file = 'hierarchy_analysis/exemplar_pairs_summary.png'
plt.savefig(output_file, dpi=300, bbox_inches='tight', facecolor='white')
print(f"\nSaved: {output_file}")
plt.close()
# Also save as a text file for easy reference
print("\nCreating text summary...")
with open('hierarchy_analysis/exemplar_pairs_summary.txt', 'w') as f:
f.write("="*80 + "\n")
f.write("EXEMPLAR IMAGE PAIRS - SUMMARY FOR PROFESSOR\n")
f.write("="*80 + "\n\n")
f.write("FINDING 1: SEMANTIC DISSOCIATION\n")
f.write("-" * 80 + "\n")
f.write("Language models show high similarity, vision models show low similarity\n")
f.write("Proves: Language models capture semantic meaning, not just visual features\n\n")
for idx, (pair_idx, row) in enumerate(dissociation.iterrows()):
f.write(f"Example {idx+1}:\n")
f.write(f" Pair Index: {pair_idx}\n")
f.write(f" Image 1: {row['image_1']}\n")
f.write(f" Image 2: {row['image_2']}\n")
f.write(f" Language Similarity: {row['language_avg']:+.3f} (HIGH)\n")
f.write(f" Vision Similarity: {row['vision_avg']:+.3f} (LOW)\n")
f.write(f" Difference: {row['language_avg'] - row['vision_avg']:+.3f}\n")
f.write(f" Human Judgment: {row['human_judgement']:.3f}\n")
f.write(f" Early Brain: {row['hierarchy_early_visual_avg']:+.3f}\n")
f.write(f" Late Brain: {row['hierarchy_late_semantic_avg']:+.3f}\n")
f.write("\n")
f.write("\n" + "="*80 + "\n")
f.write("FINDING 2: LANGUAGE-HUMAN ALIGNMENT\n")
f.write("-" * 80 + "\n")
f.write("Language models align with human judgments better than vision models\n")
f.write("Proves: Humans judge similarity based on semantics, not just visual features\n\n")
for idx, (pair_idx, row) in enumerate(alignment.iterrows()):
f.write(f"Example {idx+1}:\n")
f.write(f" Pair Index: {pair_idx}\n")
f.write(f" Image 1: {row['image_1']}\n")
f.write(f" Image 2: {row['image_2']}\n")
f.write(f" Human Judgment: {row['human_judgement']:.3f}\n")
f.write(f" Language Model: {row['language_avg']:.3f} (distance: {abs(row['human_judgement'] - row['language_avg']):.3f})\n")
f.write(f" Vision Model: {row['vision_avg']:.3f} (distance: {abs(row['human_judgement'] - row['vision_avg']):.3f})\n")
f.write(f" Language is {abs(row['human_judgement'] - row['vision_avg']) - abs(row['human_judgement'] - row['language_avg']):.3f} closer to human\n")
f.write("\n")
f.write("\n" + "="*80 + "\n")
f.write("FINDING 3: BRAIN PRIORITIZES SEMANTICS\n")
f.write("-" * 80 + "\n")
f.write("Late semantic brain regions respond more than early visual regions\n")
f.write("Proves: Brain uses semantic content for similarity judgments\n\n")
for idx, (pair_idx, row) in enumerate(brain_pref.iterrows()):
f.write(f"Example {idx+1}:\n")
f.write(f" Pair Index: {pair_idx}\n")
f.write(f" Image 1: {row['image_1']}\n")
f.write(f" Image 2: {row['image_2']}\n")
f.write(f" Early Visual (V1-hV4): {row['hierarchy_early_visual_avg']:+.3f} (LOW)\n")
f.write(f" Late Semantic (FFA, VWFA, etc): {row['hierarchy_late_semantic_avg']:+.3f} (HIGH)\n")
f.write(f" Difference: {row['hierarchy_late_semantic_avg'] - row['hierarchy_early_visual_avg']:+.3f}\n")
f.write(f" Human Judgment: {row['human_judgement']:.3f}\n")
f.write("\n")
print("Saved: hierarchy_analysis/exemplar_pairs_summary.txt")
print("\n" + "="*80)
print("DONE!")
print("="*80)
print("""
Created:
1. exemplar_pairs_summary.png - Visual summary (no images needed)
2. exemplar_pairs_summary.txt - Text file with all details
These show the BEST example pairs for each finding.
For your professor meeting:
1. Show the bar charts (main statistical findings)
2. Show exemplar_pairs_summary.png (specific examples)
3. Walk through 1-2 examples in detail:
- "Look at this pair: [image names]"
- "Language says similar (0.86), vision says different (-1.34)"
- "Humans agree with language (2.0)"
- "Brain semantic regions respond (0.22), visual regions don't (-0.22)"
- "This proves our point!"
If you have the actual images, run:
python create_image_examples_figure.py
to create figures WITH the images.
The pair indices are saved so you can look them up in your GUI!
""")