import pandas as pd print("šŸ“Š Preparing data for interpretability analysis...\n") # Use the parallel corpus (224 sentences with English translations) df = pd.read_csv('data/igala_english_parallel.csv') print(f"Total sentence pairs: {len(df)}") print(f"Columns: {list(df.columns)}") # Show first row to verify structure print(f"\nFirst sentence pair:") print(df.head(1)) # Select 50 diverse examples analysis_df = df.sample(n=min(50, len(df)), random_state=42).reset_index(drop=True) # Save analysis_df.to_csv('data/igala_probe_sentences.csv', index=False) print(f"\nāœ… Created analysis set: {len(analysis_df)} sentence pairs") print("\nSample:") print(analysis_df.head(3))