| |
| """ |
| Build English test sets from MELD (Friends) dataset. |
| |
| Extracts 8 scenario-based test sets from MELD MP4 clips, |
| converts to WAV, and concatenates into single audio files |
| that simulate real phone calls for E2E pipeline testing. |
| |
| Usage: |
| python scripts/build_meld_test_sets.py |
| |
| Output: |
| data/meld_test/ |
| βββ 01_angry_fight.wav |
| βββ 02_happy_loving.wav |
| βββ ... |
| βββ 08_calm_daily.wav |
| βββ ground_truth.json # per-utterance emotion labels |
| βββ README.md # test set descriptions |
| """ |
|
|
| import csv |
| import json |
| import os |
| import subprocess |
| import sys |
| import tempfile |
| from collections import Counter |
| from pathlib import Path |
|
|
| |
| PROJECT_ROOT = Path(__file__).resolve().parent.parent |
| ZIP_PATH = PROJECT_ROOT / "data" / "english_test.zip" |
| OUTPUT_DIR = PROJECT_ROOT / "data" / "meld_test" |
| SAMPLE_RATE = 16000 |
|
|
| |
| TEST_SETS = [ |
| { |
| "tag": "01_angry_fight", |
| "desc": "Ross-Rachel breakup fight β anger dominant (S3E15)", |
| "scenario": "Couple in a heated argument", |
| "primary_emotion": "anger", |
| "split": "train", |
| "dia_id": "51", |
| }, |
| { |
| "tag": "02_happy_loving", |
| "desc": "Monica-Chandler sweet moment β joy dominant (S5E14)", |
| "scenario": "Couple being affectionate and playful", |
| "primary_emotion": "joy", |
| "split": "train", |
| "dia_id": "1026", |
| }, |
| { |
| "tag": "03_sad_emotional", |
| "desc": "Ross-Rachel emotional confession β sadness dominant (S3E25)", |
| "scenario": "Emotional conversation with sadness and regret", |
| "primary_emotion": "sadness", |
| "split": "train", |
| "dia_id": "312", |
| }, |
| { |
| "tag": "04_surprise_shock", |
| "desc": "Ross-Rachel surprise revelations (S7E18)", |
| "scenario": "Unexpected news and reactions", |
| "primary_emotion": "surprise", |
| "split": "train", |
| "dia_id": "747", |
| }, |
| { |
| "tag": "05_fear_anxiety", |
| "desc": "Monica-Chandler anxious situation β fear+mixed (S4E14)", |
| "scenario": "Anxious and worried conversation", |
| "primary_emotion": "fear", |
| "split": "train", |
| "dia_id": "109", |
| }, |
| { |
| "tag": "06_disgust_annoyance", |
| "desc": "Family annoyance scene β disgust+anger (S6E9)", |
| "scenario": "Annoyed and disgusted reactions", |
| "primary_emotion": "disgust", |
| "split": "train", |
| "dia_id": "1025", |
| }, |
| { |
| "tag": "07_bittersweet", |
| "desc": "Ross-Rachel bittersweet farewell β sadness+surprise (S5E5)", |
| "scenario": "Mixed emotions: saying goodbye with conflicting feelings", |
| "primary_emotion": "sadness", |
| "split": "train", |
| "dia_id": "676", |
| }, |
| { |
| "tag": "08_calm_daily", |
| "desc": "Casual daily conversation β neutral baseline (S3E23)", |
| "scenario": "Normal everyday chitchat (baseline)", |
| "primary_emotion": "neutral", |
| "split": "train", |
| "dia_id": "450", |
| }, |
| ] |
|
|
|
|
| def load_csv_from_zip(zip_path: Path) -> dict[str, list[dict]]: |
| """Load all CSV data from zip, grouped by split_diaID.""" |
| import zipfile |
|
|
| dialogues = {} |
| with zipfile.ZipFile(zip_path, "r") as zf: |
| csv_files = [ |
| ("train", "JSON files/JSON files/CSV Processed/train_sent_emo_cleaned_processed.csv"), |
| ("dev", "JSON files/JSON files/CSV Processed/dev_sent_emo_cleaned_processed.csv"), |
| ("test", "JSON files/JSON files/CSV Processed/test_sent_emo_cleaned_processed.csv"), |
| ] |
| for split, csv_path in csv_files: |
| try: |
| with zf.open(csv_path) as f: |
| import io |
| reader = csv.DictReader(io.TextIOWrapper(f, encoding="utf-8")) |
| for row in reader: |
| key = f"{split}_{row['Dialogue_ID']}" |
| dialogues.setdefault(key, []).append(row) |
| except KeyError: |
| print(f" Warning: {csv_path} not found in zip") |
| return dialogues |
|
|
|
|
| def find_mp4_path(split: str, dia_id: str, utt_id: str, available_files: set) -> str | None: |
| """Find MP4 file path for a specific utterance.""" |
| patterns = [ |
| f"MELD.Raw/MELD.Raw/{split}/{split}_splits/dia{dia_id}_utt{utt_id}.mp4", |
| f"MELD.Raw/MELD.Raw/{split}/{split}_splits_complete/dia{dia_id}_utt{utt_id}.mp4", |
| f"MELD.Raw/MELD.Raw/{split}/output_repeated_splits_{split}/final_videos_{split}dia{dia_id}_utt{utt_id}.mp4", |
| ] |
| for p in patterns: |
| if p in available_files: |
| return p |
| return None |
|
|
|
|
| def get_mp4_list_from_zip(zip_path: Path) -> set: |
| """Get set of all MP4 file paths in zip.""" |
| import zipfile |
| with zipfile.ZipFile(zip_path, "r") as zf: |
| return {n for n in zf.namelist() if n.endswith(".mp4")} |
|
|
|
|
| def extract_and_concat_wav( |
| zip_path: Path, mp4_paths: list[str], output_wav: Path, sample_rate: int = 16000 |
| ) -> float: |
| """Extract audio from MP4s in zip and concatenate into single WAV.""" |
| with tempfile.TemporaryDirectory() as tmpdir: |
| tmpdir = Path(tmpdir) |
| wav_parts = [] |
|
|
| |
| import zipfile |
| with zipfile.ZipFile(zip_path, "r") as zf: |
| for i, mp4_path in enumerate(mp4_paths): |
| mp4_local = tmpdir / f"part_{i:03d}.mp4" |
| wav_local = tmpdir / f"part_{i:03d}.wav" |
|
|
| |
| with zf.open(mp4_path) as src, open(mp4_local, "wb") as dst: |
| dst.write(src.read()) |
|
|
| |
| result = subprocess.run( |
| [ |
| "ffmpeg", "-y", "-i", str(mp4_local), |
| "-ar", str(sample_rate), |
| "-ac", "1", |
| "-acodec", "pcm_s16le", |
| str(wav_local), |
| ], |
| capture_output=True, |
| text=True, |
| ) |
| if result.returncode != 0: |
| print(f" Warning: ffmpeg failed for {mp4_path}: {result.stderr[:200]}") |
| continue |
|
|
| if wav_local.exists() and wav_local.stat().st_size > 0: |
| wav_parts.append(wav_local) |
|
|
| if not wav_parts: |
| return 0.0 |
|
|
| |
| list_file = tmpdir / "concat_list.txt" |
| with open(list_file, "w") as f: |
| for wp in wav_parts: |
| f.write(f"file '{wp}'\n") |
|
|
| output_wav.parent.mkdir(parents=True, exist_ok=True) |
| result = subprocess.run( |
| [ |
| "ffmpeg", "-y", "-f", "concat", "-safe", "0", |
| "-i", str(list_file), |
| "-ar", str(sample_rate), |
| "-ac", "1", |
| "-acodec", "pcm_s16le", |
| str(output_wav), |
| ], |
| capture_output=True, |
| text=True, |
| ) |
| if result.returncode != 0: |
| print(f" Concat failed: {result.stderr[:300]}") |
| return 0.0 |
|
|
| |
| probe = subprocess.run( |
| ["ffprobe", "-v", "quiet", "-show_entries", "format=duration", |
| "-of", "default=noprint_wrappers=1:nokey=1", str(output_wav)], |
| capture_output=True, text=True, |
| ) |
| try: |
| return float(probe.stdout.strip()) |
| except ValueError: |
| return 0.0 |
|
|
|
|
| def main(): |
| print("=" * 60) |
| print(" MELD English Test Set Builder") |
| print("=" * 60) |
|
|
| if not ZIP_PATH.exists(): |
| print(f"Error: {ZIP_PATH} not found") |
| sys.exit(1) |
|
|
| |
| print("\n[1/4] Loading CSV data from zip...") |
| dialogues = load_csv_from_zip(ZIP_PATH) |
| print(f" Loaded {len(dialogues)} dialogues") |
|
|
| |
| print("[2/4] Scanning MP4 files in zip...") |
| mp4_files = get_mp4_list_from_zip(ZIP_PATH) |
| print(f" Found {len(mp4_files)} MP4 files") |
|
|
| |
| print("[3/4] Building test sets...\n") |
| ground_truth = {} |
| summary_lines = [] |
|
|
| for ts in TEST_SETS: |
| tag = ts["tag"] |
| key = f"{ts['split']}_{ts['dia_id']}" |
| utts = dialogues.get(key, []) |
|
|
| if not utts: |
| print(f" β {tag}: dialogue {key} not found") |
| continue |
|
|
| print(f" π¦ {tag} β {ts['desc']}") |
| print(f" {len(utts)} utterances", end="") |
|
|
| |
| mp4_paths = [] |
| for u in utts: |
| p = find_mp4_path(ts["split"], ts["dia_id"], u["Utterance_ID"], mp4_files) |
| if p: |
| mp4_paths.append(p) |
|
|
| print(f", {len(mp4_paths)}/{len(utts)} MP4s found") |
|
|
| if not mp4_paths: |
| print(f" β No MP4 files found, skipping") |
| continue |
|
|
| |
| output_wav = OUTPUT_DIR / f"{tag}.wav" |
| duration = extract_and_concat_wav(ZIP_PATH, mp4_paths, output_wav, SAMPLE_RATE) |
| print(f" β
{output_wav.name} β {duration:.1f}s") |
|
|
| |
| emo_counts = Counter(u["Emotion"] for u in utts) |
| ground_truth[tag] = { |
| "description": ts["desc"], |
| "scenario": ts["scenario"], |
| "primary_emotion": ts["primary_emotion"], |
| "source": f"MELD Friends S{utts[0]['Season']}E{utts[0]['Episode']} Dialogue {ts['dia_id']}", |
| "duration_sec": round(duration, 1), |
| "emotion_distribution": dict(emo_counts), |
| "total_utterances": len(utts), |
| "utterances": [ |
| { |
| "speaker": u["Speaker"], |
| "emotion": u["Emotion"], |
| "sentiment": u["Sentiment"], |
| "text": u["Utterance"], |
| } |
| for u in utts |
| ], |
| } |
|
|
| summary_lines.append( |
| f"| {tag} | {ts['scenario'][:40]} | {ts['primary_emotion']} | {duration:.1f}s | {len(utts)} utts | {dict(emo_counts)} |" |
| ) |
|
|
| |
| print("\n[4/4] Saving metadata...") |
|
|
| gt_path = OUTPUT_DIR / "ground_truth.json" |
| with open(gt_path, "w", encoding="utf-8") as f: |
| json.dump(ground_truth, f, indent=2, ensure_ascii=False) |
| print(f" β
{gt_path}") |
|
|
| |
| our_labels = {"neutral", "joy", "sadness", "anger", "surprise", "fear", "disgust"} |
| meld_labels = set() |
| for gt in ground_truth.values(): |
| meld_labels.update(gt["emotion_distribution"].keys()) |
|
|
| readme_content = f"""# MELD English Test Sets |
| |
| ## Emotion Label Alignment |
| |
| | UsTwo Pipeline (EN) | MELD Label | Match | |
| |---|---|---| |
| | neutral | neutral | β
Exact | |
| | joy | joy | β
Exact | |
| | sadness | sadness | β
Exact | |
| | anger | anger | β
Exact | |
| | surprise | surprise | β
Exact | |
| | fear | fear | β
Exact | |
| | disgust | disgust | β
Exact | |
| |
| **7/7 labels match exactly.** No mapping needed. |
| |
| ## Test Sets |
| |
| | File | Scenario | Primary Emotion | Duration | Utterances | Emotion Distribution | |
| |---|---|---|---|---|---| |
| {chr(10).join(summary_lines)} |
| |
| ## Source |
| - Dataset: MELD (Multimodal EmotionLines Dataset) |
| - Source: Friends TV series |
| - Paper: Poria et al., ACL 2019 |
| - Each WAV is a full dialogue concatenated from per-utterance MP4 clips |
| - Audio: 16kHz mono PCM (matches pipeline input format) |
| |
| ## Usage |
| ```bash |
| # Run pipeline on a single test set |
| python scripts/run_pipeline.py data/meld_test/01_angry_fight.wav |
| |
| # Evaluate all test sets |
| python scripts/evaluate_meld_test.py |
| ``` |
| """ |
| readme_path = OUTPUT_DIR / "README.md" |
| with open(readme_path, "w", encoding="utf-8") as f: |
| f.write(readme_content) |
| print(f" β
{readme_path}") |
|
|
| |
| print("\n" + "=" * 60) |
| print(" DONE") |
| print("=" * 60) |
| total_files = len(list(OUTPUT_DIR.glob("*.wav"))) |
| print(f" {total_files} WAV files in {OUTPUT_DIR}") |
| print(f" Ground truth: {gt_path}") |
| print(f" Emotion alignment: {len(our_labels & meld_labels)}/{len(our_labels)} exact match") |
|
|
|
|
| if __name__ == "__main__": |
| main() |
|
|