# import soundfile as sf # import os # # Đọc train.txt # train_file = "/workspace/trainTTS/data/data_train/train.txt" # root_path = "/workspace/trainTTS/data/data_train/wavs_new" # with open(train_file, 'r', encoding='utf-8') as f: # lines = f.readlines() # print(f"Total files: {len(lines)}") # # Check 10 files đầu # for i, line in enumerate(lines[:10]): # parts = line.strip().split('|') # audio_path = os.path.join(root_path, parts[0]) # print(f"\n{i+1}. Checking: {audio_path}") # print(f" Exists: {os.path.exists(audio_path)}") # if os.path.exists(audio_path): # try: # data, sr = sf.read(audio_path) # print(f" ✅ OK - SR: {sr}, Duration: {len(data)/sr:.2f}s") # except Exception as e: # print(f" ❌ CORRUPT: {e}") # else: # print(f" ❌ NOT FOUND") import os train_file = "/workspace/trainTTS/data/data_train/train.txt" root_path = "/workspace/trainTTS/data/wavs_new" # 1. Check thư mục tồn tại không print(f"📁 Root path exists: {os.path.exists(root_path)}") print(f"📄 Train file exists: {os.path.exists(train_file)}") # 2. Đếm files thực tế trong thư mục if os.path.exists(root_path): actual_files = [f for f in os.listdir(root_path) if f.endswith('.wav')] print(f"🎵 Actual .wav files in folder: {len(actual_files)}") print(f" First 5: {actual_files[:5]}") else: print("❌ Root path does NOT exist!") exit() # 3. Đọc train.txt with open(train_file, 'r', encoding='utf-8') as f: lines = f.readlines() print(f"\n📋 Train.txt has {len(lines)} lines") print(f" First 3 lines:") for i, line in enumerate(lines[:3]): print(f" {i+1}. {line.strip()[:100]}...") # 4. Check format first_line = lines[0].strip() parts = first_line.split('|') expected_filename = parts[0] print(f"\n🔍 Expected filename from train.txt: {expected_filename}") print(f" Full path would be: {os.path.join(root_path, expected_filename)}") print(f" File exists: {os.path.exists(os.path.join(root_path, expected_filename))}") # 5. Tìm file tương tự print(f"\n🔎 Searching for similar filenames...") for actual in actual_files[:10]: if expected_filename in actual or actual in expected_filename: print(f" Match candidate: {actual}")