File size: 2,396 Bytes
1b242be
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
# import soundfile as sf
# import os

# # Đọc train.txt
# train_file = "/workspace/trainTTS/data/data_train/train.txt"
# root_path = "/workspace/trainTTS/data/data_train/wavs_new"

# with open(train_file, 'r', encoding='utf-8') as f:
#     lines = f.readlines()

# print(f"Total files: {len(lines)}")

# # Check 10 files đầu
# for i, line in enumerate(lines[:10]):
#     parts = line.strip().split('|')
#     audio_path = os.path.join(root_path, parts[0])
    
#     print(f"\n{i+1}. Checking: {audio_path}")
#     print(f"   Exists: {os.path.exists(audio_path)}")
    
#     if os.path.exists(audio_path):
#         try:
#             data, sr = sf.read(audio_path)
#             print(f"   ✅ OK - SR: {sr}, Duration: {len(data)/sr:.2f}s")
#         except Exception as e:
#             print(f"   ❌ CORRUPT: {e}")
#     else:
#         print(f"   ❌ NOT FOUND")


import os

train_file = "/workspace/trainTTS/data/data_train/train.txt"
root_path = "/workspace/trainTTS/data/wavs_new"

# 1. Check thư mục tồn tại không
print(f"📁 Root path exists: {os.path.exists(root_path)}")
print(f"📄 Train file exists: {os.path.exists(train_file)}")

# 2. Đếm files thực tế trong thư mục
if os.path.exists(root_path):
    actual_files = [f for f in os.listdir(root_path) if f.endswith('.wav')]
    print(f"🎵 Actual .wav files in folder: {len(actual_files)}")
    print(f"   First 5: {actual_files[:5]}")
else:
    print("❌ Root path does NOT exist!")
    exit()

# 3. Đọc train.txt
with open(train_file, 'r', encoding='utf-8') as f:
    lines = f.readlines()

print(f"\n📋 Train.txt has {len(lines)} lines")
print(f"   First 3 lines:")
for i, line in enumerate(lines[:3]):
    print(f"   {i+1}. {line.strip()[:100]}...")

# 4. Check format
first_line = lines[0].strip()
parts = first_line.split('|')
expected_filename = parts[0]

print(f"\n🔍 Expected filename from train.txt: {expected_filename}")
print(f"   Full path would be: {os.path.join(root_path, expected_filename)}")
print(f"   File exists: {os.path.exists(os.path.join(root_path, expected_filename))}")

# 5. Tìm file tương tự
print(f"\n🔎 Searching for similar filenames...")
for actual in actual_files[:10]:
    if expected_filename in actual or actual in expected_filename:
        print(f"   Match candidate: {actual}")