File size: 1,579 Bytes
e5762f6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
import csv
import wave
import os
from tqdm import tqdm
def verify_wav_file(file_path):
    try:
        with wave.open(file_path, 'rb') as wav_file:
            # Try to read some basic properties
            channels = wav_file.getnchannels()
            sample_width = wav_file.getsampwidth()
            framerate = wav_file.getframerate()
            frames = wav_file.getnframes()
            
            # If we got here, the file is likely valid
            return True
    except Exception as e:
        print(f"Error processing {file_path}: {str(e)}")
        return False

def main():
    csv_path = "/home/austin/disk1/stts-zs_cleaning/data/filename.csv"
    total_files = 0
    valid_files = 0
    
    with open(csv_path, 'r') as csv_file:
        csv_reader = csv.reader(csv_file, delimiter='|')
        for row in tqdm(csv_reader,desc="Verifying files", unit="file"):
            if row:  # Check if the row is not empty
                wav_path = row[0]
                total_files += 1
                
                if os.path.exists(wav_path):
                    if verify_wav_file(wav_path):
                        valid_files += 1
                    else:
                        print(f"File is corrupted or invalid: {wav_path}")
                else:
                    print(f"File does not exist: {wav_path}")

    print(f"\nVerification completed.")
    print(f"Total files checked: {total_files}")
    print(f"Valid files: {valid_files}")
    print(f"Invalid or missing files: {total_files - valid_files}")

if __name__ == "__main__":
    main()