| | import csv |
| | import wave |
| | import os |
| | from tqdm import tqdm |
| | def verify_wav_file(file_path): |
| | try: |
| | with wave.open(file_path, 'rb') as wav_file: |
| | |
| | channels = wav_file.getnchannels() |
| | sample_width = wav_file.getsampwidth() |
| | framerate = wav_file.getframerate() |
| | frames = wav_file.getnframes() |
| | |
| | |
| | return True |
| | except Exception as e: |
| | print(f"Error processing {file_path}: {str(e)}") |
| | return False |
| |
|
| | def main(): |
| | csv_path = "/home/austin/disk1/stts-zs_cleaning/data/filename.csv" |
| | total_files = 0 |
| | valid_files = 0 |
| | |
| | with open(csv_path, 'r') as csv_file: |
| | csv_reader = csv.reader(csv_file, delimiter='|') |
| | for row in tqdm(csv_reader,desc="Verifying files", unit="file"): |
| | if row: |
| | wav_path = row[0] |
| | total_files += 1 |
| | |
| | if os.path.exists(wav_path): |
| | if verify_wav_file(wav_path): |
| | valid_files += 1 |
| | else: |
| | print(f"File is corrupted or invalid: {wav_path}") |
| | else: |
| | print(f"File does not exist: {wav_path}") |
| |
|
| | print(f"\nVerification completed.") |
| | print(f"Total files checked: {total_files}") |
| | print(f"Valid files: {valid_files}") |
| | print(f"Invalid or missing files: {total_files - valid_files}") |
| |
|
| | if __name__ == "__main__": |
| | main() |