import csv import wave import os from tqdm import tqdm def verify_wav_file(file_path): try: with wave.open(file_path, 'rb') as wav_file: # Try to read some basic properties channels = wav_file.getnchannels() sample_width = wav_file.getsampwidth() framerate = wav_file.getframerate() frames = wav_file.getnframes() # If we got here, the file is likely valid return True except Exception as e: print(f"Error processing {file_path}: {str(e)}") return False def main(): csv_path = "/home/austin/disk1/stts-zs_cleaning/data/filename.csv" total_files = 0 valid_files = 0 with open(csv_path, 'r') as csv_file: csv_reader = csv.reader(csv_file, delimiter='|') for row in tqdm(csv_reader,desc="Verifying files", unit="file"): if row: # Check if the row is not empty wav_path = row[0] total_files += 1 if os.path.exists(wav_path): if verify_wav_file(wav_path): valid_files += 1 else: print(f"File is corrupted or invalid: {wav_path}") else: print(f"File does not exist: {wav_path}") print(f"\nVerification completed.") print(f"Total files checked: {total_files}") print(f"Valid files: {valid_files}") print(f"Invalid or missing files: {total_files - valid_files}") if __name__ == "__main__": main()