saudi-msa-piper / scripts /create_training_file.py
ISTNetworks's picture
Upload Saudi Arabic Piper TTS model - Epoch 455
b51190f verified
import json
import os
from pathlib import Path
def create_training_file():
json_dir = Path("/root/piper_msa/Json_dic")
audio_base_dir = Path("/root/piper_msa/raw_audio")
output_file = Path("/root/piper_msa/training_data.csv")
training_lines = []
# Process each speaker (SPK1 to SPK5)
for spk_num in range(1, 6):
json_file = json_dir / f"SPK{spk_num}_phoneme_data.json"
audio_dir = audio_base_dir / f"SPK{spk_num}"
if not json_file.exists():
print(f"Warning: {json_file} not found, skipping...")
continue
if not audio_dir.exists():
print(f"Warning: {audio_dir} not found, skipping...")
continue
# Read JSON file
with open(json_file, 'r', encoding='utf-8') as f:
data = json.load(f)
# Process each sample
for sample in data.get('train_samples', []):
audio_file = sample.get('audio_file')
text = sample.get('text')
if audio_file and text:
# Add .wav extension if not present
if not audio_file.endswith('.wav'):
audio_file = f"{audio_file}.wav"
# Construct full audio path
audio_path = audio_dir / audio_file
# Check if audio file exists
if audio_path.exists():
# Format: /full/path/to/audio.wav|Text content
line = f"{audio_path}|{text}"
training_lines.append(line)
else:
print(f"Warning: Audio file not found: {audio_path}")
print(f"Processed SPK{spk_num}: {len(data.get('train_samples', []))} samples")
# Write to output file in CSV format (pipe-separated)
with open(output_file, 'w', encoding='utf-8') as f:
f.write('\n'.join(training_lines))
print(f"\nTraining file created: {output_file}")
print(f"Total samples: {len(training_lines)}")
return output_file, len(training_lines)
if __name__ == "__main__":
output_file, total_samples = create_training_file()
print(f"\nDone! Created {output_file} with {total_samples} training samples.")