| import torch | |
| from TTS.api import TTS | |
| import os | |
| from tqdm import tqdm | |
| import argparse | |
| # Parse arguments | |
| parser = argparse.ArgumentParser(description="Text-to-Speech Synthesis") | |
| parser.add_argument('-t', '--text_file', type=str, required=True, | |
| help='Path to text file containing text and audio reference files') | |
| parser.add_argument('-r', '--ref_dir', type=str, required=True, | |
| help='Root directory containing reference audio files') | |
| parser.add_argument('-s', '--savedir', type=str, required=True, | |
| help='Directory to store synthesized audio files') | |
| parser.add_argument('-d', '--device', type=str, required=True, | |
| help='Device to use for synthesis (cpu or cuda)') | |
| parser.add_argument('-m', '--model_path', type=str, | |
| default="/app/models/best_model_479919.pth", | |
| help='Path to the model file') | |
| parser.add_argument('-c', '--config_path', type=str, | |
| default="/app/models/config.json", | |
| help='Path to the config file') | |
| args = parser.parse_args() | |
| # Get device | |
| device = args.device if args.device in ["cpu", "cuda"] else "cuda" if torch.cuda.is_available() else "cpu" | |
| print(f"Using device: {device}") | |
| # Initialize TTS model | |
| print(f"Loading model from {args.model_path} with config {args.config_path}") | |
| tts = TTS( | |
| model_path=args.model_path, | |
| config_path=args.config_path, | |
| progress_bar=False, | |
| ).to(device) | |
| # Create output directory | |
| os.makedirs(args.savedir, exist_ok=True) | |
| print(f"Output directory: {args.savedir}") | |
| # Read the text file | |
| print(f"Reading text file: {args.text_file}") | |
| with open(args.text_file, 'r') as f: | |
| lines = f.readlines() | |
| # Process each line | |
| print(f"Processing {len(lines)} entries...") | |
| for i, line in enumerate(tqdm(lines)): | |
| parts = line.strip().split('\t') | |
| if len(parts) != 4: | |
| print(f"Warning: Line {i+1} does not have 4 tab-separated parts. Skipping.") | |
| continue | |
| idx, lang, text, ref_file = parts | |
| ref_path = os.path.join(args.ref_dir, ref_file) | |
| save_path = os.path.join(args.savedir, f"{idx}_{lang}_{os.path.basename(ref_file)}") | |
| print(f"Synthesizing: {text[:30]}... using reference {ref_path}") | |
| tts.tts_to_file(text=text, speaker_wav=ref_path, language=lang, file_path=save_path) | |
| print(f"Saved to: {save_path}") | |
| print("Synthesis complete!") | |
| # import torch | |
| # from TTS.api import TTS | |
| # import os | |
| # from tqdm import tqdm | |
| # import argparse | |
| # # Get device | |
| # device = "cuda:3" if torch.cuda.is_available() else "cpu" | |
| # sentences_dict = { | |
| # "te": ["వడ్రంగి, క్షురక వృత్తులలో పెట్టుబడి ప్రధానమై ఇతరులు కూడా ఈ వృత్తిలో ప్రవేశించి వ్యాపారంగా మార్చేసార", | |
| # "నేను ఈ రోజు నాకు ఇష్టమైన పుస్తకాన్ని చదివాను మరియు తరువాత నా స్నేహితుడితో సినిమాకు వెళ్ళాను", | |
| # "ఈ వేసవి సెలవులలో నేను నా కుటుంబంతో కలిసి ఒక అందమైన బీచ్కి వెళ్ళాలని అనుకుంటున్నాను"], | |
| # "mr": ["जायकवाडी धरणातून तब्बल अडीच ते तीन लाख हेक्टर शेतीच्या सिंचनासाठी पाणी सोडलं जातं", | |
| # "मी आज माझ्या आवडत्या पुस्तकाचे वाचन केले आणि नंतर माझ्या मित्रासोबत चित्रपटाला गेलो", | |
| # "या उन्हाळी सुट्टीत मी माझ्या कुटुंबासोबत एक सुंदर समुद्रकिनाऱ्यावर जाण्याचा विचार करतो"], | |
| # "bho": ["बिहार के बक्सर जिला के बक्सर नगर निगम क्षेत्र में गंगा नदी पर बने बक्सर पुल का उद्घाटन आज प्रधानमंत्री नरेंद्र मोदी करेंगे", | |
| # "एन्ट्रापी कंप्यूटिंग में एन्ट्रोपी ऊ ऑपरेटिंग सिस्टम ह जे पे सरा क्रिप्टोग्राफिक फंक्शन सब काम करे लें", | |
| # "हमार मंडराये वाली जहाज़ सर्पमीनन से भरी है"], | |
| # } | |
| # tts = TTS( | |
| # model_path="/home1/jesuraj/speechlm/espnet/egs2/LIMMITS_25/speechlm1/downloads/yourtts_syspin_baseline-April-19-2025_10+55AM-0b13ea658/best_model_479919.pth", | |
| # config_path="/home1/jesuraj/speechlm/espnet/egs2/LIMMITS_25/speechlm1/downloads/yourtts_syspin_baseline-April-19-2025_10+55AM-0b13ea658/config.json", | |
| # progress_bar=False, | |
| # ).to(device) | |
| # parser = argparse.ArgumentParser(description="Text-to-Speech Synthesis") | |
| # parser.add_argument('-t', '--text_file', type=str, required=True, | |
| # help='Path to text file containing text and audio reference files') | |
| # parser.add_argument('-r', '--ref_dir', type=str, required=True, | |
| # help='Root directory containing reference audio files') | |
| # parser.add_argument('-s', '--savedir', type=str, required=True, | |
| # help='Directory to store synthesized audio files') | |
| # parser.add_argument('-d', '--device', type=str, required=True, | |
| # help='Device to use for synthesis (cpu or cuda)') | |
| # args = parser.parse_args() | |
| # os.makedirs(args.savedir, exist_ok=True) | |
| # # Read the text file | |
| # with open(args.text_file, 'r') as f: | |
| # lines = f.readlines() | |
| # for line in lines: | |
| # idx, lang, text, ref_file = line.strip().split('\t') | |
| # ref_file = os.path.join(args.ref_dir, ref_file) | |
| # save_path = os.path.join(args.savedir, f"{idx}_{lang}_{os.path.basename(ref_file)}") | |
| # tts.tts_to_file(text=text, speaker_wav=ref_file, language=lang, file_path=save_path) | |
| # # ref_files = [os.path.join("/home1/jesuraj/speechlm/espnet/egs2/LIMMITS_25/speechlm1/downloads/test_samples/", x) for x in os.listdir("/home1/jesuraj/speechlm/espnet/egs2/LIMMITS_25/speechlm1/downloads/test_samples/")] | |
| # # for ref_file in ref_files: | |
| # # for language_key in sentences_dict.keys(): | |
| # # for s_idx, sentence in enumerate(sentences_dict[language_key]): | |
| # # save_path = os.path.join("/home1/jesuraj/speechlm/espnet/egs2/LIMMITS_25/speechlm1/downloads/test_infers/", f"test_{language_key}_{s_idx}_{os.path.basename(ref_file)}") | |
| # # tts.tts_to_file(text=sentence, speaker_wav=ref_file, language=language_key, file_path=save_path) | |
| # # tts.tts_to_file(text="ಹಸ್ದೇವ್ ನದಿ, ರಿಹಂಡ್ ನದಿ ಮತ್ತು ಕನ್ಹರ್ ನದಿಗಳು ಸುರ್ಗುಜಾದ ಮುಖಜ ಭೂಮಿಯಲ್ಲಿ ಹರಿಯುತ್ತವೆ.", speaker_wav="/home1/jesuraj/speechlm/espnet/egs2/LIMMITS_25/speechlm1/downloads/syspin_data/Chhattisgarhi_Male/wavs/IISc_SYSPINProject_chha_m_AGRI_00001.wav", language="kn", file_path="test_kn.wav") | |