import torch from TTS.api import TTS import os from tqdm import tqdm import argparse # Parse arguments parser = argparse.ArgumentParser(description="Text-to-Speech Synthesis") parser.add_argument('-t', '--text_file', type=str, required=True, help='Path to text file containing text and audio reference files') parser.add_argument('-r', '--ref_dir', type=str, required=True, help='Root directory containing reference audio files') parser.add_argument('-s', '--savedir', type=str, required=True, help='Directory to store synthesized audio files') parser.add_argument('-d', '--device', type=str, required=True, help='Device to use for synthesis (cpu or cuda)') parser.add_argument('-m', '--model_path', type=str, default="/app/models/best_model_479919.pth", help='Path to the model file') parser.add_argument('-c', '--config_path', type=str, default="/app/models/config.json", help='Path to the config file') args = parser.parse_args() # Get device device = args.device if args.device in ["cpu", "cuda"] else "cuda" if torch.cuda.is_available() else "cpu" print(f"Using device: {device}") # Initialize TTS model print(f"Loading model from {args.model_path} with config {args.config_path}") tts = TTS( model_path=args.model_path, config_path=args.config_path, progress_bar=False, ).to(device) # Create output directory os.makedirs(args.savedir, exist_ok=True) print(f"Output directory: {args.savedir}") # Read the text file print(f"Reading text file: {args.text_file}") with open(args.text_file, 'r') as f: lines = f.readlines() # Process each line print(f"Processing {len(lines)} entries...") for i, line in enumerate(tqdm(lines)): parts = line.strip().split('\t') if len(parts) != 4: print(f"Warning: Line {i+1} does not have 4 tab-separated parts. Skipping.") continue idx, lang, text, ref_file = parts ref_path = os.path.join(args.ref_dir, ref_file) save_path = os.path.join(args.savedir, f"{idx}_{lang}_{os.path.basename(ref_file)}") print(f"Synthesizing: {text[:30]}... using reference {ref_path}") tts.tts_to_file(text=text, speaker_wav=ref_path, language=lang, file_path=save_path) print(f"Saved to: {save_path}") print("Synthesis complete!") # import torch # from TTS.api import TTS # import os # from tqdm import tqdm # import argparse # # Get device # device = "cuda:3" if torch.cuda.is_available() else "cpu" # sentences_dict = { # "te": ["వడ్రంగి, క్షురక వృత్తులలో పెట్టుబడి ప్రధానమై ఇతరులు కూడా ఈ వృత్తిలో ప్రవేశించి వ్యాపారంగా మార్చేసార", # "నేను ఈ రోజు నాకు ఇష్టమైన పుస్తకాన్ని చదివాను మరియు తరువాత నా స్నేహితుడితో సినిమాకు వెళ్ళాను", # "ఈ వేసవి సెలవులలో నేను నా కుటుంబంతో కలిసి ఒక అందమైన బీచ్‌కి వెళ్ళాలని అనుకుంటున్నాను"], # "mr": ["जायकवाडी धरणातून तब्बल अडीच ते तीन लाख हेक्टर शेतीच्या सिंचनासाठी पाणी सोडलं जातं", # "मी आज माझ्या आवडत्या पुस्तकाचे वाचन केले आणि नंतर माझ्या मित्रासोबत चित्रपटाला गेलो", # "या उन्हाळी सुट्टीत मी माझ्या कुटुंबासोबत एक सुंदर समुद्रकिनाऱ्यावर जाण्याचा विचार करतो"], # "bho": ["बिहार के बक्सर जिला के बक्सर नगर निगम क्षेत्र में गंगा नदी पर बने बक्सर पुल का उद्घाटन आज प्रधानमंत्री नरेंद्र मोदी करेंगे", # "एन्ट्रापी कंप्यूटिंग में एन्ट्रोपी ऊ ऑपरेटिंग सिस्टम ह जे पे सरा क्रिप्टोग्राफिक फंक्शन सब काम करे लें", # "हमार मंडराये वाली जहाज़ सर्पमीनन से भरी है"], # } # tts = TTS( # model_path="/home1/jesuraj/speechlm/espnet/egs2/LIMMITS_25/speechlm1/downloads/yourtts_syspin_baseline-April-19-2025_10+55AM-0b13ea658/best_model_479919.pth", # config_path="/home1/jesuraj/speechlm/espnet/egs2/LIMMITS_25/speechlm1/downloads/yourtts_syspin_baseline-April-19-2025_10+55AM-0b13ea658/config.json", # progress_bar=False, # ).to(device) # parser = argparse.ArgumentParser(description="Text-to-Speech Synthesis") # parser.add_argument('-t', '--text_file', type=str, required=True, # help='Path to text file containing text and audio reference files') # parser.add_argument('-r', '--ref_dir', type=str, required=True, # help='Root directory containing reference audio files') # parser.add_argument('-s', '--savedir', type=str, required=True, # help='Directory to store synthesized audio files') # parser.add_argument('-d', '--device', type=str, required=True, # help='Device to use for synthesis (cpu or cuda)') # args = parser.parse_args() # os.makedirs(args.savedir, exist_ok=True) # # Read the text file # with open(args.text_file, 'r') as f: # lines = f.readlines() # for line in lines: # idx, lang, text, ref_file = line.strip().split('\t') # ref_file = os.path.join(args.ref_dir, ref_file) # save_path = os.path.join(args.savedir, f"{idx}_{lang}_{os.path.basename(ref_file)}") # tts.tts_to_file(text=text, speaker_wav=ref_file, language=lang, file_path=save_path) # # ref_files = [os.path.join("/home1/jesuraj/speechlm/espnet/egs2/LIMMITS_25/speechlm1/downloads/test_samples/", x) for x in os.listdir("/home1/jesuraj/speechlm/espnet/egs2/LIMMITS_25/speechlm1/downloads/test_samples/")] # # for ref_file in ref_files: # # for language_key in sentences_dict.keys(): # # for s_idx, sentence in enumerate(sentences_dict[language_key]): # # save_path = os.path.join("/home1/jesuraj/speechlm/espnet/egs2/LIMMITS_25/speechlm1/downloads/test_infers/", f"test_{language_key}_{s_idx}_{os.path.basename(ref_file)}") # # tts.tts_to_file(text=sentence, speaker_wav=ref_file, language=language_key, file_path=save_path) # # tts.tts_to_file(text="ಹಸ್ದೇವ್ ನದಿ, ರಿಹಂಡ್ ನದಿ ಮತ್ತು ಕನ್ಹರ್ ನದಿಗಳು ಸುರ್ಗುಜಾದ ಮುಖಜ ಭೂಮಿಯಲ್ಲಿ ಹರಿಯುತ್ತವೆ.", speaker_wav="/home1/jesuraj/speechlm/espnet/egs2/LIMMITS_25/speechlm1/downloads/syspin_data/Chhattisgarhi_Male/wavs/IISc_SYSPINProject_chha_m_AGRI_00001.wav", language="kn", file_path="test_kn.wav")