import os import time import torch from TTS.api import TTS from hing_bert_module import process_text import platform import subprocess class InteractiveTTS: def __init__(self): # Model configuration self.model_dir = "xtts_Hindi_FineTuned" self.reference_voices = { "1": "xtts_Hindi_FineTuned\\speakers\\Reference_male.wav", "2": "xtts_Hindi_FineTuned\\speakers\\Reference_female.wav" } self.reference_audio = self.reference_voices["2"] # Default to female voice # Default parameters self.params = { "temperature": 0.7, "top_k": 50, "top_p": 0.85, "speed": 1.0, "language": "en", "output_file": "output_tts.wav" } # Load the model print("Loading XTTS model... (This may take a moment)") self.use_cuda = torch.cuda.is_available() self.tts = TTS( model_path=self.model_dir, config_path=os.path.join(self.model_dir, "config.json"), gpu=self.use_cuda ) print("Model loaded successfully!") def clear_screen(self): os.system('cls' if os.name == 'nt' else 'clear') def play_audio(self, file_path): try: if platform.system() == 'Windows': import winsound winsound.PlaySound(file_path, winsound.SND_FILENAME) else: subprocess.run(['aplay', file_path], check=True) except Exception as e: print(f"Could not play audio: {e}") print(f"Audio saved to: {os.path.abspath(file_path)}") def show_menu(self): self.clear_screen() print("\n" + "="*50) print(" Interactive TTS System (Hindi/English)") print("="*50) print(f"\nCurrent Settings:") print(f"1. Voice: {'Female' if 'female' in self.reference_audio.lower() else 'Male'}") print(f"2. Language: {'English' if self.params['language'] == 'en' else 'Hindi'}") print(f"3. Speed: {self.params['speed']}x") print(f"4. Output file: {self.params['output_file']}") print("\nOptions:") print("T. Enter text to convert to speech") print("P. Change parameters") print("V. Change voice") print("L. Toggle language (English/Hindi)") print("S. Change speed") print("O. Change output file") print("Q. Quit") print("\n" + "="*50) def get_user_choice(self): return input("\nEnter your choice: ").strip().lower() def get_text_input(self): print("\nEnter your text (press Enter twice to finish):") lines = [] while True: line = input() if line == "": if lines and lines[-1] == "": lines.pop() break lines.append(line) return "\n".join(lines) def change_voice(self): print("\nAvailable voices:") print("1. Male voice") print("2. Female voice") choice = input("Select voice (1-2): ").strip() if choice in ["1", "2"]: self.reference_audio = self.reference_voices[choice] print(f"Voice changed to {'male' if choice == '1' else 'female'}") else: print("Invalid choice. Keeping current voice.") def toggle_language(self): self.params['language'] = 'hi' if self.params['language'] == 'en' else 'en' print(f"Language set to: {'Hindi' if self.params['language'] == 'hi' else 'English'}") def change_speed(self): try: speed = float(input("Enter speed (0.5-2.0, 1.0 = normal): ")) if 0.5 <= speed <= 2.0: self.params['speed'] = speed print(f"Speed set to: {speed}x") else: print("Speed must be between 0.5 and 2.0") except ValueError: print("Invalid input. Please enter a number.") def change_output_file(self): new_file = input("Enter output filename (e.g., my_tts.wav): ").strip() if not new_file.lower().endswith('.wav'): new_file += '.wav' self.params['output_file'] = new_file print(f"Output will be saved to: {new_file}") def change_parameters(self): print("\nCurrent parameters:") print(f"1. Temperature (randomness): {self.params['temperature']}") print(f"2. Top-k: {self.params['top_k']}") print(f"3. Top-p: {self.params['top_p']}") print("4. Back to main menu") param_choice = input("\nSelect parameter to change (1-4): ").strip() if param_choice == "1": try: temp = float(input("Enter temperature (0.1-1.0, higher = more random): ")) if 0.1 <= temp <= 1.0: self.params['temperature'] = temp else: print("Value must be between 0.1 and 1.0") except ValueError: print("Invalid input. Please enter a number.") elif param_choice == "2": try: top_k = int(input("Enter top-k (10-100, lower = more focused): ")) if 10 <= top_k <= 100: self.params['top_k'] = top_k else: print("Value must be between 10 and 100") except ValueError: print("Invalid input. Please enter an integer.") elif param_choice == "3": try: top_p = float(input("Enter top-p (0.5-1.0, lower = more focused): ")) if 0.5 <= top_p <= 1.0: self.params['top_p'] = top_p else: print("Value must be between 0.5 and 1.0") except ValueError: print("Invalid input. Please enter a number.") def generate_speech(self, text): if not text.strip(): print("No text provided!") return False try: # Process the text processed_text = process_text(text) input_text = processed_text["final_text"] print("\nGenerating speech...") start_time = time.perf_counter() # Generate the speech self.tts.tts_to_file( text=input_text, file_path=self.params['output_file'], speaker_wav=self.reference_audio, language=self.params['language'], temperature=self.params['temperature'], top_k=self.params['top_k'], top_p=self.params['top_p'], speed=self.params['speed'], split_sentences=True ) end_time = time.perf_counter() total_duration = end_time - start_time print(f"\n=== Generation Complete ===") print(f"Saved audio to: {os.path.abspath(self.params['output_file'])}") print(f"Processing time: {total_duration:.2f} seconds") # Play the generated audio play = input("\nPlay the generated audio? (y/n): ").lower() if play == 'y': self.play_audio(self.params['output_file']) return True except Exception as e: print(f"\nError during speech generation: {str(e)}") return False def run(self): while True: self.show_menu() choice = self.get_user_choice() if choice == 't': text = self.get_text_input() if text.strip(): self.generate_speech(text) else: print("No text entered!") input("\nPress Enter to continue...") elif choice == 'p': self.change_parameters() elif choice == 'v': self.change_voice() elif choice == 'l': self.toggle_language() elif choice == 's': self.change_speed() elif choice == 'o': self.change_output_file() elif choice == 'q': print("\nThank you for using the Interactive TTS System. Goodbye!") break else: print("\nInvalid choice. Please try again.") input("Press Enter to continue...") if __name__ == "__main__": tts_system = InteractiveTTS() tts_system.run()