File size: 8,814 Bytes

8a02978

import os
import time
import torch
from TTS.api import TTS
from hing_bert_module import process_text
import platform
import subprocess

class InteractiveTTS:
    def __init__(self):
        # Model configuration
        self.model_dir = "xtts_Hindi_FineTuned"
        self.reference_voices = {
            "1": "xtts_Hindi_FineTuned\\speakers\\Reference_male.wav",
            "2": "xtts_Hindi_FineTuned\\speakers\\Reference_female.wav"
        }
        self.reference_audio = self.reference_voices["2"]  # Default to female voice
        
        # Default parameters
        self.params = {
            "temperature": 0.7,
            "top_k": 50,
            "top_p": 0.85,
            "speed": 1.0,
            "language": "en",
            "output_file": "output_tts.wav"
        }
        
        # Load the model
        print("Loading XTTS model... (This may take a moment)")
        self.use_cuda = torch.cuda.is_available()
        self.tts = TTS(
            model_path=self.model_dir,
            config_path=os.path.join(self.model_dir, "config.json"),
            gpu=self.use_cuda
        )
        print("Model loaded successfully!")
    
    def clear_screen(self):
        os.system('cls' if os.name == 'nt' else 'clear')
    
    def play_audio(self, file_path):
        try:
            if platform.system() == 'Windows':
                import winsound
                winsound.PlaySound(file_path, winsound.SND_FILENAME)
            else:
                subprocess.run(['aplay', file_path], check=True)
        except Exception as e:
            print(f"Could not play audio: {e}")
            print(f"Audio saved to: {os.path.abspath(file_path)}")
    
    def show_menu(self):
        self.clear_screen()
        print("\n" + "="*50)
        print("  Interactive TTS System (Hindi/English)")
        print("="*50)
        print(f"\nCurrent Settings:")
        print(f"1. Voice: {'Female' if 'female' in self.reference_audio.lower() else 'Male'}")
        print(f"2. Language: {'English' if self.params['language'] == 'en' else 'Hindi'}")
        print(f"3. Speed: {self.params['speed']}x")
        print(f"4. Output file: {self.params['output_file']}")
        print("\nOptions:")
        print("T. Enter text to convert to speech")
        print("P. Change parameters")
        print("V. Change voice")
        print("L. Toggle language (English/Hindi)")
        print("S. Change speed")
        print("O. Change output file")
        print("Q. Quit")
        print("\n" + "="*50)
    
    def get_user_choice(self):
        return input("\nEnter your choice: ").strip().lower()
    
    def get_text_input(self):
        print("\nEnter your text (press Enter twice to finish):")
        lines = []
        while True:
            line = input()
            if line == "":
                if lines and lines[-1] == "":
                    lines.pop()
                    break
            lines.append(line)
        return "\n".join(lines)
    
    def change_voice(self):
        print("\nAvailable voices:")
        print("1. Male voice")
        print("2. Female voice")
        choice = input("Select voice (1-2): ").strip()
        if choice in ["1", "2"]:
            self.reference_audio = self.reference_voices[choice]
            print(f"Voice changed to {'male' if choice == '1' else 'female'}")
        else:
            print("Invalid choice. Keeping current voice.")
    
    def toggle_language(self):
        self.params['language'] = 'hi' if self.params['language'] == 'en' else 'en'
        print(f"Language set to: {'Hindi' if self.params['language'] == 'hi' else 'English'}")
    
    def change_speed(self):
        try:
            speed = float(input("Enter speed (0.5-2.0, 1.0 = normal): "))
            if 0.5 <= speed <= 2.0:
                self.params['speed'] = speed
                print(f"Speed set to: {speed}x")
            else:
                print("Speed must be between 0.5 and 2.0")
        except ValueError:
            print("Invalid input. Please enter a number.")
    
    def change_output_file(self):
        new_file = input("Enter output filename (e.g., my_tts.wav): ").strip()
        if not new_file.lower().endswith('.wav'):
            new_file += '.wav'
        self.params['output_file'] = new_file
        print(f"Output will be saved to: {new_file}")
    
    def change_parameters(self):
        print("\nCurrent parameters:")
        print(f"1. Temperature (randomness): {self.params['temperature']}")
        print(f"2. Top-k: {self.params['top_k']}")
        print(f"3. Top-p: {self.params['top_p']}")
        print("4. Back to main menu")
        
        param_choice = input("\nSelect parameter to change (1-4): ").strip()
        
        if param_choice == "1":
            try:
                temp = float(input("Enter temperature (0.1-1.0, higher = more random): "))
                if 0.1 <= temp <= 1.0:
                    self.params['temperature'] = temp
                else:
                    print("Value must be between 0.1 and 1.0")
            except ValueError:
                print("Invalid input. Please enter a number.")
                
        elif param_choice == "2":
            try:
                top_k = int(input("Enter top-k (10-100, lower = more focused): "))
                if 10 <= top_k <= 100:
                    self.params['top_k'] = top_k
                else:
                    print("Value must be between 10 and 100")
            except ValueError:
                print("Invalid input. Please enter an integer.")
                
        elif param_choice == "3":
            try:
                top_p = float(input("Enter top-p (0.5-1.0, lower = more focused): "))
                if 0.5 <= top_p <= 1.0:
                    self.params['top_p'] = top_p
                else:
                    print("Value must be between 0.5 and 1.0")
            except ValueError:
                print("Invalid input. Please enter a number.")
    
    def generate_speech(self, text):
        if not text.strip():
            print("No text provided!")
            return False
        
        try:
            # Process the text
            processed_text = process_text(text)
            input_text = processed_text["final_text"]
            
            print("\nGenerating speech...")
            start_time = time.perf_counter()
            
            # Generate the speech
            self.tts.tts_to_file(
                text=input_text,
                file_path=self.params['output_file'],
                speaker_wav=self.reference_audio,
                language=self.params['language'],
                temperature=self.params['temperature'],
                top_k=self.params['top_k'],
                top_p=self.params['top_p'],
                speed=self.params['speed'],
                split_sentences=True
            )
            
            end_time = time.perf_counter()
            total_duration = end_time - start_time
            
            print(f"\n=== Generation Complete ===")
            print(f"Saved audio to: {os.path.abspath(self.params['output_file'])}")
            print(f"Processing time: {total_duration:.2f} seconds")
            
            # Play the generated audio
            play = input("\nPlay the generated audio? (y/n): ").lower()
            if play == 'y':
                self.play_audio(self.params['output_file'])
            
            return True
            
        except Exception as e:
            print(f"\nError during speech generation: {str(e)}")
            return False
    
    def run(self):
        while True:
            self.show_menu()
            choice = self.get_user_choice()
            
            if choice == 't':
                text = self.get_text_input()
                if text.strip():
                    self.generate_speech(text)
                else:
                    print("No text entered!")
                input("\nPress Enter to continue...")
                
            elif choice == 'p':
                self.change_parameters()
                
            elif choice == 'v':
                self.change_voice()
                
            elif choice == 'l':
                self.toggle_language()
                
            elif choice == 's':
                self.change_speed()
                
            elif choice == 'o':
                self.change_output_file()
                
            elif choice == 'q':
                print("\nThank you for using the Interactive TTS System. Goodbye!")
                break
                
            else:
                print("\nInvalid choice. Please try again.")
                input("Press Enter to continue...")

if __name__ == "__main__":
    tts_system = InteractiveTTS()
    tts_system.run()