|
|
import os |
|
|
import time |
|
|
import torch |
|
|
from TTS.api import TTS |
|
|
from hing_bert_module import process_text |
|
|
import platform |
|
|
import subprocess |
|
|
|
|
|
class InteractiveTTS: |
|
|
def __init__(self): |
|
|
|
|
|
self.model_dir = "xtts_Hindi_FineTuned" |
|
|
self.reference_voices = { |
|
|
"1": "xtts_Hindi_FineTuned\\speakers\\Reference_male.wav", |
|
|
"2": "xtts_Hindi_FineTuned\\speakers\\Reference_female.wav" |
|
|
} |
|
|
self.reference_audio = self.reference_voices["2"] |
|
|
|
|
|
|
|
|
self.params = { |
|
|
"temperature": 0.7, |
|
|
"top_k": 50, |
|
|
"top_p": 0.85, |
|
|
"speed": 1.0, |
|
|
"language": "en", |
|
|
"output_file": "output_tts.wav" |
|
|
} |
|
|
|
|
|
|
|
|
print("Loading XTTS model... (This may take a moment)") |
|
|
self.use_cuda = torch.cuda.is_available() |
|
|
self.tts = TTS( |
|
|
model_path=self.model_dir, |
|
|
config_path=os.path.join(self.model_dir, "config.json"), |
|
|
gpu=self.use_cuda |
|
|
) |
|
|
print("Model loaded successfully!") |
|
|
|
|
|
def clear_screen(self): |
|
|
os.system('cls' if os.name == 'nt' else 'clear') |
|
|
|
|
|
def play_audio(self, file_path): |
|
|
try: |
|
|
if platform.system() == 'Windows': |
|
|
import winsound |
|
|
winsound.PlaySound(file_path, winsound.SND_FILENAME) |
|
|
else: |
|
|
subprocess.run(['aplay', file_path], check=True) |
|
|
except Exception as e: |
|
|
print(f"Could not play audio: {e}") |
|
|
print(f"Audio saved to: {os.path.abspath(file_path)}") |
|
|
|
|
|
def show_menu(self): |
|
|
self.clear_screen() |
|
|
print("\n" + "="*50) |
|
|
print(" Interactive TTS System (Hindi/English)") |
|
|
print("="*50) |
|
|
print(f"\nCurrent Settings:") |
|
|
print(f"1. Voice: {'Female' if 'female' in self.reference_audio.lower() else 'Male'}") |
|
|
print(f"2. Language: {'English' if self.params['language'] == 'en' else 'Hindi'}") |
|
|
print(f"3. Speed: {self.params['speed']}x") |
|
|
print(f"4. Output file: {self.params['output_file']}") |
|
|
print("\nOptions:") |
|
|
print("T. Enter text to convert to speech") |
|
|
print("P. Change parameters") |
|
|
print("V. Change voice") |
|
|
print("L. Toggle language (English/Hindi)") |
|
|
print("S. Change speed") |
|
|
print("O. Change output file") |
|
|
print("Q. Quit") |
|
|
print("\n" + "="*50) |
|
|
|
|
|
def get_user_choice(self): |
|
|
return input("\nEnter your choice: ").strip().lower() |
|
|
|
|
|
def get_text_input(self): |
|
|
print("\nEnter your text (press Enter twice to finish):") |
|
|
lines = [] |
|
|
while True: |
|
|
line = input() |
|
|
if line == "": |
|
|
if lines and lines[-1] == "": |
|
|
lines.pop() |
|
|
break |
|
|
lines.append(line) |
|
|
return "\n".join(lines) |
|
|
|
|
|
def change_voice(self): |
|
|
print("\nAvailable voices:") |
|
|
print("1. Male voice") |
|
|
print("2. Female voice") |
|
|
choice = input("Select voice (1-2): ").strip() |
|
|
if choice in ["1", "2"]: |
|
|
self.reference_audio = self.reference_voices[choice] |
|
|
print(f"Voice changed to {'male' if choice == '1' else 'female'}") |
|
|
else: |
|
|
print("Invalid choice. Keeping current voice.") |
|
|
|
|
|
def toggle_language(self): |
|
|
self.params['language'] = 'hi' if self.params['language'] == 'en' else 'en' |
|
|
print(f"Language set to: {'Hindi' if self.params['language'] == 'hi' else 'English'}") |
|
|
|
|
|
def change_speed(self): |
|
|
try: |
|
|
speed = float(input("Enter speed (0.5-2.0, 1.0 = normal): ")) |
|
|
if 0.5 <= speed <= 2.0: |
|
|
self.params['speed'] = speed |
|
|
print(f"Speed set to: {speed}x") |
|
|
else: |
|
|
print("Speed must be between 0.5 and 2.0") |
|
|
except ValueError: |
|
|
print("Invalid input. Please enter a number.") |
|
|
|
|
|
def change_output_file(self): |
|
|
new_file = input("Enter output filename (e.g., my_tts.wav): ").strip() |
|
|
if not new_file.lower().endswith('.wav'): |
|
|
new_file += '.wav' |
|
|
self.params['output_file'] = new_file |
|
|
print(f"Output will be saved to: {new_file}") |
|
|
|
|
|
def change_parameters(self): |
|
|
print("\nCurrent parameters:") |
|
|
print(f"1. Temperature (randomness): {self.params['temperature']}") |
|
|
print(f"2. Top-k: {self.params['top_k']}") |
|
|
print(f"3. Top-p: {self.params['top_p']}") |
|
|
print("4. Back to main menu") |
|
|
|
|
|
param_choice = input("\nSelect parameter to change (1-4): ").strip() |
|
|
|
|
|
if param_choice == "1": |
|
|
try: |
|
|
temp = float(input("Enter temperature (0.1-1.0, higher = more random): ")) |
|
|
if 0.1 <= temp <= 1.0: |
|
|
self.params['temperature'] = temp |
|
|
else: |
|
|
print("Value must be between 0.1 and 1.0") |
|
|
except ValueError: |
|
|
print("Invalid input. Please enter a number.") |
|
|
|
|
|
elif param_choice == "2": |
|
|
try: |
|
|
top_k = int(input("Enter top-k (10-100, lower = more focused): ")) |
|
|
if 10 <= top_k <= 100: |
|
|
self.params['top_k'] = top_k |
|
|
else: |
|
|
print("Value must be between 10 and 100") |
|
|
except ValueError: |
|
|
print("Invalid input. Please enter an integer.") |
|
|
|
|
|
elif param_choice == "3": |
|
|
try: |
|
|
top_p = float(input("Enter top-p (0.5-1.0, lower = more focused): ")) |
|
|
if 0.5 <= top_p <= 1.0: |
|
|
self.params['top_p'] = top_p |
|
|
else: |
|
|
print("Value must be between 0.5 and 1.0") |
|
|
except ValueError: |
|
|
print("Invalid input. Please enter a number.") |
|
|
|
|
|
def generate_speech(self, text): |
|
|
if not text.strip(): |
|
|
print("No text provided!") |
|
|
return False |
|
|
|
|
|
try: |
|
|
|
|
|
processed_text = process_text(text) |
|
|
input_text = processed_text["final_text"] |
|
|
|
|
|
print("\nGenerating speech...") |
|
|
start_time = time.perf_counter() |
|
|
|
|
|
|
|
|
self.tts.tts_to_file( |
|
|
text=input_text, |
|
|
file_path=self.params['output_file'], |
|
|
speaker_wav=self.reference_audio, |
|
|
language=self.params['language'], |
|
|
temperature=self.params['temperature'], |
|
|
top_k=self.params['top_k'], |
|
|
top_p=self.params['top_p'], |
|
|
speed=self.params['speed'], |
|
|
split_sentences=True |
|
|
) |
|
|
|
|
|
end_time = time.perf_counter() |
|
|
total_duration = end_time - start_time |
|
|
|
|
|
print(f"\n=== Generation Complete ===") |
|
|
print(f"Saved audio to: {os.path.abspath(self.params['output_file'])}") |
|
|
print(f"Processing time: {total_duration:.2f} seconds") |
|
|
|
|
|
|
|
|
play = input("\nPlay the generated audio? (y/n): ").lower() |
|
|
if play == 'y': |
|
|
self.play_audio(self.params['output_file']) |
|
|
|
|
|
return True |
|
|
|
|
|
except Exception as e: |
|
|
print(f"\nError during speech generation: {str(e)}") |
|
|
return False |
|
|
|
|
|
def run(self): |
|
|
while True: |
|
|
self.show_menu() |
|
|
choice = self.get_user_choice() |
|
|
|
|
|
if choice == 't': |
|
|
text = self.get_text_input() |
|
|
if text.strip(): |
|
|
self.generate_speech(text) |
|
|
else: |
|
|
print("No text entered!") |
|
|
input("\nPress Enter to continue...") |
|
|
|
|
|
elif choice == 'p': |
|
|
self.change_parameters() |
|
|
|
|
|
elif choice == 'v': |
|
|
self.change_voice() |
|
|
|
|
|
elif choice == 'l': |
|
|
self.toggle_language() |
|
|
|
|
|
elif choice == 's': |
|
|
self.change_speed() |
|
|
|
|
|
elif choice == 'o': |
|
|
self.change_output_file() |
|
|
|
|
|
elif choice == 'q': |
|
|
print("\nThank you for using the Interactive TTS System. Goodbye!") |
|
|
break |
|
|
|
|
|
else: |
|
|
print("\nInvalid choice. Please try again.") |
|
|
input("Press Enter to continue...") |
|
|
|
|
|
if __name__ == "__main__": |
|
|
tts_system = InteractiveTTS() |
|
|
tts_system.run() |
|
|
|