tts_project / imp_scripts /test_inference.py
PraveenSharma08's picture
Initial project upload: Hindi/English Text-to-Speech pipeline
8a02978
import os
import time
import torch
from TTS.api import TTS
from hing_bert_module import process_text
import platform
import subprocess
class InteractiveTTS:
def __init__(self):
# Model configuration
self.model_dir = "xtts_Hindi_FineTuned"
self.reference_voices = {
"1": "xtts_Hindi_FineTuned\\speakers\\Reference_male.wav",
"2": "xtts_Hindi_FineTuned\\speakers\\Reference_female.wav"
}
self.reference_audio = self.reference_voices["2"] # Default to female voice
# Default parameters
self.params = {
"temperature": 0.7,
"top_k": 50,
"top_p": 0.85,
"speed": 1.0,
"language": "en",
"output_file": "output_tts.wav"
}
# Load the model
print("Loading XTTS model... (This may take a moment)")
self.use_cuda = torch.cuda.is_available()
self.tts = TTS(
model_path=self.model_dir,
config_path=os.path.join(self.model_dir, "config.json"),
gpu=self.use_cuda
)
print("Model loaded successfully!")
def clear_screen(self):
os.system('cls' if os.name == 'nt' else 'clear')
def play_audio(self, file_path):
try:
if platform.system() == 'Windows':
import winsound
winsound.PlaySound(file_path, winsound.SND_FILENAME)
else:
subprocess.run(['aplay', file_path], check=True)
except Exception as e:
print(f"Could not play audio: {e}")
print(f"Audio saved to: {os.path.abspath(file_path)}")
def show_menu(self):
self.clear_screen()
print("\n" + "="*50)
print(" Interactive TTS System (Hindi/English)")
print("="*50)
print(f"\nCurrent Settings:")
print(f"1. Voice: {'Female' if 'female' in self.reference_audio.lower() else 'Male'}")
print(f"2. Language: {'English' if self.params['language'] == 'en' else 'Hindi'}")
print(f"3. Speed: {self.params['speed']}x")
print(f"4. Output file: {self.params['output_file']}")
print("\nOptions:")
print("T. Enter text to convert to speech")
print("P. Change parameters")
print("V. Change voice")
print("L. Toggle language (English/Hindi)")
print("S. Change speed")
print("O. Change output file")
print("Q. Quit")
print("\n" + "="*50)
def get_user_choice(self):
return input("\nEnter your choice: ").strip().lower()
def get_text_input(self):
print("\nEnter your text (press Enter twice to finish):")
lines = []
while True:
line = input()
if line == "":
if lines and lines[-1] == "":
lines.pop()
break
lines.append(line)
return "\n".join(lines)
def change_voice(self):
print("\nAvailable voices:")
print("1. Male voice")
print("2. Female voice")
choice = input("Select voice (1-2): ").strip()
if choice in ["1", "2"]:
self.reference_audio = self.reference_voices[choice]
print(f"Voice changed to {'male' if choice == '1' else 'female'}")
else:
print("Invalid choice. Keeping current voice.")
def toggle_language(self):
self.params['language'] = 'hi' if self.params['language'] == 'en' else 'en'
print(f"Language set to: {'Hindi' if self.params['language'] == 'hi' else 'English'}")
def change_speed(self):
try:
speed = float(input("Enter speed (0.5-2.0, 1.0 = normal): "))
if 0.5 <= speed <= 2.0:
self.params['speed'] = speed
print(f"Speed set to: {speed}x")
else:
print("Speed must be between 0.5 and 2.0")
except ValueError:
print("Invalid input. Please enter a number.")
def change_output_file(self):
new_file = input("Enter output filename (e.g., my_tts.wav): ").strip()
if not new_file.lower().endswith('.wav'):
new_file += '.wav'
self.params['output_file'] = new_file
print(f"Output will be saved to: {new_file}")
def change_parameters(self):
print("\nCurrent parameters:")
print(f"1. Temperature (randomness): {self.params['temperature']}")
print(f"2. Top-k: {self.params['top_k']}")
print(f"3. Top-p: {self.params['top_p']}")
print("4. Back to main menu")
param_choice = input("\nSelect parameter to change (1-4): ").strip()
if param_choice == "1":
try:
temp = float(input("Enter temperature (0.1-1.0, higher = more random): "))
if 0.1 <= temp <= 1.0:
self.params['temperature'] = temp
else:
print("Value must be between 0.1 and 1.0")
except ValueError:
print("Invalid input. Please enter a number.")
elif param_choice == "2":
try:
top_k = int(input("Enter top-k (10-100, lower = more focused): "))
if 10 <= top_k <= 100:
self.params['top_k'] = top_k
else:
print("Value must be between 10 and 100")
except ValueError:
print("Invalid input. Please enter an integer.")
elif param_choice == "3":
try:
top_p = float(input("Enter top-p (0.5-1.0, lower = more focused): "))
if 0.5 <= top_p <= 1.0:
self.params['top_p'] = top_p
else:
print("Value must be between 0.5 and 1.0")
except ValueError:
print("Invalid input. Please enter a number.")
def generate_speech(self, text):
if not text.strip():
print("No text provided!")
return False
try:
# Process the text
processed_text = process_text(text)
input_text = processed_text["final_text"]
print("\nGenerating speech...")
start_time = time.perf_counter()
# Generate the speech
self.tts.tts_to_file(
text=input_text,
file_path=self.params['output_file'],
speaker_wav=self.reference_audio,
language=self.params['language'],
temperature=self.params['temperature'],
top_k=self.params['top_k'],
top_p=self.params['top_p'],
speed=self.params['speed'],
split_sentences=True
)
end_time = time.perf_counter()
total_duration = end_time - start_time
print(f"\n=== Generation Complete ===")
print(f"Saved audio to: {os.path.abspath(self.params['output_file'])}")
print(f"Processing time: {total_duration:.2f} seconds")
# Play the generated audio
play = input("\nPlay the generated audio? (y/n): ").lower()
if play == 'y':
self.play_audio(self.params['output_file'])
return True
except Exception as e:
print(f"\nError during speech generation: {str(e)}")
return False
def run(self):
while True:
self.show_menu()
choice = self.get_user_choice()
if choice == 't':
text = self.get_text_input()
if text.strip():
self.generate_speech(text)
else:
print("No text entered!")
input("\nPress Enter to continue...")
elif choice == 'p':
self.change_parameters()
elif choice == 'v':
self.change_voice()
elif choice == 'l':
self.toggle_language()
elif choice == 's':
self.change_speed()
elif choice == 'o':
self.change_output_file()
elif choice == 'q':
print("\nThank you for using the Interactive TTS System. Goodbye!")
break
else:
print("\nInvalid choice. Please try again.")
input("Press Enter to continue...")
if __name__ == "__main__":
tts_system = InteractiveTTS()
tts_system.run()