tts_project / imp_scripts /test_inference.py

Initial project upload: Hindi/English Text-to-Speech pipeline

8a02978 3 months ago

8.81 kB

	import os
	import time
	import torch
	from TTS.api import TTS
	from hing_bert_module import process_text
	import platform
	import subprocess

	class InteractiveTTS:
	def __init__(self):
	# Model configuration
	self.model_dir = "xtts_Hindi_FineTuned"
	self.reference_voices = {
	"1": "xtts_Hindi_FineTuned\\speakers\\Reference_male.wav",
	"2": "xtts_Hindi_FineTuned\\speakers\\Reference_female.wav"
	}
	self.reference_audio = self.reference_voices["2"] # Default to female voice

	# Default parameters
	self.params = {
	"temperature": 0.7,
	"top_k": 50,
	"top_p": 0.85,
	"speed": 1.0,
	"language": "en",
	"output_file": "output_tts.wav"
	}

	# Load the model
	print("Loading XTTS model... (This may take a moment)")
	self.use_cuda = torch.cuda.is_available()
	self.tts = TTS(
	model_path=self.model_dir,
	config_path=os.path.join(self.model_dir, "config.json"),
	gpu=self.use_cuda
	)
	print("Model loaded successfully!")

	def clear_screen(self):
	os.system('cls' if os.name == 'nt' else 'clear')

	def play_audio(self, file_path):
	try:
	if platform.system() == 'Windows':
	import winsound
	winsound.PlaySound(file_path, winsound.SND_FILENAME)
	else:
	subprocess.run(['aplay', file_path], check=True)
	except Exception as e:
	print(f"Could not play audio: {e}")
	print(f"Audio saved to: {os.path.abspath(file_path)}")

	def show_menu(self):
	self.clear_screen()
	print("\n" + "="*50)
	print(" Interactive TTS System (Hindi/English)")
	print("="*50)
	print(f"\nCurrent Settings:")
	print(f"1. Voice: {'Female' if 'female' in self.reference_audio.lower() else 'Male'}")
	print(f"2. Language: {'English' if self.params['language'] == 'en' else 'Hindi'}")
	print(f"3. Speed: {self.params['speed']}x")
	print(f"4. Output file: {self.params['output_file']}")
	print("\nOptions:")
	print("T. Enter text to convert to speech")
	print("P. Change parameters")
	print("V. Change voice")
	print("L. Toggle language (English/Hindi)")
	print("S. Change speed")
	print("O. Change output file")
	print("Q. Quit")
	print("\n" + "="*50)

	def get_user_choice(self):
	return input("\nEnter your choice: ").strip().lower()

	def get_text_input(self):
	print("\nEnter your text (press Enter twice to finish):")
	lines = []
	while True:
	line = input()
	if line == "":
	if lines and lines[-1] == "":
	lines.pop()
	break
	lines.append(line)
	return "\n".join(lines)

	def change_voice(self):
	print("\nAvailable voices:")
	print("1. Male voice")
	print("2. Female voice")
	choice = input("Select voice (1-2): ").strip()
	if choice in ["1", "2"]:
	self.reference_audio = self.reference_voices[choice]
	print(f"Voice changed to {'male' if choice == '1' else 'female'}")
	else:
	print("Invalid choice. Keeping current voice.")

	def toggle_language(self):
	self.params['language'] = 'hi' if self.params['language'] == 'en' else 'en'
	print(f"Language set to: {'Hindi' if self.params['language'] == 'hi' else 'English'}")

	def change_speed(self):
	try:
	speed = float(input("Enter speed (0.5-2.0, 1.0 = normal): "))
	if 0.5 <= speed <= 2.0:
	self.params['speed'] = speed
	print(f"Speed set to: {speed}x")
	else:
	print("Speed must be between 0.5 and 2.0")
	except ValueError:
	print("Invalid input. Please enter a number.")

	def change_output_file(self):
	new_file = input("Enter output filename (e.g., my_tts.wav): ").strip()
	if not new_file.lower().endswith('.wav'):
	new_file += '.wav'
	self.params['output_file'] = new_file
	print(f"Output will be saved to: {new_file}")

	def change_parameters(self):
	print("\nCurrent parameters:")
	print(f"1. Temperature (randomness): {self.params['temperature']}")
	print(f"2. Top-k: {self.params['top_k']}")
	print(f"3. Top-p: {self.params['top_p']}")
	print("4. Back to main menu")

	param_choice = input("\nSelect parameter to change (1-4): ").strip()

	if param_choice == "1":
	try:
	temp = float(input("Enter temperature (0.1-1.0, higher = more random): "))
	if 0.1 <= temp <= 1.0:
	self.params['temperature'] = temp
	else:
	print("Value must be between 0.1 and 1.0")
	except ValueError:
	print("Invalid input. Please enter a number.")

	elif param_choice == "2":
	try:
	top_k = int(input("Enter top-k (10-100, lower = more focused): "))
	if 10 <= top_k <= 100:
	self.params['top_k'] = top_k
	else:
	print("Value must be between 10 and 100")
	except ValueError:
	print("Invalid input. Please enter an integer.")

	elif param_choice == "3":
	try:
	top_p = float(input("Enter top-p (0.5-1.0, lower = more focused): "))
	if 0.5 <= top_p <= 1.0:
	self.params['top_p'] = top_p
	else:
	print("Value must be between 0.5 and 1.0")
	except ValueError:
	print("Invalid input. Please enter a number.")

	def generate_speech(self, text):
	if not text.strip():
	print("No text provided!")
	return False

	try:
	# Process the text
	processed_text = process_text(text)
	input_text = processed_text["final_text"]

	print("\nGenerating speech...")
	start_time = time.perf_counter()

	# Generate the speech
	self.tts.tts_to_file(
	text=input_text,
	file_path=self.params['output_file'],
	speaker_wav=self.reference_audio,
	language=self.params['language'],
	temperature=self.params['temperature'],
	top_k=self.params['top_k'],
	top_p=self.params['top_p'],
	speed=self.params['speed'],
	split_sentences=True
	)

	end_time = time.perf_counter()
	total_duration = end_time - start_time

	print(f"\n=== Generation Complete ===")
	print(f"Saved audio to: {os.path.abspath(self.params['output_file'])}")
	print(f"Processing time: {total_duration:.2f} seconds")

	# Play the generated audio
	play = input("\nPlay the generated audio? (y/n): ").lower()
	if play == 'y':
	self.play_audio(self.params['output_file'])

	return True

	except Exception as e:
	print(f"\nError during speech generation: {str(e)}")
	return False

	def run(self):
	while True:
	self.show_menu()
	choice = self.get_user_choice()

	if choice == 't':
	text = self.get_text_input()
	if text.strip():
	self.generate_speech(text)
	else:
	print("No text entered!")
	input("\nPress Enter to continue...")

	elif choice == 'p':
	self.change_parameters()

	elif choice == 'v':
	self.change_voice()

	elif choice == 'l':
	self.toggle_language()

	elif choice == 's':
	self.change_speed()

	elif choice == 'o':
	self.change_output_file()

	elif choice == 'q':
	print("\nThank you for using the Interactive TTS System. Goodbye!")
	break

	else:
	print("\nInvalid choice. Please try again.")
	input("Press Enter to continue...")

	if __name__ == "__main__":
	tts_system = InteractiveTTS()
	tts_system.run()