Spaces:

Somalitts
/

8aad

Running

App Files Files Community

8aad / app.py

Somalitts

Update app.py

43ec985 verified 5 months ago

raw

history blame

3.55 kB

	import gradio as gr
	import torch
	import torchaudio
	import re
	import os
	from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan
	from speechbrain.pretrained import EncoderClassifier
	import numpy as np

	# --- Configuration ---
	device = "cuda" if torch.cuda.is_available() else "cpu"

	# --- HUBI INAAD SOO GELISAY FAYLASHAN ---
	# Faylashan waa inay ku jiraan Hugging Face Spaces, isla galka uu ku jiro "app.py"
	VOICE_SAMPLE_FILES = ["1.wav"]

	# Directory to store speaker embedding files
	EMBEDDING_DIR = "speaker_embeddings"
	os.makedirs(EMBEDDING_DIR, exist_ok=True)

	# --- Load Models ---
	try:
	print("Loading models... This may take a moment.")
	processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")
	model = SpeechT5ForTextToSpeech.from_pretrained("Somalitts/8aad").to(device)
	vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan").to(device)
	speaker_model = EncoderClassifier.from_hparams(
	source="speechbrain/spkrec-xvect-voxceleb",
	run_opts={"device": device},
	savedir=os.path.join("pretrained_models", "spkrec-xvect-voxceleb")
	)
	print("Models loaded successfully.")
	except Exception as e:
	raise gr.Error(f"Error loading models: {e}. Check your internet connection.")

	speaker_embeddings_cache = {}

	def get_speaker_embedding(wav_file_path):
	if wav_file_path in speaker_embeddings_cache:
	return speaker_embeddings_cache[wav_file_path]
	embedding_path = os.path.join(EMBEDDING_DIR, f"{os.path.basename(wav_file_path)}.pt")
	if os.path.exists(embedding_path):
	embedding = torch.load(embedding_path, map_location=device)
	speaker_embeddings_cache[wav_file_path] = embedding
	return embedding
	if not os.path.exists(wav_file_path):
	# Kani waa qaladka dhacay. Markaad faylasha soo geliso, meeshan wuu ka gudbayaa.
	raise FileNotFoundError(f"Lama helin faylka codka: {wav_file_path}")
	try:
	audio, sr = torchaudio.load(wav_file_path)
	if sr != 16000: audio = torchaudio.functional.resample(audio, sr, 16000)
	if audio.shape[0] > 1: audio = torch.mean(audio, dim=0, keepdim=True)
	with torch.no_grad():
	embedding = speaker_model.encode_batch(audio.to(device))
	embedding = torch.nn.functional.normalize(embedding, dim=2).squeeze()
	torch.save(embedding.cpu(), embedding_path)
	speaker_embeddings_cache[wav_file_path] = embedding.to(device)
	return embedding.to(device)
	except Exception as e:
	raise gr.Error(f"Could not process audio file {wav_file_path}. Error: {e}")

	# ... (Inta kale ee koodhka way saxantahay) ...

	# --- Main Text-to-Speech Function ---
	def text_to_speech(text, voice_choice):
	# ... (sidaadii hore) ...
	pass # Koodhka intiisa kale halkan geli

	# --- Gradio Interface ---
	iface = gr.Interface(
	# ... (sidaadii hore) ...
	pass # Koodhka intiisa kale halkan geli
	)

	# --- Launch the web interface ---
	if __name__ == "__main__":
	print("Hubinta faylasha codadka...")
	for f in VOICE_SAMPLE_FILES:
	if not os.path.exists(f):
	# Qaladku halkan ayuu ka bilaabmayaa
	raise FileNotFoundError(f"Mid ka mid ah faylasha lama helin: '{f}'. Fadlan hubi inaad soo gelisay Hugging Face Spaces.")

	print("Diyaarinta astaamaha codadka...")
	for voice_file in VOICE_SAMPLE_FILES:
	get_speaker_embedding(voice_file)
	print("Dhammaan codadka waa diyaar. Waxaa la furayaa interface-ka.")

	iface.launch(share=True)