EM-TTS-Test

Sleeping

App Files Files Community

EM-TTS-Test / app.py

skmiller

Update app.py

98f795e verified over 1 year ago

raw

history blame contribute delete

6.83 kB

	import gradio as gr
	import tempfile
	import os
	from gtts import gTTS
	from deep_translator import GoogleTranslator
	from groq import Groq
	import logging
	from sentence_transformers import SentenceTransformer
	import numpy as np

	logging.basicConfig(level=logging.INFO, format='%(asctime)s \| %(levelname)s \| %(message)s')

	# Initialize Groq client
	groq_client = Groq(api_key=os.environ.get("GROQ_API_KEY"))

	# Initialize HuggingFace embeddings (free to use)
	sentence_model = SentenceTransformer('all-MiniLM-L6-v2')

	indexed_texts = []
	indexed_embeddings = []

	# Translation languages dropdown options
	translation_languages = {
	"English": "en",
	"Spanish": "es",
	"Arabic": "ar",
	"Amharic": "am",
	"Hindi": "hi",
	"Kannada": "kn",
	"Marathi": "mr",
	"Russian": "ru",
	"Telugu": "te",
	"Tamil": "ta",
	"Vietnamese": "vi"
	}

	# Define supported languages for Google TTS
	audio_language_dict = {
	"English": {"code": "en"},
	"Spanish": {"code": "es"},
	"Amharic": {"code": "am"},
	"Arabic": {"code": "ar"},
	"Hindi": {"code": "hi"},
	"Kannada": {"code": "kn"},
	"Marathi": {"code": "mr"},
	"Russian": {"code": "ru"},
	"Telugu": {"code": "te"},
	"Tamil": {"code": "ta"},
	"Vietnamese": {"code": "vi"}
	}

	def index_text(text: str) -> str:
	global indexed_texts, indexed_embeddings
	try:
	# Split the text into sentences or smaller chunks
	chunks = text.split('. ')
	for chunk in chunks:
	if chunk:
	embedding = sentence_model.encode([chunk])[0]
	indexed_texts.append(chunk)
	indexed_embeddings.append(embedding)
	return f"Text indexed successfully. Total indexed chunks: {len(indexed_texts)}"
	except Exception as e:
	return f"Error indexing text: {str(e)}"

	def clear_index() -> str:
	global indexed_texts, indexed_embeddings
	indexed_texts.clear()
	indexed_embeddings.clear()
	return "Index cleared successfully. Ready for new indexing."

	def find_most_similar(query: str, top_k: int = 3) -> list:
	if not indexed_texts:
	return ["No indexed text available."]
	query_embedding = sentence_model.encode([query])[0]
	similarities = [np.dot(query_embedding, doc_embedding) for doc_embedding in indexed_embeddings]
	top_indices = np.argsort(similarities)[-top_k:][::-1]
	return [indexed_texts[i] for i in top_indices]

	def chat_with_context(question: str, model: str) -> str:
	if not indexed_texts:
	return "Please index some text first."

	relevant_contexts = find_most_similar(question, top_k=3)
	context = " ".join(relevant_contexts)

	try:
	prompt = f"Context: {context}\n\nQuestion: {question}\n\nAnswer:"
	chat_completion = groq_client.chat.completions.create(
	messages=[
	{
	"role": "user",
	"content": prompt,
	}
	],
	model=model,
	max_tokens=500 # Limit the response length
	)
	return chat_completion.choices[0].message.content
	except Exception as e:
	logging.error(f"Error in chat: {str(e)}")
	return f"Error in chat: {str(e)}"

	# Translation function
	def translate_text(text, target_lang_code):
	try:
	translator = GoogleTranslator(source='auto', target=target_lang_code)
	return translator.translate(text)
	except Exception as e:
	return f"Translation Error: {str(e)}"

	# Google TTS function
	def google_tts(text, lang):
	try:
	tts = gTTS(text=text, lang=lang, slow=False)
	with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio:
	tts.save(temp_audio.name)
	return temp_audio.name, f"Speech generated with Google TTS using {lang} language"
	except Exception as e:
	return None, f"Error in Google TTS: {str(e)}"

	with gr.Blocks() as iface:
	gr.Markdown("# EM Text Translator and Speech Generator")

	with gr.Row():
	text_input = gr.Textbox(label="Enter text for translation and speech generation", lines=3)

	with gr.Row():
	translation_lang_dropdown = gr.Dropdown(list(translation_languages.keys()), label="Select Translation Language", value="English")
	convert_button = gr.Button("Convert")

	translated_text = gr.Textbox(label="Translated Text")

	# with gr.Row():
	# index_button = gr.Button("Index")
	# clear_index_button = gr.Button("Clear Index")

	# index_status = gr.Textbox(label="Indexing Status")

	use_chat = gr.Checkbox(label="", value=False)

	# chat_group = gr.Group(visible=False)
	# with chat_group:
	# chat_input = gr.Textbox(label="Ask a question about the indexed text")
	# chat_model = gr.Dropdown(
	# choices=["llama3-70b-8192", "mixtral-8x7b-32768", "gemma-7b-it"],
	# label="Select Chat Model",
	# value="llama3-70b-8192"
	# )
	# chat_button = gr.Button("Ask")

	# chat_output = gr.Textbox(label="Answer", interactive=False)

	with gr.Group() as tts_options:
	audio_lang_dropdown = gr.Dropdown(list(audio_language_dict.keys()), label="Select Audio Language", value="English")

	generate_button = gr.Button("Generate Speech")
	audio_output = gr.Audio(label="Generated Speech")
	message_output = gr.Textbox(label="Message")

	# def update_chat_visibility(use_chat):
	# return gr.update(visible=use_chat)

	def convert_text(text, translation_lang):
	target_code = translation_languages[translation_lang]
	translated = translate_text(text, target_code)
	return translated

	def generate_speech(text, audio_lang, use_chat, chat_output):
	if use_chat and chat_output:
	text = chat_output
	logging.info(f"Generating speech: lang={audio_lang}")
	try:
	return google_tts(text, audio_language_dict[audio_lang]["code"])
	except Exception as e:
	logging.error(f"Error generating speech: {str(e)}")
	return None, f"Error generating speech: {str(e)}"

	convert_button.click(convert_text, inputs=[text_input, translation_lang_dropdown], outputs=translated_text)
	#index_button.click(index_text, inputs=[translated_text], outputs=[index_status])
	#clear_index_button.click(clear_index, outputs=[index_status])
	#use_chat.change(update_chat_visibility, inputs=[use_chat], outputs=[chat_group])
	#chat_button.click(chat_with_context, inputs=[chat_input, chat_model], outputs=[chat_output])

	generate_button.click(
	generate_speech,
	# inputs=[translated_text, audio_lang_dropdown, use_chat, chat_output],
	inputs=[translated_text, audio_lang_dropdown],
	outputs=[audio_output, message_output]
	)

	iface.launch()