Spaces:

Matheusmatos2916
/

PronunAI

Sleeping

PronunAI / app.py

Matheus Matos Rocha

teste

83d2714 10 months ago

15.1 kB

	import os
	import speech_recognition as sr
	import difflib
	import time
	from langchain_groq.chat_models import ChatGroq
	from dotenv import load_dotenv
	import tempfile
	import gradio as gr

	# Load environment variables
	load_dotenv()

	class PronunciaPratica:
	def __init__(self, idioma='pt-BR'):
	"""
	Initializes the pronunciation practice application.

	Args:
	idioma (str): Language code for speech recognition (e.g., 'pt-BR', 'en-US')
	"""
	self.idioma = idioma
	self.recognizer = sr.Recognizer()

	# Configure the Groq client using an environment variable
	api_key = os.getenv("GROQ_API_KEY")
	if not api_key:
	raise ValueError("⚠️ Groq API key not found. Set the GROQ_API_KEY environment variable.")

	self.chat = ChatGroq(model="llama-3.1-8b-instant", api_key=api_key)

	def gerar_frase(self):
	"""Generate a random phrase for pronunciation practice."""
	try:
	# Map language code to language name for better prompt
	language_map = {
	'pt-BR': 'português',
	'en-US': 'inglês',
	'es-ES': 'espanhol',
	'fr-FR': 'francês',
	'it-IT': 'italiano',
	'de-DE': 'alemão'
	}

	language_name = language_map.get(self.idioma, 'português')

	# Create a more explicit prompt that specifies the language
	prompt = f"""
	Forneça uma frase curta em {language_name} para treinar pronúncia.
	A frase deve ter entre 5 e 10 palavras.
	Responda APENAS com a frase em {language_name}, sem explicações.
	É MUITO IMPORTANTE que a frase seja apenas em {language_name} e não em qualquer outro idioma.
	"""

	resposta = self.chat.invoke([{
	"role": "system",
	"content": prompt
	}])

	return resposta.content.strip()
	except Exception as e:
	# Default phrases based on language
	default_phrases = {
	'pt-BR': "O sol está brilhando hoje.",
	'en-US': "The sun is shining today.",
	'es-ES': "El sol está brillando hoy.",
	'fr-FR': "Le soleil brille aujourd'hui.",
	'it-IT': "Il sole splende oggi.",
	'de-DE': "Die Sonne scheint heute."
	}
	return f"{default_phrases.get(self.idioma, 'O sol está brilhando hoje.')} (Erro: {str(e)})"

	def reconhecer_audio(self, audio_path):
	"""Convert recorded audio to text."""
	try:
	# Load the audio file
	with sr.AudioFile(audio_path) as source:
	audio_data = self.recognizer.record(source)

	# Recognize the text
	texto_falado = self.recognizer.recognize_google(audio_data, language=self.idioma)
	return texto_falado
	except sr.UnknownValueError:
	return "Erro: Não foi possível entender o áudio."
	except sr.RequestError as e:
	return f"Erro: Problema na requisição do serviço de reconhecimento. {e}"
	except Exception as e:
	return f"Erro: {e}"

	def avaliar_pronuncia(self, frase_original, frase_falada):
	"""
	Evaluate the similarity between the original phrase and the spoken phrase.

	Returns:
	tuple: (similarity percentage, incorrect words)
	"""
	# Normalize phrases (remove punctuation and convert to lowercase)
	import re
	normalizar = lambda texto: re.sub(r'[^\w\s]', '', texto.lower())

	original_norm = normalizar(frase_original)
	falada_norm = normalizar(frase_falada)

	# Calculate similarity
	sequencia = difflib.SequenceMatcher(None, original_norm, falada_norm)
	similaridade = sequencia.ratio() * 100

	# Identify incorrect words
	palavras_originais = original_norm.split()
	palavras_faladas = falada_norm.split()

	palavras_incorretas = []
	for palavra in palavras_originais:
	if palavra not in palavras_faladas:
	palavras_incorretas.append(palavra)

	return similaridade, palavras_incorretas

	def obter_feedback(self, similaridade, palavras_incorretas, frase_original, frase_falada):
	"""Generate detailed feedback on pronunciation."""
	# Determine language for feedback based on idioma
	feedback_lang = self.idioma

	# Choose appropriate language for the feedback prompt
	if feedback_lang.startswith('pt'):
	prompt_template = """
	Analise a pronúncia do usuário e forneça feedback específico:

	Frase original: "{frase_original}"
	Frase falada: "{frase_falada}"
	Similaridade: {similaridade:.2f}%
	Palavras possivelmente problemáticas: {palavras_prob}

	Ofereça dicas específicas para melhorar a pronúncia, focando nos erros mais comuns.
	Seja breve e construtivo, máximo de 3 linhas.
	"""
	palavras_prob = ', '.join(palavras_incorretas) if palavras_incorretas else 'Nenhuma'
	elif feedback_lang.startswith('en'):
	prompt_template = """
	Analyze the user's pronunciation and provide specific feedback:

	Original phrase: "{frase_original}"
	Spoken phrase: "{frase_falada}"
	Similarity: {similaridade:.2f}%
	Potentially problematic words: {palavras_prob}

	Offer specific tips to improve pronunciation, focusing on the most common errors.
	Be brief and constructive, maximum of 3 lines.
	"""
	palavras_prob = ', '.join(palavras_incorretas) if palavras_incorretas else 'None'
	else:
	# Default to English for other languages
	prompt_template = """
	Analyze the user's pronunciation and provide specific feedback:

	Original phrase: "{frase_original}"
	Spoken phrase: "{frase_falada}"
	Similarity: {similaridade:.2f}%
	Potentially problematic words: {palavras_prob}

	Offer specific tips to improve pronunciation, focusing on the most common errors.
	Be brief and constructive, maximum of 3 lines.
	"""
	palavras_prob = ', '.join(palavras_incorretas) if palavras_incorretas else 'None'

	prompt = prompt_template.format(
	frase_original=frase_original,
	frase_falada=frase_falada,
	similaridade=similaridade,
	palavras_prob=palavras_prob
	)

	try:
	resposta = self.chat.invoke([{"role": "system", "content": prompt}])
	return resposta.content.strip()
	except Exception as e:
	# Default feedback in case of error, based on language
	if feedback_lang.startswith('pt'):
	if similaridade > 90:
	return "Excelente pronúncia! Continue praticando."
	elif similaridade > 70:
	return f"Boa pronúncia, mas pode melhorar. Preste atenção em: {', '.join(palavras_incorretas)}"
	else:
	return "Tente novamente, focando na pronúncia clara de cada palavra."
	else:
	if similaridade > 90:
	return "Excellent pronunciation! Keep practicing."
	elif similaridade > 70:
	return f"Good pronunciation, but can be improved. Pay attention to: {', '.join(palavras_incorretas)}"
	else:
	return "Try again, focusing on clear pronunciation of each word."

	# Function to map language dropdown to language code
	def get_language_code(language_name):
	idiomas = {
	"Português (Brasil)": "pt-BR",
	"Inglês (EUA)": "en-US",
	"Espanhol": "es-ES",
	"Francês": "fr-FR",
	"Italiano": "it-IT",
	"Alemão": "de-DE"
	}
	return idiomas.get(language_name, "pt-BR")

	# Create a global instance of the app
	app_instance = None

	# Track the current language
	current_language_code = "pt-BR"

	# Functions for Gradio interface
	def gerar_nova_frase(language_name):
	global app_instance, current_language_code
	language_code = get_language_code(language_name)

	# Only create a new instance if the language has changed
	if app_instance is None or current_language_code != language_code:
	app_instance = PronunciaPratica(idioma=language_code)
	current_language_code = language_code

	return app_instance.gerar_frase()

	def process_audio(audio_path, frase_atual, language_name, historico):
	global app_instance, current_language_code

	if audio_path is None:
	return "Nenhum áudio gravado", "", 0, "", historico, ""

	# Make sure we have an app instance with the current language
	language_code = get_language_code(language_name)
	if app_instance is None or current_language_code != language_code:
	app_instance = PronunciaPratica(idioma=language_code)
	current_language_code = language_code

	# Recognize the speech
	texto_falado = app_instance.reconhecer_audio(audio_path)

	if texto_falado.startswith("Erro"):
	return texto_falado, "", 0, "", historico, ""

	# Evaluate pronunciation
	similaridade, palavras_incorretas = app_instance.avaliar_pronuncia(frase_atual, texto_falado)

	# Get detailed feedback
	feedback = app_instance.obter_feedback(similaridade, palavras_incorretas, frase_atual, texto_falado)

	# Add to history
	entry = {
	"frase": frase_atual,
	"falado": texto_falado,
	"similaridade": f"{similaridade:.1f}%",
	"feedback": feedback,
	"timestamp": time.strftime("%H:%M:%S")
	}

	historico = [entry] + historico
	if len(historico) > 5:
	historico = historico[:5]

	# Modificação aqui para melhorar a visibilidade do histórico
	history_html = ""
	for entry in historico:
	history_html += f"""
	<div class="history-entry">
	<b>{entry['timestamp']}</b> - Precisão: {entry['similaridade']}<br>
	<b>Original:</b> "{entry['frase']}"<br>
	<b>Sua fala:</b> "{entry['falado']}"<br>
	<b>Feedback:</b> {entry['feedback']}
	</div>
	"""

	return texto_falado, f"{similaridade:.1f}%", similaridade, feedback, historico, history_html

	def initialize_app_and_get_phrase(language_name="Português (Brasil)"):
	"""Initialize the app with a specific language and get first phrase"""
	global app_instance, current_language_code

	language_code = get_language_code(language_name)
	app_instance = PronunciaPratica(idioma=language_code)
	current_language_code = language_code

	return app_instance.gerar_frase()

	def main():
	# Get initial phrase (will also initialize app_instance)
	initial_phrase = initialize_app_and_get_phrase()

	# CSS for styling
	css = """
	.gradio-container {
	font-family: 'Helvetica Neue', Arial, sans-serif;
	}
	.phrase-box {
	background-color: #828181;
	padding: 20px;
	border-radius: 10px;
	border-left: 5px solid #1e3d59;
	font-size: 22px;
	margin: 20px 0;
	}
	.spoken-box {
	background-color: #e6f7ff;
	padding: 15px;
	border-radius: 10px;
	border-left: 5px solid #0074cc;
	}
	.score-display {
	font-size: 36px;
	font-weight: bold;
	text-align: center;
	}
	.feedback-box {
	background-color: #e8f4ea;
	padding: 15px;
	border-radius: 10px;
	border-left: 5px solid #4CAF50;
	}
	.history-entry {
	margin-bottom: 15px;
	padding: 10px;
	border-left: 3px solid #4CAF50;
	background-color: #2a2a2a;
	color: white;
	border-radius: 8px;
	}
	"""

	with gr.Blocks(css=css) as demo:
	gr.Markdown("# 🎤 Pronúncia Prática")
	gr.Markdown("### Melhore sua pronúncia com feedback em tempo real")

	with gr.Row():
	with gr.Column(scale=2):
	idioma_dropdown = gr.Dropdown(
	choices=["Português (Brasil)", "Inglês (EUA)", "Espanhol", "Francês", "Italiano", "Alemão"],
	value="Português (Brasil)",
	label="Selecione o idioma para praticar:"
	)

	with gr.Column(scale=1):
	gerar_frase_btn = gr.Button("🔄 Gerar nova frase")

	frase_atual = gr.Textbox(
	value=initial_phrase,
	label="Frase para praticar:",
	elem_classes=["phrase-box"]
	)

	with gr.Row():
	audio_input = gr.Audio(
	sources=["microphone"],
	type="filepath",
	label="🎙️ Grave sua voz"
	)

	with gr.Row():
	texto_reconhecido = gr.Textbox(label="Você disse:")

	with gr.Row():
	score_text = gr.Textbox(label="Pontuação:")
	score_progress = gr.Slider(minimum=0, maximum=100, label="", interactive=False)

	feedback_box = gr.Textbox(label="Feedback:")

	# Hidden state for history
	historico_state = gr.State([])

	with gr.Accordion("📜 Histórico de Prática", open=False):
	history_display = gr.HTML()

	# Event handlers
	gerar_frase_btn.click(
	fn=gerar_nova_frase,
	inputs=[idioma_dropdown],
	outputs=[frase_atual]
	)

	idioma_dropdown.change(
	fn=gerar_nova_frase,
	inputs=[idioma_dropdown],
	outputs=[frase_atual]
	)

	audio_input.change(
	fn=process_audio,
	inputs=[audio_input, frase_atual, idioma_dropdown, historico_state],
	outputs=[texto_reconhecido, score_text, score_progress, feedback_box, historico_state, history_display]
	)

	# Instructions in the footer
	gr.Markdown("""
	### 📝 Como usar:
	1. Selecione o idioma que deseja praticar
	2. Clique em 'Gerar nova frase' para mudar a frase (opcional)
	3. Leia a frase em voz alta
	4. Clique no botão de microfone e fale a frase
	5. Veja o feedback e sua pontuação

	---
	Desenvolvido com ❤️ usando Gradio e IA
	""")

	# Launch the app with share=True to create a shareable link
	demo.launch(share=True, server_name="0.0.0.0")

	if __name__ == "__main__":
	main()