Spaces:

DataMine
/

Adhan_prep

Sleeping

App Files Files Community

Adhan_prep / app.py

DataMine

Update app.py

3ef02b3 verified over 1 year ago

raw

history blame contribute delete

7.85 kB

	import os
	import streamlit as st
	import torch
	import torch.nn.functional as F
	import librosa
	import speech_recognition as sr
	# from transformers import Wav2Vec2Processor, Wav2Vec2Model
	from sklearn.feature_extraction.text import CountVectorizer
	from sklearn.metrics.pairwise import cosine_similarity
	from groq import Groq

	# # Load pretrained model and processor
	# processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-base-960h")
	# model = Wav2Vec2Model.from_pretrained("facebook/wav2vec2-base-960h")

	# Initialize Groq client
	groq_client = Groq(api_key="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")

	# Function to transcribe audio into text
	def transcribe_audio(audio_file):
	recognizer = sr.Recognizer()
	try:
	with sr.AudioFile(audio_file) as source:
	audio_data = recognizer.record(source) # Read the entire audio file
	text = recognizer.recognize_google(audio_data, language='ar-SA') # Arabic transcription
	return text
	except sr.UnknownValueError:
	return None
	except sr.RequestError:
	return None

	# Function to convert Arabic text to Romanized text
	def romanize_arabic(text):
	romanized_mapping = {
	"الله": "Allahu",
	"اكبر": "akbar",
	"اشهد": "Ashhadu",
	"ان": "an",
	"لا": "la",
	"اله": "ilaha",
	"الا": "illa",
	"محمد": "Muhammad",
	"رسول": "Rasul",
	"حي": "Hayya",
	"على": "'ala",
	"الصلاه": "as-salah",
	"الفلاح": "al-falah",
	"لا": "la",
	"الا": "illa",
	}

	words = text.split()
	romanized_text = ' '.join(romanized_mapping.get(word, word) for word in words)
	return romanized_text

	# Function to convert audio file into embeddings
	import torch
	from transformers import Wav2Vec2FeatureExtractor, Wav2Vec2Model
	import librosa

	# Load pretrained model and processor
	feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained("facebook/wav2vec2-base-960h")
	model = Wav2Vec2Model.from_pretrained("facebook/wav2vec2-base-960h")

	# Function to convert audio file into embeddings
	from io import BytesIO
	import librosa

	# Updated function for Streamlit-compatible audio processing
	from io import BytesIO
	from pydub import AudioSegment

	def get_audio_embedding(file_input):
	# Convert Streamlit file input to BytesIO if it's not a string path
	if not isinstance(file_input, str):
	file_input = BytesIO(file_input.read())

	# Convert the audio to WAV format using pydub (supports various formats like mp3, m4a, etc.)
	try:
	# Read the audio file using pydub and convert to WAV format
	audio = AudioSegment.from_file(file_input)
	wav_io = BytesIO()
	audio.export(wav_io, format="wav")
	wav_io.seek(0) # Move back to the start of the BytesIO object

	# Load the converted WAV file using librosa
	audio_data, sr = librosa.load(wav_io, sr=16000)
	except Exception as e:
	raise ValueError(f"Failed to process the audio file: {str(e)}")

	# Convert audio to embeddings using Wav2Vec2
	inputs = feature_extractor(audio_data, sampling_rate=sr, return_tensors="pt", padding=True)
	with torch.no_grad():
	embeddings = model(**inputs).last_hidden_state.mean(dim=1)
	return embeddings


	# Function to calculate cosine similarity for embeddings
	def compare_embeddings(embedding_1, embedding_2):
	similarity = F.cosine_similarity(embedding_1, embedding_2, dim=1)
	return similarity.item()

	# Function to calculate text similarity using Cosine Similarity
	def compare_text_similarity(text1, text2):
	vectorizer = CountVectorizer().fit_transform([text1, text2])
	vectors = vectorizer.toarray()
	cosine_sim = cosine_similarity(vectors)
	return cosine_sim[0][1]

	# LLM feedback function using Groq
	def generate_llm_feedback(similarity_score):
	feedback_prompt = f"""
	A user has just pronounced part of the Azaan, and the similarity score between their pronunciation and the reference Azaan is {similarity_score:.2f}.

	Based on this score:
	- If the score is above 0.9, the pronunciation is excellent.
	- If the score is between 0.7 and 0.9, the pronunciation is good but may need slight improvement.
	- If the score is below 0.7, the pronunciation requires significant improvement.

	Provide detailed feedback for the user about their pronunciation, considering their score of {similarity_score:.2f}.
	"""

	chat_completion = groq_client.chat.completions.create(
	messages=[
	{
	"role": "user",
	"content": feedback_prompt,
	}
	],
	model="llama3-8b-8192",
	)

	return chat_completion.choices[0].message.content

	# Custom CSS for styling
	st.markdown(
	"""
	<style>
	.main {
	background-color: #f5f5f5;
	font-family: 'Arial', sans-serif;
	}
	.title {
	text-align: center;
	color: #2a9d8f;
	}
	.subtitle {
	text-align: center;
	color: #264653;
	}
	.footer {
	text-align: center;
	font-size: 0.8em;
	color: #555;
	}
	.feedback {
	background-color: #e9c6c6;
	border-radius: 10px;
	padding: 20px;
	margin: 10px;
	box-shadow: 0 0 10px rgba(0, 0, 0, 0.1);
	}
	</style>
	""",
	unsafe_allow_html=True
	)

	# Streamlit UI
	def main():
	st.title("🔔 Azaan Pronunciation Evaluation")
	st.markdown("<h3 class='subtitle'>Welcome to the Azaan Pronunciation Evaluation!</h3>", unsafe_allow_html=True)

	st.subheader("Upload Your Audio")
	uploaded_file = st.file_uploader("Choose an audio file...", type=["wav", "mp3", "m4a"])

	if uploaded_file is not None:
	st.audio(uploaded_file, format='audio/wav')

	# Step 1: Transcribe expert audio and user audio
	expert_audio_path = "Hafiz muqeem.wav" # Change this to the correct path
	st.write("🎤 Step 1: Checking if the words match...")

	# Transcribe user audio
	user_text = transcribe_audio(uploaded_file)
	expert_text = transcribe_audio(expert_audio_path)

	if user_text and expert_text:
	st.write("✅ Transcription successful!")
	st.write(f"Expert Azaan Text: {expert_text}")
	st.write(f"Your Azaan Text: {user_text}")

	# Step 2: Romanize and compare texts
	user_romanized = romanize_arabic(user_text)
	expert_romanized = romanize_arabic(expert_text)

	text_similarity = compare_text_similarity(user_romanized, expert_romanized)
	st.write(f"📝 Text Similarity Score: {text_similarity:.2f}")

	if text_similarity >= 0.1:
	st.success("✅ Great! Your words match well enough. Now, let's evaluate your pronunciation.")

	# Step 3: Evaluate pronunciation similarity
	expert_embedding = get_audio_embedding(expert_audio_path)
	user_embedding = get_audio_embedding(uploaded_file)

	pronunciation_similarity = compare_embeddings(expert_embedding, user_embedding)
	st.write(f"🔊 Pronunciation Similarity Score: {pronunciation_similarity:.2f}")

	# Get feedback
	feedback = generate_llm_feedback(pronunciation_similarity)
	st.markdown(f"<div class='feedback'>{feedback}</div>", unsafe_allow_html=True)
	else:
	st.warning("⚠️ Your words do not match sufficiently. Please try again.")
	else:
	st.error("❌ There was an error transcribing one or both audio files.")

	st.markdown("<div class='footer'>© 2024 Azaan Pronunciation Evaluation Tool</div>", unsafe_allow_html=True)

	if __name__ == "__main__":
	main()