import streamlit as st from google.oauth2 import service_account from google.cloud import speech import io import torch import numpy as np from transformers import Wav2Vec2Processor from transformers.models.wav2vec2 import Wav2Vec2Model import librosa from groq import Groq # Initialize Google Speech-to-Text, Hugging Face model, and Groq LLM client_file = "gcp_api.json" # Replace with your actual service account file path credentials = service_account.Credentials.from_service_account_file(client_file) speech_client = speech.SpeechClient(credentials=credentials) processor = Wav2Vec2Processor.from_pretrained("models/wav2vec2-base") model = Wav2Vec2Model.from_pretrained("models/wav2vec2-base") ideal_embedding = torch.tensor(np.load("ideal_azan_embedding.npy")) groq_client = Groq(api_key="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") # Define the ideal Azan text (first part only) and its English meaning ideal_text = "اللّٰهُ أَكْبَرُ، اللّٰهُ أَكْبَرُ" ideal_text_meaning = "Allah is the Greatest, Allah is the Greatest" # Function to extract embedding of the uploaded audio def get_audio_embedding(audio_file_path): audio_input, _ = librosa.load(audio_file_path, sr=16000) inputs = processor(audio_input, sampling_rate=16000, return_tensors="pt", padding=True) with torch.no_grad(): embedding = model(inputs.input_values).last_hidden_state.mean(dim=1).squeeze() return embedding # Function to calculate similarity score between user and ideal embeddings def calculate_similarity(embedding1, embedding2): similarity = torch.nn.functional.cosine_similarity(embedding1, embedding2, dim=0) return similarity.item() * 100 # Function to generate feedback with Groq LLM def generate_feedback_with_llm(user_transcription, ideal_text, similarity_score): messages = [ { "role": "system", "content": ( "You are an expert muezzin trainer providing detailed, supportive feedback on a student's Azan transcription accuracy. " "The student has attempted to recite the phrase, and their transcription is compared to the ideal Azan phrase in Arabic. " "Evaluate how closely their recitation matches the ideal Azan text based on articulation, tone, rhythm, and accuracy. " "Emphasize strengths, point out specific areas where improvements can be made, and give clear, practical tips to improve pronunciation. " "Encourage the student with positivity, helping them refine their pronunciation and accuracy until it aligns closely with the ideal." ) }, { "role": "user", "content": ( f"The student's transcription of their recitation is: '{user_transcription}'. " f"The ideal phrase for comparison is: '{ideal_text}'. Their similarity score is {similarity_score:.2f}%. " "Please provide feedback highlighting strengths, improvement areas, and actionable tips for better alignment with the ideal." ) } ] completion = groq_client.chat.completions.create( model="llama3-70b-8192", messages=messages, temperature=0.7, max_tokens=1000 ) return completion.choices[0].message.content # Function to transcribe audio, validate with the ideal text, and provide feedback def transcribe_and_validate(audio_file_path, ideal_text): with io.open(audio_file_path, 'rb') as f: audio_content = f.read() audio = speech.RecognitionAudio(content=audio_content) config = speech.RecognitionConfig( encoding=speech.RecognitionConfig.AudioEncoding.MP3, sample_rate_hertz=48000, language_code="ar" ) response = speech_client.recognize(config=config, audio=audio) transcription = " ".join(result.alternatives[0].transcript for result in response.results) # Refined prompt for validation with LLM content = f""" You are an expert in validating the Azaan (the call to prayer). Below is the correct structure of the Azaan. Compare the transcription provided with this structure to determine if it contains all essential phrases in the correct order. Validation Guidelines: - Validate the Azaan as "VALIDATED" if it contains all essential phrases in the correct sequence, even if there are minor spelling, diacritic, or punctuation differences. - Specifically, ignore small differences such as: - Missing or extra diacritics (e.g., "ا" vs. "أ" or "حي على الصلاه" vs. "حي على الصلاة"). - Minor spelling variations, such as: - "لا اله الا الله" vs. "لا إله إلا الله". - "حي على الصلاه" vs. "حي على الصلاة". - "حي على الفلاح" vs. "حي على الفلاح". - "أشهد" vs "شهاده" - Punctuation or slight variations in commonly understood words and phrases. - Invalidate the Azaan as "INVALIDATED" only if: - Essential phrases are missing. - Extra, unrelated phrases that are not part of the Azaan are added. - Major incorrect words or substitutions that change the meaning of an essential phrase are present. Correct Azaan Structure: "{ideal_text}" Transcribed Azaan: "{transcription}" Conclude with "Validation Status: VALIDATED" if the Azaan matches the correct structure, or "Validation Status: INVALIDATED" if it does not, and list any specific issues if found. Only list issues if they involve missing phrases, extra phrases, or significant meaning changes. """ # Send request to Groq LLM for validation feedback completion = groq_client.chat.completions.create( model="llama3-70b-8192", messages=[{"role": "user", "content": content}], temperature=0, max_tokens=512, ) feedback = completion.choices[0].message.content return transcription, feedback # Streamlit layout st.set_page_config(page_title="Azan Pronunciation Trainer", layout="centered", initial_sidebar_state="expanded") # Display ideal text and its meaning for reference st.markdown("