syed7 commited on
Commit
d061a6b
ยท
verified ยท
1 Parent(s): ddd800d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +165 -165
app.py CHANGED
@@ -1,165 +1,165 @@
1
- import streamlit as st
2
- from google.oauth2 import service_account
3
- from google.cloud import speech
4
- import io
5
- import torch
6
- import numpy as np
7
- from transformers import Wav2Vec2Processor
8
- from transformers.models.wav2vec2 import Wav2Vec2Model
9
- import librosa
10
- from groq import Groq
11
-
12
- # Initialize Google Speech-to-Text, Hugging Face model, and Groq LLM
13
- client_file = "gcp_api.json" # Replace with your actual service account file path
14
- credentials = service_account.Credentials.from_service_account_file(client_file)
15
- speech_client = speech.SpeechClient(credentials=credentials)
16
-
17
- processor = Wav2Vec2Processor.from_pretrained("models/wav2vec2-base")
18
- model = Wav2Vec2Model.from_pretrained("models/wav2vec2-base")
19
-
20
- ideal_embedding = torch.tensor(np.load("ideal_azan_embedding.npy"))
21
-
22
- groq_client = Groq(api_key="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")
23
-
24
- # Define the ideal Azan text (first part only) and its English meaning
25
- ideal_text = "ุงู„ู„ู‘ูฐู‡ู ุฃูŽูƒู’ุจูŽุฑูุŒ ุงู„ู„ู‘ูฐู‡ู ุฃูŽูƒู’ุจูŽุฑู"
26
- ideal_text_meaning = "Allah is the Greatest, Allah is the Greatest"
27
-
28
- # Function to extract embedding of the uploaded audio
29
- def get_audio_embedding(audio_file_path):
30
- audio_input, _ = librosa.load(audio_file_path, sr=16000)
31
- inputs = processor(audio_input, sampling_rate=16000, return_tensors="pt", padding=True)
32
- with torch.no_grad():
33
- embedding = model(inputs.input_values).last_hidden_state.mean(dim=1).squeeze()
34
- return embedding
35
-
36
- # Function to calculate similarity score between user and ideal embeddings
37
- def calculate_similarity(embedding1, embedding2):
38
- similarity = torch.nn.functional.cosine_similarity(embedding1, embedding2, dim=0)
39
- return similarity.item() * 100
40
-
41
- # Function to generate feedback with Groq LLM
42
- def generate_feedback_with_llm(user_transcription, ideal_text, similarity_score):
43
- messages = [
44
- {
45
- "role": "system",
46
- "content": (
47
- "You are an expert muezzin trainer providing detailed, supportive feedback on a student's Azan transcription accuracy. "
48
- "The student has attempted to recite the phrase, and their transcription is compared to the ideal Azan phrase in Arabic. "
49
- "Evaluate how closely their recitation matches the ideal Azan text based on articulation, tone, rhythm, and accuracy. "
50
- "Emphasize strengths, point out specific areas where improvements can be made, and give clear, practical tips to improve pronunciation. "
51
- "Encourage the student with positivity, helping them refine their pronunciation and accuracy until it aligns closely with the ideal."
52
- )
53
- },
54
- {
55
- "role": "user",
56
- "content": (
57
- f"The student's transcription of their recitation is: '{user_transcription}'. "
58
- f"The ideal phrase for comparison is: '{ideal_text}'. Their similarity score is {similarity_score:.2f}%. "
59
- "Please provide feedback highlighting strengths, improvement areas, and actionable tips for better alignment with the ideal."
60
- )
61
- }
62
- ]
63
- completion = groq_client.chat.completions.create(
64
- model="llama3-70b-8192",
65
- messages=messages,
66
- temperature=0.7,
67
- max_tokens=1000
68
- )
69
- return completion.choices[0].message.content
70
-
71
- # Function to transcribe audio, validate with the ideal text, and provide feedback
72
- def transcribe_and_validate(audio_file_path, ideal_text):
73
- with io.open(audio_file_path, 'rb') as f:
74
- audio_content = f.read()
75
- audio = speech.RecognitionAudio(content=audio_content)
76
- config = speech.RecognitionConfig(
77
- encoding=speech.RecognitionConfig.AudioEncoding.MP3,
78
- sample_rate_hertz=48000,
79
- language_code="ar"
80
- )
81
- response = speech_client.recognize(config=config, audio=audio)
82
- transcription = " ".join(result.alternatives[0].transcript for result in response.results)
83
-
84
- # Refined prompt for validation with LLM
85
- content = f"""
86
- You are an expert in validating the Azaan (the call to prayer). Below is the correct structure of the Azaan.
87
- Compare the transcription provided with this structure to determine if it contains all essential phrases in the correct order.
88
-
89
- Validation Guidelines:
90
- - Validate the Azaan as "VALIDATED" if it contains all essential phrases in the correct sequence, even if there are minor spelling, diacritic, or punctuation differences.
91
- - Specifically, ignore small differences such as:
92
- - Missing or extra diacritics (e.g., "ุง" vs. "ุฃ" or "ุญูŠ ุนู„ู‰ ุงู„ุตู„ุงู‡" vs. "ุญูŠ ุนู„ู‰ ุงู„ุตู„ุงุฉ").
93
- - Minor spelling variations, such as:
94
- - "ู„ุง ุงู„ู‡ ุงู„ุง ุงู„ู„ู‡" vs. "ู„ุง ุฅู„ู‡ ุฅู„ุง ุงู„ู„ู‡".
95
- - "ุญูŠ ุนู„ู‰ ุงู„ุตู„ุงู‡" vs. "ุญูŠ ุนู„ู‰ ุงู„ุตู„ุงุฉ".
96
- - "ุญูŠ ุนู„ู‰ ุงู„ูู„ุง๏ฟฝ๏ฟฝ" vs. "ุญูŠ ุนู„ู‰ ุงู„ูู„ุงุญ".
97
- - "ุฃุดู‡ุฏ" vs "ุดู‡ุงุฏู‡"
98
- - Punctuation or slight variations in commonly understood words and phrases.
99
- - Invalidate the Azaan as "INVALIDATED" only if:
100
- - Essential phrases are missing.
101
- - Extra, unrelated phrases that are not part of the Azaan are added.
102
- - Major incorrect words or substitutions that change the meaning of an essential phrase are present.
103
-
104
- Correct Azaan Structure:
105
- "{ideal_text}"
106
-
107
- Transcribed Azaan:
108
- "{transcription}"
109
-
110
- Conclude with "Validation Status: VALIDATED" if the Azaan matches the correct structure, or "Validation Status: INVALIDATED" if it does not, and list any specific issues if found. Only list issues if they involve missing phrases, extra phrases, or significant meaning changes.
111
- """
112
-
113
- # Send request to Groq LLM for validation feedback
114
- completion = groq_client.chat.completions.create(
115
- model="llama3-70b-8192",
116
- messages=[{"role": "user", "content": content}],
117
- temperature=0,
118
- max_tokens=512,
119
- )
120
- feedback = completion.choices[0].message.content
121
-
122
- return transcription, feedback
123
-
124
- # Streamlit layout
125
- st.set_page_config(page_title="Azan Pronunciation Trainer", layout="centered", initial_sidebar_state="expanded")
126
-
127
- # Display ideal text and its meaning for reference
128
- st.markdown("<div style='font-size: 30px; color: #4CAF50; text-align: center;'>Azan Pronunciation Trainer</div>", unsafe_allow_html=True)
129
- st.markdown(f"<div style='font-size: 20px; text-align: center;'>Phrase to Practice: {ideal_text}</div>", unsafe_allow_html=True)
130
- st.markdown(f"<div style='font-size: 18px; text-align: center; color: #555;'>Meaning: {ideal_text_meaning}</div>", unsafe_allow_html=True)
131
-
132
- # Placeholder for expert audio playback
133
- st.audio(r"C:\Users\USER\Downloads\wav2vec\expert_azan_audio.mp3", format="audio/mp3") # Replace with actual path
134
-
135
- # Upload audio file for pronunciation assessment
136
- st.markdown("<div style='font-size: 18px; text-align: center;'>Upload your Azan recitation audio (MP3 format):</div>", unsafe_allow_html=True)
137
- audio_file = st.file_uploader("Choose an audio file", type=["mp3"])
138
-
139
- if audio_file is not None:
140
- with st.spinner("Analyzing your pronunciation..."):
141
- audio_path = "uploaded_audio.mp3"
142
- with open(audio_path, "wb") as f:
143
- f.write(audio_file.read())
144
-
145
- # Transcribe and validate transcription with the ideal text
146
- transcription, validation_feedback = transcribe_and_validate(audio_path, ideal_text)
147
-
148
- # Check if validation is successful
149
- if "Validation Status: VALIDATED" in validation_feedback:
150
- # Perform similarity check if validated
151
- user_embedding = get_audio_embedding(audio_path)
152
- similarity_score = calculate_similarity(user_embedding, ideal_embedding)
153
-
154
- st.markdown(f"<div style='font-size: 18px; color: #333; padding: 10px; border: 2px solid #4CAF50; border-radius: 10px; background-color: #f9f9f9;'><b>Similarity Score:</b> {similarity_score:.2f}%</div>", unsafe_allow_html=True)
155
-
156
- # Provide feedback based on similarity score
157
- if similarity_score >= 90:
158
- st.markdown("<div style='color: green; font-weight: bold;'>Excellent work! Your pronunciation is reverent and accurate. You may proceed to the next phrase.</div>", unsafe_allow_html=True)
159
- else:
160
- llm_feedback = generate_feedback_with_llm(transcription, ideal_text, similarity_score)
161
- st.markdown(f"<div style='font-size: 18px; color: #333; padding: 10px; border: 2px solid #4CAF50; border-radius: 10px; background-color: #f9f9f9;'><b>Feedback:</b><br>{llm_feedback}</div>", unsafe_allow_html=True)
162
- else:
163
- # Inform user to re-record if validation failed
164
- st.markdown("<div style='color: red; font-weight: bold;'>The transcription does not match the ideal Azan phrase. Please record your recitation again.</div>", unsafe_allow_html=True)
165
- st.markdown(f"<div style='font-size: 18px; color: #333; padding: 10px; border: 2px solid #4CAF50; border-radius: 10px; background-color: #f9f9f9;'><b>Feedback:</b><br>{validation_feedback}</div>", unsafe_allow_html=True)
 
1
+ import streamlit as st
2
+ from google.oauth2 import service_account
3
+ from google.cloud import speech
4
+ import io
5
+ import torch
6
+ import numpy as np
7
+ from transformers import Wav2Vec2Processor
8
+ from transformers.models.wav2vec2 import Wav2Vec2Model
9
+ import librosa
10
+ from groq import Groq
11
+
12
+ # Initialize Google Speech-to-Text, Hugging Face model, and Groq LLM
13
+ client_file = "gcp_api.json" # Replace with your actual service account file path
14
+ credentials = service_account.Credentials.from_service_account_file(client_file)
15
+ speech_client = speech.SpeechClient(credentials=credentials)
16
+
17
+ processor = Wav2Vec2Processor.from_pretrained("models/wav2vec2-base")
18
+ model = Wav2Vec2Model.from_pretrained("models/wav2vec2-base")
19
+
20
+ ideal_embedding = torch.tensor(np.load("ideal_azan_embedding.npy"))
21
+
22
+ groq_client = Groq(api_key="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")
23
+
24
+ # Define the ideal Azan text (first part only) and its English meaning
25
+ ideal_text = "ุงู„ู„ู‘ูฐู‡ู ุฃูŽูƒู’ุจูŽุฑูุŒ ุงู„ู„ู‘ูฐู‡ู ุฃูŽูƒู’ุจูŽุฑู"
26
+ ideal_text_meaning = "Allah is the Greatest, Allah is the Greatest"
27
+
28
+ # Function to extract embedding of the uploaded audio
29
+ def get_audio_embedding(audio_file_path):
30
+ audio_input, _ = librosa.load(audio_file_path, sr=16000)
31
+ inputs = processor(audio_input, sampling_rate=16000, return_tensors="pt", padding=True)
32
+ with torch.no_grad():
33
+ embedding = model(inputs.input_values).last_hidden_state.mean(dim=1).squeeze()
34
+ return embedding
35
+
36
+ # Function to calculate similarity score between user and ideal embeddings
37
+ def calculate_similarity(embedding1, embedding2):
38
+ similarity = torch.nn.functional.cosine_similarity(embedding1, embedding2, dim=0)
39
+ return similarity.item() * 100
40
+
41
+ # Function to generate feedback with Groq LLM
42
+ def generate_feedback_with_llm(user_transcription, ideal_text, similarity_score):
43
+ messages = [
44
+ {
45
+ "role": "system",
46
+ "content": (
47
+ "You are an expert muezzin trainer providing detailed, supportive feedback on a student's Azan transcription accuracy. "
48
+ "The student has attempted to recite the phrase, and their transcription is compared to the ideal Azan phrase in Arabic. "
49
+ "Evaluate how closely their recitation matches the ideal Azan text based on articulation, tone, rhythm, and accuracy. "
50
+ "Emphasize strengths, point out specific areas where improvements can be made, and give clear, practical tips to improve pronunciation. "
51
+ "Encourage the student with positivity, helping them refine their pronunciation and accuracy until it aligns closely with the ideal."
52
+ )
53
+ },
54
+ {
55
+ "role": "user",
56
+ "content": (
57
+ f"The student's transcription of their recitation is: '{user_transcription}'. "
58
+ f"The ideal phrase for comparison is: '{ideal_text}'. Their similarity score is {similarity_score:.2f}%. "
59
+ "Please provide feedback highlighting strengths, improvement areas, and actionable tips for better alignment with the ideal."
60
+ )
61
+ }
62
+ ]
63
+ completion = groq_client.chat.completions.create(
64
+ model="llama3-70b-8192",
65
+ messages=messages,
66
+ temperature=0.7,
67
+ max_tokens=1000
68
+ )
69
+ return completion.choices[0].message.content
70
+
71
+ # Function to transcribe audio, validate with the ideal text, and provide feedback
72
+ def transcribe_and_validate(audio_file_path, ideal_text):
73
+ with io.open(audio_file_path, 'rb') as f:
74
+ audio_content = f.read()
75
+ audio = speech.RecognitionAudio(content=audio_content)
76
+ config = speech.RecognitionConfig(
77
+ encoding=speech.RecognitionConfig.AudioEncoding.MP3,
78
+ sample_rate_hertz=48000,
79
+ language_code="ar"
80
+ )
81
+ response = speech_client.recognize(config=config, audio=audio)
82
+ transcription = " ".join(result.alternatives[0].transcript for result in response.results)
83
+
84
+ # Refined prompt for validation with LLM
85
+ content = f"""
86
+ You are an expert in validating the Azaan (the call to prayer). Below is the correct structure of the Azaan.
87
+ Compare the transcription provided with this structure to determine if it contains all essential phrases in the correct order.
88
+
89
+ Validation Guidelines:
90
+ - Validate the Azaan as "VALIDATED" if it contains all essential phrases in the correct sequence, even if there are minor spelling, diacritic, or punctuation differences.
91
+ - Specifically, ignore small differences such as:
92
+ - Missing or extra diacritics (e.g., "ุง" vs. "ุฃ" or "ุญูŠ ุนู„ู‰ ุงู„ุตู„ุงู‡" vs. "ุญูŠ ุนู„ู‰ ุงู„ุตู„ุงุฉ").
93
+ - Minor spelling variations, such as:
94
+ - "ู„ุง ุงู„ู‡ ุงู„ุง ุงู„ู„ู‡" vs. "ู„ุง ุฅู„ู‡ ุฅู„ุง ุงู„ู„ู‡".
95
+ - "ุญูŠ ุนู„ู‰ ุงู„ุตู„ุงู‡" vs. "ุญูŠ ุนู„ู‰ ุงู„ุตู„ุงุฉ".
96
+ - "ุญูŠ ุนู„ู‰ ุงู„ูู„ุงุญ" vs. "ุญูŠ ุนู„ู‰ ุงู„ูู„ุงุญ".
97
+ - "ุฃุดู‡ุฏ" vs "ุดู‡ุงุฏู‡"
98
+ - Punctuation or slight variations in commonly understood words and phrases.
99
+ - Invalidate the Azaan as "INVALIDATED" only if:
100
+ - Essential phrases are missing.
101
+ - Extra, unrelated phrases that are not part of the Azaan are added.
102
+ - Major incorrect words or substitutions that change the meaning of an essential phrase are present.
103
+
104
+ Correct Azaan Structure:
105
+ "{ideal_text}"
106
+
107
+ Transcribed Azaan:
108
+ "{transcription}"
109
+
110
+ Conclude with "Validation Status: VALIDATED" if the Azaan matches the correct structure, or "Validation Status: INVALIDATED" if it does not, and list any specific issues if found. Only list issues if they involve missing phrases, extra phrases, or significant meaning changes.
111
+ """
112
+
113
+ # Send request to Groq LLM for validation feedback
114
+ completion = groq_client.chat.completions.create(
115
+ model="llama3-70b-8192",
116
+ messages=[{"role": "user", "content": content}],
117
+ temperature=0,
118
+ max_tokens=512,
119
+ )
120
+ feedback = completion.choices[0].message.content
121
+
122
+ return transcription, feedback
123
+
124
+ # Streamlit layout
125
+ st.set_page_config(page_title="Azan Pronunciation Trainer", layout="centered", initial_sidebar_state="expanded")
126
+
127
+ # Display ideal text and its meaning for reference
128
+ st.markdown("<div style='font-size: 30px; color: #4CAF50; text-align: center;'>Azan Pronunciation Trainer</div>", unsafe_allow_html=True)
129
+ st.markdown(f"<div style='font-size: 20px; text-align: center;'>Phrase to Practice: {ideal_text}</div>", unsafe_allow_html=True)
130
+ st.markdown(f"<div style='font-size: 18px; text-align: center; color: #555;'>Meaning: {ideal_text_meaning}</div>", unsafe_allow_html=True)
131
+
132
+ # Placeholder for expert audio playback
133
+ st.audio("expert_azan_audio.mp3", format="audio/mp3") # Replace with actual path
134
+
135
+ # Upload audio file for pronunciation assessment
136
+ st.markdown("<div style='font-size: 18px; text-align: center;'>Upload your Azan recitation audio (MP3 format):</div>", unsafe_allow_html=True)
137
+ audio_file = st.file_uploader("Choose an audio file", type=["mp3"])
138
+
139
+ if audio_file is not None:
140
+ with st.spinner("Analyzing your pronunciation..."):
141
+ audio_path = "uploaded_audio.mp3"
142
+ with open(audio_path, "wb") as f:
143
+ f.write(audio_file.read())
144
+
145
+ # Transcribe and validate transcription with the ideal text
146
+ transcription, validation_feedback = transcribe_and_validate(audio_path, ideal_text)
147
+
148
+ # Check if validation is successful
149
+ if "Validation Status: VALIDATED" in validation_feedback:
150
+ # Perform similarity check if validated
151
+ user_embedding = get_audio_embedding(audio_path)
152
+ similarity_score = calculate_similarity(user_embedding, ideal_embedding)
153
+
154
+ st.markdown(f"<div style='font-size: 18px; color: #333; padding: 10px; border: 2px solid #4CAF50; border-radius: 10px; background-color: #f9f9f9;'><b>Similarity Score:</b> {similarity_score:.2f}%</div>", unsafe_allow_html=True)
155
+
156
+ # Provide feedback based on similarity score
157
+ if similarity_score >= 90:
158
+ st.markdown("<div style='color: green; font-weight: bold;'>Excellent work! Your pronunciation is reverent and accurate. You may proceed to the next phrase.</div>", unsafe_allow_html=True)
159
+ else:
160
+ llm_feedback = generate_feedback_with_llm(transcription, ideal_text, similarity_score)
161
+ st.markdown(f"<div style='font-size: 18px; color: #333; padding: 10px; border: 2px solid #4CAF50; border-radius: 10px; background-color: #f9f9f9;'><b>Feedback:</b><br>{llm_feedback}</div>", unsafe_allow_html=True)
162
+ else:
163
+ # Inform user to re-record if validation failed
164
+ st.markdown("<div style='color: red; font-weight: bold;'>The transcription does not match the ideal Azan phrase. Please record your recitation again.</div>", unsafe_allow_html=True)
165
+ st.markdown(f"<div style='font-size: 18px; color: #333; padding: 10px; border: 2px solid #4CAF50; border-radius: 10px; background-color: #f9f9f9;'><b>Feedback:</b><br>{validation_feedback}</div>", unsafe_allow_html=True)