syed7 commited on
Commit
d93b225
Β·
verified Β·
1 Parent(s): 98a6bc7

Upload 4 files

Browse files
Files changed (4) hide show
  1. app.py.py +778 -0
  2. ideal_embedding_part_1.npy +3 -0
  3. main8.py +783 -0
  4. qari_part_1.mp3 +0 -0
app.py.py ADDED
@@ -0,0 +1,778 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from google.oauth2 import service_account
3
+ from google.cloud import speech
4
+ import io
5
+ import torch
6
+ import numpy as np
7
+ from transformers import Wav2Vec2Processor
8
+ from transformers.models.wav2vec2 import Wav2Vec2Model
9
+ import librosa
10
+ from groq import Groq
11
+ import sounddevice as sd
12
+ import scipy.io.wavfile as wav
13
+ import os
14
+ from datetime import datetime
15
+ from pydub import AudioSegment
16
+ from pathlib import Path
17
+ from openai import OpenAI
18
+ import json
19
+ import plotly.graph_objects as go
20
+
21
+ # Enhanced UI Styles
22
+ CUSTOM_CSS = """
23
+ <style>
24
+ @import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&display=swap');
25
+
26
+ /* Base styles */
27
+ :root {
28
+ --primary-color: #2563eb;
29
+ --secondary-color: #1d4ed8;
30
+ --success-color: #059669;
31
+ --warning-color: #d97706;
32
+ --danger-color: #dc2626;
33
+ --text-primary: #111827;
34
+ --text-secondary: #4b5563;
35
+ --bg-primary: #ffffff;
36
+ --bg-secondary: #f3f4f6;
37
+ }
38
+
39
+ .stApp {
40
+ font-family: 'Inter', sans-serif;
41
+ color: var(--text-primary);
42
+ background: var(--bg-secondary);
43
+ }
44
+
45
+ /* Header styles */
46
+ .app-header {
47
+ background: linear-gradient(135deg, var(--primary-color), var(--secondary-color));
48
+ padding: 2rem 1rem;
49
+ text-align: center;
50
+ border-radius: 0 0 1.5rem 1.5rem;
51
+ margin-bottom: 2rem;
52
+ box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
53
+ }
54
+
55
+ .app-title {
56
+ color: white;
57
+ font-size: 2.5rem;
58
+ font-weight: 700;
59
+ margin-bottom: 0.5rem;
60
+ text-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
61
+ }
62
+
63
+ .app-subtitle {
64
+ color: rgba(255, 255, 255, 0.9);
65
+ font-size: 1.2rem;
66
+ font-weight: 500;
67
+ direction: rtl;
68
+ }
69
+
70
+ /* Card styles */
71
+ .card {
72
+ background: var(--bg-primary);
73
+ border-radius: 1rem;
74
+ padding: 1.5rem;
75
+ margin-bottom: 1.5rem;
76
+ box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
77
+ transition: transform 0.2s ease;
78
+ }
79
+
80
+ .card:hover {
81
+ transform: translateY(-2px);
82
+ }
83
+
84
+ .card-header {
85
+ display: flex;
86
+ align-items: center;
87
+ gap: 0.75rem;
88
+ margin-bottom: 1rem;
89
+ padding-bottom: 0.75rem;
90
+ border-bottom: 1px solid var(--bg-secondary);
91
+ }
92
+
93
+ .card-title {
94
+ font-size: 1.25rem;
95
+ font-weight: 600;
96
+ color: var(--text-primary);
97
+ margin: 0;
98
+ }
99
+
100
+ /* Button styles */
101
+ .button-container {
102
+ display: flex;
103
+ gap: 1rem;
104
+ margin-bottom: 1rem;
105
+ }
106
+
107
+ .button-primary {
108
+ background-color: var(--primary-color);
109
+ color: white;
110
+ padding: 0.75rem 1.5rem;
111
+ border-radius: 0.5rem;
112
+ border: none;
113
+ font-weight: 500;
114
+ cursor: pointer;
115
+ transition: background-color 0.2s ease;
116
+ text-align: center;
117
+ display: inline-flex;
118
+ align-items: center;
119
+ justify-content: center;
120
+ gap: 0.5rem;
121
+ }
122
+
123
+ .button-primary:hover {
124
+ background-color: var(--secondary-color);
125
+ }
126
+
127
+ .button-danger {
128
+ background-color: var(--danger-color);
129
+ color: white;
130
+ padding: 0.75rem 1.5rem;
131
+ border-radius: 0.5rem;
132
+ border: none;
133
+ font-weight: 500;
134
+ cursor: pointer;
135
+ transition: background-color 0.2s ease;
136
+ }
137
+
138
+ /* Progress indicator */
139
+ .score-container {
140
+ text-align: center;
141
+ padding: 1.5rem;
142
+ background: var(--bg-secondary);
143
+ border-radius: 1rem;
144
+ margin-bottom: 1.5rem;
145
+ }
146
+
147
+ .score-value {
148
+ font-size: 3rem;
149
+ font-weight: 700;
150
+ color: var(--primary-color);
151
+ }
152
+
153
+ .score-label {
154
+ color: var(--text-secondary);
155
+ font-size: 1.1rem;
156
+ margin-top: 0.5rem;
157
+ }
158
+
159
+ /* Feedback section */
160
+ .feedback-section {
161
+ background: var(--bg-secondary);
162
+ border-radius: 1rem;
163
+ padding: 1.5rem;
164
+ margin-top: 1.5rem;
165
+ }
166
+
167
+ .feedback-item {
168
+ background: white;
169
+ border-radius: 0.5rem;
170
+ padding: 1rem;
171
+ margin-bottom: 1rem;
172
+ box-shadow: 0 2px 4px rgba(0, 0, 0, 0.05);
173
+ }
174
+
175
+ /* Status messages */
176
+ .success-msg {
177
+ background-color: var(--success-color);
178
+ color: white;
179
+ padding: 1rem;
180
+ border-radius: 0.5rem;
181
+ text-align: center;
182
+ margin-bottom: 1rem;
183
+ animation: slideIn 0.3s ease;
184
+ }
185
+
186
+ .error-msg {
187
+ background-color: var(--danger-color);
188
+ color: white;
189
+ padding: 1rem;
190
+ border-radius: 0.5rem;
191
+ text-align: center;
192
+ margin-bottom: 1rem;
193
+ animation: slideIn 0.3s ease;
194
+ }
195
+
196
+ /* Animations */
197
+ @keyframes slideIn {
198
+ from { transform: translateY(-10px); opacity: 0; }
199
+ to { transform: translateY(0); opacity: 1; }
200
+ }
201
+
202
+ /* Responsive adjustments */
203
+ @media (max-width: 768px) {
204
+ .app-title {
205
+ font-size: 2rem;
206
+ }
207
+
208
+ .card {
209
+ padding: 1rem;
210
+ }
211
+
212
+ .button-container {
213
+ flex-direction: column;
214
+ }
215
+
216
+ .score-value {
217
+ font-size: 2.5rem;
218
+ }
219
+ }
220
+ </style>
221
+ """
222
+
223
+
224
+ class AzanTrainerApp:
225
+ def __init__(self):
226
+ self.setup_api_clients()
227
+ self.setup_configs()
228
+ self.setup_directories()
229
+ self.initialize_models()
230
+
231
+ def setup_api_clients(self):
232
+ """Initialize API clients"""
233
+ self.openai_client = OpenAI(api_key=OpenAI_api_key)
234
+ self.groq_client = Groq(api_key=Groq_api_key)
235
+ self.speech_client = self.init_google_speech()
236
+
237
+ def init_google_speech(self):
238
+ """Initialize Google Speech client"""
239
+ credentials = service_account.Credentials.from_service_account_file(
240
+ "sa_speecch_demo.json"
241
+ )
242
+ return speech.SpeechClient(credentials=credentials)
243
+
244
+ def setup_configs(self):
245
+ """Set up configuration variables"""
246
+ self.SAMPLE_RATE = 48000
247
+ self.DURATION = 6
248
+ self.AUDIO_GAIN = 1.50
249
+ self.IDEAL_TEXT = "اللّٰهُ Ψ£ΩŽΩƒΩ’Ψ¨ΩŽΨ±ΩΨŒ اللّٰهُ Ψ£ΩŽΩƒΩ’Ψ¨ΩŽΨ±Ω"
250
+ self.IDEAL_TEXT_MEANING = "Allah is the Greatest, Allah is the Greatest"
251
+
252
+ def setup_directories(self):
253
+ """Create necessary directories"""
254
+ for dir_name in ['recordings', 'feedback_audio']:
255
+ os.makedirs(dir_name, exist_ok=True)
256
+
257
+ def initialize_models(self):
258
+ """Initialize ML models"""
259
+ self.processor = Wav2Vec2Processor.from_pretrained("models/wav2vec2-base")
260
+ self.model = Wav2Vec2Model.from_pretrained("models/wav2vec2-base")
261
+ self.ideal_embedding = torch.tensor(np.load("ideal_embedding_part_1.npy"))
262
+
263
+ def create_waveform_visualization(self, audio_path, reference_path):
264
+ """Create waveform visualization using Plotly"""
265
+ fig = go.Figure()
266
+
267
+ # Process user audio
268
+ y_user, sr_user = librosa.load(audio_path)
269
+ times_user = np.arange(len(y_user)) / sr_user
270
+ fig.add_trace(go.Scatter(
271
+ x=times_user,
272
+ y=y_user,
273
+ name='Your Recording',
274
+ line=dict(color='#1E88E5')
275
+ ))
276
+
277
+ # Process reference audio
278
+ y_ref, sr_ref = librosa.load(reference_path)
279
+ times_ref = np.arange(len(y_ref)) / sr_ref
280
+ fig.add_trace(go.Scatter(
281
+ x=times_ref,
282
+ y=y_ref,
283
+ name='Expert Recording',
284
+ line=dict(color='#4CAF50')
285
+ ))
286
+
287
+ fig.update_layout(
288
+ title='Waveform Comparison',
289
+ xaxis_title='Time (s)',
290
+ yaxis_title='Amplitude',
291
+ template='plotly_white',
292
+ height=400
293
+ )
294
+
295
+ return fig
296
+
297
+ def record_audio(self):
298
+ """Record audio from user"""
299
+ try:
300
+ audio_data = sd.rec(
301
+ int(self.DURATION * self.SAMPLE_RATE),
302
+ samplerate=self.SAMPLE_RATE,
303
+ channels=1,
304
+ dtype=np.float32
305
+ )
306
+ sd.wait()
307
+ return self.enhance_audio(audio_data)
308
+ except Exception as e:
309
+ st.error(f"Recording error: {str(e)}")
310
+ return None
311
+
312
+ def enhance_audio(self, audio_data):
313
+ """Enhance audio quality"""
314
+ audio_data = audio_data / (np.max(np.abs(audio_data)) + 1e-10)
315
+ audio_data = audio_data * self.AUDIO_GAIN
316
+ noise_threshold = 0.01
317
+ audio_data[np.abs(audio_data) < noise_threshold] = 0
318
+ return audio_data
319
+
320
+ def save_audio(self, audio_data):
321
+ """Save audio to file"""
322
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
323
+ filename = f"recordings/audio_{timestamp}.wav"
324
+ audio_data = np.clip(audio_data * 32767, -32768, 32767).astype(np.int16)
325
+ wav.write(filename, self.SAMPLE_RATE, audio_data)
326
+ return filename
327
+
328
+ def analyze_recording(self, audio_path):
329
+ """Analyze the recording"""
330
+ try:
331
+ # Convert to MP3 for Google Speech API
332
+ mp3_path = audio_path.replace('.wav', '.mp3')
333
+ AudioSegment.from_wav(audio_path).export(mp3_path, format="mp3")
334
+
335
+ # Transcribe audio
336
+ with open(mp3_path, 'rb') as f:
337
+ content = f.read()
338
+
339
+ audio = speech.RecognitionAudio(content=content)
340
+ config = speech.RecognitionConfig(
341
+ encoding=speech.RecognitionConfig.AudioEncoding.MP3,
342
+ sample_rate_hertz=self.SAMPLE_RATE,
343
+ language_code="ar"
344
+ )
345
+
346
+ response = self.speech_client.recognize(config=config, audio=audio)
347
+ transcription = " ".join(result.alternatives[0].transcript
348
+ for result in response.results)
349
+
350
+ # Calculate similarity
351
+ user_embedding = self.get_audio_embedding(audio_path)
352
+ similarity_score = self.calculate_similarity(user_embedding, self.ideal_embedding)
353
+
354
+ # Generate feedback
355
+ feedback = self.generate_feedback(transcription, similarity_score)
356
+
357
+ # Clean up
358
+ os.remove(mp3_path)
359
+
360
+ return transcription, similarity_score, feedback
361
+
362
+ except Exception as e:
363
+ st.error(f"Analysis error: {str(e)}")
364
+ return None, None, None
365
+
366
+ def get_audio_embedding(self, audio_path):
367
+ """Generate audio embedding"""
368
+ audio_input, _ = librosa.load(audio_path, sr=16000)
369
+ inputs = self.processor(audio_input, sampling_rate=16000,
370
+ return_tensors="pt", padding=True)
371
+ with torch.no_grad():
372
+ embedding = self.model(inputs.input_values).last_hidden_state.mean(dim=1).squeeze()
373
+ return embedding
374
+
375
+ def calculate_similarity(self, embedding1, embedding2):
376
+ """Calculate similarity score"""
377
+ similarity = torch.nn.functional.cosine_similarity(embedding1, embedding2, dim=0)
378
+ return similarity.item() * 100
379
+
380
+ def generate_feedback(self, transcription, similarity_score):
381
+ """Generate feedback in natural Roman Urdu using LLM"""
382
+ prompt = f"""
383
+ Is Azan ki tilawat ka jaiza len aur natural Roman Urdu main feedback den:
384
+
385
+ Tilawat: {transcription}
386
+ Mutabiqat Score: {similarity_score:.2f}%
387
+
388
+ Feedback ko in 3 hisson main takseem karen:
389
+
390
+ 1. Talaffuz (Pronunciation):
391
+ - Har lafz ka talaffuz kaisa hai
392
+ - Huroof ki tartib theek hai ya nahi
393
+ - Allah ke lafz ka talaffuz kaisa hai
394
+ - Mukammal Azan ki tarteeb kaisi hai
395
+
396
+ 2. Waqt aur Lehja (Timing):
397
+ - Har hissay ka sahi dohrao
398
+ - Waqfay ki durustagi
399
+ - Aawaz ka utaar chadhao
400
+
401
+ 3. Behtar Karne Ke Liye Mashwaray:
402
+ - Kahan ghaltiyan hain
403
+ - Kya behtar karna hai
404
+ - Kis cheez par zyada mehnat ki zaroorat hai
405
+
406
+ Note: Feedback zabaan-e-urdu main likhen, lekin English huroof istimal karen.
407
+ Lehja mohtaram aur madadgaar hona chahiye.
408
+ """
409
+
410
+ response = self.groq_client.chat.completions.create(
411
+ model="llama3-70b-8192",
412
+ messages=[{"role": "user", "content": prompt}],
413
+ temperature=0.7,
414
+ max_tokens=1000
415
+ )
416
+
417
+ return response.choices[0].message.content
418
+
419
+ def generate_audio_feedback(self, feedback_text):
420
+ """Generate audio feedback"""
421
+ try:
422
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
423
+ audio_path = f"feedback_audio/feedback_{timestamp}.mp3"
424
+
425
+ response = self.openai_client.audio.speech.create(
426
+ model="tts-1",
427
+ voice="alloy",
428
+ input=feedback_text
429
+ )
430
+
431
+ response.stream_to_file(audio_path)
432
+ return audio_path
433
+
434
+ except Exception as e:
435
+ st.error(f"Error generating audio feedback: {str(e)}")
436
+ return None
437
+
438
+ def run(self):
439
+ """Run the enhanced Streamlit application with Persian/Masjid-inspired UI"""
440
+ st.set_page_config(
441
+ page_title="Azan Pronunciation Trainer",
442
+ layout="wide",
443
+ initial_sidebar_state="collapsed"
444
+ )
445
+
446
+ # Custom CSS with Persian/Masjid-inspired theme (Keep your existing CSS here)
447
+ st.markdown("""
448
+ <style>
449
+ /* Global Styles */
450
+ @import url('https://fonts.googleapis.com/css2?family=Amiri:wght@400;700&display=swap');
451
+
452
+ :root {
453
+ --primary-color: #1F4C6B;
454
+ --secondary-color: #C3934B;
455
+ --accent-color: #E6B17E;
456
+ --background-color: #F7F3E9;
457
+ --text-color: #2C3E50;
458
+ --card-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
459
+ }
460
+
461
+ .stApp {
462
+ background-color: var(--background-color);
463
+ font-family: 'Amiri', serif;
464
+ }
465
+
466
+ /* Header Styles */
467
+ .app-header {
468
+ background: linear-gradient(135deg, var(--primary-color), #2C3E50);
469
+ color: white;
470
+ padding: 2rem;
471
+ border-radius: 15px;
472
+ text-align: center;
473
+ margin-bottom: 2rem;
474
+ box-shadow: var(--card-shadow);
475
+ }
476
+
477
+ .app-title {
478
+ font-size: 2.5rem;
479
+ margin-bottom: 0.5rem;
480
+ font-weight: 700;
481
+ background: linear-gradient(45deg, var(--accent-color), #FFD700);
482
+ -webkit-background-clip: text;
483
+ -webkit-text-fill-color: transparent;
484
+ }
485
+
486
+ .app-subtitle {
487
+ font-size: 1.2rem;
488
+ opacity: 0.9;
489
+ margin: 0.5rem 0;
490
+ }
491
+
492
+ .arabic-text {
493
+ font-family: 'Amiri', serif;
494
+ font-size: 2rem;
495
+ direction: rtl;
496
+ margin: 1rem 0;
497
+ color: var(--secondary-color);
498
+ }
499
+
500
+ /* Card Styles */
501
+ .card {
502
+ background: white;
503
+ border-radius: 15px;
504
+ padding: 1.5rem;
505
+ margin-bottom: 1.5rem;
506
+ box-shadow: var(--card-shadow);
507
+ border: 1px solid rgba(195, 147, 75, 0.2);
508
+ transition: transform 0.2s ease;
509
+ }
510
+
511
+ .card:hover {
512
+ transform: translateY(-2px);
513
+ }
514
+
515
+ .card-header {
516
+ display: flex;
517
+ align-items: center;
518
+ margin-bottom: 1rem;
519
+ border-bottom: 2px solid var(--accent-color);
520
+ padding-bottom: 0.5rem;
521
+ }
522
+
523
+ .card-title {
524
+ font-size: 1.3rem;
525
+ margin: 0 0 0 0.5rem;
526
+ color: var(--primary-color);
527
+ }
528
+
529
+ /* Button Styles */
530
+ .stButton button {
531
+ background: linear-gradient(45deg, var(--primary-color), var(--secondary-color));
532
+ color: white;
533
+ border: none;
534
+ padding: 0.75rem 1.5rem;
535
+ border-radius: 25px;
536
+ font-weight: bold;
537
+ transition: all 0.3s ease;
538
+ width: 100%;
539
+ margin: 0.5rem 0;
540
+ }
541
+
542
+ .stButton button:hover {
543
+ transform: translateY(-2px);
544
+ box-shadow: 0 4px 12px rgba(31, 76, 107, 0.2);
545
+ }
546
+
547
+ /* Score Display */
548
+ .score-container {
549
+ background: linear-gradient(135deg, var(--primary-color), var(--secondary-color));
550
+ color: white;
551
+ padding: 2rem;
552
+ border-radius: 15px;
553
+ text-align: center;
554
+ margin: 1.5rem 0;
555
+ }
556
+
557
+ .score-value {
558
+ font-size: 3rem;
559
+ font-weight: bold;
560
+ margin-bottom: 0.5rem;
561
+ }
562
+
563
+ .score-label {
564
+ font-size: 1.2rem;
565
+ opacity: 0.9;
566
+ }
567
+
568
+ /* Feedback Styles */
569
+ .feedback-item {
570
+ background-color: rgba(195, 147, 75, 0.1);
571
+ padding: 1rem;
572
+ border-radius: 10px;
573
+ margin: 1rem 0;
574
+ border-left: 4px solid var(--secondary-color);
575
+ }
576
+
577
+ /* Help Section Styling */
578
+ .help-container {
579
+ background: white;
580
+ padding: 1.5rem;
581
+ border-radius: 15px;
582
+ margin-top: 1rem;
583
+ }
584
+
585
+ .help-item {
586
+ display: flex;
587
+ align-items: center;
588
+ margin-bottom: 1rem;
589
+ padding: 0.5rem;
590
+ border-radius: 8px;
591
+ background-color: rgba(31, 76, 107, 0.05);
592
+ }
593
+
594
+ .help-number {
595
+ background-color: var(--primary-color);
596
+ color: white;
597
+ width: 24px;
598
+ height: 24px;
599
+ border-radius: 50%;
600
+ display: flex;
601
+ align-items: center;
602
+ justify-content: center;
603
+ margin-right: 1rem;
604
+ font-size: 0.9rem;
605
+ }
606
+ </style>
607
+ """, unsafe_allow_html=True)
608
+
609
+ # Enhanced Header with Arabic Styling
610
+ st.markdown(f"""
611
+ <div class="app-header">
612
+ <h1 class="app-title">Azan Pronunciation Trainer</h1>
613
+ <p class="app-subtitle">Perfect Your Recitation</p>
614
+ <div class="arabic-text">{self.IDEAL_TEXT}</div>
615
+ <p class="app-subtitle">{self.IDEAL_TEXT_MEANING}</p>
616
+ </div>
617
+ """, unsafe_allow_html=True)
618
+
619
+ # Expert demonstration card
620
+ st.markdown("""
621
+ <div class="card">
622
+ <div class="card-header">
623
+ <span style="font-size: 2rem;">πŸ“Ή</span>
624
+ <h2 class="card-title">Expert Demonstration</h2>
625
+ </div>
626
+ """, unsafe_allow_html=True)
627
+ st.video("qari part-1.mp4")
628
+ st.markdown("</div>", unsafe_allow_html=True)
629
+
630
+ # Expert audio card
631
+ st.markdown("""
632
+ <div class="card">
633
+ <div class="card-header">
634
+ <span style="font-size: 2rem;">🎡</span>
635
+ <h2 class="card-title">Reference Audio</h2>
636
+ </div>
637
+ """, unsafe_allow_html=True)
638
+ st.audio("qari_part_1.mp3")
639
+ st.markdown("</div>", unsafe_allow_html=True)
640
+
641
+ # Recording controls card
642
+ st.markdown("""
643
+ <div class="card">
644
+ <div class="card-header">
645
+ <span style="font-size: 2rem;">πŸŽ™οΈ</span>
646
+ <h2 class="card-title">Recording Controls</h2>
647
+ </div>
648
+ """, unsafe_allow_html=True)
649
+
650
+ col1, col2 = st.columns(2)
651
+
652
+ with col1:
653
+ if st.button("Start Recording", help="Click to start recording (6 seconds)", key="start_rec"):
654
+ with st.spinner("Recording in progress..."):
655
+ audio_data = self.record_audio()
656
+ if audio_data is not None:
657
+ audio_path = self.save_audio(audio_data)
658
+ st.session_state['audio_file'] = audio_path
659
+ st.markdown("""
660
+ <div class="feedback-item" style="background-color: rgba(46, 204, 113, 0.1); border-left-color: #2ecc71;">
661
+ Recording completed successfully! βœ…
662
+ </div>
663
+ """, unsafe_allow_html=True)
664
+
665
+ with col2:
666
+ if st.button("Clear Recording", key="clear_rec"):
667
+ if 'audio_file' in st.session_state:
668
+ if os.path.exists(st.session_state['audio_file']):
669
+ os.remove(st.session_state['audio_file'])
670
+ st.session_state['audio_file'] = None
671
+ st.markdown("""
672
+ <div class="feedback-item" style="background-color: rgba(231, 76, 60, 0.1); border-left-color: #e74c3c;">
673
+ Recording cleared! πŸ—‘οΈ
674
+ </div>
675
+ """, unsafe_allow_html=True)
676
+
677
+ st.markdown("</div>", unsafe_allow_html=True)
678
+
679
+ # Analysis section
680
+ if 'audio_file' in st.session_state and st.session_state['audio_file']:
681
+ st.markdown("""
682
+ <div class="card">
683
+ <div class="card-header">
684
+ <span style="font-size: 2rem;">🎡</span>
685
+ <h2 class="card-title">Your Recording</h2>
686
+ </div>
687
+ """, unsafe_allow_html=True)
688
+
689
+ st.audio(st.session_state['audio_file'])
690
+
691
+ if st.button("Analyze Recording", key="analyze"):
692
+ with st.spinner("Analyzing your recitation..."):
693
+ transcription, similarity, feedback = self.analyze_recording(
694
+ st.session_state['audio_file']
695
+ )
696
+
697
+ if all([transcription, similarity, feedback]):
698
+ # Enhanced similarity score display
699
+ st.markdown(f"""
700
+ <div class="score-container">
701
+ <div class="score-value">{similarity:.1f}%</div>
702
+ <div class="score-label">Similarity Score</div>
703
+ </div>
704
+ """, unsafe_allow_html=True)
705
+
706
+ # Waveform visualization
707
+ fig = self.create_waveform_visualization(
708
+ st.session_state['audio_file'],
709
+ "qari_part_1.mp3"
710
+ )
711
+ st.plotly_chart(fig, use_container_width=True)
712
+
713
+ # Feedback display
714
+ st.markdown(f"""
715
+ <div class="card">
716
+ <div class="card-header">
717
+ <span style="font-size: 2rem;">πŸ“</span>
718
+ <h2 class="card-title">Detailed Feedback</h2>
719
+ </div>
720
+ <div class="feedback-item">
721
+ {feedback}
722
+ </div>
723
+ </div>
724
+ """, unsafe_allow_html=True)
725
+
726
+ # Audio feedback
727
+ audio_feedback_path = self.generate_audio_feedback(feedback)
728
+ if audio_feedback_path:
729
+ st.markdown("""
730
+ <div class="card">
731
+ <div class="card-header">
732
+ <span style="font-size: 2rem;">πŸ”Š</span>
733
+ <h2 class="card-title">Audio Feedback</h2>
734
+ </div>
735
+ """, unsafe_allow_html=True)
736
+ st.audio(audio_feedback_path)
737
+ st.markdown("</div>", unsafe_allow_html=True)
738
+
739
+ st.markdown("</div>", unsafe_allow_html=True)
740
+
741
+ # Enhanced help section with numbered steps
742
+ with st.expander("❓ How to Use"):
743
+ st.markdown("""
744
+ <div class="help-container">
745
+ <div class="help-item">
746
+ <div class="help-number">1</div>
747
+ <div>Watch the expert demonstration video carefully</div>
748
+ </div>
749
+ <div class="help-item">
750
+ <div class="help-number">2</div>
751
+ <div>Listen to the reference audio to understand proper pronunciation</div>
752
+ </div>
753
+ <div class="help-item">
754
+ <div class="help-number">3</div>
755
+ <div>Click 'Start Recording' and recite the phrase (6 seconds)</div>
756
+ </div>
757
+ <div class="help-item">
758
+ <div class="help-number">4</div>
759
+ <div>Wait for the recording to complete</div>
760
+ </div>
761
+ <div class="help-item">
762
+ <div class="help-number">5</div>
763
+ <div>Click 'Analyze Recording' to get detailed feedback</div>
764
+ </div>
765
+ <div class="help-item">
766
+ <div class="help-number">6</div>
767
+ <div>Review your score and feedback to improve</div>
768
+ </div>
769
+ <div class="help-item">
770
+ <div class="help-number">7</div>
771
+ <div>Practice until you achieve 90% or higher similarity</div>
772
+ </div>
773
+ </div>
774
+ """, unsafe_allow_html=True)
775
+
776
+ if __name__ == "__main__":
777
+ app = AzanTrainerApp()
778
+ app.run()
ideal_embedding_part_1.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5d9ccaf726d2a0038435d0254359f5144a30af97cf81de5f309e7dc3f519fc67
3
+ size 3200
main8.py ADDED
@@ -0,0 +1,783 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from google.oauth2 import service_account
3
+ from google.cloud import speech
4
+ import io
5
+ import torch
6
+ import numpy as np
7
+ from transformers import Wav2Vec2Processor
8
+ from transformers.models.wav2vec2 import Wav2Vec2Model
9
+ import librosa
10
+ from groq import Groq
11
+ import sounddevice as sd
12
+ import scipy.io.wavfile as wav
13
+ import os
14
+ from datetime import datetime
15
+ from pydub import AudioSegment
16
+ from pathlib import Path
17
+ from openai import OpenAI
18
+ import json
19
+ import plotly.graph_objects as go
20
+ import os
21
+
22
+ OpenAI_api_key =os.environ.get('OpenAI_api_key')
23
+
24
+ Groq_api_key = os.environ.get('Groq_api_key')
25
+
26
+ google_creds = os.environ.get('google_creds')
27
+
28
+ # Enhanced UI Styles
29
+ CUSTOM_CSS = """
30
+ <style>
31
+ @import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&display=swap');
32
+
33
+ /* Base styles */
34
+ :root {
35
+ --primary-color: #2563eb;
36
+ --secondary-color: #1d4ed8;
37
+ --success-color: #059669;
38
+ --warning-color: #d97706;
39
+ --danger-color: #dc2626;
40
+ --text-primary: #111827;
41
+ --text-secondary: #4b5563;
42
+ --bg-primary: #ffffff;
43
+ --bg-secondary: #f3f4f6;
44
+ }
45
+
46
+ .stApp {
47
+ font-family: 'Inter', sans-serif;
48
+ color: var(--text-primary);
49
+ background: var(--bg-secondary);
50
+ }
51
+
52
+ /* Header styles */
53
+ .app-header {
54
+ background: linear-gradient(135deg, var(--primary-color), var(--secondary-color));
55
+ padding: 2rem 1rem;
56
+ text-align: center;
57
+ border-radius: 0 0 1.5rem 1.5rem;
58
+ margin-bottom: 2rem;
59
+ box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
60
+ }
61
+
62
+ .app-title {
63
+ color: white;
64
+ font-size: 2.5rem;
65
+ font-weight: 700;
66
+ margin-bottom: 0.5rem;
67
+ text-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
68
+ }
69
+
70
+ .app-subtitle {
71
+ color: rgba(255, 255, 255, 0.9);
72
+ font-size: 1.2rem;
73
+ font-weight: 500;
74
+ direction: rtl;
75
+ }
76
+
77
+ /* Card styles */
78
+ .card {
79
+ background: var(--bg-primary);
80
+ border-radius: 1rem;
81
+ padding: 1.5rem;
82
+ margin-bottom: 1.5rem;
83
+ box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
84
+ transition: transform 0.2s ease;
85
+ }
86
+
87
+ .card:hover {
88
+ transform: translateY(-2px);
89
+ }
90
+
91
+ .card-header {
92
+ display: flex;
93
+ align-items: center;
94
+ gap: 0.75rem;
95
+ margin-bottom: 1rem;
96
+ padding-bottom: 0.75rem;
97
+ border-bottom: 1px solid var(--bg-secondary);
98
+ }
99
+
100
+ .card-title {
101
+ font-size: 1.25rem;
102
+ font-weight: 600;
103
+ color: var(--text-primary);
104
+ margin: 0;
105
+ }
106
+
107
+ /* Button styles */
108
+ .button-container {
109
+ display: flex;
110
+ gap: 1rem;
111
+ margin-bottom: 1rem;
112
+ }
113
+
114
+ .button-primary {
115
+ background-color: var(--primary-color);
116
+ color: white;
117
+ padding: 0.75rem 1.5rem;
118
+ border-radius: 0.5rem;
119
+ border: none;
120
+ font-weight: 500;
121
+ cursor: pointer;
122
+ transition: background-color 0.2s ease;
123
+ text-align: center;
124
+ display: inline-flex;
125
+ align-items: center;
126
+ justify-content: center;
127
+ gap: 0.5rem;
128
+ }
129
+
130
+ .button-primary:hover {
131
+ background-color: var(--secondary-color);
132
+ }
133
+
134
+ .button-danger {
135
+ background-color: var(--danger-color);
136
+ color: white;
137
+ padding: 0.75rem 1.5rem;
138
+ border-radius: 0.5rem;
139
+ border: none;
140
+ font-weight: 500;
141
+ cursor: pointer;
142
+ transition: background-color 0.2s ease;
143
+ }
144
+
145
+ /* Progress indicator */
146
+ .score-container {
147
+ text-align: center;
148
+ padding: 1.5rem;
149
+ background: var(--bg-secondary);
150
+ border-radius: 1rem;
151
+ margin-bottom: 1.5rem;
152
+ }
153
+
154
+ .score-value {
155
+ font-size: 3rem;
156
+ font-weight: 700;
157
+ color: var(--primary-color);
158
+ }
159
+
160
+ .score-label {
161
+ color: var(--text-secondary);
162
+ font-size: 1.1rem;
163
+ margin-top: 0.5rem;
164
+ }
165
+
166
+ /* Feedback section */
167
+ .feedback-section {
168
+ background: var(--bg-secondary);
169
+ border-radius: 1rem;
170
+ padding: 1.5rem;
171
+ margin-top: 1.5rem;
172
+ }
173
+
174
+ .feedback-item {
175
+ background: white;
176
+ border-radius: 0.5rem;
177
+ padding: 1rem;
178
+ margin-bottom: 1rem;
179
+ box-shadow: 0 2px 4px rgba(0, 0, 0, 0.05);
180
+ }
181
+
182
+ /* Status messages */
183
+ .success-msg {
184
+ background-color: var(--success-color);
185
+ color: white;
186
+ padding: 1rem;
187
+ border-radius: 0.5rem;
188
+ text-align: center;
189
+ margin-bottom: 1rem;
190
+ animation: slideIn 0.3s ease;
191
+ }
192
+
193
+ .error-msg {
194
+ background-color: var(--danger-color);
195
+ color: white;
196
+ padding: 1rem;
197
+ border-radius: 0.5rem;
198
+ text-align: center;
199
+ margin-bottom: 1rem;
200
+ animation: slideIn 0.3s ease;
201
+ }
202
+
203
+ /* Animations */
204
+ @keyframes slideIn {
205
+ from { transform: translateY(-10px); opacity: 0; }
206
+ to { transform: translateY(0); opacity: 1; }
207
+ }
208
+
209
+ /* Responsive adjustments */
210
+ @media (max-width: 768px) {
211
+ .app-title {
212
+ font-size: 2rem;
213
+ }
214
+
215
+ .card {
216
+ padding: 1rem;
217
+ }
218
+
219
+ .button-container {
220
+ flex-direction: column;
221
+ }
222
+
223
+ .score-value {
224
+ font-size: 2.5rem;
225
+ }
226
+ }
227
+ </style>
228
+ """
229
+
230
+
231
+ class AzanTrainerApp:
232
+ def __init__(self):
233
+ self.setup_api_clients()
234
+ self.setup_configs()
235
+ self.setup_directories()
236
+ self.initialize_models()
237
+
238
+ def setup_api_clients(self):
239
+ """Initialize API clients"""
240
+ self.openai_client = OpenAI(api_key=OpenAI_api_key)
241
+ self.groq_client = Groq(api_key=Groq_api_key)
242
+ self.speech_client = self.init_google_speech()
243
+
244
+ def init_google_speech(self):
245
+ """Initialize Google Speech client"""
246
+ credentials = service_account.Credentials.from_service_account_file(google_creds)
247
+ return speech.SpeechClient(credentials=credentials)
248
+
249
+ def setup_configs(self):
250
+ """Set up configuration variables"""
251
+ self.SAMPLE_RATE = 48000
252
+ self.DURATION = 6
253
+ self.AUDIO_GAIN = 1.50
254
+ self.IDEAL_TEXT = "اللّٰهُ Ψ£ΩŽΩƒΩ’Ψ¨ΩŽΨ±ΩΨŒ اللّٰهُ Ψ£ΩŽΩƒΩ’Ψ¨ΩŽΨ±Ω"
255
+ self.IDEAL_TEXT_MEANING = "Allah is the Greatest, Allah is the Greatest"
256
+
257
+ def setup_directories(self):
258
+ """Create necessary directories"""
259
+ for dir_name in ['recordings', 'feedback_audio']:
260
+ os.makedirs(dir_name, exist_ok=True)
261
+
262
+ def initialize_models(self):
263
+ """Initialize ML models"""
264
+ self.processor = Wav2Vec2Processor.from_pretrained("models/wav2vec2-base")
265
+ self.model = Wav2Vec2Model.from_pretrained("models/wav2vec2-base")
266
+ self.ideal_embedding = torch.tensor(np.load("ideal_embedding_part_1.npy"))
267
+
268
+ def create_waveform_visualization(self, audio_path, reference_path):
269
+ """Create waveform visualization using Plotly"""
270
+ fig = go.Figure()
271
+
272
+ # Process user audio
273
+ y_user, sr_user = librosa.load(audio_path)
274
+ times_user = np.arange(len(y_user)) / sr_user
275
+ fig.add_trace(go.Scatter(
276
+ x=times_user,
277
+ y=y_user,
278
+ name='Your Recording',
279
+ line=dict(color='#1E88E5')
280
+ ))
281
+
282
+ # Process reference audio
283
+ y_ref, sr_ref = librosa.load(reference_path)
284
+ times_ref = np.arange(len(y_ref)) / sr_ref
285
+ fig.add_trace(go.Scatter(
286
+ x=times_ref,
287
+ y=y_ref,
288
+ name='Expert Recording',
289
+ line=dict(color='#4CAF50')
290
+ ))
291
+
292
+ fig.update_layout(
293
+ title='Waveform Comparison',
294
+ xaxis_title='Time (s)',
295
+ yaxis_title='Amplitude',
296
+ template='plotly_white',
297
+ height=400
298
+ )
299
+
300
+ return fig
301
+
302
+ def record_audio(self):
303
+ """Record audio from user"""
304
+ try:
305
+ audio_data = sd.rec(
306
+ int(self.DURATION * self.SAMPLE_RATE),
307
+ samplerate=self.SAMPLE_RATE,
308
+ channels=1,
309
+ dtype=np.float32
310
+ )
311
+ sd.wait()
312
+ return self.enhance_audio(audio_data)
313
+ except Exception as e:
314
+ st.error(f"Recording error: {str(e)}")
315
+ return None
316
+
317
+ def enhance_audio(self, audio_data):
318
+ """Enhance audio quality"""
319
+ audio_data = audio_data / (np.max(np.abs(audio_data)) + 1e-10)
320
+ audio_data = audio_data * self.AUDIO_GAIN
321
+ noise_threshold = 0.01
322
+ audio_data[np.abs(audio_data) < noise_threshold] = 0
323
+ return audio_data
324
+
325
+ def save_audio(self, audio_data):
326
+ """Save audio to file"""
327
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
328
+ filename = f"recordings/audio_{timestamp}.wav"
329
+ audio_data = np.clip(audio_data * 32767, -32768, 32767).astype(np.int16)
330
+ wav.write(filename, self.SAMPLE_RATE, audio_data)
331
+ return filename
332
+
333
+ def analyze_recording(self, audio_path):
334
+ """Analyze the recording"""
335
+ try:
336
+ # Convert to MP3 for Google Speech API
337
+ mp3_path = audio_path.replace('.wav', '.mp3')
338
+ AudioSegment.from_wav(audio_path).export(mp3_path, format="mp3")
339
+
340
+ # Transcribe audio
341
+ with open(mp3_path, 'rb') as f:
342
+ content = f.read()
343
+
344
+ audio = speech.RecognitionAudio(content=content)
345
+ config = speech.RecognitionConfig(
346
+ encoding=speech.RecognitionConfig.AudioEncoding.MP3,
347
+ sample_rate_hertz=self.SAMPLE_RATE,
348
+ language_code="ar"
349
+ )
350
+
351
+ response = self.speech_client.recognize(config=config, audio=audio)
352
+ transcription = " ".join(result.alternatives[0].transcript
353
+ for result in response.results)
354
+
355
+ # Calculate similarity
356
+ user_embedding = self.get_audio_embedding(audio_path)
357
+ similarity_score = self.calculate_similarity(user_embedding, self.ideal_embedding)
358
+
359
+ # Generate feedback
360
+ feedback = self.generate_feedback(transcription, similarity_score)
361
+
362
+ # Clean up
363
+ os.remove(mp3_path)
364
+
365
+ return transcription, similarity_score, feedback
366
+
367
+ except Exception as e:
368
+ st.error(f"Analysis error: {str(e)}")
369
+ return None, None, None
370
+
371
+ def get_audio_embedding(self, audio_path):
372
+ """Generate audio embedding"""
373
+ audio_input, _ = librosa.load(audio_path, sr=16000)
374
+ inputs = self.processor(audio_input, sampling_rate=16000,
375
+ return_tensors="pt", padding=True)
376
+ with torch.no_grad():
377
+ embedding = self.model(inputs.input_values).last_hidden_state.mean(dim=1).squeeze()
378
+ return embedding
379
+
380
+ def calculate_similarity(self, embedding1, embedding2):
381
+ """Calculate similarity score"""
382
+ similarity = torch.nn.functional.cosine_similarity(embedding1, embedding2, dim=0)
383
+ return similarity.item() * 100
384
+
385
+ def generate_feedback(self, transcription, similarity_score):
386
+ """Generate feedback in natural Roman Urdu using LLM"""
387
+ prompt = f"""
388
+ Is Azan ki tilawat ka jaiza len aur natural Roman Urdu main feedback den:
389
+
390
+ Tilawat: {transcription}
391
+ Mutabiqat Score: {similarity_score:.2f}%
392
+
393
+ Feedback ko in 3 hisson main takseem karen:
394
+
395
+ 1. Talaffuz (Pronunciation):
396
+ - Har lafz ka talaffuz kaisa hai
397
+ - Huroof ki tartib theek hai ya nahi
398
+ - Allah ke lafz ka talaffuz kaisa hai
399
+ - Mukammal Azan ki tarteeb kaisi hai
400
+
401
+ 2. Waqt aur Lehja (Timing):
402
+ - Har hissay ka sahi dohrao
403
+ - Waqfay ki durustagi
404
+ - Aawaz ka utaar chadhao
405
+
406
+ 3. Behtar Karne Ke Liye Mashwaray:
407
+ - Kahan ghaltiyan hain
408
+ - Kya behtar karna hai
409
+ - Kis cheez par zyada mehnat ki zaroorat hai
410
+
411
+ Note: Feedback zabaan-e-urdu main likhen, lekin English huroof istimal karen.
412
+ Lehja mohtaram aur madadgaar hona chahiye.
413
+ """
414
+
415
+ response = self.groq_client.chat.completions.create(
416
+ model="llama3-70b-8192",
417
+ messages=[{"role": "user", "content": prompt}],
418
+ temperature=0.7,
419
+ max_tokens=1000
420
+ )
421
+
422
+ return response.choices[0].message.content
423
+
424
+ def generate_audio_feedback(self, feedback_text):
425
+ """Generate audio feedback"""
426
+ try:
427
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
428
+ audio_path = f"feedback_audio/feedback_{timestamp}.mp3"
429
+
430
+ response = self.openai_client.audio.speech.create(
431
+ model="tts-1",
432
+ voice="alloy",
433
+ input=feedback_text
434
+ )
435
+
436
+ response.stream_to_file(audio_path)
437
+ return audio_path
438
+
439
+ except Exception as e:
440
+ st.error(f"Error generating audio feedback: {str(e)}")
441
+ return None
442
+
443
+ def run(self):
444
+ """Run the enhanced Streamlit application with Persian/Masjid-inspired UI"""
445
+ st.set_page_config(
446
+ page_title="Azan Pronunciation Trainer",
447
+ layout="wide",
448
+ initial_sidebar_state="collapsed"
449
+ )
450
+
451
+ # Custom CSS with Persian/Masjid-inspired theme (Keep your existing CSS here)
452
+ st.markdown("""
453
+ <style>
454
+ /* Global Styles */
455
+ @import url('https://fonts.googleapis.com/css2?family=Amiri:wght@400;700&display=swap');
456
+
457
+ :root {
458
+ --primary-color: #1F4C6B;
459
+ --secondary-color: #C3934B;
460
+ --accent-color: #E6B17E;
461
+ --background-color: #F7F3E9;
462
+ --text-color: #2C3E50;
463
+ --card-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
464
+ }
465
+
466
+ .stApp {
467
+ background-color: var(--background-color);
468
+ font-family: 'Amiri', serif;
469
+ }
470
+
471
+ /* Header Styles */
472
+ .app-header {
473
+ background: linear-gradient(135deg, var(--primary-color), #2C3E50);
474
+ color: white;
475
+ padding: 2rem;
476
+ border-radius: 15px;
477
+ text-align: center;
478
+ margin-bottom: 2rem;
479
+ box-shadow: var(--card-shadow);
480
+ }
481
+
482
+ .app-title {
483
+ font-size: 2.5rem;
484
+ margin-bottom: 0.5rem;
485
+ font-weight: 700;
486
+ background: linear-gradient(45deg, var(--accent-color), #FFD700);
487
+ -webkit-background-clip: text;
488
+ -webkit-text-fill-color: transparent;
489
+ }
490
+
491
+ .app-subtitle {
492
+ font-size: 1.2rem;
493
+ opacity: 0.9;
494
+ margin: 0.5rem 0;
495
+ }
496
+
497
+ .arabic-text {
498
+ font-family: 'Amiri', serif;
499
+ font-size: 2rem;
500
+ direction: rtl;
501
+ margin: 1rem 0;
502
+ color: var(--secondary-color);
503
+ }
504
+
505
+ /* Card Styles */
506
+ .card {
507
+ background: white;
508
+ border-radius: 15px;
509
+ padding: 1.5rem;
510
+ margin-bottom: 1.5rem;
511
+ box-shadow: var(--card-shadow);
512
+ border: 1px solid rgba(195, 147, 75, 0.2);
513
+ transition: transform 0.2s ease;
514
+ }
515
+
516
+ .card:hover {
517
+ transform: translateY(-2px);
518
+ }
519
+
520
+ .card-header {
521
+ display: flex;
522
+ align-items: center;
523
+ margin-bottom: 1rem;
524
+ border-bottom: 2px solid var(--accent-color);
525
+ padding-bottom: 0.5rem;
526
+ }
527
+
528
+ .card-title {
529
+ font-size: 1.3rem;
530
+ margin: 0 0 0 0.5rem;
531
+ color: var(--primary-color);
532
+ }
533
+
534
+ /* Button Styles */
535
+ .stButton button {
536
+ background: linear-gradient(45deg, var(--primary-color), var(--secondary-color));
537
+ color: white;
538
+ border: none;
539
+ padding: 0.75rem 1.5rem;
540
+ border-radius: 25px;
541
+ font-weight: bold;
542
+ transition: all 0.3s ease;
543
+ width: 100%;
544
+ margin: 0.5rem 0;
545
+ }
546
+
547
+ .stButton button:hover {
548
+ transform: translateY(-2px);
549
+ box-shadow: 0 4px 12px rgba(31, 76, 107, 0.2);
550
+ }
551
+
552
+ /* Score Display */
553
+ .score-container {
554
+ background: linear-gradient(135deg, var(--primary-color), var(--secondary-color));
555
+ color: white;
556
+ padding: 2rem;
557
+ border-radius: 15px;
558
+ text-align: center;
559
+ margin: 1.5rem 0;
560
+ }
561
+
562
+ .score-value {
563
+ font-size: 3rem;
564
+ font-weight: bold;
565
+ margin-bottom: 0.5rem;
566
+ }
567
+
568
+ .score-label {
569
+ font-size: 1.2rem;
570
+ opacity: 0.9;
571
+ }
572
+
573
+ /* Feedback Styles */
574
+ .feedback-item {
575
+ background-color: rgba(195, 147, 75, 0.1);
576
+ padding: 1rem;
577
+ border-radius: 10px;
578
+ margin: 1rem 0;
579
+ border-left: 4px solid var(--secondary-color);
580
+ }
581
+
582
+ /* Help Section Styling */
583
+ .help-container {
584
+ background: white;
585
+ padding: 1.5rem;
586
+ border-radius: 15px;
587
+ margin-top: 1rem;
588
+ }
589
+
590
+ .help-item {
591
+ display: flex;
592
+ align-items: center;
593
+ margin-bottom: 1rem;
594
+ padding: 0.5rem;
595
+ border-radius: 8px;
596
+ background-color: rgba(31, 76, 107, 0.05);
597
+ }
598
+
599
+ .help-number {
600
+ background-color: var(--primary-color);
601
+ color: white;
602
+ width: 24px;
603
+ height: 24px;
604
+ border-radius: 50%;
605
+ display: flex;
606
+ align-items: center;
607
+ justify-content: center;
608
+ margin-right: 1rem;
609
+ font-size: 0.9rem;
610
+ }
611
+ </style>
612
+ """, unsafe_allow_html=True)
613
+
614
+ # Enhanced Header with Arabic Styling
615
+ st.markdown(f"""
616
+ <div class="app-header">
617
+ <h1 class="app-title">Azan Pronunciation Trainer</h1>
618
+ <p class="app-subtitle">Perfect Your Recitation</p>
619
+ <div class="arabic-text">{self.IDEAL_TEXT}</div>
620
+ <p class="app-subtitle">{self.IDEAL_TEXT_MEANING}</p>
621
+ </div>
622
+ """, unsafe_allow_html=True)
623
+
624
+ # Expert demonstration card
625
+ st.markdown("""
626
+ <div class="card">
627
+ <div class="card-header">
628
+ <span style="font-size: 2rem;">πŸ“Ή</span>
629
+ <h2 class="card-title">Expert Demonstration</h2>
630
+ </div>
631
+ """, unsafe_allow_html=True)
632
+ st.video("qari part-1.mp4")
633
+ st.markdown("</div>", unsafe_allow_html=True)
634
+
635
+ # Expert audio card
636
+ st.markdown("""
637
+ <div class="card">
638
+ <div class="card-header">
639
+ <span style="font-size: 2rem;">🎡</span>
640
+ <h2 class="card-title">Reference Audio</h2>
641
+ </div>
642
+ """, unsafe_allow_html=True)
643
+ st.audio("qari_part_1.mp3")
644
+ st.markdown("</div>", unsafe_allow_html=True)
645
+
646
+ # Recording controls card
647
+ st.markdown("""
648
+ <div class="card">
649
+ <div class="card-header">
650
+ <span style="font-size: 2rem;">πŸŽ™οΈ</span>
651
+ <h2 class="card-title">Recording Controls</h2>
652
+ </div>
653
+ """, unsafe_allow_html=True)
654
+
655
+ col1, col2 = st.columns(2)
656
+
657
+ with col1:
658
+ if st.button("Start Recording", help="Click to start recording (6 seconds)", key="start_rec"):
659
+ with st.spinner("Recording in progress..."):
660
+ audio_data = self.record_audio()
661
+ if audio_data is not None:
662
+ audio_path = self.save_audio(audio_data)
663
+ st.session_state['audio_file'] = audio_path
664
+ st.markdown("""
665
+ <div class="feedback-item" style="background-color: rgba(46, 204, 113, 0.1); border-left-color: #2ecc71;">
666
+ Recording completed successfully! βœ…
667
+ </div>
668
+ """, unsafe_allow_html=True)
669
+
670
+ with col2:
671
+ if st.button("Clear Recording", key="clear_rec"):
672
+ if 'audio_file' in st.session_state:
673
+ if os.path.exists(st.session_state['audio_file']):
674
+ os.remove(st.session_state['audio_file'])
675
+ st.session_state['audio_file'] = None
676
+ st.markdown("""
677
+ <div class="feedback-item" style="background-color: rgba(231, 76, 60, 0.1); border-left-color: #e74c3c;">
678
+ Recording cleared! πŸ—‘οΈ
679
+ </div>
680
+ """, unsafe_allow_html=True)
681
+
682
+ st.markdown("</div>", unsafe_allow_html=True)
683
+
684
+ # Analysis section
685
+ if 'audio_file' in st.session_state and st.session_state['audio_file']:
686
+ st.markdown("""
687
+ <div class="card">
688
+ <div class="card-header">
689
+ <span style="font-size: 2rem;">🎡</span>
690
+ <h2 class="card-title">Your Recording</h2>
691
+ </div>
692
+ """, unsafe_allow_html=True)
693
+
694
+ st.audio(st.session_state['audio_file'])
695
+
696
+ if st.button("Analyze Recording", key="analyze"):
697
+ with st.spinner("Analyzing your recitation..."):
698
+ transcription, similarity, feedback = self.analyze_recording(
699
+ st.session_state['audio_file']
700
+ )
701
+
702
+ if all([transcription, similarity, feedback]):
703
+ # Enhanced similarity score display
704
+ st.markdown(f"""
705
+ <div class="score-container">
706
+ <div class="score-value">{similarity:.1f}%</div>
707
+ <div class="score-label">Similarity Score</div>
708
+ </div>
709
+ """, unsafe_allow_html=True)
710
+
711
+ # Waveform visualization
712
+ fig = self.create_waveform_visualization(
713
+ st.session_state['audio_file'],
714
+ "qari_part_1.mp3"
715
+ )
716
+ st.plotly_chart(fig, use_container_width=True)
717
+
718
+ # Feedback display
719
+ st.markdown(f"""
720
+ <div class="card">
721
+ <div class="card-header">
722
+ <span style="font-size: 2rem;">πŸ“</span>
723
+ <h2 class="card-title">Detailed Feedback</h2>
724
+ </div>
725
+ <div class="feedback-item">
726
+ {feedback}
727
+ </div>
728
+ </div>
729
+ """, unsafe_allow_html=True)
730
+
731
+ # Audio feedback
732
+ audio_feedback_path = self.generate_audio_feedback(feedback)
733
+ if audio_feedback_path:
734
+ st.markdown("""
735
+ <div class="card">
736
+ <div class="card-header">
737
+ <span style="font-size: 2rem;">πŸ”Š</span>
738
+ <h2 class="card-title">Audio Feedback</h2>
739
+ </div>
740
+ """, unsafe_allow_html=True)
741
+ st.audio(audio_feedback_path)
742
+ st.markdown("</div>", unsafe_allow_html=True)
743
+
744
+ st.markdown("</div>", unsafe_allow_html=True)
745
+
746
+ # Enhanced help section with numbered steps
747
+ with st.expander("❓ How to Use"):
748
+ st.markdown("""
749
+ <div class="help-container">
750
+ <div class="help-item">
751
+ <div class="help-number">1</div>
752
+ <div>Watch the expert demonstration video carefully</div>
753
+ </div>
754
+ <div class="help-item">
755
+ <div class="help-number">2</div>
756
+ <div>Listen to the reference audio to understand proper pronunciation</div>
757
+ </div>
758
+ <div class="help-item">
759
+ <div class="help-number">3</div>
760
+ <div>Click 'Start Recording' and recite the phrase (6 seconds)</div>
761
+ </div>
762
+ <div class="help-item">
763
+ <div class="help-number">4</div>
764
+ <div>Wait for the recording to complete</div>
765
+ </div>
766
+ <div class="help-item">
767
+ <div class="help-number">5</div>
768
+ <div>Click 'Analyze Recording' to get detailed feedback</div>
769
+ </div>
770
+ <div class="help-item">
771
+ <div class="help-number">6</div>
772
+ <div>Review your score and feedback to improve</div>
773
+ </div>
774
+ <div class="help-item">
775
+ <div class="help-number">7</div>
776
+ <div>Practice until you achieve 90% or higher similarity</div>
777
+ </div>
778
+ </div>
779
+ """, unsafe_allow_html=True)
780
+
781
+ if __name__ == "__main__":
782
+ app = AzanTrainerApp()
783
+ app.run()
qari_part_1.mp3 ADDED
Binary file (103 kB). View file