PranaviPenumetcha commited on
Commit
e27de60
Β·
verified Β·
1 Parent(s): 850b882

Update main_app.py

Browse files
Files changed (1) hide show
  1. main_app.py +429 -123
main_app.py CHANGED
@@ -1,123 +1,429 @@
1
- import streamlit as st
2
- import os
3
- import vosk
4
- import pyaudio
5
- import threading
6
- import queue
7
- import json
8
-
9
- # --- UI Setup ---
10
- st.set_page_config(page_title="DHWANI", layout="wide")
11
- st.markdown(
12
- """
13
- <style>
14
- body, .stApp { background-color: #e3f0ff !important; }
15
- </style>
16
- """, unsafe_allow_html=True
17
- )
18
- st.markdown(
19
- """
20
- <h1 style="text-align:center; color:#1a237e;" id="main-title">DHWANI</h1>
21
- <div style="text-align:center; color:#1a237e;" id="subtitle">
22
- Real-time speech-to-text transcription for Indic languages (Offline)
23
- </div>
24
- """, unsafe_allow_html=True
25
- )
26
-
27
- # --- Language Selection ---
28
- LANGUAGES = {
29
- "Hindi": "vosk-model-small-hi-0.22",
30
- "Telugu": "vosk-model-small-te-0.4",
31
- "Tamil": "vosk-model-small-ta-0.4",
32
- "Bengali": "vosk-model-small-bn-0.22",
33
- "Gujarati": "vosk-model-small-gu-0.4",
34
- "Kannada": "vosk-model-small-kn-0.22",
35
- "Malayalam": "vosk-model-small-ml-0.22",
36
- "Marathi": "vosk-model-small-mr-0.22",
37
- "Punjabi": "vosk-model-small-pa-0.22",
38
- "Urdu": "vosk-model-small-ur-0.22",
39
- "English (India)": "vosk-model-small-en-in-0.4"
40
- }
41
- selected_lang = st.selectbox("Select Language you want to record:", list(LANGUAGES.keys()), index=0)
42
- model_name = LANGUAGES[selected_lang]
43
- model_path = os.path.join("models", model_name) # You must download and place models in ./models/
44
-
45
- # --- App State ---
46
- if "recording" not in st.session_state:
47
- st.session_state.recording = False
48
- if "transcript" not in st.session_state:
49
- st.session_state.transcript = ""
50
-
51
- # --- Helper Functions ---
52
- def recognize_audio(q, model_path, transcript_callback):
53
- vosk.SetLogLevel(-1)
54
- if not os.path.exists(model_path):
55
- q.put("ERROR: Model not found. Download and place it in the 'models' folder.")
56
- return
57
- model = vosk.Model(model_path)
58
- recognizer = vosk.KaldiRecognizer(model, 16000)
59
- mic = pyaudio.PyAudio()
60
- stream = mic.open(format=pyaudio.paInt16, channels=1, rate=16000, input=True, frames_per_buffer=4000)
61
- stream.start_stream()
62
- try:
63
- while st.session_state.recording:
64
- data = stream.read(4000, exception_on_overflow=False)
65
- if recognizer.AcceptWaveform(data):
66
- result = recognizer.Result()
67
- text = json.loads(result)["text"]
68
- if text.strip():
69
- transcript_callback(text)
70
- except Exception as e:
71
- q.put(f"ERROR: {str(e)}")
72
- finally:
73
- stream.stop_stream()
74
- stream.close()
75
- mic.terminate()
76
-
77
- def start_recording():
78
- st.session_state.recording = True
79
- q = queue.Queue()
80
- def update_transcript(text):
81
- st.session_state.transcript += text + " "
82
- thread = threading.Thread(target=recognize_audio, args=(q, model_path, update_transcript), daemon=True)
83
- thread.start()
84
- st.session_state.thread = thread
85
-
86
- def stop_recording():
87
- st.session_state.recording = False
88
-
89
- def clear_text():
90
- st.session_state.transcript = ""
91
-
92
- def print_transcript():
93
- st.write("Printing is not supported in Streamlit directly. Please copy the text below and print from your editor:")
94
-
95
- # --- Controls ---
96
- col1, col2, col3 = st.columns([1,1,1])
97
- with col1:
98
- if not st.session_state.recording:
99
- if st.button("🎀 Start Recording"):
100
- start_recording()
101
- else:
102
- if st.button("πŸ›‘ Stop Recording"):
103
- stop_recording()
104
- with col2:
105
- st.button("Clear Text", on_click=clear_text)
106
- with col3:
107
- st.button("πŸ–¨ Print", on_click=print_transcript)
108
-
109
- # --- Status and Transcript Display ---
110
- if st.session_state.recording:
111
- st.info("πŸŽ™ Listening... Speak now!")
112
- else:
113
- st.warning("Click the microphone to start recording")
114
-
115
- st.text_area("Your transcribed text will appear here...", value=st.session_state.transcript, height=200)
116
-
117
- # --- Instructions for Models ---
118
- st.markdown("""
119
- **Note:**
120
- - Download Vosk models for your chosen language from [alphacephei.com/vosk/models](https://alphacephei.com/vosk/models)
121
- - Place them in a `models` folder in your app directory, e.g. `models/vosk-model-small-hi-0.22` for Hindi.
122
- """)
123
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import streamlit.components.v1 as components
3
+
4
+ def main():
5
+ st.set_page_config(
6
+ page_title="DHWANI",
7
+ layout="wide"
8
+ )
9
+
10
+ # Inject pastel blue background for the whole page (including behind header)
11
+ st.markdown(
12
+ """
13
+ <style>
14
+ body, .stApp {
15
+ background-color: #e3f0ff !important;
16
+ }
17
+ </style>
18
+ """,
19
+ unsafe_allow_html=True
20
+ )
21
+
22
+ # Centered, navy blue headings
23
+ st.markdown(
24
+ """
25
+ <h1 style="text-align:center; color:#1a237e;" id="main-title">DHWANI</h1>
26
+ <div style="text-align:center; color:#1a237e;" id="subtitle">
27
+ Real-time speech-to-text transcription for Indic languages
28
+ </div>
29
+ """,
30
+ unsafe_allow_html=True
31
+ )
32
+
33
+ # Language selection with black text
34
+ st.markdown(
35
+ """
36
+ <style>
37
+ .stSelectbox > label {
38
+ color: blue !important;
39
+ }
40
+ </style>
41
+ """,
42
+ unsafe_allow_html=True
43
+ )
44
+
45
+ languages = {
46
+ "Hindi": "hi-IN",
47
+ "Telugu": "te-IN",
48
+ "Tamil": "ta-IN",
49
+ "Bengali": "bn-IN",
50
+ "Gujarati": "gu-IN",
51
+ "Kannada": "kn-IN",
52
+ "Malayalam": "ml-IN",
53
+ "Marathi": "mr-IN",
54
+ "Punjabi": "pa-IN",
55
+ "Urdu": "ur-IN",
56
+ "English (India)": "en-IN"
57
+ }
58
+
59
+ selected_lang = st.selectbox("Select Language you want to record:", list(languages.keys()), index=0)
60
+ lang_code = languages[selected_lang]
61
+
62
+ html_code = f"""
63
+ <!DOCTYPE html>
64
+ <html>
65
+ <head>
66
+ <style>
67
+ :root {{
68
+ --pastel-blue: #e3f0ff;
69
+ --navy-blue: #1a237e;
70
+ --white: #fff;
71
+ --black: #000;
72
+ }}
73
+ body {{
74
+ font-family: Arial, sans-serif;
75
+ margin: 0;
76
+ padding: 0;
77
+ background: var(--pastel-blue) !important;
78
+ color: var(--navy-blue);
79
+ transition: background 0.2s, color 0.2s;
80
+ }}
81
+ .container {{
82
+ max-width: 800px;
83
+ margin: 0 auto;
84
+ padding: 20px;
85
+ }}
86
+ #main-title, #subtitle {{
87
+ text-align: center;
88
+ color: var(--navy-blue);
89
+ transition: color 0.2s;
90
+ }}
91
+ .mic-button {{
92
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
93
+ border: none;
94
+ border-radius: 50%;
95
+ width: 80px;
96
+ height: 80px;
97
+ color: white;
98
+ font-size: 24px;
99
+ cursor: pointer;
100
+ transition: all 0.3s ease;
101
+ box-shadow: 0 4px 15px rgba(0,0,0,0.2);
102
+ margin: 20px 0;
103
+ }}
104
+ .mic-button:hover {{
105
+ transform: scale(1.05);
106
+ box-shadow: 0 6px 20px rgba(0,0,0,0.3);
107
+ }}
108
+ .mic-button.recording {{
109
+ background: linear-gradient(135deg, #ff6b6b 0%, #ee5a52 100%);
110
+ animation: pulse 1.5s infinite;
111
+ }}
112
+ @keyframes pulse {{
113
+ 0% {{
114
+ box-shadow: 0 0 0 0 rgba(255, 107, 107, 0.7);
115
+ }}
116
+ 70% {{
117
+ box-shadow: 0 0 0 10px rgba(255, 107, 107, 0);
118
+ }}
119
+ 100% {{
120
+ box-shadow: 0 0 0 0 rgba(255, 107, 107, 0);
121
+ }}
122
+ }}
123
+ .transcription-box {{
124
+ width: 100%;
125
+ min-height: 200px;
126
+ padding: 15px;
127
+ border: 2px solid #e0e0e0;
128
+ border-radius: 10px;
129
+ font-size: 16px;
130
+ line-height: 1.5;
131
+ resize: vertical;
132
+ margin-top: 20px;
133
+ background: #f9f9f9;
134
+ color: var(--navy-blue);
135
+ text-align: left;
136
+ transition: background 0.2s, color 0.2s;
137
+ }}
138
+ .status {{
139
+ padding: 10px;
140
+ border-radius: 5px;
141
+ margin: 10px 0;
142
+ font-weight: bold;
143
+ }}
144
+ .status.listening {{
145
+ background-color: #d4edda;
146
+ color: #155724;
147
+ border: 1px solid #c3e6cb;
148
+ }}
149
+ .status.stopped {{
150
+ background-color: #f8d7da;
151
+ color: #721c24;
152
+ border: 1px solid #f5c6cb;
153
+ }}
154
+ .status.error {{
155
+ background-color: #fff3cd;
156
+ color: #856404;
157
+ border: 1px solid #ffeaa7;
158
+ }}
159
+ .controls {{
160
+ display: flex;
161
+ align-items: center;
162
+ gap: 20px;
163
+ margin: 20px 0;
164
+ }}
165
+ .clear-btn {{
166
+ background: #6c757d;
167
+ color: white;
168
+ border: none;
169
+ padding: 10px 20px;
170
+ border-radius: 5px;
171
+ cursor: pointer;
172
+ transition: background 0.3s ease;
173
+ }}
174
+ .clear-btn:hover {{
175
+ background: #545b62;
176
+ }}
177
+ .print-btn {{
178
+ background: #28a745;
179
+ color: white;
180
+ border: none;
181
+ padding: 10px 20px;
182
+ border-radius: 5px;
183
+ cursor: pointer;
184
+ transition: background 0.3s ease;
185
+ }}
186
+ .print-btn:hover {{
187
+ background: #218838;
188
+ }}
189
+ @media (prefers-color-scheme: dark) {{
190
+ body {{
191
+ background: var(--pastel-blue) !important;
192
+ color: var(--white);
193
+ }}
194
+ #main-title, #subtitle {{
195
+ color: var(--white);
196
+ }}
197
+ .transcription-box {{
198
+ background: #222;
199
+ color: var(--white);
200
+ border-color: #444;
201
+ }}
202
+ }}
203
+ </style>
204
+ </head>
205
+ <body>
206
+ <div class="container">
207
+ <div class="controls">
208
+ <button id="micButton" class="mic-button" onclick="toggleRecording()">
209
+ 🎀
210
+ </button>
211
+ <button id="clearButton" class="clear-btn" onclick="clearTranscription()">
212
+ Clear Text
213
+ </button>
214
+ <button id="printButton" class="print-btn" onclick="printTranscription()">
215
+ πŸ–¨ Print
216
+ </button>
217
+ </div>
218
+ <div id="status" class="status stopped">
219
+ Click the microphone to start recording
220
+ </div>
221
+ <textarea id="transcription" class="transcription-box"
222
+ placeholder="Your transcribed text will appear here..."></textarea>
223
+ </div>
224
+ <script>
225
+ let recognition = null;
226
+ let isRecording = false;
227
+ let finalTranscript = ''; // <-- Moved to global scope
228
+ const micButton = document.getElementById('micButton');
229
+ const statusDiv = document.getElementById('status');
230
+ const transcriptionArea = document.getElementById('transcription');
231
+
232
+ if ('webkitSpeechRecognition' in window || 'SpeechRecognition' in window) {{
233
+ const SpeechRecognition = window.SpeechRecognition || window.webkitSpeechRecognition;
234
+ recognition = new SpeechRecognition();
235
+ recognition.continuous = true;
236
+ recognition.interimResults = true;
237
+ recognition.lang = '{lang_code}';
238
+ recognition.onresult = function(event) {{
239
+ let interimTranscript = '';
240
+ for (let i = event.resultIndex; i < event.results.length; i++) {{
241
+ const transcript = event.results[i][0].transcript;
242
+ if (event.results[i].isFinal) {{
243
+ finalTranscript += transcript + ' ';
244
+ }} else {{
245
+ interimTranscript += transcript;
246
+ }}
247
+ }}
248
+ transcriptionArea.value = finalTranscript + interimTranscript;
249
+ transcriptionArea.scrollTop = transcriptionArea.scrollHeight;
250
+ }};
251
+ recognition.onstart = function() {{
252
+ statusDiv.textContent = 'πŸŽ™ Listening... Speak now!';
253
+ statusDiv.className = 'status listening';
254
+ }};
255
+ recognition.onend = function() {{
256
+ if (isRecording) {{
257
+ recognition.start();
258
+ }} else {{
259
+ statusDiv.textContent = 'πŸ›‘ Recording stopped';
260
+ statusDiv.className = 'status stopped';
261
+ micButton.classList.remove('recording');
262
+ }}
263
+ }};
264
+ recognition.onerror = function(event) {{
265
+ console.error('Speech recognition error:', event.error);
266
+ let errorMessage = 'Error occurred: ';
267
+ switch(event.error) {{
268
+ case 'no-speech':
269
+ errorMessage += 'No speech detected. Try speaking louder.';
270
+ break;
271
+ case 'audio-capture':
272
+ errorMessage += 'Microphone not accessible.';
273
+ break;
274
+ case 'not-allowed':
275
+ errorMessage += 'Microphone permission denied.';
276
+ break;
277
+ case 'network':
278
+ errorMessage += 'Network error occurred.';
279
+ break;
280
+ default:
281
+ errorMessage += event.error;
282
+ }}
283
+ statusDiv.textContent = errorMessage;
284
+ statusDiv.className = 'status error';
285
+ isRecording = false;
286
+ micButton.classList.remove('recording');
287
+ }};
288
+ }} else {{
289
+ statusDiv.textContent = '❌ Speech recognition not supported in this browser';
290
+ statusDiv.className = 'status error';
291
+ micButton.disabled = true;
292
+ }}
293
+ function toggleRecording() {{
294
+ if (!recognition) return;
295
+ if (isRecording) {{
296
+ isRecording = false;
297
+ recognition.stop();
298
+ micButton.classList.remove('recording');
299
+ statusDiv.textContent = 'πŸ›‘ Stopping...';
300
+ statusDiv.className = 'status stopped';
301
+ }} else {{
302
+ isRecording = true;
303
+ micButton.classList.add('recording');
304
+ try {{
305
+ recognition.start();
306
+ }} catch (error) {{
307
+ console.error('Error starting recognition:', error);
308
+ isRecording = false;
309
+ micButton.classList.remove('recording');
310
+ }}
311
+ }}
312
+ }}
313
+ function clearTranscription() {{
314
+ transcriptionArea.value = '';
315
+ finalTranscript = ''; // <-- This now resets the transcript for new recordings
316
+ }}
317
+ function printTranscription() {{
318
+ const transcriptionText = transcriptionArea.value;
319
+ if (!transcriptionText.trim()) {{
320
+ alert('No text to print. Please transcribe some speech first.');
321
+ return;
322
+ }}
323
+
324
+ const printWindow = window.open('', '_blank');
325
+ const currentDate = new Date().toLocaleDateString();
326
+ const currentTime = new Date().toLocaleTimeString();
327
+
328
+ printWindow.document.write(`
329
+ <!DOCTYPE html>
330
+ <html>
331
+ <head>
332
+ <title>DHWANI Transcription</title>
333
+ <style>
334
+ body {{
335
+ font-family: Arial, sans-serif;
336
+ line-height: 1.6;
337
+ margin: 40px;
338
+ color: #333;
339
+ }}
340
+ .header {{
341
+ text-align: center;
342
+ margin-bottom: 30px;
343
+ border-bottom: 2px solid #1a237e;
344
+ padding-bottom: 20px;
345
+ }}
346
+ .header h1 {{
347
+ color: #1a237e;
348
+ margin: 0;
349
+ font-size: 28px;
350
+ }}
351
+ .header p {{
352
+ margin: 5px 0;
353
+ color: #666;
354
+ }}
355
+ .transcription {{
356
+ background: #f9f9f9;
357
+ padding: 20px;
358
+ border-radius: 8px;
359
+ border-left: 4px solid #1a237e;
360
+ font-size: 16px;
361
+ line-height: 1.8;
362
+ white-space: pre-wrap;
363
+ word-wrap: break-word;
364
+ }}
365
+ .footer {{
366
+ margin-top: 30px;
367
+ text-align: center;
368
+ font-size: 12px;
369
+ color: #888;
370
+ border-top: 1px solid #ddd;
371
+ padding-top: 20px;
372
+ }}
373
+ @media print {{
374
+ body {{ margin: 20px; }}
375
+ .header {{ page-break-after: avoid; }}
376
+ }}
377
+ </style>
378
+ </head>
379
+ <body>
380
+ <div class="header">
381
+ <h1>DHWANI</h1>
382
+ <p>Speech-to-Text Transcription</p>
383
+ <p>Date: ${{currentDate}} | Time: ${{currentTime}}</p>
384
+ </div>
385
+ <div class="transcription">
386
+ ${{transcriptionText}}
387
+ </div>
388
+ <div class="footer">
389
+ <p>Generated by DHWANI - Real-time speech-to-text transcription for Indic languages</p>
390
+ </div>
391
+ </body>
392
+ </html>
393
+ `);
394
+
395
+ printWindow.document.close();
396
+ printWindow.focus();
397
+
398
+ // Wait a moment for the content to load, then trigger print dialog
399
+ setTimeout(() => {{
400
+ printWindow.print();
401
+ }}, 500);
402
+ }}
403
+ function changeLanguage(langCode) {{
404
+ if (recognition) {{
405
+ recognition.lang = langCode;
406
+ if (isRecording) {{
407
+ recognition.stop();
408
+ setTimeout(() => {{
409
+ if (isRecording) {{
410
+ recognition.start();
411
+ }}
412
+ }}, 100);
413
+ }}
414
+ }}
415
+ }}
416
+ window.addEventListener('message', function(event) {{
417
+ if (event.data.type === 'language-change') {{
418
+ changeLanguage(event.data.langCode);
419
+ }}
420
+ }});
421
+ </script>
422
+ </body>
423
+ </html>
424
+ """
425
+
426
+ components.html(html_code, height=500, scrolling=False)
427
+
428
+ if __name__ == "__main__":
429
+ main()