NightPrince commited on
Commit
df7af7d
·
verified ·
1 Parent(s): 066dfa2

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +285 -0
app.py ADDED
@@ -0,0 +1,285 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # filename: elegant_arabic_transcriber.py
2
+
3
+ import streamlit as st
4
+ import nemo.collections.asr as nemo_asr
5
+ import soundfile as sf
6
+ import tempfile
7
+ import os
8
+ from pydub import AudioSegment
9
+ import time
10
+
11
+ # Custom CSS for gloomy elegant styling
12
+ st.markdown("""
13
+ <style>
14
+ :root {
15
+ --primary: #3a506b;
16
+ --secondary: #5bc0be;
17
+ --accent: #e55934;
18
+ --background: #1c2541;
19
+ --card: #0b132b;
20
+ --text: #e0e0e0;
21
+ --text-secondary: #b8b8b8;
22
+ }
23
+
24
+ .stApp {
25
+ background-color: var(--background);
26
+ color: var(--text);
27
+ }
28
+
29
+ .main .block-container {
30
+ max-width: 1200px;
31
+ padding: 2rem 3rem;
32
+ }
33
+
34
+ .card {
35
+ background-color: var(--card);
36
+ border-radius: 8px;
37
+ padding: 1.5rem;
38
+ margin-bottom: 1.5rem;
39
+ border-left: 3px solid var(--secondary);
40
+ }
41
+
42
+ .header {
43
+ background: linear-gradient(135deg, #0b132b, #1c2541);
44
+ color: white;
45
+ padding: 2rem 3rem;
46
+ margin: -2rem -3rem 2rem -3rem;
47
+ border-bottom: 1px solid rgba(91, 192, 190, 0.2);
48
+ }
49
+
50
+ .stButton>button {
51
+ background: var(--primary);
52
+ color: white;
53
+ border: none;
54
+ border-radius: 6px;
55
+ padding: 0.7rem 1.5rem;
56
+ font-weight: 500;
57
+ transition: all 0.2s ease;
58
+ border: 1px solid rgba(91, 192, 190, 0.3);
59
+ }
60
+
61
+ .stButton>button:hover {
62
+ background: #2c3e5a;
63
+ color: white;
64
+ }
65
+
66
+ .stDownloadButton>button {
67
+ background: var(--secondary);
68
+ color: #0b132b;
69
+ }
70
+
71
+ .stDownloadButton>button:hover {
72
+ background: #4aa8a6;
73
+ color: #0b132b;
74
+ }
75
+
76
+ .transcript-container {
77
+ background-color: rgba(11, 19, 43, 0.7);
78
+ border-radius: 8px;
79
+ padding: 1.5rem;
80
+ margin-top: 1rem;
81
+ border: 1px solid rgba(91, 192, 190, 0.1);
82
+ }
83
+
84
+ .transcript-box {
85
+ background-color: transparent;
86
+ font-size: 1.1rem;
87
+ line-height: 1.8;
88
+ min-height: 150px;
89
+ direction: rtl;
90
+ font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
91
+ color: var(--text);
92
+ white-space: pre-wrap;
93
+ }
94
+
95
+ .stats {
96
+ display: flex;
97
+ gap: 1rem;
98
+ margin-top: 1rem;
99
+ }
100
+
101
+ .stat-box {
102
+ background-color: rgba(58, 80, 107, 0.5);
103
+ padding: 0.8rem 1rem;
104
+ border-radius: 6px;
105
+ flex: 1;
106
+ min-width: 100px;
107
+ text-align: center;
108
+ border: 1px solid rgba(91, 192, 190, 0.1);
109
+ }
110
+
111
+ .stat-value {
112
+ font-size: 1.2rem;
113
+ font-weight: bold;
114
+ color: var(--secondary);
115
+ }
116
+
117
+ .progress-container {
118
+ height: 6px;
119
+ background-color: rgba(58, 80, 107, 0.5);
120
+ border-radius: 3px;
121
+ margin: 1.5rem 0;
122
+ overflow: hidden;
123
+ }
124
+
125
+ .progress-bar {
126
+ height: 100%;
127
+ background: linear-gradient(90deg, var(--secondary), #4aa8a6);
128
+ border-radius: 3px;
129
+ transition: width 0.4s ease;
130
+ }
131
+
132
+ h1, h2, h3 {
133
+ color: var(--text) !important;
134
+ }
135
+
136
+ .file-uploader {
137
+ border: 2px dashed var(--secondary);
138
+ border-radius: 8px;
139
+ padding: 2rem;
140
+ text-align: center;
141
+ background-color: rgba(91, 192, 190, 0.05);
142
+ margin-bottom: 1.5rem;
143
+ }
144
+
145
+ .feature-icon {
146
+ color: var(--secondary);
147
+ margin-right: 0.5rem;
148
+ }
149
+
150
+ .stSpinner > div {
151
+ border-color: var(--secondary) transparent transparent transparent !important;
152
+ }
153
+ </style>
154
+ """, unsafe_allow_html=True)
155
+
156
+ SUPPORTED_TYPES = ['wav', 'mp3', 'ogg', 'flac', 'm4a']
157
+
158
+ # Load NeMo model once
159
+ @st.cache_resource
160
+ def load_model():
161
+ model = nemo_asr.models.EncDecHybridRNNTCTCBPEModel.from_pretrained(
162
+ model_name="nvidia/stt_ar_fastconformer_hybrid_large_pcd_v1.0"
163
+ )
164
+ return model
165
+
166
+ model = load_model()
167
+
168
+ # Helper: Convert any audio to 16kHz mono WAV
169
+ def convert_audio(uploaded_file, target_sample_rate=16000):
170
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_out:
171
+ audio = AudioSegment.from_file(uploaded_file)
172
+ audio = audio.set_frame_rate(target_sample_rate).set_channels(1)
173
+ audio.export(tmp_out.name, format="wav")
174
+ return tmp_out.name
175
+
176
+ # App UI
177
+ st.markdown("""
178
+ <div class="header">
179
+ <h1 style="margin-bottom: 0.5rem;">Arabic Transcriber</h1>
180
+ <p style="color: var(--text-secondary); margin-top: 0;">Convert speech to text with precision</p>
181
+ </div>
182
+ """, unsafe_allow_html=True)
183
+
184
+ # Main content - single wide column layout
185
+ st.markdown("""
186
+ <div class="card">
187
+ <div style="display: flex; gap: 1rem; margin-bottom: 1rem;">
188
+ <span class="feature-icon">🔊</span>
189
+ <span>Supports WAV, MP3, OGG, FLAC, M4A</span>
190
+ </div>
191
+ <div style="display: flex; gap: 1rem; margin-bottom: 1rem;">
192
+ <span class="feature-icon">⚡</span>
193
+ <span>Fast processing with advanced AI</span>
194
+ </div>
195
+ </div>
196
+ """, unsafe_allow_html=True)
197
+
198
+ uploaded_file = st.file_uploader("Drag and drop audio file here", type=SUPPORTED_TYPES)
199
+
200
+ if uploaded_file is not None:
201
+ # Convert to 16kHz mono wav
202
+ with st.spinner("Preparing audio for transcription..."):
203
+ processed_wav = convert_audio(uploaded_file)
204
+
205
+ # Show audio info
206
+ data, sample_rate = sf.read(processed_wav)
207
+ channels = 1 if len(data.shape) == 1 else data.shape[1]
208
+ duration = len(data) / sample_rate
209
+
210
+ # Show audio player and info
211
+ st.audio(processed_wav, format="audio/wav")
212
+
213
+ st.markdown("### Audio Details")
214
+ st.markdown("""
215
+ <div class="stats">
216
+ <div class="stat-box">
217
+ <div>Duration</div>
218
+ <div class="stat-value">{:.1f}s</div>
219
+ </div>
220
+ <div class="stat-box">
221
+ <div>Sample Rate</div>
222
+ <div class="stat-value">{} Hz</div>
223
+ </div>
224
+ <div class="stat-box">
225
+ <div>Channels</div>
226
+ <div class="stat-value">{}</div>
227
+ </div>
228
+ </div>
229
+ """.format(duration, sample_rate, channels), unsafe_allow_html=True)
230
+
231
+ # Transcription
232
+ if st.button("Transcribe Audio", type="primary"):
233
+ # Create a progress container
234
+ progress_container = st.empty()
235
+ progress_container.markdown("""
236
+ <div class="progress-container">
237
+ <div class="progress-bar" style="width: 30%;"></div>
238
+ </div>
239
+ <div style="text-align: center; margin-top: 5px; color: var(--secondary);">Processing audio...</div>
240
+ """, unsafe_allow_html=True)
241
+
242
+ time.sleep(0.8)
243
+ progress_container.markdown("""
244
+ <div class="progress-container">
245
+ <div class="progress-bar" style="width: 70%;"></div>
246
+ </div>
247
+ <div style="text-align: center; margin-top: 5px; color: var(--secondary);">Transcribing content...</div>
248
+ """, unsafe_allow_html=True)
249
+
250
+ # Actual transcription
251
+ with st.spinner(""):
252
+ result = model.transcribe([processed_wav])
253
+ transcript = result[0].text
254
+
255
+ # Update progress to complete
256
+ progress_container.markdown("""
257
+ <div class="progress-container">
258
+ <div class="progress-bar" style="width: 100%;"></div>
259
+ </div>
260
+ <div style="text-align: center; margin-top: 5px; color: var(--secondary);">Transcription complete</div>
261
+ """, unsafe_allow_html=True)
262
+
263
+ time.sleep(0.5)
264
+ progress_container.empty()
265
+
266
+ st.markdown("### Transcription Results")
267
+ st.markdown(f"""
268
+ <div class="transcript-container">
269
+ <div class="transcript-box">{transcript}</div>
270
+ </div>
271
+ """, unsafe_allow_html=True)
272
+
273
+ # Download button
274
+ st.download_button("Download Transcript", transcript,
275
+ file_name="arabic_transcript.txt")
276
+
277
+ # Cleanup
278
+ os.remove(processed_wav)
279
+
280
+ # Minimal footer
281
+ st.markdown("""
282
+ <div style="text-align: center; color: var(--text-secondary); padding: 2rem 0; font-size: 0.8rem;">
283
+ <p>Powered by NeMo ASR • Secure local processing</p>
284
+ </div>
285
+ """, unsafe_allow_html=True)