MiakOnline commited on
Commit
ea817e9
·
verified ·
1 Parent(s): 857038c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +98 -118
app.py CHANGED
@@ -11,62 +11,54 @@ from docx import Document
11
  from docx.shared import Pt
12
  from io import BytesIO
13
 
14
- # -----------------------------------------------------
15
- # PAGE CONFIG
16
- # -----------------------------------------------------
17
  st.set_page_config(page_title="RecToText Pro", layout="wide")
18
 
19
- # Increase upload limit to 200MB
20
- st.markdown("""
21
- <style>
22
- .block-container {padding-top: 2rem;}
23
- </style>
24
- """, unsafe_allow_html=True)
25
-
26
- # -----------------------------------------------------
27
- # HEADER
28
- # -----------------------------------------------------
29
  st.title("🎤 RecToText Pro – Intelligent Lecture Transcriber")
30
- st.caption("Upload Lecture | AI Transcription | Excel & Word Export")
31
-
32
- # -----------------------------------------------------
33
- # SIDEBAR CONTROLS
34
- # -----------------------------------------------------
35
- st.sidebar.header("⚙️ Settings")
36
-
37
- model_size = st.sidebar.selectbox(
38
- "Whisper Model",
39
- ["base", "small"]
40
- )
41
-
42
- output_format = st.sidebar.radio(
43
- "Output Format",
44
- ["English", "Roman Urdu"]
45
- )
46
-
47
- if st.sidebar.button("🧹 Clear Session"):
48
- st.session_state.clear()
49
- st.rerun()
50
 
51
- # -----------------------------------------------------
52
  # LOAD WHISPER MODEL (CPU INT8 OPTIMIZED)
53
- # -----------------------------------------------------
54
  @st.cache_resource
55
- def load_model(size):
56
- return WhisperModel(size, device="cpu", compute_type="int8")
 
 
57
 
58
- model = load_model(model_size)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
 
60
- # -----------------------------------------------------
61
- # TEXT PROCESSING FUNCTIONS
62
- # -----------------------------------------------------
63
- def clean_text(text):
64
- filler_words = ["um", "hmm", "acha", "matlab", "uh"]
65
- pattern = r'\b(?:' + '|'.join(filler_words) + r')\b'
66
  text = re.sub(pattern, "", text, flags=re.IGNORECASE)
67
  text = re.sub(r'\s+', ' ', text).strip()
68
 
69
  sentences = re.split(r'(?<=[.!?]) +', text)
 
70
  paragraphs = []
71
  temp = ""
72
 
@@ -81,53 +73,44 @@ def clean_text(text):
81
 
82
  return "\n\n".join(paragraphs)
83
 
84
- def convert_to_roman_urdu(text):
85
- replacements = {
86
- "ہے": "hai",
87
- "میں": "main",
88
- "اور": "aur",
89
- "کیا": "kya",
90
- "کی": "ki",
91
- "کا": "ka"
92
- }
93
- for urdu, roman in replacements.items():
94
- text = text.replace(urdu, roman)
95
- return text
96
 
97
- # -----------------------------------------------------
 
 
98
  # EXPORT EXCEL
99
- # -----------------------------------------------------
100
- def export_excel(segments):
101
  wb = Workbook()
102
  ws = wb.active
103
  ws.title = "Transcription"
104
 
105
- headers = ["Timestamp", "Original Text", "Cleaned Text"]
106
- ws.append(headers)
107
-
108
- for col in range(1, 4):
109
- ws.cell(row=1, column=col).font = Font(bold=True)
110
-
111
- for segment in segments:
112
- timestamp = f"{round(segment.start,2)} - {round(segment.end,2)}"
113
- original = segment.text.strip()
114
- cleaned = clean_text(original)
115
- ws.append([timestamp, original, cleaned])
116
 
117
  buffer = BytesIO()
118
  wb.save(buffer)
119
  buffer.seek(0)
120
  return buffer
121
 
122
- # -----------------------------------------------------
123
  # EXPORT WORD
124
- # -----------------------------------------------------
125
- def export_word(title, cleaned_text):
126
  doc = Document()
127
  doc.add_heading(title, level=1)
128
- doc.add_paragraph("")
129
- paragraphs = cleaned_text.split("\n\n")
130
 
 
131
  for para in paragraphs:
132
  p = doc.add_paragraph(para)
133
  p.paragraph_format.space_after = Pt(12)
@@ -137,87 +120,84 @@ def export_word(title, cleaned_text):
137
  buffer.seek(0)
138
  return buffer
139
 
140
- # -----------------------------------------------------
141
- # FILE UPLOADER (200MB SUPPORT)
142
- # -----------------------------------------------------
143
- uploaded_file = st.file_uploader(
144
- "Upload Lecture Recording (Max 200MB) – MP3, WAV, M4A, AAC",
145
  type=["mp3", "wav", "m4a", "aac"]
146
  )
147
 
148
- if uploaded_file:
149
 
 
150
  try:
151
- st.audio(uploaded_file)
152
 
 
153
  with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
154
- ext = uploaded_file.name.split(".")[-1]
155
- audio = AudioSegment.from_file(uploaded_file, format=ext)
156
  audio.export(tmp.name, format="wav")
157
- temp_audio_path = tmp.name
158
 
159
  start_time = time.time()
160
 
161
- with st.spinner("🔄 Transcribing... Please wait"):
162
- segments, info = model.transcribe(temp_audio_path)
 
 
 
 
 
163
 
164
- os.remove(temp_audio_path)
 
165
 
166
- full_text = ""
167
- segment_list = []
168
 
169
- for segment in segments:
170
- full_text += segment.text + " "
171
- segment_list.append(segment)
172
 
173
- cleaned_text = clean_text(full_text)
 
 
 
174
 
175
- if output_format == "Roman Urdu":
176
- cleaned_text = convert_to_roman_urdu(cleaned_text)
177
 
178
- word_count = len(cleaned_text.split())
179
  processing_time = round(time.time() - start_time, 2)
180
- detected_language = info.language
181
 
182
  col1, col2 = st.columns(2)
183
 
184
  with col1:
185
- st.subheader("📜 Raw Transcription")
186
  st.text_area("", full_text, height=300)
187
 
188
  with col2:
189
- st.subheader("Clean Story Format")
190
- st.text_area("", cleaned_text, height=300)
191
 
192
  st.divider()
193
 
194
- st.write(f"**Detected Language:** {detected_language}")
195
- st.write(f"**Word Count:** {word_count}")
196
- st.write(f"**Processing Time:** {processing_time} sec")
197
 
198
- excel_file = export_excel(segment_list)
199
- word_file = export_word("Lecture Transcription", cleaned_text)
200
 
201
  colA, colB = st.columns(2)
202
 
203
  with colA:
204
- st.download_button(
205
- "📥 Download Excel (.xlsx)",
206
- data=excel_file,
207
- file_name="RecToText_Transcription.xlsx"
208
- )
209
 
210
  with colB:
211
- st.download_button(
212
- "📄 Download Word (.docx)",
213
- data=word_file,
214
- file_name="RecToText_Lecture.docx"
215
- )
216
 
217
- st.success(" Transcription Completed Successfully!")
218
 
219
  except Exception as e:
220
- st.error(" Error Occurred During Processing")
221
  st.exception(e)
222
 
223
  st.markdown("---")
 
11
  from docx.shared import Pt
12
  from io import BytesIO
13
 
 
 
 
14
  st.set_page_config(page_title="RecToText Pro", layout="wide")
15
 
 
 
 
 
 
 
 
 
 
 
16
  st.title("🎤 RecToText Pro – Intelligent Lecture Transcriber")
17
+ st.caption("Strict English / Roman Urdu Output | No Script Mixing")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
+ # -------------------------------------------------------
20
  # LOAD WHISPER MODEL (CPU INT8 OPTIMIZED)
21
+ # -------------------------------------------------------
22
  @st.cache_resource
23
+ def load_model():
24
+ return WhisperModel("base", device="cpu", compute_type="int8")
25
+
26
+ model = load_model()
27
 
28
+ # -------------------------------------------------------
29
+ # STRICT ROMAN URDU TRANSLITERATION
30
+ # -------------------------------------------------------
31
+ def transliterate_to_roman(text):
32
+ replacements = {
33
+ "ہے": "hai",
34
+ "میں": "main",
35
+ "اور": "aur",
36
+ "کیا": "kya",
37
+ "کی": "ki",
38
+ "کا": "ka",
39
+ "سے": "se",
40
+ "کو": "ko",
41
+ "پر": "par",
42
+ "نہیں": "nahin"
43
+ }
44
+ for urdu, roman in replacements.items():
45
+ text = text.replace(urdu, roman)
46
+
47
+ # remove any remaining non-ASCII characters
48
+ text = re.sub(r'[^\x00-\x7F]+', '', text)
49
+ return text
50
 
51
+ # -------------------------------------------------------
52
+ # CLEAN + STRUCTURE STORY
53
+ # -------------------------------------------------------
54
+ def clean_and_structure(text):
55
+ filler = ["um", "hmm", "acha", "matlab", "uh"]
56
+ pattern = r'\b(?:' + '|'.join(filler) + r')\b'
57
  text = re.sub(pattern, "", text, flags=re.IGNORECASE)
58
  text = re.sub(r'\s+', ' ', text).strip()
59
 
60
  sentences = re.split(r'(?<=[.!?]) +', text)
61
+
62
  paragraphs = []
63
  temp = ""
64
 
 
73
 
74
  return "\n\n".join(paragraphs)
75
 
76
+ # -------------------------------------------------------
77
+ # AUDIO CHUNKING (30 SEC SAFE)
78
+ # -------------------------------------------------------
79
+ def chunk_audio(audio_path):
80
+ audio = AudioSegment.from_wav(audio_path)
81
+ chunk_length = 30 * 1000
82
+ chunks = []
83
+
84
+ for i in range(0, len(audio), chunk_length):
85
+ chunks.append(audio[i:i + chunk_length])
 
 
86
 
87
+ return chunks
88
+
89
+ # -------------------------------------------------------
90
  # EXPORT EXCEL
91
+ # -------------------------------------------------------
92
+ def export_excel(text):
93
  wb = Workbook()
94
  ws = wb.active
95
  ws.title = "Transcription"
96
 
97
+ ws.append(["Lecture Transcription"])
98
+ ws["A1"].font = Font(bold=True)
99
+ ws.append([text])
 
 
 
 
 
 
 
 
100
 
101
  buffer = BytesIO()
102
  wb.save(buffer)
103
  buffer.seek(0)
104
  return buffer
105
 
106
+ # -------------------------------------------------------
107
  # EXPORT WORD
108
+ # -------------------------------------------------------
109
+ def export_word(title, text):
110
  doc = Document()
111
  doc.add_heading(title, level=1)
 
 
112
 
113
+ paragraphs = text.split("\n\n")
114
  for para in paragraphs:
115
  p = doc.add_paragraph(para)
116
  p.paragraph_format.space_after = Pt(12)
 
120
  buffer.seek(0)
121
  return buffer
122
 
123
+ # -------------------------------------------------------
124
+ # FILE UPLOADER
125
+ # -------------------------------------------------------
126
+ uploaded = st.file_uploader(
127
+ "Upload Lecture Recording (MP3, WAV, M4A, AAC) – Max 200MB",
128
  type=["mp3", "wav", "m4a", "aac"]
129
  )
130
 
131
+ output_mode = st.radio("Output Language", ["English", "Roman Urdu"])
132
 
133
+ if uploaded:
134
  try:
135
+ st.audio(uploaded)
136
 
137
+ # Convert to WAV
138
  with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
139
+ ext = uploaded.name.split(".")[-1]
140
+ audio = AudioSegment.from_file(uploaded, format=ext)
141
  audio.export(tmp.name, format="wav")
142
+ temp_path = tmp.name
143
 
144
  start_time = time.time()
145
 
146
+ chunks = chunk_audio(temp_path)
147
+ full_text = ""
148
+
149
+ for chunk in chunks:
150
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as ctmp:
151
+ chunk.export(ctmp.name, format="wav")
152
+ segments, info = model.transcribe(ctmp.name)
153
 
154
+ for segment in segments:
155
+ full_text += segment.text + " "
156
 
157
+ os.remove(ctmp.name)
 
158
 
159
+ os.remove(temp_path)
 
 
160
 
161
+ if output_mode == "Roman Urdu":
162
+ full_text = transliterate_to_roman(full_text)
163
+ else:
164
+ full_text = re.sub(r'[^\x00-\x7F]+', '', full_text)
165
 
166
+ structured_text = clean_and_structure(full_text)
 
167
 
168
+ word_count = len(structured_text.split())
169
  processing_time = round(time.time() - start_time, 2)
 
170
 
171
  col1, col2 = st.columns(2)
172
 
173
  with col1:
174
+ st.subheader("Raw Transcription")
175
  st.text_area("", full_text, height=300)
176
 
177
  with col2:
178
+ st.subheader("Clean Story Format")
179
+ st.text_area("", structured_text, height=300)
180
 
181
  st.divider()
182
 
183
+ st.write(f"Word Count: {word_count}")
184
+ st.write(f"Processing Time: {processing_time} sec")
 
185
 
186
+ excel_file = export_excel(structured_text)
187
+ word_file = export_word("Lecture Transcription", structured_text)
188
 
189
  colA, colB = st.columns(2)
190
 
191
  with colA:
192
+ st.download_button("Download Excel (.xlsx)", excel_file, "RecToText.xlsx")
 
 
 
 
193
 
194
  with colB:
195
+ st.download_button("Download Word (.docx)", word_file, "RecToText.docx")
 
 
 
 
196
 
197
+ st.success("Complete Clean Story Generated Successfully.")
198
 
199
  except Exception as e:
200
+ st.error("Processing Error")
201
  st.exception(e)
202
 
203
  st.markdown("---")