MiakOnline commited on
Commit
a892cef
·
verified ·
1 Parent(s): ebef77e

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +183 -0
app.py ADDED
@@ -0,0 +1,183 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import whisper
3
+ import tempfile
4
+ import os
5
+ import time
6
+ import re
7
+ from pydub import AudioSegment
8
+ from openpyxl import Workbook
9
+ from openpyxl.styles import Font
10
+ from io import BytesIO
11
+
12
+ # ---------------------------
13
+ # PAGE CONFIG
14
+ # ---------------------------
15
+ st.set_page_config(
16
+ page_title="RecToText Pro",
17
+ layout="wide",
18
+ page_icon="🎤"
19
+ )
20
+
21
+ # ---------------------------
22
+ # SIDEBAR
23
+ # ---------------------------
24
+ st.sidebar.title("⚙️ Settings")
25
+ model_option = st.sidebar.selectbox(
26
+ "Select Whisper Model",
27
+ ["base", "small"]
28
+ )
29
+
30
+ output_mode = st.sidebar.radio(
31
+ "Output Format",
32
+ ["Roman Urdu", "English"]
33
+ )
34
+
35
+ if st.sidebar.button("🧹 Clear Session"):
36
+ st.session_state.clear()
37
+ st.experimental_rerun()
38
+
39
+ # ---------------------------
40
+ # HEADER
41
+ # ---------------------------
42
+ st.markdown("<h1 style='text-align:center;'>🎤 RecToText Pro</h1>", unsafe_allow_html=True)
43
+ st.markdown("<p style='text-align:center;'>Intelligent Urdu + English Lecture Transcriber</p>", unsafe_allow_html=True)
44
+ st.divider()
45
+
46
+ # ---------------------------
47
+ # FUNCTIONS
48
+ # ---------------------------
49
+
50
+ @st.cache_resource
51
+ def load_model(model_size):
52
+ return whisper.load_model(model_size)
53
+
54
+ def clean_text(text):
55
+ filler_words = ["um", "hmm", "acha", "matlab", "uh", "huh"]
56
+ pattern = r'\b(?:' + '|'.join(filler_words) + r')\b'
57
+ text = re.sub(pattern, '', text, flags=re.IGNORECASE)
58
+ text = re.sub(r'\s+', ' ', text).strip()
59
+ return text
60
+
61
+ def convert_to_roman_urdu(text):
62
+ # Basic placeholder conversion logic
63
+ replacements = {
64
+ "ہے": "hai",
65
+ "میں": "main",
66
+ "اور": "aur",
67
+ "کیا": "kya",
68
+ "آپ": "aap"
69
+ }
70
+ for urdu, roman in replacements.items():
71
+ text = text.replace(urdu, roman)
72
+ return text
73
+
74
+ def process_audio(file_path, model):
75
+ result = model.transcribe(file_path)
76
+ return result
77
+
78
+ def create_excel(segments):
79
+ wb = Workbook()
80
+ ws = wb.active
81
+ ws.title = "Transcription"
82
+
83
+ headers = ["Timestamp", "Transcribed Text", "Cleaned Output"]
84
+ ws.append(headers)
85
+
86
+ for col in range(1, 4):
87
+ ws.cell(row=1, column=col).font = Font(bold=True)
88
+
89
+ for seg in segments:
90
+ timestamp = f"{round(seg['start'],2)} - {round(seg['end'],2)}"
91
+ raw_text = seg["text"]
92
+ cleaned = clean_text(raw_text)
93
+ ws.append([timestamp, raw_text, cleaned])
94
+
95
+ excel_buffer = BytesIO()
96
+ wb.save(excel_buffer)
97
+ excel_buffer.seek(0)
98
+ return excel_buffer
99
+
100
+ # ---------------------------
101
+ # FILE UPLOADER
102
+ # ---------------------------
103
+ uploaded_file = st.file_uploader(
104
+ "Upload Lecture Recording (.mp3, .wav, .m4a)",
105
+ type=["mp3", "wav", "m4a"]
106
+ )
107
+
108
+ if uploaded_file:
109
+
110
+ st.audio(uploaded_file)
111
+
112
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
113
+ audio = AudioSegment.from_file(uploaded_file)
114
+ audio.export(tmp.name, format="wav")
115
+ temp_audio_path = tmp.name
116
+
117
+ st.info("Loading model...")
118
+ model = load_model(model_option)
119
+
120
+ progress = st.progress(0)
121
+ start_time = time.time()
122
+
123
+ with st.spinner("Transcribing..."):
124
+ result = process_audio(temp_audio_path, model)
125
+ progress.progress(100)
126
+
127
+ end_time = time.time()
128
+
129
+ os.remove(temp_audio_path)
130
+
131
+ detected_lang = result.get("language", "Unknown")
132
+ segments = result["segments"]
133
+
134
+ full_text = result["text"]
135
+ cleaned_text = clean_text(full_text)
136
+
137
+ if output_mode == "Roman Urdu":
138
+ cleaned_text = convert_to_roman_urdu(cleaned_text)
139
+ else:
140
+ cleaned_text = cleaned_text
141
+
142
+ word_count = len(cleaned_text.split())
143
+ processing_time = round(end_time - start_time, 2)
144
+
145
+ # ---------------------------
146
+ # DISPLAY RESULTS
147
+ # ---------------------------
148
+ col1, col2 = st.columns(2)
149
+
150
+ with col1:
151
+ st.subheader("📜 Raw Transcription")
152
+ st.text_area("", full_text, height=300)
153
+
154
+ with col2:
155
+ st.subheader("✨ Cleaned Output")
156
+ st.text_area("", cleaned_text, height=300)
157
+
158
+ st.divider()
159
+
160
+ st.write(f"**Detected Language:** {detected_lang}")
161
+ st.write(f"**Word Count:** {word_count}")
162
+ st.write(f"**Processing Time:** {processing_time} sec")
163
+
164
+ # ---------------------------
165
+ # EXCEL DOWNLOAD
166
+ # ---------------------------
167
+ excel_file = create_excel(segments)
168
+
169
+ st.download_button(
170
+ label="📥 Download Excel File",
171
+ data=excel_file,
172
+ file_name="transcription.xlsx",
173
+ mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
174
+ )
175
+
176
+ # ---------------------------
177
+ # FOOTER
178
+ # ---------------------------
179
+ st.divider()
180
+ st.markdown(
181
+ "<p style='text-align:center; font-size:12px;'>Developed with ❤️ using Whisper & Streamlit</p>",
182
+ unsafe_allow_html=True
183
+ )