Spaces:

MiakOnline
/

RecToTextPro

Sleeping

App Files Files Community

MiakOnline commited on Mar 14

Commit

ea817e9

verified ·

1 Parent(s): 857038c

Update app.py

Browse files

Files changed (1) hide show

app.py +98 -118

app.py CHANGED Viewed

@@ -11,62 +11,54 @@ from docx import Document
 from docx.shared import Pt
 from io import BytesIO
-# -----------------------------------------------------
-# PAGE CONFIG
-# -----------------------------------------------------
 st.set_page_config(page_title="RecToText Pro", layout="wide")
-# Increase upload limit to 200MB
-st.markdown("""
-    <style>
-    .block-container {padding-top: 2rem;}
-    </style>
-""", unsafe_allow_html=True)
-# -----------------------------------------------------
-# HEADER
-# -----------------------------------------------------
 st.title("🎤 RecToText Pro – Intelligent Lecture Transcriber")
-st.caption("Upload Lecture | AI Transcription | Excel & Word Export")
-# -----------------------------------------------------
-# SIDEBAR CONTROLS
-# -----------------------------------------------------
-st.sidebar.header("⚙️ Settings")
-model_size = st.sidebar.selectbox(
-    "Whisper Model",
-    ["base", "small"]
-)
-output_format = st.sidebar.radio(
-    "Output Format",
-    ["English", "Roman Urdu"]
-)
-if st.sidebar.button("🧹 Clear Session"):
-    st.session_state.clear()
-    st.rerun()
-# -----------------------------------------------------
 # LOAD WHISPER MODEL (CPU INT8 OPTIMIZED)
-# -----------------------------------------------------
 @st.cache_resource
-def load_model(size):
-    return WhisperModel(size, device="cpu", compute_type="int8")
-model = load_model(model_size)
-# -----------------------------------------------------
-# TEXT PROCESSING FUNCTIONS
-# -----------------------------------------------------
-def clean_text(text):
-    filler_words = ["um", "hmm", "acha", "matlab", "uh"]
-    pattern = r'\b(?:' + '|'.join(filler_words) + r')\b'
     text = re.sub(pattern, "", text, flags=re.IGNORECASE)
     text = re.sub(r'\s+', ' ', text).strip()
     sentences = re.split(r'(?<=[.!?]) +', text)
     paragraphs = []
     temp = ""
@@ -81,53 +73,44 @@ def clean_text(text):
     return "\n\n".join(paragraphs)
-def convert_to_roman_urdu(text):
-    replacements = {
-        "ہے": "hai",
-        "میں": "main",
-        "اور": "aur",
-        "کیا": "kya",
-        "کی": "ki",
-        "کا": "ka"
-    }
-    for urdu, roman in replacements.items():
-        text = text.replace(urdu, roman)
-    return text
-# -----------------------------------------------------
 # EXPORT EXCEL
-# -----------------------------------------------------
-def export_excel(segments):
     wb = Workbook()
     ws = wb.active
     ws.title = "Transcription"
-    headers = ["Timestamp", "Original Text", "Cleaned Text"]
-    ws.append(headers)
-    for col in range(1, 4):
-        ws.cell(row=1, column=col).font = Font(bold=True)
-    for segment in segments:
-        timestamp = f"{round(segment.start,2)} - {round(segment.end,2)}"
-        original = segment.text.strip()
-        cleaned = clean_text(original)
-        ws.append([timestamp, original, cleaned])
     buffer = BytesIO()
     wb.save(buffer)
     buffer.seek(0)
     return buffer
-# -----------------------------------------------------
 # EXPORT WORD
-# -----------------------------------------------------
-def export_word(title, cleaned_text):
     doc = Document()
     doc.add_heading(title, level=1)
-    doc.add_paragraph("")
-    paragraphs = cleaned_text.split("\n\n")
     for para in paragraphs:
         p = doc.add_paragraph(para)
         p.paragraph_format.space_after = Pt(12)
@@ -137,87 +120,84 @@ def export_word(title, cleaned_text):
     buffer.seek(0)
     return buffer
-# -----------------------------------------------------
-# FILE UPLOADER (200MB SUPPORT)
-# -----------------------------------------------------
-uploaded_file = st.file_uploader(
-    "Upload Lecture Recording (Max 200MB) – MP3, WAV, M4A, AAC",
     type=["mp3", "wav", "m4a", "aac"]
 )
-if uploaded_file:
     try:
-        st.audio(uploaded_file)
         with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
-            ext = uploaded_file.name.split(".")[-1]
-            audio = AudioSegment.from_file(uploaded_file, format=ext)
             audio.export(tmp.name, format="wav")
-            temp_audio_path = tmp.name
         start_time = time.time()
-        with st.spinner("🔄 Transcribing... Please wait"):
-            segments, info = model.transcribe(temp_audio_path)
-        os.remove(temp_audio_path)
-        full_text = ""
-        segment_list = []
-        for segment in segments:
-            full_text += segment.text + " "
-            segment_list.append(segment)
-        cleaned_text = clean_text(full_text)
-        if output_format == "Roman Urdu":
-            cleaned_text = convert_to_roman_urdu(cleaned_text)
-        word_count = len(cleaned_text.split())
         processing_time = round(time.time() - start_time, 2)
-        detected_language = info.language
         col1, col2 = st.columns(2)
         with col1:
-            st.subheader("📜 Raw Transcription")
             st.text_area("", full_text, height=300)
         with col2:
-            st.subheader("✨ Clean Story Format")
-            st.text_area("", cleaned_text, height=300)
         st.divider()
-        st.write(f"**Detected Language:** {detected_language}")
-        st.write(f"**Word Count:** {word_count}")
-        st.write(f"**Processing Time:** {processing_time} sec")
-        excel_file = export_excel(segment_list)
-        word_file = export_word("Lecture Transcription", cleaned_text)
         colA, colB = st.columns(2)
         with colA:
-            st.download_button(
-                "📥 Download Excel (.xlsx)",
-                data=excel_file,
-                file_name="RecToText_Transcription.xlsx"
-            )
         with colB:
-            st.download_button(
-                "📄 Download Word (.docx)",
-                data=word_file,
-                file_name="RecToText_Lecture.docx"
-            )
-        st.success("✅ Transcription Completed Successfully!")
     except Exception as e:
-        st.error("❌ Error Occurred During Processing")
         st.exception(e)
 st.markdown("---")

 from docx.shared import Pt
 from io import BytesIO
 st.set_page_config(page_title="RecToText Pro", layout="wide")
 st.title("🎤 RecToText Pro – Intelligent Lecture Transcriber")
+st.caption("Strict English / Roman Urdu Output | No Script Mixing")
+# -------------------------------------------------------
 # LOAD WHISPER MODEL (CPU INT8 OPTIMIZED)
+# -------------------------------------------------------
 @st.cache_resource
+def load_model():
+    return WhisperModel("base", device="cpu", compute_type="int8")
+model = load_model()
+# -------------------------------------------------------
+# STRICT ROMAN URDU TRANSLITERATION
+# -------------------------------------------------------
+def transliterate_to_roman(text):
+    replacements = {
+        "ہے": "hai",
+        "میں": "main",
+        "اور": "aur",
+        "کیا": "kya",
+        "کی": "ki",
+        "کا": "ka",
+        "سے": "se",
+        "کو": "ko",
+        "پر": "par",
+        "نہیں": "nahin"
+    }
+    for urdu, roman in replacements.items():
+        text = text.replace(urdu, roman)
+    # remove any remaining non-ASCII characters
+    text = re.sub(r'[^\x00-\x7F]+', '', text)
+    return text
+# -------------------------------------------------------
+# CLEAN + STRUCTURE STORY
+# -------------------------------------------------------
+def clean_and_structure(text):
+    filler = ["um", "hmm", "acha", "matlab", "uh"]
+    pattern = r'\b(?:' + '|'.join(filler) + r')\b'
     text = re.sub(pattern, "", text, flags=re.IGNORECASE)
     text = re.sub(r'\s+', ' ', text).strip()
     sentences = re.split(r'(?<=[.!?]) +', text)
     paragraphs = []
     temp = ""
     return "\n\n".join(paragraphs)
+# -------------------------------------------------------
+# AUDIO CHUNKING (30 SEC SAFE)
+# -------------------------------------------------------
+def chunk_audio(audio_path):
+    audio = AudioSegment.from_wav(audio_path)
+    chunk_length = 30 * 1000
+    chunks = []
+    for i in range(0, len(audio), chunk_length):
+        chunks.append(audio[i:i + chunk_length])
+    return chunks
+# -------------------------------------------------------
 # EXPORT EXCEL
+# -------------------------------------------------------
+def export_excel(text):
     wb = Workbook()
     ws = wb.active
     ws.title = "Transcription"
+    ws.append(["Lecture Transcription"])
+    ws["A1"].font = Font(bold=True)
+    ws.append([text])
     buffer = BytesIO()
     wb.save(buffer)
     buffer.seek(0)
     return buffer
+# -------------------------------------------------------
 # EXPORT WORD
+# -------------------------------------------------------
+def export_word(title, text):
     doc = Document()
     doc.add_heading(title, level=1)
+    paragraphs = text.split("\n\n")
     for para in paragraphs:
         p = doc.add_paragraph(para)
         p.paragraph_format.space_after = Pt(12)
     buffer.seek(0)
     return buffer
+# -------------------------------------------------------
+# FILE UPLOADER
+# -------------------------------------------------------
+uploaded = st.file_uploader(
+    "Upload Lecture Recording (MP3, WAV, M4A, AAC) – Max 200MB",
     type=["mp3", "wav", "m4a", "aac"]
 )
+output_mode = st.radio("Output Language", ["English", "Roman Urdu"])
+if uploaded:
     try:
+        st.audio(uploaded)
+        # Convert to WAV
         with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
+            ext = uploaded.name.split(".")[-1]
+            audio = AudioSegment.from_file(uploaded, format=ext)
             audio.export(tmp.name, format="wav")
+            temp_path = tmp.name
         start_time = time.time()
+        chunks = chunk_audio(temp_path)
+        full_text = ""
+        for chunk in chunks:
+            with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as ctmp:
+                chunk.export(ctmp.name, format="wav")
+                segments, info = model.transcribe(ctmp.name)
+                for segment in segments:
+                    full_text += segment.text + " "
+                os.remove(ctmp.name)
+        os.remove(temp_path)
+        if output_mode == "Roman Urdu":
+            full_text = transliterate_to_roman(full_text)
+        else:
+            full_text = re.sub(r'[^\x00-\x7F]+', '', full_text)
+        structured_text = clean_and_structure(full_text)
+        word_count = len(structured_text.split())
         processing_time = round(time.time() - start_time, 2)
         col1, col2 = st.columns(2)
         with col1:
+            st.subheader("Raw Transcription")
             st.text_area("", full_text, height=300)
         with col2:
+            st.subheader("Clean Story Format")
+            st.text_area("", structured_text, height=300)
         st.divider()
+        st.write(f"Word Count: {word_count}")
+        st.write(f"Processing Time: {processing_time} sec")
+        excel_file = export_excel(structured_text)
+        word_file = export_word("Lecture Transcription", structured_text)
         colA, colB = st.columns(2)
         with colA:
+            st.download_button("Download Excel (.xlsx)", excel_file, "RecToText.xlsx")
         with colB:
+            st.download_button("Download Word (.docx)", word_file, "RecToText.docx")
+        st.success("Complete Clean Story Generated Successfully.")
     except Exception as e:
+        st.error("Processing Error")
         st.exception(e)
 st.markdown("---")