Spaces:

MiakOnline
/

RecToTextPro

Sleeping

App Files Files Community

MiakOnline commited on Mar 14

Commit

a892cef

verified ·

1 Parent(s): ebef77e

Create app.py

Browse files

Files changed (1) hide show

app.py +183 -0

app.py ADDED Viewed

	@@ -0,0 +1,183 @@

+import streamlit as st
+import whisper
+import tempfile
+import os
+import time
+import re
+from pydub import AudioSegment
+from openpyxl import Workbook
+from openpyxl.styles import Font
+from io import BytesIO
+# ---------------------------
+# PAGE CONFIG
+# ---------------------------
+st.set_page_config(
+    page_title="RecToText Pro",
+    layout="wide",
+    page_icon="🎤"
+)
+# ---------------------------
+# SIDEBAR
+# ---------------------------
+st.sidebar.title("⚙️ Settings")
+model_option = st.sidebar.selectbox(
+    "Select Whisper Model",
+    ["base", "small"]
+)
+output_mode = st.sidebar.radio(
+    "Output Format",
+    ["Roman Urdu", "English"]
+)
+if st.sidebar.button("🧹 Clear Session"):
+    st.session_state.clear()
+    st.experimental_rerun()
+# ---------------------------
+# HEADER
+# ---------------------------
+st.markdown("<h1 style='text-align:center;'>🎤 RecToText Pro</h1>", unsafe_allow_html=True)
+st.markdown("<p style='text-align:center;'>Intelligent Urdu + English Lecture Transcriber</p>", unsafe_allow_html=True)
+st.divider()
+# ---------------------------
+# FUNCTIONS
+# ---------------------------
+@st.cache_resource
+def load_model(model_size):
+    return whisper.load_model(model_size)
+def clean_text(text):
+    filler_words = ["um", "hmm", "acha", "matlab", "uh", "huh"]
+    pattern = r'\b(?:' + '|'.join(filler_words) + r')\b'
+    text = re.sub(pattern, '', text, flags=re.IGNORECASE)
+    text = re.sub(r'\s+', ' ', text).strip()
+    return text
+def convert_to_roman_urdu(text):
+    # Basic placeholder conversion logic
+    replacements = {
+        "ہے": "hai",
+        "میں": "main",
+        "اور": "aur",
+        "کیا": "kya",
+        "آپ": "aap"
+    }
+    for urdu, roman in replacements.items():
+        text = text.replace(urdu, roman)
+    return text
+def process_audio(file_path, model):
+    result = model.transcribe(file_path)
+    return result
+def create_excel(segments):
+    wb = Workbook()
+    ws = wb.active
+    ws.title = "Transcription"
+    headers = ["Timestamp", "Transcribed Text", "Cleaned Output"]
+    ws.append(headers)
+    for col in range(1, 4):
+        ws.cell(row=1, column=col).font = Font(bold=True)
+    for seg in segments:
+        timestamp = f"{round(seg['start'],2)} - {round(seg['end'],2)}"
+        raw_text = seg["text"]
+        cleaned = clean_text(raw_text)
+        ws.append([timestamp, raw_text, cleaned])
+    excel_buffer = BytesIO()
+    wb.save(excel_buffer)
+    excel_buffer.seek(0)
+    return excel_buffer
+# ---------------------------
+# FILE UPLOADER
+# ---------------------------
+uploaded_file = st.file_uploader(
+    "Upload Lecture Recording (.mp3, .wav, .m4a)",
+    type=["mp3", "wav", "m4a"]
+)
+if uploaded_file:
+    st.audio(uploaded_file)
+    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
+        audio = AudioSegment.from_file(uploaded_file)
+        audio.export(tmp.name, format="wav")
+        temp_audio_path = tmp.name
+    st.info("Loading model...")
+    model = load_model(model_option)
+    progress = st.progress(0)
+    start_time = time.time()
+    with st.spinner("Transcribing..."):
+        result = process_audio(temp_audio_path, model)
+        progress.progress(100)
+    end_time = time.time()
+    os.remove(temp_audio_path)
+    detected_lang = result.get("language", "Unknown")
+    segments = result["segments"]
+    full_text = result["text"]
+    cleaned_text = clean_text(full_text)
+    if output_mode == "Roman Urdu":
+        cleaned_text = convert_to_roman_urdu(cleaned_text)
+    else:
+        cleaned_text = cleaned_text
+    word_count = len(cleaned_text.split())
+    processing_time = round(end_time - start_time, 2)
+    # ---------------------------
+    # DISPLAY RESULTS
+    # ---------------------------
+    col1, col2 = st.columns(2)
+    with col1:
+        st.subheader("📜 Raw Transcription")
+        st.text_area("", full_text, height=300)
+    with col2:
+        st.subheader("✨ Cleaned Output")
+        st.text_area("", cleaned_text, height=300)
+    st.divider()
+    st.write(f"**Detected Language:** {detected_lang}")
+    st.write(f"**Word Count:** {word_count}")
+    st.write(f"**Processing Time:** {processing_time} sec")
+    # ---------------------------
+    # EXCEL DOWNLOAD
+    # ---------------------------
+    excel_file = create_excel(segments)
+    st.download_button(
+        label="📥 Download Excel File",
+        data=excel_file,
+        file_name="transcription.xlsx",
+        mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
+    )
+# ---------------------------
+# FOOTER
+# ---------------------------
+st.divider()
+st.markdown(
+    "<p style='text-align:center; font-size:12px;'>Developed with ❤️ using Whisper & Streamlit</p>",
+    unsafe_allow_html=True
+)