Spaces:

MiakOnline
/

RecToTextPro

Sleeping

File size: 5,943 Bytes

a892cef
 
 
 
ddfd8e5
a892cef
9a53ebf
a892cef
eebed8e
b98298a
eebed8e
a892cef
 
ddfd8e5
a892cef
88bdbd2
 
 
 
 
 
 
 
 
 
 
 
55e1272
ea817e9
 
 
 
ddfd8e5
88bdbd2
 
 
 
ea817e9
 
 
 
 
 
 
 
 
 
 
 
88bdbd2
 
 
 
eebed8e
88bdbd2
 
 
 
 
eebed8e
ea817e9
eebed8e
 
 
 
88bdbd2
 
 
eebed8e
88bdbd2
 
eebed8e
 
ddfd8e5
eebed8e
 
 
 
 
88bdbd2
 
 
 
 
ea817e9
88bdbd2
 
 
ea817e9
 
88bdbd2
a53d6d2
88bdbd2
ea817e9
a892cef
 
ea817e9
 
 
eebed8e
55e1272
 
 
 
a892cef
88bdbd2
a53d6d2
88bdbd2
 
b98298a
88bdbd2
eebed8e
ea817e9
eebed8e
 
 
 
55e1272
 
 
 
b98298a
88bdbd2
 
 
 
 
 
 
 
ea817e9
88bdbd2
ea817e9
88bdbd2
b98298a
a892cef
 
ea817e9
a892cef
ea817e9
55e1272
ea817e9
1be1b95
55e1272
ea817e9
 
55e1272
ea817e9
a892cef
eebed8e
ea817e9
 
 
 
 
 
 
a892cef
ea817e9
 
a892cef
ea817e9
a53d6d2
ea817e9
a892cef
88bdbd2
ea817e9
88bdbd2
ea817e9
 
a892cef
88bdbd2
a892cef
88bdbd2
b98298a
88bdbd2
 
eebed8e
88bdbd2
 
eebed8e
ea817e9
 
ddfd8e5
88bdbd2
 
eebed8e
88bdbd2
eebed8e
88bdbd2
 
 
eebed8e
88bdbd2
 
 
eebed8e
88bdbd2
a892cef
55e1272
ea817e9
eebed8e
 
 
88bdbd2

import streamlit as st
import tempfile
import os
import re
import time
from pydub import AudioSegment
from faster_whisper import WhisperModel
from openpyxl import Workbook
from openpyxl.styles import Font
from docx import Document
from docx.shared import Pt
from io import BytesIO

st.set_page_config(page_title="RecToText Pro", layout="wide")

st.title("🎤 RecToText Pro – AI Polished Edition")
st.caption("Professional Lecture Transcriber | Clean Story | Grammar Polished")

# --------------------------------------------------
# SESSION STATE
# --------------------------------------------------
if "processed_text" not in st.session_state:
    st.session_state.processed_text = None

# --------------------------------------------------
# LOAD MODEL
# --------------------------------------------------
@st.cache_resource
def load_model():
    return WhisperModel("base", device="cpu", compute_type="int8")

model = load_model()

# --------------------------------------------------
# STRICT ROMAN URDU
# --------------------------------------------------
def transliterate(text):
    replacements = {
        "ہے": "hai",
        "میں": "main",
        "اور": "aur",
        "کیا": "kya",
        "کی": "ki",
        "کا": "ka",
        "سے": "se",
        "کو": "ko",
        "پر": "par",
        "نہیں": "nahin"
    }
    for k, v in replacements.items():
        text = text.replace(k, v)

    return re.sub(r'[^\x00-\x7F]+', '', text)

# --------------------------------------------------
# AI STYLE POLISHING (RULE BASED SAFE)
# --------------------------------------------------
def polish_text(text):
    text = re.sub(r'\s+', ' ', text).strip()
    sentences = re.split(r'(?<=[.!?]) +', text)

    paragraphs = []
    temp = ""

    for i, sentence in enumerate(sentences):
        sentence = sentence.strip().capitalize()
        if not sentence.endswith((".", "!", "?")):
            sentence += "."
        temp += sentence + " "

        if (i + 1) % 4 == 0:
            paragraphs.append(temp.strip())
            temp = ""

    if temp:
        paragraphs.append(temp.strip())

    return "\n\n".join(paragraphs)

# --------------------------------------------------
# AUDIO CHUNKING
# --------------------------------------------------
def chunk_audio(path):
    audio = AudioSegment.from_wav(path)
    chunks = []
    chunk_len = 30 * 1000
    for i in range(0, len(audio), chunk_len):
        chunks.append(audio[i:i + chunk_len])
    return chunks

# --------------------------------------------------
# EXPORT EXCEL
# --------------------------------------------------
def export_excel(text):
    wb = Workbook()
    ws = wb.active
    ws.append(["Lecture Transcription"])
    ws["A1"].font = Font(bold=True)
    ws.append([text])

    buffer = BytesIO()
    wb.save(buffer)
    buffer.seek(0)
    return buffer

# --------------------------------------------------
# EXPORT WORD
# --------------------------------------------------
def export_word(text):
    doc = Document()
    doc.add_heading("Lecture Transcription", level=1)

    paragraphs = text.split("\n\n")
    for para in paragraphs:
        p = doc.add_paragraph(para)
        p.paragraph_format.space_after = Pt(12)

    buffer = BytesIO()
    doc.save(buffer)
    buffer.seek(0)
    return buffer

# --------------------------------------------------
# CLEAR BUTTON
# --------------------------------------------------
if st.sidebar.button("🧹 Clear All"):
    st.session_state.processed_text = None
    st.rerun()

# --------------------------------------------------
# FILE UPLOADER
# --------------------------------------------------
uploaded = st.file_uploader(
    "Upload Lecture (MP3, WAV, M4A, AAC) – Max 200MB",
    type=["mp3", "wav", "m4a", "aac"]
)

output_mode = st.radio("Output Language", ["English", "Roman Urdu"])

if uploaded:
    try:
        st.audio(uploaded)

        with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
            ext = uploaded.name.split(".")[-1]
            audio = AudioSegment.from_file(uploaded, format=ext)
            audio.export(tmp.name, format="wav")
            temp_path = tmp.name

        start_time = time.time()
        chunks = chunk_audio(temp_path)
        full_text = ""

        for chunk in chunks:
            with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as ctmp:
                chunk.export(ctmp.name, format="wav")
                segments, info = model.transcribe(ctmp.name)

                for segment in segments:
                    full_text += segment.text + " "

                os.remove(ctmp.name)

        os.remove(temp_path)

        # Strict output control
        if output_mode == "Roman Urdu":
            full_text = transliterate(full_text)
        else:
            full_text = re.sub(r'[^\x00-\x7F]+', '', full_text)

        polished = polish_text(full_text)

        st.session_state.processed_text = polished

        word_count = len(polished.split())
        processing_time = round(time.time() - start_time, 2)

        st.subheader("✨ Clean AI Polished Story")
        st.text_area("", polished, height=350)

        st.write(f"Word Count: {word_count}")
        st.write(f"Processing Time: {processing_time} sec")

        excel_file = export_excel(polished)
        word_file = export_word(polished)

        col1, col2 = st.columns(2)

        with col1:
            if st.download_button("Download Excel (.xlsx)", excel_file):
                st.session_state.processed_text = None

        with col2:
            if st.download_button("Download Word (.docx)", word_file):
                st.session_state.processed_text = None

        st.success("Story Generated Successfully.")

    except Exception as e:
        st.error("Processing Error")
        st.exception(e)

st.markdown("---")
st.markdown("<center>RecToText Pro – AI Polished Edition</center>", unsafe_allow_html=True)