File size: 5,943 Bytes
a892cef
 
 
 
ddfd8e5
a892cef
9a53ebf
a892cef
eebed8e
b98298a
eebed8e
a892cef
 
ddfd8e5
a892cef
88bdbd2
 
 
 
 
 
 
 
 
 
 
 
55e1272
ea817e9
 
 
 
ddfd8e5
88bdbd2
 
 
 
ea817e9
 
 
 
 
 
 
 
 
 
 
 
88bdbd2
 
 
 
eebed8e
88bdbd2
 
 
 
 
eebed8e
ea817e9
eebed8e
 
 
 
88bdbd2
 
 
eebed8e
88bdbd2
 
eebed8e
 
ddfd8e5
eebed8e
 
 
 
 
88bdbd2
 
 
 
 
ea817e9
88bdbd2
 
 
ea817e9
 
88bdbd2
a53d6d2
88bdbd2
ea817e9
a892cef
 
ea817e9
 
 
eebed8e
55e1272
 
 
 
a892cef
88bdbd2
a53d6d2
88bdbd2
 
b98298a
88bdbd2
eebed8e
ea817e9
eebed8e
 
 
 
55e1272
 
 
 
b98298a
88bdbd2
 
 
 
 
 
 
 
ea817e9
88bdbd2
ea817e9
88bdbd2
b98298a
a892cef
 
ea817e9
a892cef
ea817e9
55e1272
ea817e9
1be1b95
55e1272
ea817e9
 
55e1272
ea817e9
a892cef
eebed8e
ea817e9
 
 
 
 
 
 
a892cef
ea817e9
 
a892cef
ea817e9
a53d6d2
ea817e9
a892cef
88bdbd2
ea817e9
88bdbd2
ea817e9
 
a892cef
88bdbd2
a892cef
88bdbd2
b98298a
88bdbd2
 
eebed8e
88bdbd2
 
eebed8e
ea817e9
 
ddfd8e5
88bdbd2
 
eebed8e
88bdbd2
eebed8e
88bdbd2
 
 
eebed8e
88bdbd2
 
 
eebed8e
88bdbd2
a892cef
55e1272
ea817e9
eebed8e
 
 
88bdbd2
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
import streamlit as st
import tempfile
import os
import re
import time
from pydub import AudioSegment
from faster_whisper import WhisperModel
from openpyxl import Workbook
from openpyxl.styles import Font
from docx import Document
from docx.shared import Pt
from io import BytesIO

st.set_page_config(page_title="RecToText Pro", layout="wide")

st.title("🎤 RecToText Pro – AI Polished Edition")
st.caption("Professional Lecture Transcriber | Clean Story | Grammar Polished")

# --------------------------------------------------
# SESSION STATE
# --------------------------------------------------
if "processed_text" not in st.session_state:
    st.session_state.processed_text = None

# --------------------------------------------------
# LOAD MODEL
# --------------------------------------------------
@st.cache_resource
def load_model():
    return WhisperModel("base", device="cpu", compute_type="int8")

model = load_model()

# --------------------------------------------------
# STRICT ROMAN URDU
# --------------------------------------------------
def transliterate(text):
    replacements = {
        "ہے": "hai",
        "میں": "main",
        "اور": "aur",
        "کیا": "kya",
        "کی": "ki",
        "کا": "ka",
        "سے": "se",
        "کو": "ko",
        "پر": "par",
        "نہیں": "nahin"
    }
    for k, v in replacements.items():
        text = text.replace(k, v)

    return re.sub(r'[^\x00-\x7F]+', '', text)

# --------------------------------------------------
# AI STYLE POLISHING (RULE BASED SAFE)
# --------------------------------------------------
def polish_text(text):
    text = re.sub(r'\s+', ' ', text).strip()
    sentences = re.split(r'(?<=[.!?]) +', text)

    paragraphs = []
    temp = ""

    for i, sentence in enumerate(sentences):
        sentence = sentence.strip().capitalize()
        if not sentence.endswith((".", "!", "?")):
            sentence += "."
        temp += sentence + " "

        if (i + 1) % 4 == 0:
            paragraphs.append(temp.strip())
            temp = ""

    if temp:
        paragraphs.append(temp.strip())

    return "\n\n".join(paragraphs)

# --------------------------------------------------
# AUDIO CHUNKING
# --------------------------------------------------
def chunk_audio(path):
    audio = AudioSegment.from_wav(path)
    chunks = []
    chunk_len = 30 * 1000
    for i in range(0, len(audio), chunk_len):
        chunks.append(audio[i:i + chunk_len])
    return chunks

# --------------------------------------------------
# EXPORT EXCEL
# --------------------------------------------------
def export_excel(text):
    wb = Workbook()
    ws = wb.active
    ws.append(["Lecture Transcription"])
    ws["A1"].font = Font(bold=True)
    ws.append([text])

    buffer = BytesIO()
    wb.save(buffer)
    buffer.seek(0)
    return buffer

# --------------------------------------------------
# EXPORT WORD
# --------------------------------------------------
def export_word(text):
    doc = Document()
    doc.add_heading("Lecture Transcription", level=1)

    paragraphs = text.split("\n\n")
    for para in paragraphs:
        p = doc.add_paragraph(para)
        p.paragraph_format.space_after = Pt(12)

    buffer = BytesIO()
    doc.save(buffer)
    buffer.seek(0)
    return buffer

# --------------------------------------------------
# CLEAR BUTTON
# --------------------------------------------------
if st.sidebar.button("🧹 Clear All"):
    st.session_state.processed_text = None
    st.rerun()

# --------------------------------------------------
# FILE UPLOADER
# --------------------------------------------------
uploaded = st.file_uploader(
    "Upload Lecture (MP3, WAV, M4A, AAC) – Max 200MB",
    type=["mp3", "wav", "m4a", "aac"]
)

output_mode = st.radio("Output Language", ["English", "Roman Urdu"])

if uploaded:
    try:
        st.audio(uploaded)

        with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
            ext = uploaded.name.split(".")[-1]
            audio = AudioSegment.from_file(uploaded, format=ext)
            audio.export(tmp.name, format="wav")
            temp_path = tmp.name

        start_time = time.time()
        chunks = chunk_audio(temp_path)
        full_text = ""

        for chunk in chunks:
            with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as ctmp:
                chunk.export(ctmp.name, format="wav")
                segments, info = model.transcribe(ctmp.name)

                for segment in segments:
                    full_text += segment.text + " "

                os.remove(ctmp.name)

        os.remove(temp_path)

        # Strict output control
        if output_mode == "Roman Urdu":
            full_text = transliterate(full_text)
        else:
            full_text = re.sub(r'[^\x00-\x7F]+', '', full_text)

        polished = polish_text(full_text)

        st.session_state.processed_text = polished

        word_count = len(polished.split())
        processing_time = round(time.time() - start_time, 2)

        st.subheader("✨ Clean AI Polished Story")
        st.text_area("", polished, height=350)

        st.write(f"Word Count: {word_count}")
        st.write(f"Processing Time: {processing_time} sec")

        excel_file = export_excel(polished)
        word_file = export_word(polished)

        col1, col2 = st.columns(2)

        with col1:
            if st.download_button("Download Excel (.xlsx)", excel_file):
                st.session_state.processed_text = None

        with col2:
            if st.download_button("Download Word (.docx)", word_file):
                st.session_state.processed_text = None

        st.success("Story Generated Successfully.")

    except Exception as e:
        st.error("Processing Error")
        st.exception(e)

st.markdown("---")
st.markdown("<center>RecToText Pro – AI Polished Edition</center>", unsafe_allow_html=True)