File size: 10,815 Bytes
46ebbe7
 
 
980892f
ef9a67d
67748bb
d97d093
f5712db
 
67748bb
f5712db
ef9a67d
67748bb
ef9a67d
67748bb
 
46ebbe7
67748bb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d97d093
f5712db
 
ef9a67d
 
67748bb
ef9a67d
67748bb
 
fbfb3b5
 
b292d46
0a869f5
67748bb
fbfb3b5
ef9a67d
 
67748bb
 
 
0a869f5
b292d46
67748bb
 
 
 
 
 
 
 
 
ef9a67d
 
67748bb
fbfb3b5
67748bb
 
fbfb3b5
 
 
67748bb
 
 
 
 
 
 
 
 
 
fbfb3b5
67748bb
 
 
 
 
 
 
 
 
fbfb3b5
 
67748bb
f5712db
67748bb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f5712db
 
 
67748bb
ef9a67d
67748bb
46ebbe7
67748bb
fbfb3b5
 
67748bb
 
 
b292d46
67748bb
 
 
fbfb3b5
67748bb
 
2d3b613
fbfb3b5
b292d46
fbfb3b5
67748bb
 
 
 
d816888
67748bb
 
 
 
 
d816888
67748bb
 
980892f
ef9a67d
67748bb
 
 
 
 
d816888
67748bb
 
 
 
 
2d3b613
46ebbe7
67748bb
 
 
 
 
 
 
 
 
 
46ebbe7
ef9a67d
67748bb
ef9a67d
67748bb
 
fbfb3b5
67748bb
 
fbfb3b5
67748bb
 
 
 
 
 
fbfb3b5
ef9a67d
67748bb
ef9a67d
 
fbfb3b5
67748bb
 
 
fbfb3b5
67748bb
 
 
fbfb3b5
67748bb
 
 
 
 
ef9a67d
67748bb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
980892f
67748bb
 
 
 
 
 
 
 
 
 
 
 
 
f5712db
67748bb
 
f5712db
2d3b613
 
67748bb
 
 
 
 
ef9a67d
67748bb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d816888
 
67748bb
 
 
 
 
 
 
 
 
 
 
2d3b613
67748bb
 
46ebbe7
67748bb
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
import gradio as gr
import yt_dlp
import os
import shutil
import subprocess
import tempfile
from faster_whisper import WhisperModel
from indic_transliteration import sanscript
from indic_transliteration.sanscript import transliterate
import torch

# ===============================
# ๐Ÿ”’ GLOBALS & CONFIG
# ===============================
MODEL_CACHE_DIR = "/tmp/qwen_whisper_cache"
os.makedirs(MODEL_CACHE_DIR, exist_ok=True)

# Lazy-loaded model (shared across calls)
_model = None

def load_whisper_model():
    global _model
    if _model is None:
        print("๐Ÿ“ฅ Loading Whisper 'base' model (CPU/int8)...")
        _model = WhisperModel(
            "base",
            device="cpu",
            compute_type="int8",
            download_root=MODEL_CACHE_DIR
        )
        print("โœ… Model loaded.")
    return _model

def get_ffmpeg():
    return shutil.which("ffmpeg") or "/usr/bin/ffmpeg"

# ===============================
# ๐Ÿ“ฅ SAFE DOWNLOAD (YouTube, TikTok, etc.)
# ===============================
def download_video(url):
    video_path = os.path.join(tempfile.gettempdir(), "downloaded_video.mp4")
    if os.path.exists(video_path):
        os.remove(video_path)

    ydl_opts = {
        "format": "bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best",
        "outtmpl": video_path,
        "quiet": True,
        "nocheckcertificate": True,
        "noplaylist": True,        "extract_audio": False,
        "retries": 10,
        "fragment_retries": 10,
    }

    try:
        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
            info = ydl.extract_info(url, download=True)
            # Ensure file exists
            if not os.path.exists(video_path):
                raise FileNotFoundError("Download failed: no file created")
        return video_path, info.get("title", "Untitled")
    except Exception as e:
        raise RuntimeError(f"Download failed: {str(e)}")

# ===============================
# ๐ŸŽง EXTRACT AUDIO (robust)
# ===============================
def extract_audio(video_path):
    audio_path = os.path.join(tempfile.gettempdir(), "extracted_audio.wav")
    if os.path.exists(audio_path):
        os.remove(audio_path)

    cmd = [
        get_ffmpeg(),
        "-y",
        "-i", video_path,
        "-vn",
        "-ac", "1",
        "-ar", "16000",
        "-c:a", "pcm_s16le",
        audio_path
    ]

    try:
        result = subprocess.run(cmd, capture_output=True, text=True, timeout=60)
        if result.returncode != 0:
            raise RuntimeError(f"FFmpeg failed: {result.stderr}")
        if not os.path.exists(audio_path) or os.path.getsize(audio_path) < 5000:
            raise RuntimeError("Audio extraction produced empty/invalid file")
        return audio_path
    except subprocess.TimeoutExpired:
        raise RuntimeError("Audio extraction timed out (>60s)")

# ===============================
# ๐ŸŒ LANGUAGE-AWARE TRANSLITERATION & NORMALIZATION
# ===============================
def normalize_to_hindi(text):
    """Convert any script to Devanagari + clean up"""
    if not text.strip():        return ""
    
    # Step 1: Transliterate non-Devanagari scripts to Devanagari
    try:
        # Try Arabic โ†’ Devanagari (for Urdu)
        text = transliterate(text, sanscript.ARABIC, sanscript.DEVANAGARI)
        # Try Roman โ†’ Devanagari (for Hindi/English mixed)
        text = transliterate(text, sanscript.ITRANS, sanscript.DEVANAGARI)
    except Exception:
        pass  # fallback to raw text

    # Step 2: Clean punctuation & spacing
    import re
    text = re.sub(r'[^\u0900-\u097F\u0020\u002E\u002C\u003F\u0021\u003B\u003A\u002D\u0028\u0029]', '', text)
    text = re.sub(r'\s+', ' ', text).strip()
    text = re.sub(r'\.\s*\.', '.', text)  # fix .. โ†’ .
    text = re.sub(r'\?\s*\?', '?', text)
    text = re.sub(r'!\s*!', '!', text)

    # Step 3: Add proper full stops at end if missing
    if text and text[-1] not in "เฅค.!?":
        text += "เฅค"

    return text

# ===============================
# ๐ŸŽฏ CORE TRANSCRIBE FUNCTION (ALWAYS OUTPUT HINDI)
# ===============================
def transcribe_to_hindi(url=None, file=None, lang_choice="Auto Detect"):
    try:
        # ======== INPUT HANDLING ========
        if file:
            ext = os.path.splitext(file)[1].lower()
            if ext in [".mp3", ".wav", ".m4a", ".ogg"]:
                audio_path = file
                title = os.path.basename(file)
            else:
                video_path = file
                audio_path = extract_audio(video_path)
                title = os.path.basename(video_path)
        elif url:
            video_path, title = download_video(url)
            audio_path = extract_audio(video_path)
        else:
            return "โš ๏ธ Please paste a URL or upload a file."

        # Safety check
        if not os.path.exists(audio_path) or os.path.getsize(audio_path) < 5000:
            return "โŒ Audio file too small or missing. Try again."
        # ======== TRANSCRIPTION ========
        model = load_whisper_model()
        segments, info = model.transcribe(
            audio_path,
            beam_size=5,
            best_of=3,
            patience=1.0,
            temperature=(0.0, 0.2, 0.4, 0.6, 0.8, 1.0),
            vad_filter=True,
            word_timestamps=False,
            language=None  # Auto-detect
        )

        raw_text = " ".join([seg.text for seg in segments]).strip()

        # ======== FORCE HINDI OUTPUT ========
        # Even if detected language is en/ur/tam, convert to Hindi script
        final_text = normalize_to_hindi(raw_text)

        # Optional: Add title & metadata
        header = f"๐ŸŽฌ {title[:50]}{'...' if len(title) > 50 else ''}\n"
        header += f"๐ŸŒ Detected: {info.language or 'Unknown'} โ†’ ๐Ÿ‡ฎ๐Ÿ‡ณ Output: Hindi (Devanagari)\n\n"

        return header + final_text

    except Exception as e:
        err_msg = str(e).lower()
        if "instagram" in err_msg:
            return (
                "โŒ Instagram URLs are blocked on Hugging Face.\n\n"
                "โœ… Solution: Download the video manually (e.g., via online downloader), then upload it here."
            )
        elif "timeout" in err_msg or "network" in err_msg:
            return "โš ๏ธ Network timeout. Try again or upload file directly."
        else:
            return f"โŒ Error: {str(e)[:200]}..."

# ===============================
# ๐ŸŽจ MODERN UI (HUGGING FACE OPTIMIZED)
# ===============================
CSS = """
/* Glassmorphism + Dark Gradient */
body {
    background: radial-gradient(circle at top, #0c1445, #1a2a6c, #2c3e50);
    font-family: 'Inter', system-ui, sans-serif;
}
.glass-card {
    background: rgba(255, 255, 255, 0.07);
    backdrop-filter:);
    border-radius: 20px;    padding: 28px;
    box-shadow: 0 12px 32px rgba(0, 0, 0, 0.4);
    border: 1px solid rgba(255, 255, 255, 0.1);
}
.gr-button-primary {
    background: linear-gradient(135deg, #6a11cb 0%, #2575fc 100%);
    border: none;
    color: white;
    font-weight: 600;
    padding: 12px 24px;
    border-radius: 12px;
    transition: all 0.3s ease;
}
.gr-button-primary:hover {
    transform: translateY(-2px);
    box-shadow: 0 6px 15px rgba(37, 117252, 0.4);
}
.gr-input, .gr-textarea, .gr-dropdown {
    background: rgba(255, 255, 255, 0.08) !important;
    color: #e0e0ff !important;
    border: 1px solid rgba(255, 255, 255, 0.15) !important;
    border-radius: 10px;
}
.gr-markdown p, .gr-markdown h2 {
    color: #f0f4ff !important;
}
footer { display: none !important; }
.title {
    font-size: 2.2rem;
    font-weight: 800;
    background: linear-gradient(90deg, #ffd700, #ff8c00);
    -webkit-background-clip: text;
    background-clip: text;
    color: transparent;
    margin-bottom: 12px;
}
.subtitle {
    color: #a0d2eb;
    font-size: 1.1rem;
    margin-bottom: 24px;
}
.feature-badge {
    display: inline-block;
    background: rgba(106, 17, 203, 0.3);
    color: #ffd700;
    padding: 3px 10;
    border-radius: 20px;
    font-size: 0.85rem;
    margin: 0 4px;
}"""

with gr.Blocks(
    css=CSS,
    theme=gr.themes.Default(
        primary_hue=gr.themes.Color(c100="#6a11cb", c200="#2575fc", c300="#1a5fb4"),
        secondary_hue=gr.themes.Color(c100="#ff9e00", c200="#ff7b00"),
        neutral_hue=gr.themes.Color(c100="#1e293b", c200="#0f172a"),
    ),
    title="๐Ÿ—ฃ๏ธ AI Hindi Transcript Studio",
) as demo:
    with gr.Column(elem_classes=["glass-card"]):
        gr.HTML("<div class='title'>AI Hindi Transcript Studio</div>")
        gr.HTML("<div class='subtitle'>Upload or paste any video โ†’ Get clean Devanagari Hindi transcript instantly</div>")
        
        gr.Markdown(
            "โœจ Supports: YouTube, TikTok, Facebook, Twitter/X, Instagram (via upload), local files<br>"
            "โšก Zero ffprobe errors โ€ข Auto-script conversion โ€ข Real-time cleanup"
        )

        with gr.Tabs():
            with gr.TabItem("๐Ÿ”— URL"):
                url_input = gr.Textbox(
                    label="๐ŸŽฅ Video URL",
                    placeholder="https://youtu.be/...",
                    info="Instagram? Upload file instead (HF restriction)"
                )
                btn_url = gr.Button("๐Ÿ”Š Transcribe to Hindi", variant="primary", size="lg")

            with gr.TabItem("๐Ÿ“‚ File"):
                file_input = gr.File(
                    label="๐Ÿ“ Upload Video/Audio",
                    file_types=["video", "audio"],
                    info="MP4, MOV, MP3, WAV, M4A, etc."
                )
                btn_file = gr.Button("๐Ÿ“– Convert to Hindi", variant="primary", size="lg")

        lang_dummy = gr.Dropdown(
            choices=["Auto (โ†’ Hindi)"],
            value="Auto (โ†’ Hindi)",
            interactive=False,
            visible=False
        )  # Hidden โ€” we force Hindi output

        output_box = gr.Textbox(
            label="๐Ÿ“ Hindi Transcript (Devanagari)",
            lines=16,
            max_lines=25,
            show_copy_button=True,
            interactive=False,            elem_classes=["gr-textarea"]
        )

        gr.Markdown(
            "<div style='text-align:center; margin-top:20px; color:#a0d2eb; font-size:0.9rem;'>"
            "๐Ÿš€ Powered by Faster-Whisper + Indic Transliteration | Deployed on Hugging Face Spaces"
            "</div>"
        )

    # Event bindings
    btn_url.click(
        fn=transcribe_to_hindi,
        inputs=[url_input, gr.State(None), lang_dummy],
        outputs=output_box
    )
    btn_file.click(
        fn=transcribe_to_hindi,
        inputs=[gr.State(None), file_input, lang_dummy],
        outputs=output_box
    )

# Optional: Enable queue for HF Spaces
demo.queue(concurrency_count=2, max_size=10)

if __name__ == "__main__":
    demo.launch(server_name="0.0.0.0", server_port=7860)