staraks commited on
Commit
8208346
·
verified ·
1 Parent(s): daf797c

Upload 6 files

Browse files
LICENSE_Version6.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2025 staraks486
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction...
README_Version6.md ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ```markdown
2
+ # Whisper Transcription Tool — Hugging Face Space (enhanced)
3
+
4
+ This Space-ready Gradio app transcribes audio files using multiple backends and exports TXT, SRT, merged DOCX, and ZIP outputs.
5
+
6
+ What's included:
7
+ - main.py: Gradio app with backend selection (openai-whisper, faster-whisper, openai-api), chunking, SRT export, merged DOCX, ZIP creation.
8
+ - requirements.txt: Python dependencies.
9
+ - apt.txt: system dependency (ffmpeg).
10
+
11
+ Quick deployment steps (CLI method)
12
+ 1. Install the Hugging Face CLI:
13
+ pip install huggingface-hub
14
+
15
+ 2. Login to the Hub:
16
+ huggingface-cli login
17
+ (enter your token from https://huggingface.co/settings/tokens)
18
+
19
+ 3. Create a new Space (replace <space-name> with your chosen name):
20
+ huggingface-cli repo create YOUR_USERNAME/<space-name> --type space --space-sdk gradio
21
+
22
+ 4. Clone the new Space repo:
23
+ git clone https://huggingface.co/spaces/YOUR_USERNAME/<space-name>
24
+ cd <space-name>
25
+
26
+ 5. Copy main.py, requirements.txt, apt.txt, README.md into the repo directory, then:
27
+ git add -A
28
+ git commit -m "Initial commit: Whisper Transcription Space"
29
+ git push
30
+
31
+ 6. In the Space settings (web UI):
32
+ - If you plan to run medium/large models or faster-whisper, choose "Hardware accelerator: GPU".
33
+ - Add OPENAI_API_KEY as a Space secret if you want to use the openai-api backend securely.
34
+
35
+ 7. Wait for build to finish. The app will be available at:
36
+ https://huggingface.co/spaces/YOUR_USERNAME/<space-name>
37
+
38
+ Notes & tips
39
+ - Use smaller models (tiny/base) on CPU Spaces to avoid OOM. For better GPU performance use faster-whisper.
40
+ - If build fails due to memory or dependency issues, try removing faster-whisper from requirements.txt (or select CPU/GPU appropriately).
41
+ - You can paste an OpenAI API key into the UI for quick tests, but prefer saving it as a Space secret named OPENAI_API_KEY.
42
+
43
+ Next improvements:
44
+ - Streaming transcripts to the UI while running.
45
+ - Add VTT/JSON timestamp exports.
46
+ - Integrate remote storage backends (S3, Google Drive).
47
+ ```
apt_Version4.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ ffmpeg
gitignore_Version6.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ outputs/
2
+ *.zip
3
+ *.pyc
4
+ __pycache__/
5
+ .env
main_Version7.py ADDED
@@ -0,0 +1,449 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Whisper Transcription Tool (Gradio) — Spaces-ready with backend selection, chunking, SRT export, and OpenAI API option.
4
+
5
+ Features:
6
+ - Backend selection: "openai-whisper" (local), "faster-whisper" (local, faster on GPU), "openai-api" (hosted whisper-1).
7
+ - Optional audio chunking (split long files with ffmpeg) to avoid OOM and speed up processing.
8
+ - SRT export (from segments) and per-file .txt exports.
9
+ - Merged Word (.docx) export.
10
+ - Zipped download containing all generated transcripts (TXT + SRT) if requested.
11
+ - Gradio UI updated to select backend and options, and to accept an OpenAI API key (when using openai-api).
12
+
13
+ Notes:
14
+ - On Hugging Face Spaces, select GPU in Space settings to run medium/large models or faster-whisper efficiently.
15
+ - Store your OpenAI API key in the Space "Secrets" as OPENAI_API_KEY to avoid exposing it in the UI.
16
+ """
17
+
18
+ import os
19
+ import sys
20
+ import tempfile
21
+ import shutil
22
+ import subprocess
23
+ import traceback
24
+ from typing import List, Optional, Tuple
25
+ import json
26
+ import time
27
+ import zipfile
28
+
29
+ import gradio as gr
30
+ from docx import Document
31
+ import pyzipper
32
+
33
+ # optional imports
34
+ try:
35
+ import whisper # openai-whisper
36
+ except Exception:
37
+ whisper = None
38
+
39
+ try:
40
+ from faster_whisper import WhisperModel # faster-whisper
41
+ except Exception:
42
+ WhisperModel = None
43
+
44
+ try:
45
+ import openai # openai API
46
+ except Exception:
47
+ openai = None
48
+
49
+ AUDIO_EXTS = {".mp3", ".wav", ".m4a", ".flac", ".aac", ".ogg", ".webm", ".dat", ".dct"}
50
+ DEFAULT_CHUNK_SECONDS = 15 * 60 # 15 minutes
51
+
52
+
53
+ # -------------------------
54
+ # Utilities
55
+ # -------------------------
56
+ def save_as_word(text: str, filename: str = "merged_transcripts.docx") -> str:
57
+ document = Document()
58
+ document.add_paragraph(text)
59
+ document.save(filename)
60
+ return os.path.abspath(filename)
61
+
62
+
63
+ def srt_time(sec: float) -> str:
64
+ """Convert seconds to SRT timecode 'HH:MM:SS,mmm'."""
65
+ ms = int((sec - int(sec)) * 1000)
66
+ h = int(sec // 3600)
67
+ m = int((sec % 3600) // 60)
68
+ s = int(sec % 60)
69
+ return f"{h:02}:{m:02}:{s:02},{ms:03}"
70
+
71
+
72
+ def segments_to_srt(segments: List[dict]) -> str:
73
+ """Convert segments (with start, end, text) to SRT string."""
74
+ lines = []
75
+ for i, seg in enumerate(segments, start=1):
76
+ start = srt_time(seg.get("start", 0.0))
77
+ end = srt_time(seg.get("end", seg.get("start", 0.0) + 1.0))
78
+ text = seg.get("text", "").strip()
79
+ lines.append(f"{i}")
80
+ lines.append(f"{start} --> {end}")
81
+ lines.append(text)
82
+ lines.append("") # blank line
83
+ return "\n".join(lines)
84
+
85
+
86
+ def safe_mkdir(path: str):
87
+ os.makedirs(path, exist_ok=True)
88
+
89
+
90
+ def chunk_audio_ffmpeg(input_path: str, out_dir: str, chunk_seconds: int) -> List[str]:
91
+ """
92
+ Split input audio into chunks using ffmpeg segment muxer (copy codec).
93
+ Returns list of chunk file paths.
94
+ """
95
+ safe_mkdir(out_dir)
96
+ _, ext = os.path.splitext(input_path)
97
+ pattern = os.path.join(out_dir, "chunk_%04d" + ext)
98
+ cmd = [
99
+ "ffmpeg", "-y", "-i", input_path,
100
+ "-f", "segment",
101
+ "-segment_time", str(chunk_seconds),
102
+ "-c", "copy",
103
+ pattern
104
+ ]
105
+ try:
106
+ subprocess.check_output(cmd, stderr=subprocess.STDOUT)
107
+ except subprocess.CalledProcessError as e:
108
+ try:
109
+ pattern = os.path.join(out_dir, "chunk_%04d.wav")
110
+ cmd2 = [
111
+ "ffmpeg", "-y", "-i", input_path,
112
+ "-f", "segment",
113
+ "-segment_time", str(chunk_seconds),
114
+ "-ar", "16000", "-ac", "1",
115
+ pattern
116
+ ]
117
+ subprocess.check_output(cmd2, stderr=subprocess.STDOUT)
118
+ except Exception as e2:
119
+ raise RuntimeError(f"ffmpeg chunking failed: {e}\nFallback failed: {e2}")
120
+ created = sorted([os.path.join(out_dir, f) for f in os.listdir(out_dir)])
121
+ return created
122
+
123
+
124
+ def _extract_audio_from_zip(zip_path: str, password: Optional[str], extract_dir: str, logs: List[str]) -> List[str]:
125
+ extracted_paths = []
126
+ try:
127
+ with pyzipper.ZipFile(zip_path, "r") as zf:
128
+ if password:
129
+ try:
130
+ zf.setpassword(password.encode())
131
+ except RuntimeError:
132
+ logs.append("Error: Incorrect password for the zip file.")
133
+ return []
134
+ os.makedirs(extract_dir, exist_ok=True)
135
+ for info in zf.infolist():
136
+ if info.is_dir():
137
+ continue
138
+ _, ext = os.path.splitext(info.filename)
139
+ if ext.lower() in AUDIO_EXTS:
140
+ try:
141
+ zf.extract(info, path=extract_dir)
142
+ extracted_path = os.path.abspath(os.path.join(extract_dir, info.filename))
143
+ extracted_paths.append(extracted_path)
144
+ logs.append(f"Extracted: {info.filename}")
145
+ except Exception as e:
146
+ logs.append(f"Error extracting {info.filename}: {e}")
147
+ except pyzipper.BadZipFile:
148
+ logs.append("Error: Invalid zip file format.")
149
+ except FileNotFoundError:
150
+ logs.append("Error: Zip file not found.")
151
+ except Exception as e:
152
+ logs.append(f"Unexpected error while extracting zip: {e}\n{traceback.format_exc()}")
153
+ return extracted_paths
154
+
155
+
156
+ # -------------------------
157
+ # Backend wrappers
158
+ # -------------------------
159
+ def transcribe_with_openai_whisper(model, audio_path: str, **kwargs) -> Tuple[str, List[dict]]:
160
+ res = model.transcribe(audio_path, **kwargs)
161
+ text = res.get("text", "")
162
+ segments = res.get("segments", []) or []
163
+ return text, segments
164
+
165
+
166
+ def transcribe_with_faster_whisper(model_obj, audio_path: str, **kwargs) -> Tuple[str, List[dict]]:
167
+ segments = []
168
+ text_parts = []
169
+ try:
170
+ result = model_obj.transcribe(audio_path, **kwargs)
171
+ if isinstance(result, dict):
172
+ text = result.get("text", "")
173
+ segments = result.get("segments", []) or []
174
+ return text, segments
175
+ elif isinstance(result, tuple) and len(result) == 2:
176
+ segs, info = result
177
+ for s in segs:
178
+ segments.append({"start": s.start, "end": s.end, "text": s.text})
179
+ text = " ".join([s.text for s in segments])
180
+ return text, segments
181
+ else:
182
+ for seg in result:
183
+ segments.append({"start": seg.start, "end": seg.end, "text": seg.text})
184
+ text = " ".join([s["text"] for s in segments])
185
+ return text, segments
186
+ except Exception as e:
187
+ raise
188
+
189
+
190
+ def transcribe_with_openai_api(api_key: str, audio_path: str, model_name: str = "whisper-1") -> Tuple[str, List[dict]]:
191
+ if openai is None:
192
+ raise RuntimeError("openai package not installed")
193
+ openai.api_key = api_key
194
+ with open(audio_path, "rb") as f:
195
+ try:
196
+ resp = openai.Audio.transcribe(model_name, f)
197
+ text = ""
198
+ segments = []
199
+ if isinstance(resp, dict):
200
+ text = resp.get("text", "")
201
+ segments = resp.get("segments", []) or []
202
+ else:
203
+ text = getattr(resp, "text", "") or ""
204
+ return text, segments
205
+ except Exception as e:
206
+ raise
207
+
208
+
209
+ # -------------------------
210
+ # Main processing function
211
+ # -------------------------
212
+ def process_files(
213
+ audio_list: Optional[List[str]],
214
+ backend: str,
215
+ model_name: str,
216
+ use_chunks: bool,
217
+ chunk_seconds: int,
218
+ export_srt: bool,
219
+ merge_docx: bool,
220
+ zip_outputs: bool,
221
+ zip_file: Optional[str],
222
+ zip_password: Optional[str],
223
+ openai_api_key_input: Optional[str],
224
+ ):
225
+ logs: List[str] = []
226
+ transcript_outputs: List[str] = []
227
+ created_files: List[str] = []
228
+ temp_dirs: List[str] = []
229
+
230
+ try:
231
+ temp_extract_dir = None
232
+ extracted_paths = []
233
+ if zip_file:
234
+ logs.append(f"Processing zip file: {zip_file}")
235
+ temp_extract_dir = tempfile.mkdtemp(prefix="extracted_audio_")
236
+ temp_dirs.append(temp_extract_dir)
237
+ extracted_paths = _extract_audio_from_zip(zip_file, zip_password, temp_extract_dir, logs)
238
+
239
+ all_audio_files: List[str] = []
240
+ if audio_list:
241
+ all_audio_files.extend([os.path.abspath(p) for p in audio_list if p])
242
+ if extracted_paths:
243
+ all_audio_files.extend(extracted_paths)
244
+
245
+ if not all_audio_files:
246
+ logs.append("No audio files provided.")
247
+ return ("\n".join(logs), "", gr.update(value=None, visible=False), gr.update(value=None, visible=False))
248
+
249
+ model_local = None
250
+ faster_model = None
251
+ api_key = openai_api_key_input or os.environ.get("OPENAI_API_KEY")
252
+
253
+ if backend == "openai-whisper":
254
+ if whisper is None:
255
+ logs.append("openai-whisper package not installed.")
256
+ return ("\n".join(logs), "", gr.update(value=None, visible=False), gr.update(value=None, visible=False))
257
+ logs.append(f"Loading openai-whisper model: {model_name}")
258
+ model_local = whisper.load_model(model_name)
259
+ logs.append("Model loaded.")
260
+ elif backend == "faster-whisper":
261
+ if WhisperModel is None:
262
+ logs.append("faster-whisper package not installed.")
263
+ return ("\n".join(logs), "", gr.update(value=None, visible=False), gr.update(value=None, visible=False))
264
+ logs.append(f"Loading faster-whisper model: {model_name}")
265
+ device = "cuda" if (os.environ.get("CUDA_VISIBLE_DEVICES") or os.path.exists('/usr/local/cuda')) else "cpu"
266
+ faster_model = WhisperModel(model_name, device=device)
267
+ logs.append("Faster-Whisper model loaded.")
268
+ elif backend == "openai-api":
269
+ if openai is None:
270
+ logs.append("openai package not installed.")
271
+ return ("\n".join(logs), "", gr.update(value=None, visible=False), gr.update(value=None, visible=False))
272
+ if not api_key:
273
+ logs.append("OpenAI API key not provided (use Space secret OPENAI_API_KEY or enter in UI).")
274
+ return ("\n".join(logs), "", gr.update(value=None, visible=False), gr.update(value=None, visible=False))
275
+ logs.append("Using OpenAI hosted Whisper (whisper-1).")
276
+
277
+ out_dir = os.path.abspath("outputs")
278
+ safe_mkdir(out_dir)
279
+ for audio_path in all_audio_files:
280
+ try:
281
+ if not os.path.exists(audio_path):
282
+ logs.append(f"Missing file, skipping: {audio_path}")
283
+ transcript_outputs.append(f"Could not transcribe {os.path.basename(audio_path)} — missing.")
284
+ continue
285
+ _, ext = os.path.splitext(audio_path)
286
+ if ext.lower() not in AUDIO_EXTS:
287
+ logs.append(f"Skipping unsupported file type: {audio_path}")
288
+ transcript_outputs.append(f"Skipped unsupported {os.path.basename(audio_path)}.")
289
+ continue
290
+
291
+ logs.append(f"Processing: {os.path.basename(audio_path)}")
292
+ to_transcribe_paths = [audio_path]
293
+
294
+ if use_chunks and chunk_seconds > 0:
295
+ try:
296
+ cmd = ["ffprobe", "-v", "error", "-show_entries", "format=duration", "-of", "default=noprint_wrappers=1:nokey=1", audio_path]
297
+ out = subprocess.check_output(cmd, stderr=subprocess.DEVNULL).decode().strip()
298
+ duration = float(out)
299
+ except Exception:
300
+ duration = 0.0
301
+ if duration > chunk_seconds and duration > 0:
302
+ logs.append(f"Chunking {os.path.basename(audio_path)} ({int(duration)}s) into {chunk_seconds}s parts...")
303
+ chunk_dir = tempfile.mkdtemp(prefix="chunks_")
304
+ temp_dirs.append(chunk_dir)
305
+ try:
306
+ chunks = chunk_audio_ffmpeg(audio_path, chunk_dir, chunk_seconds)
307
+ if chunks:
308
+ to_transcribe_paths = chunks
309
+ logs.append(f"Created {len(chunks)} chunks.")
310
+ else:
311
+ logs.append("No chunks created, using original file.")
312
+ except Exception as e:
313
+ logs.append(f"Chunking failed, will use original file. Error: {e}")
314
+
315
+ combined_texts = []
316
+ combined_segments = []
317
+ for piece in to_transcribe_paths:
318
+ try:
319
+ if backend == "openai-whisper":
320
+ text, segments = transcribe_with_openai_whisper(model_local, piece)
321
+ elif backend == "faster-whisper":
322
+ text, segments = transcribe_with_faster_whisper(faster_model, piece)
323
+ elif backend == "openai-api":
324
+ text, segments = transcribe_with_openai_api(api_key, piece, model_name="whisper-1")
325
+ else:
326
+ raise RuntimeError("Unknown backend")
327
+
328
+ if text:
329
+ combined_texts.append(text.strip())
330
+ if segments:
331
+ combined_segments.extend(segments)
332
+ logs.append(f"Transcribed: {os.path.basename(piece)}")
333
+ except Exception as e:
334
+ logs.append(f"Error transcribing {os.path.basename(piece)}: {e}\n{traceback.format_exc()}")
335
+ combined_texts.append(f"[Error transcribing {os.path.basename(piece)}]")
336
+
337
+ final_text = "\n".join(combined_texts).strip()
338
+ transcript_outputs.append(f"Transcript for {os.path.basename(audio_path)}:\n{final_text}")
339
+
340
+ base = os.path.splitext(os.path.basename(audio_path))[0]
341
+ txt_path = os.path.join(out_dir, f"{base}.txt")
342
+ with open(txt_path, "w", encoding="utf-8") as f:
343
+ f.write(final_text)
344
+ created_files.append(txt_path)
345
+ logs.append(f"Saved TXT: {txt_path}")
346
+
347
+ if export_srt and combined_segments:
348
+ srt_text = segments_to_srt(combined_segments)
349
+ srt_path = os.path.join(out_dir, f"{base}.srt")
350
+ with open(srt_path, "w", encoding="utf-8") as f:
351
+ f.write(srt_text)
352
+ created_files.append(srt_path)
353
+ logs.append(f"Saved SRT: {srt_path}")
354
+
355
+ except Exception as e:
356
+ logs.append(f"Fatal error while processing {audio_path}: {e}\n{traceback.format_exc()}")
357
+ transcript_outputs.append(f"Could not transcribe {os.path.basename(audio_path)} due to an error.")
358
+
359
+ merged_docx_path = None
360
+ if merge_docx:
361
+ combined_all = "\n\n---\n\n".join(transcript_outputs)
362
+ if combined_all.strip():
363
+ merged_docx_path = save_as_word(combined_all, filename=os.path.abspath("merged_transcripts.docx"))
364
+ created_files.append(merged_docx_path)
365
+ logs.append(f"Saved merged DOCX: {merged_docx_path}")
366
+
367
+ zip_path = None
368
+ if zip_outputs and created_files:
369
+ zip_path = os.path.abspath("transcripts_outputs.zip")
370
+ with zipfile.ZipFile(zip_path, "w", compression=zipfile.ZIP_DEFLATED) as zf:
371
+ for fpath in created_files:
372
+ zf.write(fpath, arcname=os.path.basename(fpath))
373
+ logs.append(f"Created outputs ZIP: {zip_path}")
374
+
375
+ for d in temp_dirs:
376
+ try:
377
+ shutil.rmtree(d)
378
+ except Exception:
379
+ pass
380
+
381
+ docx_update = gr.update(value=merged_docx_path, visible=bool(merged_docx_path))
382
+ zip_update = gr.update(value=zip_path, visible=bool(zip_path))
383
+ return ("\n".join(logs), "\n\n".join(transcript_outputs), docx_update, zip_update)
384
+
385
+ except Exception as e:
386
+ logs.append(f"Unhandled error: {e}\n{traceback.format_exc()}")
387
+ return ("\n".join(logs), "", gr.update(value=None, visible=False), gr.update(value=None, visible=False))
388
+
389
+
390
+ # -------------------------
391
+ # Gradio UI
392
+ # -------------------------
393
+ def build_ui():
394
+ with gr.Blocks() as demo:
395
+ gr.Markdown("## Whisper Transcription Tool — Spaces-ready\nSelect backend, upload audio files or a ZIP, and choose options like chunking, SRT export, and merged DOCX/ZIP outputs.")
396
+
397
+ with gr.Row():
398
+ backend_dropdown = gr.Dropdown(choices=["openai-whisper", "faster-whisper", "openai-api"], value="openai-whisper", label="Backend")
399
+ model_dropdown = gr.Dropdown(
400
+ choices=["tiny", "tiny.en", "base", "base.en", "small", "small.en", "medium", "medium.en", "large"],
401
+ value="base",
402
+ label="Model"
403
+ )
404
+
405
+ with gr.Row():
406
+ audio_input = gr.File(file_count="multiple", type="filepath", label="Upload Audio Files (Optional)")
407
+ zip_input = gr.File(file_count="single", type="filepath", label="Upload Zip of Audio (Optional)")
408
+ zip_password = gr.Textbox(label="Zip Password (Optional)", type="password")
409
+
410
+ with gr.Row():
411
+ use_chunks = gr.Checkbox(label="Enable chunking for long files (recommended for large files)", value=True)
412
+ chunk_seconds = gr.Slider(minimum=60, maximum=3600, value=DEFAULT_CHUNK_SECONDS, step=60, label="Chunk length (seconds)")
413
+
414
+ with gr.Row():
415
+ export_srt = gr.Checkbox(label="Export SRT files (timestamped subtitles)", value=True)
416
+ merge_docx = gr.Checkbox(label="Merge transcripts into one DOCX", value=False)
417
+ zip_outputs = gr.Checkbox(label="Produce ZIP with all outputs (TXT/SRT/DOCX)", value=True)
418
+
419
+ openai_key = gr.Textbox(label="OpenAI API Key (only needed for openai-api backend)", type="password", value=os.environ.get("OPENAI_API_KEY", ""))
420
+
421
+ transcribe_btn = gr.Button("Start Transcription")
422
+
423
+ log_output = gr.Textbox(label="Log Output", lines=12)
424
+ transcript_output = gr.Textbox(label="Transcripts", lines=20)
425
+ docx_file_output = gr.File(label="Download Merged Transcript (.docx)", visible=False)
426
+ zip_file_output = gr.File(label="Download Outputs (.zip)", visible=False)
427
+
428
+ def toggle_openai_key(backend_choice):
429
+ return gr.update(visible=(backend_choice == "openai-api"))
430
+
431
+ backend_dropdown.change(
432
+ toggle_openai_key,
433
+ inputs=[backend_dropdown],
434
+ outputs=[openai_key]
435
+ )
436
+
437
+ transcribe_btn.click(
438
+ process_files,
439
+ inputs=[audio_input, backend_dropdown, model_dropdown, use_chunks, chunk_seconds, export_srt, merge_docx, zip_outputs, zip_input, zip_password, openai_key],
440
+ outputs=[log_output, transcript_output, docx_file_output, zip_file_output],
441
+ )
442
+
443
+ return demo
444
+
445
+
446
+ if __name__ == "__main__":
447
+ app = build_ui()
448
+ port = int(os.environ.get("PORT", 7860))
449
+ app.launch(server_name="0.0.0.0", server_port=port, enable_queue=True)
requirements_Version5.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ gradio>=3.0
2
+ openai-whisper>=20230314
3
+ faster-whisper>=0.7.0
4
+ openai>=0.27.0
5
+ pyzipper>=0.3.6
6
+ python-docx>=0.8.11