Spaces:
Sleeping
Sleeping
| import os, tempfile | |
| import torch | |
| import gradio as gr | |
| from transformers import pipeline | |
| from transformers.pipelines.audio_utils import ffmpeg_read | |
| # ----------------------------- | |
| # CONFIG | |
| # ----------------------------- | |
| ASR_MODEL = "openai/whisper-large-v3" | |
| BATCH_SIZE = 8 | |
| HAS_CUDA = torch.cuda.is_available() | |
| DEVICE = 0 if HAS_CUDA else "cpu" | |
| DTYPE = torch.float16 if HAS_CUDA else torch.float32 | |
| asr_pipe = pipeline( | |
| task="automatic-speech-recognition", | |
| model=ASR_MODEL, | |
| device=DEVICE, | |
| torch_dtype=DTYPE, | |
| chunk_length_s=30, | |
| ) | |
| def _save_text_file(text: str, suffix: str = ".txt") -> str: | |
| fd, path = tempfile.mkstemp(suffix=suffix) | |
| with os.fdopen(fd, "w", encoding="utf-8") as f: | |
| f.write(text) | |
| return path | |
| def _resolve_path(x): | |
| """ | |
| รับค่า input ได้ทั้ง: | |
| - str (filepath) | |
| - dict ที่มี key 'name' หรือ 'path' | |
| - gradio FileData (มี .path) | |
| คืนค่าเป็น filepath เสมอ | |
| """ | |
| if x is None: | |
| return None | |
| if isinstance(x, str): | |
| return x | |
| # Gradio v4/v5 บางทีให้เป็น dict | |
| if isinstance(x, dict): | |
| return x.get("path") or x.get("name") | |
| # Gradio FileData | |
| path = getattr(x, "path", None) | |
| if path: | |
| return path | |
| # เผื่อกรณี edge | |
| return str(x) | |
| def _transcribe_from_any(file_like, task: str): | |
| path = _resolve_path(file_like) | |
| if not path or not os.path.exists(path): | |
| raise gr.Error("ไม่พบไฟล์ที่อัปโหลด (path ว่างหรือไฟล์หาย)") | |
| # อ่านเป็น bytes แล้วให้ ffmpeg แปลงเป็นโมโน float32 | |
| with open(path, "rb") as f: | |
| payload = f.read() | |
| audio = ffmpeg_read(payload, asr_pipe.feature_extractor.sampling_rate) | |
| inputs = {"array": audio, "sampling_rate": asr_pipe.feature_extractor.sampling_rate} | |
| out = asr_pipe( | |
| inputs, | |
| batch_size=BATCH_SIZE, | |
| generate_kwargs={"task": task}, # 'transcribe' = ตามภาษาเดิม, 'translate' = แปลอังกฤษ | |
| return_timestamps=True, | |
| ) | |
| text = out["text"] | |
| return text, _save_text_file(text, ".txt") | |
| def transcribe_mic(mic_path: str, task: str): | |
| return _transcribe_from_any(mic_path, task) | |
| def transcribe_audio(file_path: str, task: str): | |
| return _transcribe_from_any(file_path, task) | |
| def transcribe_video(video_file, task: str): | |
| # รับเป็นไฟล์วิดีโอ (mp4, mov, webm ก็ได้) แล้วให้ ffmpeg_read ดึงเสียง | |
| return _transcribe_from_any(video_file, task) | |
| # ----------------------------- | |
| # UI | |
| # ----------------------------- | |
| with gr.Blocks(title="Whisper V3 – Transcriber (Audio + MP4)") as demo: | |
| gr.Markdown("## 🎙️ Whisper V3 – Record/Upload Audio or MP4 → Transcript → Download (.txt)") | |
| with gr.Tab("🎤 Microphone"): | |
| mic_audio = gr.Audio(sources="microphone", type="filepath", label="Record") | |
| mic_task = gr.Radio(["transcribe", "translate"], value="transcribe", label="Task") | |
| mic_text = gr.Textbox(label="Transcript", lines=10) | |
| mic_file = gr.File(label="Download Transcript (.txt)") | |
| gr.Button("Run").click(transcribe_mic, inputs=[mic_audio, mic_task], outputs=[mic_text, mic_file]) | |
| with gr.Tab("📁 Audio file"): | |
| up_audio = gr.Audio(sources="upload", type="filepath", label="Upload audio") | |
| up_task = gr.Radio(["transcribe", "translate"], value="transcribe", label="Task") | |
| up_text = gr.Textbox(label="Transcript", lines=10) | |
| up_file = gr.File(label="Download Transcript (.txt)") | |
| gr.Button("Run").click(transcribe_audio, inputs=[up_audio, up_task], outputs=[up_text, up_file]) | |
| with gr.Tab("🎬 Video MP4"): | |
| # ใช้ gr.File เพื่อให้ได้ path ที่นิ่งที่สุด | |
| up_video = gr.File(file_count="single", file_types=["video"], label="Upload MP4 / Video") | |
| vd_task = gr.Radio(["transcribe", "translate"], value="transcribe", label="Task") | |
| vd_text = gr.Textbox(label="Transcript", lines=10) | |
| vd_file = gr.File(label="Download Transcript (.txt)") | |
| gr.Button("Run").click(transcribe_video, inputs=[up_video, vd_task], outputs=[vd_text, vd_file]) | |
| demo.queue().launch() | |