Spaces:
Sleeping
Sleeping
File size: 4,457 Bytes
e8939bc 64644c3 6c69ff9 64644c3 6c69ff9 64644c3 7ffbf27 6c69ff9 7ffbf27 6c69ff9 64644c3 7ffbf27 e8939bc 64644c3 6c69ff9 64644c3 7ffbf27 64644c3 6c69ff9 64644c3 e8939bc 6c69ff9 64644c3 7ffbf27 6c69ff9 e8939bc 7ffbf27 6c69ff9 7ffbf27 e8939bc 64644c3 6c69ff9 64644c3 e8939bc 6c69ff9 64644c3 6c69ff9 64644c3 e8939bc 7ffbf27 e8939bc 6c69ff9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 |
import os, tempfile
import torch
import gradio as gr
from transformers import pipeline
from transformers.pipelines.audio_utils import ffmpeg_read
# -----------------------------
# CONFIG
# -----------------------------
ASR_MODEL = "openai/whisper-large-v3"
BATCH_SIZE = 8
HAS_CUDA = torch.cuda.is_available()
DEVICE = 0 if HAS_CUDA else "cpu"
DTYPE = torch.float16 if HAS_CUDA else torch.float32
asr_pipe = pipeline(
task="automatic-speech-recognition",
model=ASR_MODEL,
device=DEVICE,
torch_dtype=DTYPE,
chunk_length_s=30,
)
def _save_text_file(text: str, suffix: str = ".txt") -> str:
fd, path = tempfile.mkstemp(suffix=suffix)
with os.fdopen(fd, "w", encoding="utf-8") as f:
f.write(text)
return path
def _resolve_path(x):
"""
รับค่า input ได้ทั้ง:
- str (filepath)
- dict ที่มี key 'name' หรือ 'path'
- gradio FileData (มี .path)
คืนค่าเป็น filepath เสมอ
"""
if x is None:
return None
if isinstance(x, str):
return x
# Gradio v4/v5 บางทีให้เป็น dict
if isinstance(x, dict):
return x.get("path") or x.get("name")
# Gradio FileData
path = getattr(x, "path", None)
if path:
return path
# เผื่อกรณี edge
return str(x)
def _transcribe_from_any(file_like, task: str):
path = _resolve_path(file_like)
if not path or not os.path.exists(path):
raise gr.Error("ไม่พบไฟล์ที่อัปโหลด (path ว่างหรือไฟล์หาย)")
# อ่านเป็น bytes แล้วให้ ffmpeg แปลงเป็นโมโน float32
with open(path, "rb") as f:
payload = f.read()
audio = ffmpeg_read(payload, asr_pipe.feature_extractor.sampling_rate)
inputs = {"array": audio, "sampling_rate": asr_pipe.feature_extractor.sampling_rate}
out = asr_pipe(
inputs,
batch_size=BATCH_SIZE,
generate_kwargs={"task": task}, # 'transcribe' = ตามภาษาเดิม, 'translate' = แปลอังกฤษ
return_timestamps=True,
)
text = out["text"]
return text, _save_text_file(text, ".txt")
def transcribe_mic(mic_path: str, task: str):
return _transcribe_from_any(mic_path, task)
def transcribe_audio(file_path: str, task: str):
return _transcribe_from_any(file_path, task)
def transcribe_video(video_file, task: str):
# รับเป็นไฟล์วิดีโอ (mp4, mov, webm ก็ได้) แล้วให้ ffmpeg_read ดึงเสียง
return _transcribe_from_any(video_file, task)
# -----------------------------
# UI
# -----------------------------
with gr.Blocks(title="Whisper V3 – Transcriber (Audio + MP4)") as demo:
gr.Markdown("## 🎙️ Whisper V3 – Record/Upload Audio or MP4 → Transcript → Download (.txt)")
with gr.Tab("🎤 Microphone"):
mic_audio = gr.Audio(sources="microphone", type="filepath", label="Record")
mic_task = gr.Radio(["transcribe", "translate"], value="transcribe", label="Task")
mic_text = gr.Textbox(label="Transcript", lines=10)
mic_file = gr.File(label="Download Transcript (.txt)")
gr.Button("Run").click(transcribe_mic, inputs=[mic_audio, mic_task], outputs=[mic_text, mic_file])
with gr.Tab("📁 Audio file"):
up_audio = gr.Audio(sources="upload", type="filepath", label="Upload audio")
up_task = gr.Radio(["transcribe", "translate"], value="transcribe", label="Task")
up_text = gr.Textbox(label="Transcript", lines=10)
up_file = gr.File(label="Download Transcript (.txt)")
gr.Button("Run").click(transcribe_audio, inputs=[up_audio, up_task], outputs=[up_text, up_file])
with gr.Tab("🎬 Video MP4"):
# ใช้ gr.File เพื่อให้ได้ path ที่นิ่งที่สุด
up_video = gr.File(file_count="single", file_types=["video"], label="Upload MP4 / Video")
vd_task = gr.Radio(["transcribe", "translate"], value="transcribe", label="Task")
vd_text = gr.Textbox(label="Transcript", lines=10)
vd_file = gr.File(label="Download Transcript (.txt)")
gr.Button("Run").click(transcribe_video, inputs=[up_video, vd_task], outputs=[vd_text, vd_file])
demo.queue().launch()
|