Spaces:

PPloychor
/

Youtubetranscript

Sleeping

App Files Files Community

PPloychor commited on Nov 4, 2025

Commit

e8939bc

verified ·

1 Parent(s): abd5d27

Update app.py

Browse files

Files changed (1) hide show

app.py +25 -18

app.py CHANGED Viewed

@@ -1,4 +1,4 @@
-import os, time, tempfile
 import torch
 import gradio as gr
 from transformers import pipeline
@@ -13,7 +13,7 @@ HAS_CUDA = torch.cuda.is_available()
 DEVICE = 0 if HAS_CUDA else "cpu"
 DTYPE = torch.float16 if HAS_CUDA else torch.float32
-# สร้างโมเดลครั้งเดียว
 asr_pipe = pipeline(
     task="automatic-speech-recognition",
     model=ASR_MODEL,
@@ -23,45 +23,45 @@ asr_pipe = pipeline(
 )
 def _save_text_file(text: str, suffix: str = ".txt") -> str:
-    """บันทึกไฟล์ชั่วคราวแล้วคืน path (เพื่อให้ Gradio สร้างปุ่มดาวน์โหลด)"""
     fd, path = tempfile.mkstemp(suffix=suffix)
     with os.fdopen(fd, "w", encoding="utf-8") as f:
         f.write(text)
     return path
-def _transcribe_from_path(audio_path: str, task: str) -> tuple[str, str]:
-    """อ่านไฟล์เสียง -> ถอดเสียง -> คืน (ข้อความ, path ไฟล์ .txt สำหรับดาวน์โหลด)"""
-    if not audio_path:
-        raise gr.Error("โปรดอัปโหลดหรืออัดเสียงก่อน")
-    # อ่านเป็น bytes แล้วให้ ffmpeg แปลงเป็น waveform (float32 mono)
-    with open(audio_path, "rb") as f:
         payload = f.read()
     audio = ffmpeg_read(payload, asr_pipe.feature_extractor.sampling_rate)
     inputs = {"array": audio, "sampling_rate": asr_pipe.feature_extractor.sampling_rate}
-    # task = "transcribe" (คงภาษาเดิม) หรือ "translate" (ให้แปลเป็นอังกฤษ)
     out = asr_pipe(
         inputs,
         batch_size=BATCH_SIZE,
-        generate_kwargs={"task": task},
         return_timestamps=True,
     )
     text = out["text"]
-    txt_path = _save_text_file(text, suffix=".txt")
-    return text, txt_path
 def transcribe_mic(mic_path: str, task: str):
     return _transcribe_from_path(mic_path, task)
-def transcribe_file(file_path: str, task: str):
     return _transcribe_from_path(file_path, task)
 # -----------------------------
 # UI
 # -----------------------------
-with gr.Blocks(title="Whisper V3 – Transcriber") as demo:
-    gr.Markdown("## 🎙️ Whisper V3 – Upload/Record → Transcript → Download (.txt)")
     with gr.Tab("🎤 Microphone"):
         mic_audio = gr.Audio(sources="microphone", type="filepath", label="Record")
@@ -75,6 +75,13 @@ with gr.Blocks(title="Whisper V3 – Transcriber") as demo:
         up_task   = gr.Radio(["transcribe", "translate"], value="transcribe", label="Task")
         up_text   = gr.Textbox(label="Transcript", lines=10)
         up_file   = gr.File(label="Download Transcript (.txt)")
-        gr.Button("Run").click(transcribe_file, inputs=[up_audio, up_task], outputs=[up_text, up_file])
 demo.queue().launch()

+import os, tempfile
 import torch
 import gradio as gr
 from transformers import pipeline
 DEVICE = 0 if HAS_CUDA else "cpu"
 DTYPE = torch.float16 if HAS_CUDA else torch.float32
+# โหลดโมเดลครั้งเดียว
 asr_pipe = pipeline(
     task="automatic-speech-recognition",
     model=ASR_MODEL,
 )
 def _save_text_file(text: str, suffix: str = ".txt") -> str:
     fd, path = tempfile.mkstemp(suffix=suffix)
     with os.fdopen(fd, "w", encoding="utf-8") as f:
         f.write(text)
     return path
+def _transcribe_from_path(path: str, task: str):
+    if not path:
+        raise gr.Error("โปรดอัปโหลดไฟล์ก่อน")
+    # อ่านเป็น bytes แล้วให้ ffmpeg แปลงเป็น waveform (mono float32)
+    with open(path, "rb") as f:
         payload = f.read()
     audio = ffmpeg_read(payload, asr_pipe.feature_extractor.sampling_rate)
     inputs = {"array": audio, "sampling_rate": asr_pipe.feature_extractor.sampling_rate}
     out = asr_pipe(
         inputs,
         batch_size=BATCH_SIZE,
+        generate_kwargs={"task": task},  # 'transcribe' = คงภาษาเดิม, 'translate' = แปลเป็นอังกฤษ
         return_timestamps=True,
     )
     text = out["text"]
+    return text, _save_text_file(text, ".txt")
+# ---- entry points สำหรับ UI สามแท็บ ----
 def transcribe_mic(mic_path: str, task: str):
     return _transcribe_from_path(mic_path, task)
+def transcribe_audio(file_path: str, task: str):
     return _transcribe_from_path(file_path, task)
+def transcribe_video(video_path: str, task: str):
+    # ffmpeg_read รองรับไฟล์วิดีโอได้ (จะดึงเสียงออกมาให้)
+    return _transcribe_from_path(video_path, task)
 # -----------------------------
 # UI
 # -----------------------------
+with gr.Blocks(title="Whisper V3 – Transcriber (Audio + MP4)") as demo:
+    gr.Markdown("## 🎙️ Whisper V3 – Record/Upload Audio or MP4 → Transcript → Download (.txt)")
     with gr.Tab("🎤 Microphone"):
         mic_audio = gr.Audio(sources="microphone", type="filepath", label="Record")
         up_task   = gr.Radio(["transcribe", "translate"], value="transcribe", label="Task")
         up_text   = gr.Textbox(label="Transcript", lines=10)
         up_file   = gr.File(label="Download Transcript (.txt)")
+        gr.Button("Run").click(transcribe_audio, inputs=[up_audio, up_task], outputs=[up_text, up_file])
+    with gr.Tab("🎬 Video MP4"):
+        up_video  = gr.Video(sources=["upload"], format="mp4", label="Upload MP4")
+        vd_task   = gr.Radio(["transcribe", "translate"], value="transcribe", label="Task")
+        vd_text   = gr.Textbox(label="Transcript", lines=10)
+        vd_file   = gr.File(label="Download Transcript (.txt)")
+        gr.Button("Run").click(transcribe_video, inputs=[up_video, vd_task], outputs=[vd_text, vd_file])
 demo.queue().launch()