Spaces:

PPloychor
/

Youtubetranscript

Sleeping

App Files Files Community

PPloychor commited on Nov 4, 2025

Commit

7ffbf27

verified ·

1 Parent(s): e8939bc

Update app.py

Browse files

Files changed (1) hide show

app.py +38 -15

app.py CHANGED Viewed

@@ -7,19 +7,18 @@ from transformers.pipelines.audio_utils import ffmpeg_read
 # -----------------------------
 # CONFIG
 # -----------------------------
-ASR_MODEL = "openai/whisper-large-v3"   # เปลี่ยนรุ่นได้ตามต้องการ
 BATCH_SIZE = 8
 HAS_CUDA = torch.cuda.is_available()
 DEVICE = 0 if HAS_CUDA else "cpu"
 DTYPE = torch.float16 if HAS_CUDA else torch.float32
-# โหลดโมเดลครั้งเดียว
 asr_pipe = pipeline(
     task="automatic-speech-recognition",
     model=ASR_MODEL,
     device=DEVICE,
     torch_dtype=DTYPE,
-    chunk_length_s=30,   # ปลอดภัยสำหรับไฟล์ยาว
 )
 def _save_text_file(text: str, suffix: str = ".txt") -> str:
@@ -28,10 +27,34 @@ def _save_text_file(text: str, suffix: str = ".txt") -> str:
         f.write(text)
     return path
-def _transcribe_from_path(path: str, task: str):
-    if not path:
-        raise gr.Error("โปรดอัปโหลดไฟล์ก่อน")
-    # อ่านเป็น bytes แล้วให้ ffmpeg แปลงเป็น waveform (mono float32)
     with open(path, "rb") as f:
         payload = f.read()
     audio = ffmpeg_read(payload, asr_pipe.feature_extractor.sampling_rate)
@@ -40,22 +63,21 @@ def _transcribe_from_path(path: str, task: str):
     out = asr_pipe(
         inputs,
         batch_size=BATCH_SIZE,
-        generate_kwargs={"task": task},  # 'transcribe' = คงภาษาเดิม, 'translate' = แปลเป็นอังกฤษ
         return_timestamps=True,
     )
     text = out["text"]
     return text, _save_text_file(text, ".txt")
-# ---- entry points สำหรับ UI สามแท็บ ----
 def transcribe_mic(mic_path: str, task: str):
-    return _transcribe_from_path(mic_path, task)
 def transcribe_audio(file_path: str, task: str):
-    return _transcribe_from_path(file_path, task)
-def transcribe_video(video_path: str, task: str):
-    # ffmpeg_read รองรับไฟล์วิดีโอได้ (จะดึงเสียงออกมาให้)
-    return _transcribe_from_path(video_path, task)
 # -----------------------------
 # UI
@@ -78,7 +100,8 @@ with gr.Blocks(title="Whisper V3 – Transcriber (Audio + MP4)") as demo:
         gr.Button("Run").click(transcribe_audio, inputs=[up_audio, up_task], outputs=[up_text, up_file])
     with gr.Tab("🎬 Video MP4"):
-        up_video  = gr.Video(sources=["upload"], format="mp4", label="Upload MP4")
         vd_task   = gr.Radio(["transcribe", "translate"], value="transcribe", label="Task")
         vd_text   = gr.Textbox(label="Transcript", lines=10)
         vd_file   = gr.File(label="Download Transcript (.txt)")

 # -----------------------------
 # CONFIG
 # -----------------------------
+ASR_MODEL = "openai/whisper-large-v3"
 BATCH_SIZE = 8
 HAS_CUDA = torch.cuda.is_available()
 DEVICE = 0 if HAS_CUDA else "cpu"
 DTYPE = torch.float16 if HAS_CUDA else torch.float32
 asr_pipe = pipeline(
     task="automatic-speech-recognition",
     model=ASR_MODEL,
     device=DEVICE,
     torch_dtype=DTYPE,
+    chunk_length_s=30,
 )
 def _save_text_file(text: str, suffix: str = ".txt") -> str:
         f.write(text)
     return path
+def _resolve_path(x):
+    """
+    รับค่า input ได้ทั้ง:
+    - str (filepath)
+    - dict ที่มี key 'name' หรือ 'path'
+    - gradio FileData (มี .path)
+    คืนค่าเป็น filepath เสมอ
+    """
+    if x is None:
+        return None
+    if isinstance(x, str):
+        return x
+    # Gradio v4/v5 บางทีให้เป็น dict
+    if isinstance(x, dict):
+        return x.get("path") or x.get("name")
+    # Gradio FileData
+    path = getattr(x, "path", None)
+    if path:
+        return path
+    # เผื่อกรณี edge
+    return str(x)
+def _transcribe_from_any(file_like, task: str):
+    path = _resolve_path(file_like)
+    if not path or not os.path.exists(path):
+        raise gr.Error("ไม่พบไฟล์ที่อัปโหลด (path ว่างหรือไฟล์หาย)")
+    # อ่านเป็น bytes แล้วให้ ffmpeg แปลงเป็นโมโน float32
     with open(path, "rb") as f:
         payload = f.read()
     audio = ffmpeg_read(payload, asr_pipe.feature_extractor.sampling_rate)
     out = asr_pipe(
         inputs,
         batch_size=BATCH_SIZE,
+        generate_kwargs={"task": task},  # 'transcribe' = ตามภาษาเดิม, 'translate' = แปลอังกฤษ
         return_timestamps=True,
     )
     text = out["text"]
     return text, _save_text_file(text, ".txt")
 def transcribe_mic(mic_path: str, task: str):
+    return _transcribe_from_any(mic_path, task)
 def transcribe_audio(file_path: str, task: str):
+    return _transcribe_from_any(file_path, task)
+def transcribe_video(video_file, task: str):
+    # รับเป็นไฟล์วิดีโอ (mp4, mov, webm ก็ได้) แล้วให้ ffmpeg_read ดึงเสียง
+    return _transcribe_from_any(video_file, task)
 # -----------------------------
 # UI
         gr.Button("Run").click(transcribe_audio, inputs=[up_audio, up_task], outputs=[up_text, up_file])
     with gr.Tab("🎬 Video MP4"):
+        # ใช้ gr.File เพื่อให้ได้ path ที่นิ่งที่สุด
+        up_video  = gr.File(file_count="single", file_types=["video"], label="Upload MP4 / Video")
         vd_task   = gr.Radio(["transcribe", "translate"], value="transcribe", label="Task")
         vd_text   = gr.Textbox(label="Transcript", lines=10)
         vd_file   = gr.File(label="Download Transcript (.txt)")