Ppreyy commited on
Commit
c8b6818
Β·
verified Β·
1 Parent(s): 792d32a

Upload batch.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. batch.py +21 -21
batch.py CHANGED
@@ -57,7 +57,7 @@ def load_model():
57
  return_timestamps=True,
58
  ignore_warning=True,
59
  )
60
- print("βœ… Model loaded successfully!\n")
61
 
62
  return _model_cache["apex"]
63
 
@@ -366,44 +366,44 @@ def process_video(
366
 
367
  with tempfile.TemporaryDirectory() as tmp:
368
  # Step 1 β€” extract audio
369
- print(f" 🎡 Extracting audio...")
370
  try:
371
  audio_path = extract_audio(video_path, tmp)
372
  except Exception as e:
373
- print(f" ❌ Audio extraction failed: {e}")
374
  return None
375
 
376
  # Step 2 β€” transcribe
377
  if word_level:
378
- print(f" πŸ€– Transcribing with word-level timestamps...")
379
  try:
380
  segments = transcribe_word_level(
381
  audio_path, words_per_line=words_per_line
382
  )
383
  except Exception as e:
384
- print(f" ❌ Word-level transcription failed: {e}")
385
  return None
386
  else:
387
- print(f" πŸ€– Transcribing... (may take a while on CPU)")
388
  try:
389
  segments = transcribe(audio_path)
390
  except Exception as e:
391
- print(f" ❌ Transcription failed: {e}")
392
  return None
393
 
394
  if not segments:
395
- print(f" ⚠️ No speech detected β€” skipping.")
396
  return None
397
 
398
  # Step 3 β€” detect FPS for Premiere Pro formats
399
  fps = 25.0
400
  if output_format in ["pr-text", "pr-srt"]:
401
- print(f" 🎬 Detecting video FPS...")
402
  fps = get_video_fps(video_path)
403
  print(f" FPS: {fps}")
404
 
405
  # Step 4 β€” generate output based on format
406
- print(f" πŸ“ Generating caption file ({output_format})...")
407
 
408
  if output_format == "pr-text":
409
  # Premiere Pro Text format (.txt)
@@ -418,7 +418,7 @@ def process_video(
418
  with open(output_path, "w", encoding="utf-8") as f:
419
  f.write(content)
420
 
421
- print(f" βœ… Done! {len(segments)} segments β†’ {output_path}")
422
  return output_path
423
 
424
 
@@ -442,7 +442,7 @@ def collect_videos(inputs: list[str]) -> list[str]:
442
  if ext in VIDEO_EXTENSIONS:
443
  videos.append(path)
444
  else:
445
- print(f"⚠️ Skipping '{path}' β€” not a supported video format.")
446
 
447
  elif os.path.isdir(path):
448
  found = [
@@ -451,11 +451,11 @@ def collect_videos(inputs: list[str]) -> list[str]:
451
  if os.path.splitext(f)[1].lower() in VIDEO_EXTENSIONS
452
  ]
453
  if not found:
454
- print(f"⚠️ No videos found in folder: {path}")
455
  videos.extend(found)
456
 
457
  else:
458
- print(f"⚠️ Path not found: {path}")
459
 
460
  return videos
461
 
@@ -512,8 +512,8 @@ def run_batch(
512
 
513
  print("─" * 60)
514
  print(f"Batch complete in {minutes}m {seconds}s")
515
- print(f" βœ… Succeeded : {len(succeeded)}/{total}")
516
- print(f" ❌ Failed : {len(failed)}/{total}")
517
 
518
  if failed:
519
  print("\nFailed videos:")
@@ -604,10 +604,10 @@ def main():
604
  videos = collect_videos(args.inputs)
605
 
606
  if not videos:
607
- print("❌ No valid video files found. Nothing to do.")
608
  sys.exit(1)
609
 
610
- print(f"\n🎬 Found {len(videos)} video(s) to process:")
611
  for v in videos:
612
  print(f" {v}")
613
  print()
@@ -625,17 +625,17 @@ def main():
625
  else:
626
  output_dir = os.getcwd()
627
 
628
- print(f"πŸ“‚ SRT files will be saved to: {output_dir}\n")
629
 
630
  if args.word_level:
631
- print(f"πŸ“ Word-level mode enabled: {args.words_per_line} words per line")
632
 
633
  format_name = {
634
  "srt": "Standard SRT",
635
  "pr-srt": "Premiere Pro SRT",
636
  "pr-text": "Premiere Pro Text",
637
  }.get(args.format, "SRT")
638
- print(f"🎬 Output format: {format_name}\n")
639
 
640
  run_batch(videos, output_dir, args.word_level, args.words_per_line, args.format)
641
 
 
57
  return_timestamps=True,
58
  ignore_warning=True,
59
  )
60
+ print("Model loaded successfully!\n")
61
 
62
  return _model_cache["apex"]
63
 
 
366
 
367
  with tempfile.TemporaryDirectory() as tmp:
368
  # Step 1 β€” extract audio
369
+ print(" Extracting audio...")
370
  try:
371
  audio_path = extract_audio(video_path, tmp)
372
  except Exception as e:
373
+ print(f" Audio extraction failed: {e}")
374
  return None
375
 
376
  # Step 2 β€” transcribe
377
  if word_level:
378
+ print(" Transcribing with word-level timestamps...")
379
  try:
380
  segments = transcribe_word_level(
381
  audio_path, words_per_line=words_per_line
382
  )
383
  except Exception as e:
384
+ print(f" Word-level transcription failed: {e}")
385
  return None
386
  else:
387
+ print(" Transcribing... (may take a while on CPU)")
388
  try:
389
  segments = transcribe(audio_path)
390
  except Exception as e:
391
+ print(f" Transcription failed: {e}")
392
  return None
393
 
394
  if not segments:
395
+ print("No speech detected - skipping.")
396
  return None
397
 
398
  # Step 3 β€” detect FPS for Premiere Pro formats
399
  fps = 25.0
400
  if output_format in ["pr-text", "pr-srt"]:
401
+ print(" Detecting video FPS...")
402
  fps = get_video_fps(video_path)
403
  print(f" FPS: {fps}")
404
 
405
  # Step 4 β€” generate output based on format
406
+ print(f" Generating caption file ({output_format})...")
407
 
408
  if output_format == "pr-text":
409
  # Premiere Pro Text format (.txt)
 
418
  with open(output_path, "w", encoding="utf-8") as f:
419
  f.write(content)
420
 
421
+ print(f" Done! {len(segments)} segments -> {output_path}")
422
  return output_path
423
 
424
 
 
442
  if ext in VIDEO_EXTENSIONS:
443
  videos.append(path)
444
  else:
445
+ print(f"Skipping '{path}' β€” not a supported video format.")
446
 
447
  elif os.path.isdir(path):
448
  found = [
 
451
  if os.path.splitext(f)[1].lower() in VIDEO_EXTENSIONS
452
  ]
453
  if not found:
454
+ print(f"No videos found in folder: {path}")
455
  videos.extend(found)
456
 
457
  else:
458
+ print(f"Path not found: {path}")
459
 
460
  return videos
461
 
 
512
 
513
  print("─" * 60)
514
  print(f"Batch complete in {minutes}m {seconds}s")
515
+ print(f" Succeeded : {len(succeeded)}/{total}")
516
+ print(f" Failed : {len(failed)}/{total}")
517
 
518
  if failed:
519
  print("\nFailed videos:")
 
604
  videos = collect_videos(args.inputs)
605
 
606
  if not videos:
607
+ print("No valid video files found. Nothing to do.")
608
  sys.exit(1)
609
 
610
+ print(f"\nFound {len(videos)} video(s) to process:")
611
  for v in videos:
612
  print(f" {v}")
613
  print()
 
625
  else:
626
  output_dir = os.getcwd()
627
 
628
+ print(f"Output directory: {output_dir}\n")
629
 
630
  if args.word_level:
631
+ print(f"Word-level mode: {args.words_per_line} words per line")
632
 
633
  format_name = {
634
  "srt": "Standard SRT",
635
  "pr-srt": "Premiere Pro SRT",
636
  "pr-text": "Premiere Pro Text",
637
  }.get(args.format, "SRT")
638
+ print(f"Output format: {format_name}\n")
639
 
640
  run_batch(videos, output_dir, args.word_level, args.words_per_line, args.format)
641