JS6969 commited on
Commit
a67035c
·
verified ·
1 Parent(s): 88a0012

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +100 -1003
app.py CHANGED
@@ -1,112 +1,36 @@
1
- # =============================
2
- # app.py
3
- # FFmpeg Frames + Real-ESRGAN Upscale + Re-encode (3-step) + Quick Mode
4
- # - Step 1: Extract frames (with live estimate & progress)
5
- # - Step 2: Upscale frames (now supports uploading your own images directly)
6
- # - Step 3: Re-encode frames (now supports uploading your own frames/ZIP and optional audio source)
7
- # - Previews: 30 frames sampled evenly; scrollable galleries
8
- # - Prefix defaults to input video filename if left blank
9
- # =============================
10
-
11
-
12
  # ────────────────────────────────────────────────────────
13
  # Standard imports
14
  # ────────────────────────────────────────────────────────
15
- # ---- TorchVision shim so basicsr can import without torchvision installed ----
16
- import sys, types
17
- try:
18
- # If torchvision is present, great — use it.
19
- import torchvision.transforms.functional_tensor as _ft # noqa: F401
20
- except Exception:
21
- # Create a minimal module that provides rgb_to_grayscale with Torch ops.
22
- import torch
23
-
24
- _mod = types.ModuleType("torchvision.transforms.functional_tensor")
25
-
26
- def rgb_to_grayscale(img: "torch.Tensor", num_output_channels: int = 1) -> "torch.Tensor":
27
- """
28
- Minimal replacement for torchvision's rgb_to_grayscale.
29
- Expects a Tensor with channel-last-three: (..., 3, H, W) and returns
30
- (..., 1, H, W) or (..., 3, H, W) if num_output_channels == 3.
31
- """
32
- if not torch.is_tensor(img):
33
- raise TypeError("rgb_to_grayscale expects a torch.Tensor")
34
- if img.ndim < 3 or img.shape[-3] != 3:
35
- raise ValueError(f"expected tensor with C=3 as the third-from-last dim, got shape {tuple(img.shape)}")
36
-
37
- r = img[..., -3, :, :]
38
- g = img[..., -2, :, :]
39
- b = img[..., -1, :, :]
40
-
41
- gray = 0.2989 * r + 0.5870 * g + 0.1140 * b # same weights as TV
42
- if num_output_channels == 3:
43
- out = torch.stack([gray, gray, gray], dim=-3)
44
- else:
45
- out = gray.unsqueeze(-3)
46
- return out
47
-
48
- _mod.rgb_to_grayscale = rgb_to_grayscale
49
- sys.modules["torchvision.transforms.functional_tensor"] = _mod
50
- # ---------------------------------------------------------------------------
51
 
52
- import os
53
- import re
54
- import cv2
55
- import json
56
- import math
57
- import time
58
- import shutil
59
- import zipfile
60
- import tempfile
61
- import subprocess
62
- import inspect
63
  from pathlib import Path
64
- from typing import List, Optional, Tuple
65
-
66
  import gradio as gr
67
  import numpy as np
68
- from PIL import Image
69
-
70
- from basicsr.archs.rrdbnet_arch import RRDBNet as _RRDBNet
71
- from basicsr.utils.download_util import load_file_from_url
72
-
73
- from realesrgan import RealESRGANer
74
- from realesrgan.archs.srvgg_arch import SRVGGNetCompact
75
 
76
  _num = re.compile(r'(\d+)')
77
 
78
- def _rgb_to_grayscale_np(arr: np.ndarray) -> np.ndarray:
79
- # arr: HxWx3 uint8
80
- r, g, b = arr[...,0], arr[...,1], arr[...,2]
81
- gray = (0.2989*r + 0.5870*g + 0.1140*b).astype(arr.dtype)
82
- return np.stack([gray, gray, gray], axis=-1)
83
-
84
  def _natural_key(p: Path | str):
85
  s = str(p)
86
  return [int(t) if t.isdigit() else t.lower() for t in _num.split(s)]
87
 
88
  def sample_paths(paths: List[Path] | List[str], n: int = 30) -> List[str]:
89
- """Evenly sample up to n items across the entire list, in order."""
90
  if not paths:
91
  return []
92
- # Ensure stable numeric ordering first (00001, 00002, ... 01000)
93
  paths = sorted(paths, key=_natural_key)
94
  total = len(paths)
95
  n = max(1, min(n, total))
96
  if n == total:
97
  return [str(p) for p in paths]
98
- # Even spacing (no duplicates), covering start→end
99
  step = (total - 1) / (n - 1)
100
  idxs = [round(i * step) for i in range(n)]
101
- # De-dupe in case of edge rounding on tiny sets
102
- out = []
103
- seen = set()
104
  for i in idxs:
105
  if i not in seen:
106
- out.append(str(paths[i]))
107
- seen.add(i)
108
  return out
109
-
110
  import base64
111
 
112
  APP_DIR = os.getcwd()
@@ -129,69 +53,6 @@ def render_logo_html(px: int = 96) -> str:
129
  <hr>
130
  """
131
 
132
- # Flag so UI can know if realesrgan is importable
133
- HAVE_REALESRGAN = True
134
-
135
- def build_rrdb(scale: int, num_block: int):
136
- # simple wrapper to the imported RRDBNet class
137
- return _RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=num_block, num_grow_ch=32, scale=scale)
138
-
139
- def _weights_dir() -> str:
140
- ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
141
- wdir = os.path.join(ROOT_DIR, "weights")
142
- os.makedirs(wdir, exist_ok=True)
143
- return wdir
144
-
145
- def get_realesrganer(model_id: str, scale: int, tile: int, half: bool, device: str = "cpu") -> RealESRGANer:
146
- """
147
- model_id: one of {"x4plus", "x4plus-anime", "x2plus"}
148
- Downloads weights if missing, builds the proper arch, and returns a RealESRGANer.
149
- """
150
- wdir = _weights_dir()
151
-
152
- if model_id == "x4plus":
153
- model = build_rrdb(scale=4, num_block=23)
154
- netscale = 4
155
- urls = ["https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.0/RealESRGAN_x4plus.pth"]
156
- model_path = os.path.join(wdir, "RealESRGAN_x4plus.pth")
157
- dni_weight = None
158
- elif model_id == "x4plus-anime":
159
- model = build_rrdb(scale=4, num_block=6)
160
- netscale = 4
161
- urls = ["https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.2.4/RealESRGAN_x4plus_anime_6B.pth"]
162
- model_path = os.path.join(wdir, "RealESRGAN_x4plus_anime_6B.pth")
163
- dni_weight = None
164
- elif model_id == "x2plus":
165
- model = build_rrdb(scale=2, num_block=23)
166
- netscale = 2
167
- urls = ["https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.1/RealESRGAN_x2plus.pth"]
168
- model_path = os.path.join(wdir, "RealESRGAN_x2plus.pth")
169
- dni_weight = None
170
- else:
171
- raise ValueError(f"Unknown model_id: {model_id}")
172
-
173
- # ensure weights on disk
174
- for url in urls:
175
- fname = os.path.basename(url)
176
- local_path = os.path.join(wdir, fname)
177
- if not os.path.isfile(local_path):
178
- load_file_from_url(url=url, model_dir=wdir, progress=True)
179
-
180
- # Use GPU if visible; otherwise CPU
181
- gpu_id = 0 if (device == "cuda") else None
182
-
183
- upsampler = RealESRGANer(
184
- scale=netscale,
185
- model_path=model_path,
186
- dni_weight=dni_weight,
187
- model=model,
188
- tile=tile or 256,
189
- tile_pad=10,
190
- pre_pad=10,
191
- half=bool(half and device == "cuda"),
192
- gpu_id=gpu_id
193
- )
194
- return upsampler
195
 
196
 
197
  # ─────────────────────────────────────────────────────────────
@@ -220,25 +81,6 @@ else:
220
  # Helpers
221
  # ─────────────────────────────────────────────────────────────
222
 
223
- # Map UI model names (demo) to our internal model IDs
224
- def map_ui_model_to_internal(ui_name: str) -> str:
225
- mapping = {
226
- "RealESRGAN_x4plus": "x4plus",
227
- "RealESRGAN_x4plus_anime_6B": "x4plus-anime",
228
- "RealESRGAN_x2plus": "x2plus",
229
- # Unsupported in our current RRDBNet wiring – fallback:
230
- "RealESRNet_x4plus": "x4plus",
231
- "realesr-general-x4v3": "x4plus",
232
- }
233
- return mapping.get(ui_name, "x4plus")
234
-
235
- def clamp_scale_for_model(outscale: int, model_id: str) -> int:
236
- # Our current models are ×2 or ×4 only.
237
- if model_id == "x2plus":
238
- return 2
239
- # For x4plus / x4plus-anime, force 4 (ignore 5–6)
240
- return 4
241
-
242
  def sanitize_prefix(txt: str) -> str:
243
  txt = (txt or "").strip()
244
  if not txt:
@@ -369,131 +211,6 @@ def build_ffmpeg_extract(
369
  details = f"Frames extracted: {len(frames)} | Saved to: {raw_dir}"
370
  return gallery, str(zip_path), details, cmd_preview, render_progress(100.0, f"Extracted {len(frames)} frames"), [str(p) for p in frames], str(raw_dir), prefix
371
 
372
-
373
- def realesrgan(img, model_name, denoise_strength, face_enhance, outscale):
374
- if img is None:
375
- return
376
-
377
- # ----- Select backbone + weights -----
378
- if model_name == 'RealESRGAN_x4plus':
379
- model = build_rrdb(scale=4, num_block=23); netscale = 4
380
- file_url = ['https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.0/RealESRGAN_x4plus.pth']
381
-
382
- elif model_name == 'RealESRNet_x4plus':
383
- model = build_rrdb(scale=4, num_block=23); netscale = 4
384
- file_url = ['https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.1/RealESRNet_x4plus.pth']
385
-
386
- elif model_name == 'RealESRGAN_x4plus_anime_6B':
387
- model = build_rrdb(scale=4, num_block=6); netscale = 4
388
- file_url = ['https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.2.4/RealESRGAN_x4plus_anime_6B.pth']
389
-
390
- elif model_name == 'RealESRGAN_x2plus':
391
- model = build_rrdb(scale=2, num_block=23); netscale = 2
392
- file_url = ['https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.1/RealESRGAN_x2plus.pth']
393
-
394
- elif model_name == 'realesr-general-x4v3':
395
- model = SRVGGNetCompact(num_in_ch=3, num_out_ch=3, num_feat=64, num_conv=32, upscale=4, act_type='prelu'); netscale = 4
396
- file_url = [
397
- 'https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.5.0/realesr-general-x4v3.pth',
398
- 'https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.5.0/realesr-general-wdn-x4v3.pth'
399
- ]
400
- else:
401
- raise ValueError(f"Unknown model: {model_name}")
402
-
403
- # ----- Ensure weights on disk -----
404
- ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
405
- weights_dir = os.path.join(ROOT_DIR, 'weights')
406
- os.makedirs(weights_dir, exist_ok=True)
407
-
408
- for url in file_url:
409
- fname = os.path.basename(url)
410
- local_path = os.path.join(weights_dir, fname)
411
- if not os.path.isfile(local_path):
412
- load_file_from_url(url=url, model_dir=weights_dir, progress=True)
413
-
414
- if model_name == 'realesr-general-x4v3':
415
- base_path = os.path.join(weights_dir, 'realesr-general-x4v3.pth')
416
- wdn_path = os.path.join(weights_dir, 'realesr-general-wdn-x4v3.pth')
417
- model_path = [base_path, wdn_path]
418
- denoise_strength = float(denoise_strength)
419
- dni_weight = [1.0 - denoise_strength, denoise_strength] # base, WDN
420
- else:
421
- model_path = os.path.join(weights_dir, f"{model_name}.pth")
422
- dni_weight = None
423
-
424
- # ----- CUDA / precision / tiling -----
425
- use_cuda = False
426
- try:
427
- use_cuda = hasattr(cv2, "cuda") and cv2.cuda.getCudaEnabledDeviceCount() > 0
428
- except Exception:
429
- use_cuda = False
430
- gpu_id = 0 if use_cuda else None
431
-
432
- upsampler = RealESRGANer(
433
- scale=netscale,
434
- model_path=model_path,
435
- dni_weight=dni_weight,
436
- model=model,
437
- tile=256, # VRAM-safe default; lower to 128 if OOM
438
- tile_pad=10,
439
- pre_pad=10,
440
- half=bool(use_cuda),
441
- gpu_id=gpu_id
442
- )
443
-
444
- # ----- Optional face enhancement -----
445
- face_enhancer = None
446
- if face_enhance:
447
- from gfpgan import GFPGANer
448
- face_enhancer = GFPGANer(
449
- model_path='https://github.com/TencentARC/GFPGAN/releases/download/v1.3.0/GFPGANv1.3.pth',
450
- upscale=outscale,
451
- arch='clean',
452
- channel_multiplier=2,
453
- bg_upsampler=upsampler
454
- )
455
- import random, string
456
-
457
- def rnd_string(n: int = 8) -> str:
458
- return "".join(random.choice(string.ascii_lowercase + string.digits) for _ in range(n))
459
-
460
- # ----- PIL -> cv2 -----
461
- cv_img = np.array(img)
462
- if cv_img.ndim == 3 and cv_img.shape[2] == 4:
463
- cv_img = cv2.cvtColor(cv_img, cv2.COLOR_RGBA2BGRA)
464
- else:
465
- cv_img = cv2.cvtColor(cv_img, cv2.COLOR_RGB2BGR)
466
-
467
- # ----- Enhance -----
468
- try:
469
- if face_enhancer:
470
- _, _, output = face_enhancer.enhance(cv_img, has_aligned=False, only_center_face=False, paste_back=True)
471
- else:
472
- output, _ = upsampler.enhance(cv_img, outscale=int(outscale))
473
- except RuntimeError as error:
474
- print('Error', error)
475
- print('Tip: If you hit CUDA OOM, try a smaller tile size (e.g., 128).')
476
- return None
477
-
478
- # ----- cv2 -> display ndarray, also save -----
479
- if output.ndim == 3 and output.shape[2] == 4:
480
- display_img = cv2.cvtColor(output, cv2.COLOR_BGRA2RGBA)
481
- extension = 'png'
482
- else:
483
- display_img = cv2.cvtColor(output, cv2.COLOR_BGR2RGB)
484
- extension = 'jpg'
485
-
486
- out_filename = f"output_{rnd_string(8)}.{extension}"
487
- try:
488
- cv2.imwrite(out_filename, output)
489
- global last_file
490
- last_file = out_filename
491
- except Exception as e:
492
- print("Save error:", e)
493
-
494
- return display_img
495
-
496
-
497
  def render_progress(pct: float, label: str = "") -> str:
498
  pct = max(0.0, min(100.0, pct))
499
  return f'''<div style="width:100%;border:1px solid #ddd;border-radius:8px;overflow:hidden;height:18px;"><div style="height:100%;width:{pct:.1f}%;background:#3b82f6;"></div></div><div style="font-size:12px;opacity:.8;margin-top:4px;">{label} {pct:.1f}%</div>'''
@@ -627,726 +344,106 @@ def step1_extract(
627
  details = f"Frames extracted: {len(frames)} | Saved to: {raw_dir}"
628
  yield gallery, str(zip_path), details, cmd_preview, render_progress(100.0, f"Extracted {len(frames)} frames"), [str(p) for p in frames], str(raw_dir), prefix
629
 
630
- # ───────────────── Upscale (Step 2) — supports uploaded images OR frames from Step 1
631
-
632
- # Manual-batch Step 2 helpers (resumable, click-to-advance)
633
- def _ensure_dir(p: Path) -> Path:
634
- p.mkdir(parents=True, exist_ok=True)
635
- return p
636
-
637
- def _save_zip_of_dir(dir_path: Path, zip_path: Path) -> str:
638
- with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as zf:
639
- for p in sorted(dir_path.glob("*.*"), key=_natural_key):
640
- if p.suffix.lower() in [".jpg", ".jpeg", ".png"]:
641
- zf.write(p, p.name)
642
- return str(zip_path)
643
-
644
- def _list_image_paths_from_upload(files: List[gr.File] | None) -> List[str]:
645
- if not files: return []
646
- return [str(Path(f.name)) for f in files if Path(f.name).suffix.lower() in [".jpg",".jpeg",".png"]]
647
-
648
- def _build_gallery_from_dir(dir_path: Path, n: int = 30) -> List[str]:
649
- paths = sorted(list(dir_path.glob("*.jpg")) + list(dir_path.glob("*.png")), key=_natural_key)
650
- return sample_paths(paths, n)
651
-
652
- def step2_prepare_sources(frames_list, uploaded_imgs, max_images):
653
- src = _list_image_paths_from_upload(uploaded_imgs) or (frames_list or [])
654
- if not src:
655
- return [], "", 0, 0, "No images found. Upload files or run Step 1 first.", render_progress(0.0, "Idle")
656
- try:
657
- max_images = int(max_images or 0)
658
- except Exception:
659
- max_images = 0
660
- if max_images > 0:
661
- src = src[:max_images]
662
- work = Path(tempfile.mkdtemp(prefix="up_manual_"))
663
- out_dir = _ensure_dir(work / "upscaled")
664
- total = len(src)
665
- done_idx = 0
666
- msg = f"Sources loaded: {total} image(s). Click 'Process Next Batch' to start."
667
- prog = render_progress(0.0, "Ready")
668
- return src, str(out_dir), done_idx, total, msg, prog
669
-
670
- def step2_process_next_batch(
671
- up_src_paths, up_out_dir, up_done_idx, up_total,
672
- ui_model_name, outscale, tile, precision, denoise_strength, face_enhance, batch_size,
673
- ):
674
- # Turn this into a generator that streams progress
675
- if not up_src_paths or not up_out_dir:
676
- yield None, None, "Load sources first.", render_progress(0.0, "Idle"), up_done_idx, up_out_dir
677
- return
678
-
679
- model_id = map_ui_model_to_internal(ui_model_name)
680
- scale = clamp_scale_for_model(int(outscale or 4), model_id)
681
- device = "cuda" if os.environ.get("CUDA_VISIBLE_DEVICES") else "cpu"
682
- half = (precision == "half") and (device == "cuda")
683
- tile = int(tile or 256)
684
- batch_size = max(1, int(batch_size or 8))
685
-
686
- # Build upsampler
687
- upsampler = get_realesrganer(model_id, scale, tile, half, device=device)
688
-
689
- # Optional: GFPGAN face enhancer
690
- face_enhancer = None
691
- if face_enhance:
692
- try:
693
- from gfpgan import GFPGANer
694
- face_enhancer = GFPGANer(
695
- model_path="https://github.com/TencentARC/GFPGAN/releases/download/v1.3.0/GFPGANv1.3.pth",
696
- upscale=scale,
697
- arch="clean",
698
- channel_multiplier=2,
699
- bg_upsampler=upsampler
700
- )
701
- except Exception as e:
702
- print("GFPGAN load failed:", e)
703
- face_enhancer = None
704
-
705
- start = int(up_done_idx or 0)
706
- end = min(start + batch_size, int(up_total or 0))
707
- out_dir = Path(up_out_dir)
708
-
709
- if start >= up_total:
710
- gallery = _build_gallery_from_dir(out_dir, 30)
711
- zip_path = Path(out_dir.parent) / "upscaled.zip"
712
- zip_file = _save_zip_of_dir(out_dir, zip_path)
713
- prog = render_progress(100.0, "All images processed")
714
- details = f"Done. Total upscaled: {len(list(out_dir.glob('*.jpg')))+len(list(out_dir.glob('*.png')))}"
715
- yield gallery, zip_file, details, prog, start, up_out_dir
716
- return
717
-
718
- batch_paths = up_src_paths[start:end]
719
- total_in_batch = len(batch_paths)
720
- processed_now = 0
721
-
722
- # For ETA
723
- t0 = time.time()
724
- for idx, fp in enumerate(batch_paths, start=1):
725
- try:
726
- with Image.open(fp) as im:
727
- img = im.convert("RGB")
728
- cv_img = np.array(img)
729
-
730
- if face_enhancer:
731
- _, _, output = face_enhancer.enhance(
732
- cv_img, has_aligned=False, only_center_face=False, paste_back=True
733
- )
734
- else:
735
- # denoise_strength only applies to general-x4v3, but harmless otherwise
736
- output, _ = upsampler.enhance(cv_img, outscale=scale, denoise_strength=float(denoise_strength or 0.5))
737
-
738
- Image.fromarray(output).save(out_dir / (Path(fp).stem + ".jpg"), quality=95)
739
-
740
- except Exception as e:
741
- print("Upscale error:", e)
742
-
743
- processed_now = idx
744
- # Progress & ETA for THIS batch
745
- pct_batch = (processed_now / total_in_batch) * 100.0
746
- elapsed = time.time() - t0
747
- secs_per_img = elapsed / max(1, processed_now)
748
- remaining_imgs = total_in_batch - processed_now
749
- eta = remaining_imgs * secs_per_img
750
- label = (f"Batch: {processed_now}/{total_in_batch} · "
751
- f"~{eta:.1f}s ETA · global {start+processed_now}/{up_total} "
752
- f"(x{scale}, model={ui_model_name}, denoise={denoise_strength}, face={face_enhance})")
753
-
754
- gallery = _build_gallery_from_dir(out_dir, 30)
755
- zip_path = Path(out_dir.parent) / "upscaled.zip"
756
- zip_file = _save_zip_of_dir(out_dir, zip_path)
757
- yield gallery, zip_file, label, render_progress(pct_batch, f"Upscaling… {pct_batch:.0f}% (this batch)"), start+processed_now, up_out_dir
758
-
759
- # Batch complete — final emit for this click
760
- next_idx = end
761
- gallery = _build_gallery_from_dir(out_dir, 30)
762
- zip_path = Path(out_dir.parent) / "upscaled.zip"
763
- zip_file = _save_zip_of_dir(out_dir, zip_path)
764
-
765
- # Total (global) percentage across all sources
766
- pct_global = (next_idx / up_total) * 100.0 if up_total else 100.0
767
- final_label = (f"Processed batch {total_in_batch} image(s). "
768
- f"{next_idx}/{up_total} done (global {pct_global:.0f}%).")
769
- yield gallery, zip_file, final_label, render_progress(pct_global, "Upscaling… (global)"), next_idx, up_out_dir
770
-
771
-
772
- def save_uploaded_images(files: List[gr.File] | None, prefix: str = "upload") -> Tuple[List[Path], Path]:
773
- tmp = Path(tempfile.mkdtemp(prefix="imgup_"))
774
- in_dir = tmp / "input"; in_dir.mkdir(parents=True, exist_ok=True)
775
- paths: List[Path] = []
776
- if not files:
777
- return paths, in_dir
778
- for f in files:
779
- src = Path(f.name)
780
- name = f"{prefix}_{src.name}"
781
- dst = in_dir / name
782
- shutil.copy2(src, dst)
783
- paths.append(dst)
784
- return paths, in_dir
785
-
786
- def step2_upscale(
787
- frames_list: List[str] | None,
788
- ui_model_name: str,
789
- outscale: int,
790
- tile: int,
791
- precision: str,
792
- prog_html: str,
793
- uploaded_imgs: List[gr.File] | None,
794
- denoise_strength: float = 0.5,
795
- face_enhance: bool = False,
796
- batch_size: int = 16, # << NEW
797
- max_images: int = 0, # << NEW (0 = all)
798
- ):
799
- """Upscale frames **in batches** with live progress updates.
800
- Streams: "Upscaling… 20% · 80/100 remaining (batch 2/10)"
801
- """
802
- if not HAVE_REALESRGAN:
803
- msg = ("Real-ESRGAN not available. Check requirements.txt includes: --prefer-binary, "
804
- "numpy==1.26.4, scipy==1.11.4, scikit-image==0.22.0, opencv-python-headless, "
805
- "torch==2.2.2, realesrgan==0.3.0, basicsr==1.4.2, pillow, gradio.")
806
- yield None, None, msg, prog_html
807
- return
808
-
809
- # Source: uploaded > frames from Step 1
810
- if uploaded_imgs and len(uploaded_imgs) > 0:
811
- # Use direct file paths; no extra staging copy
812
- src_paths = [str(Path(f.name)) for f in uploaded_imgs]
813
- else:
814
- src_paths = frames_list or []
815
-
816
- if not src_paths:
817
- yield None, None, "No images provided. Upload files or run Step 1 first.", prog_html
818
- return
819
-
820
- # Optional cap
821
- try:
822
- max_images = int(max_images or 0)
823
- except Exception:
824
- max_images = 0
825
- if max_images > 0:
826
- src_paths = src_paths[:max_images]
827
-
828
- # Batch size
829
- try:
830
- batch_size = max(1, int(batch_size or 1))
831
- except Exception:
832
- batch_size = 16
833
-
834
- # Map UI model -> internal id; clamp scale to model
835
- model_id = map_ui_model_to_internal(ui_model_name)
836
- scale = clamp_scale_for_model(int(outscale or 4), model_id)
837
-
838
-
839
- scale = _clamp_scale_for_model(int(outscale or 4), model_id)
840
- device = "cuda" if os.environ.get("CUDA_VISIBLE_DEVICES") else "cpu"
841
- half = (precision == "half") and (device == "cuda")
842
- upsampler = get_realesrganer(model_id, scale, tile, half, device=device)
843
-
844
- work = Path(tempfile.mkdtemp(prefix="up_"))
845
- out_dir = work / "upscaled"
846
- out_dir.mkdir(parents=True, exist_ok=True)
847
-
848
- total = len(src_paths)
849
- done = 0
850
- up_paths: List[Path] = []
851
-
852
- # Process in batches
853
- for i in range(0, total, batch_size):
854
- batch = src_paths[i:i+batch_size]
855
-
856
- for fp in batch:
857
- try:
858
- with Image.open(fp) as im:
859
- img = im.convert("RGB")
860
- output, _ = upsampler.enhance(np.array(img), outscale=scale)
861
- out_img = Image.fromarray(output)
862
- out_file = out_dir / (Path(fp).stem + ".jpg")
863
- out_img.save(out_file, quality=95)
864
- up_paths.append(out_file)
865
- except Exception:
866
- # continue on errors
867
- pass
868
- finally:
869
- done += 1
870
-
871
- # Emit progress after each batch
872
- pct = int(round((done / total) * 100)) if total else 0
873
- remaining = max(0, total - done)
874
- label = f"Upscaling… {pct}% · {remaining}/{total} remaining (batch {i//batch_size+1}/{(total+batch_size-1)//batch_size})"
875
- prog_html = render_progress(pct, label)
876
- yield None, None, label, prog_html
877
-
878
- if not up_paths:
879
- yield None, None, "Upscaling produced no outputs.", prog_html
880
- return
881
-
882
- up_paths = sorted(up_paths, key=_natural_key)
883
- gallery = sample_paths(up_paths, 30)
884
- zip_path = work / "upscaled.zip"
885
- with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as zf:
886
- for p in up_paths:
887
- zf.write(p, p.name)
888
-
889
- final_label = (f"Upscaled: {len(up_paths)} | Model: {ui_model_name}→{model_id} | "
890
- f"Scale: x{scale} | Tile: {tile} | Precision: {precision} | Batch: {batch_size}")
891
- yield gallery, str(zip_path), final_label, render_progress(100.0, "Upscaling complete")
892
-
893
-
894
- # ───────────────── Encode (Step 3) — supports uploaded frames/ZIP & optional audio source
895
-
896
- def prepare_frames_from_upload(files: List[gr.File] | None, prefix: str = "enc") -> Tuple[Optional[str], Optional[str]]:
897
- if not files:
898
- return None, None
899
- work = Path(tempfile.mkdtemp(prefix="enc_"))
900
- frames_dir = work / "frames"; frames_dir.mkdir(parents=True, exist_ok=True)
901
- detected_prefix = None
902
-
903
- # If a single ZIP is uploaded, unzip
904
- if len(files) == 1 and Path(files[0].name).suffix.lower() == ".zip":
905
- with zipfile.ZipFile(files[0].name, "r") as zf:
906
- zf.extractall(frames_dir)
907
- # try detect a prefix
908
- imgs = sorted(frames_dir.glob("*.jpg")) + sorted(frames_dir.glob("*.png"))
909
- if imgs:
910
- detected_prefix = Path(imgs[0]).stem.split("_")[0]
911
- return str(frames_dir), detected_prefix or prefix
912
-
913
- # else, copy images directly
914
- counter = 1
915
- for f in files:
916
- src = Path(f.name)
917
- if src.suffix.lower() not in [".jpg", ".jpeg", ".png"]:
918
- continue
919
- dst = frames_dir / f"{prefix}_{counter:05d}{src.suffix.lower()}"
920
- shutil.copy2(src, dst)
921
- counter += 1
922
- return str(frames_dir), prefix
923
-
924
-
925
- def build_ffmpeg_encode(frames_dir: str, prefix: str, fps: float, fmt: str, include_audio: bool, orig_video: str | None) -> List[str]:
926
- pattern_jpg = Path(frames_dir) / f"{prefix}_%05d.jpg"
927
- pattern_png = Path(frames_dir) / f"{prefix}_%05d.png"
928
- pattern = str(pattern_jpg if pattern_jpg.exists() else pattern_png)
929
- args = [FFMPEG, "-y", "-start_number", "1", "-framerate", f"{fps:.6f}", "-i", pattern]
930
- if include_audio and orig_video:
931
- args += ["-i", orig_video, "-map", "0:v:0", "-map", "1:a:0", "-shortest"]
932
- if fmt == "h265":
933
- vcodec = ["-c:v", "libx265"]
934
- elif fmt == "vp9":
935
- vcodec = ["-c:v", "libvpx-vp9"]
936
- else:
937
- vcodec = ["-c:v", "libx264"]
938
- args += vcodec + ["-pix_fmt", "yuv420p", "-crf", "18", "-preset", "medium"]
939
- out_name = "output.mp4" if fmt in ("h264", "h265") else "output.webm"
940
- args += [out_name]
941
- return args
942
-
943
-
944
- def step3_encode(
945
- frames_dir_state: str | None,
946
- prefix_state: str | None,
947
- orig_video: gr.File | None,
948
- fps: float | None,
949
- fmt: str,
950
- include_audio: bool,
951
- prog_html: str,
952
- uploaded_frames: List[gr.File] | None,
953
- uploaded_audio_video: gr.File | None
954
- ):
955
- # Choose frames source: uploaded takes priority
956
- frames_dir = frames_dir_state
957
- prefix = prefix_state
958
- if uploaded_frames and len(uploaded_frames) > 0:
959
- frames_dir, detected = prepare_frames_from_upload(uploaded_frames, prefix or "enc")
960
- if detected:
961
- prefix = detected
962
- if not frames_dir or not prefix:
963
- yield None, "No frames available. Upload frames (ZIP/images) or run Step 1.", prog_html
964
- return
965
-
966
- fps = float(fps or 30.0)
967
- orig_path = uploaded_audio_video.name if uploaded_audio_video else (orig_video.name if orig_video else None)
968
-
969
- # Build ffmpeg command
970
- cmd = build_ffmpeg_encode(frames_dir, prefix, fps, fmt, include_audio, orig_path)
971
-
972
- # Inject progress reporting
973
- cmd.insert(1, "-progress")
974
- cmd.insert(2, "pipe:2")
975
-
976
- # Try to estimate total frames for progress %
977
- total_frames = len(list(Path(frames_dir).glob(f"{prefix}_*.jpg"))) \
978
- + len(list(Path(frames_dir).glob(f"{prefix}_*.png")))
979
-
980
- proc = subprocess.Popen(
981
- cmd,
982
- stderr=subprocess.PIPE,
983
- stdout=subprocess.DEVNULL,
984
- text=True,
985
- bufsize=1,
986
- cwd=frames_dir
987
- )
988
-
989
- last_html = prog_html
990
- current_frame = 0
991
-
992
- while True:
993
- line = proc.stderr.readline()
994
- if not line and proc.poll() is not None:
995
- break
996
-
997
- if "frame=" in line:
998
- try:
999
- # parse `frame=123`
1000
- current_frame = int(line.strip().split("=")[-1])
1001
- except Exception:
1002
- pass
1003
-
1004
- if total_frames > 0:
1005
- pct = min(100.0, (current_frame / total_frames) * 100.0)
1006
- last_html = render_progress(pct, f"Encoding… {current_frame}/{total_frames} frames")
1007
- yield None, f"Encoding in progress… {current_frame}/{total_frames}", last_html
1008
- else:
1009
- last_html = render_progress(50.0, "Encoding…")
1010
- yield None, "Encoding in progress…", last_html
1011
-
1012
- ret = proc.wait()
1013
- out_file = Path(frames_dir) / ("output.mp4" if fmt in ("h264", "h265") else "output.webm")
1014
-
1015
- if ret != 0 or not out_file.exists():
1016
- try:
1017
- err = proc.stderr.read() if proc.stderr else ""
1018
- except Exception:
1019
- err = ""
1020
- yield None, f"Encoding failed.\n\n{err}", last_html
1021
- return
1022
-
1023
- yield str(out_file), f"Video created: {out_file.name}", render_progress(100.0, "Encoding complete")
1024
-
1025
-
1026
- # ───────────────── Quick Mode — one click: All frames → Upscale ×4 → MP4 (audio)
1027
-
1028
- #def quick_mode(video: gr.File | None, start_time: str, end_time: str, resize_long: int, prefix_in: str, prog_html: str):
1029
- # if not video or not video.name:
1030
- # return None, None, None, "Upload a video.", prog_html
1031
- # if not (FFMPEG and FFPROBE and HAVE_REALESRGAN):
1032
- # return None, None, None, "Missing deps (ffmpeg/ffprobe/realesrgan). See requirements.txt.", prog_html
1033
-
1034
- # info = parse_video_info(ffprobe_json(video.name))
1035
- # in_fps = info.get("fps") or 30.0
1036
- # prefix = sanitize_prefix(prefix_in) or Path(video.name).stem
1037
-
1038
- # work = Path(tempfile.mkdtemp(prefix="quick_"))
1039
- # raw_dir = work / "frames_raw"; raw_dir.mkdir(parents=True, exist_ok=True)
1040
- # up_dir = work / "upscaled"; up_dir.mkdir(parents=True, exist_ok=True)
1041
-
1042
- # Extract all frames
1043
- # extract_cmd = build_ffmpeg_extract(
1044
- # input_path=video.name,
1045
- # mode="All frames",
1046
- # every_seconds=1.0,
1047
- # nth_frame=1,
1048
- # exact_fps=in_fps,
1049
- # start_time=(start_time or "").strip(),
1050
- # end_time=(end_time or "").strip(),
1051
- # long_side=resize_long,
1052
- # out_format="jpg",
1053
- # jpg_quality=3,
1054
- # png_level=2,
1055
- # scene_detect=False,
1056
- # scene_thresh=0.3,
1057
- # out_pattern=str(raw_dir / f"{prefix}_%05d.jpg"),
1058
- # )
1059
- # proc = subprocess.Popen(extract_cmd, stderr=subprocess.PIPE, stdout=subprocess.DEVNULL, text=True, bufsize=1)
1060
- # est = estimate_output_count("All frames", info.get("duration"), in_fps, 1.0, 1, in_fps)
1061
- # created = 0
1062
- # while True:
1063
- # line = proc.stderr.readline()
1064
- # if not line and proc.poll() is not None:
1065
- # break
1066
- # if int(time.time()*10) % 3 == 0:
1067
- # created = len(list(raw_dir.glob(f"{prefix}_*.jpg")))
1068
- # pct = min(100.0, (created / est) * 100.0) if est else 0
1069
- # prog_html = render_progress(pct, f"Phase 1/3: Extracting {created}/{est or '?'}")
1070
- # proc.wait()
1071
-
1072
- # frames = sorted(raw_dir.glob(f"{prefix}_*.jpg"))
1073
- # if not frames:
1074
- # return None, None, None, "No frames extracted in Quick Mode.", prog_html
1075
-
1076
- # Upscale x4
1077
- # device = "cuda" if os.environ.get("CUDA_VISIBLE_DEVICES") else "cpu"
1078
- # upsampler = get_realesrganer("x4plus", 4, 0, (device=="cuda"), device=device)
1079
-
1080
- # total = len(frames)
1081
- # done = 0
1082
- # for fp in frames:
1083
- # img = Image.open(fp).convert("RGB")
1084
- # output, _ = upsampler.enhance(np.array(img), outscale=4)
1085
- # Image.fromarray(output).save(up_dir / (Path(fp).stem + ".jpg"), quality=95)
1086
- # done += 1
1087
- # pct = (done/total)*100 if total else 0
1088
- # prog_html = render_progress(pct, f"Phase 2/3: Upscaling {done}/{total}")
1089
-
1090
- # Encode MP4 with audio
1091
- # encode_cmd = build_ffmpeg_encode(str(up_dir), prefix, in_fps, "h264", True, video.name)
1092
- # proc2 = subprocess.Popen(encode_cmd, stderr=subprocess.PIPE, stdout=subprocess.DEVNULL, text=True, bufsize=1, cwd=str(up_dir))
1093
- # while True:
1094
- # line = proc2.stderr.readline()
1095
- # if not line and proc2.poll() is not None:
1096
- # break
1097
- # if int(time.time()*10) % 5 == 0:
1098
- # prog_html = render_progress(50.0, "Phase 3/3: Encoding…")
1099
- # proc2.wait()
1100
-
1101
- # out_file = Path(up_dir) / "output.mp4"
1102
- # if not out_file.exists():
1103
- # return None, None, None, "Encoding failed in Quick Mode.", prog_html
1104
-
1105
- # Intermediates
1106
- # zip_frames = work / "frames.zip"
1107
- # with zipfile.ZipFile(zip_frames, "w", zipfile.ZIP_DEFLATED) as zf:
1108
- # for p in frames:
1109
- # zf.write(p, p.name)
1110
- # zip_up = work / "upscaled.zip"
1111
- # with zipfile.ZipFile(zip_up, "w", zipfile.ZIP_DEFLATED) as zf:
1112
- # for p in sorted(up_dir.glob("*.jpg"), key=_natural_key):
1113
- # zf.write(p, p.name)
1114
-
1115
- # return str(out_file), str(zip_frames), str(zip_up), "Quick Mode complete.", render_progress(100.0, "All done")
1116
-
1117
  # ───────────────── UI
1118
 
1119
  def build_ui():
1120
- with gr.Blocks(theme=gr.themes.Soft(), css="""
1121
- .cf-title { font-size: 1.6rem; font-weight: 800; }
1122
- .cmdbox textarea { font-family: ui-monospace, Menlo, monospace; font-size: 12px; }
1123
- """) as demo:
1124
- gr.HTML(render_logo_html(96))
1125
- gr.Markdown("Three-step workflow. Video → Frames → Upscale → Re-encode")
1126
-
1127
-
1128
- # Shared states (from Step 1)
1129
- frames_state = gr.State([]) # list[str]
1130
- frames_dir_state = gr.State("") # str
1131
- prefix_state = gr.State("") # str
1132
- fps_state = gr.State(30.0) # float
1133
- # Shared Step 2 states (manual batching)
1134
- up_src_paths_state = gr.State([]) # list[str] absolute paths to process
1135
- up_out_dir_state = gr.State("") # str: output dir path
1136
- up_done_idx_state = gr.State(0) # int: next index to start from
1137
- up_total_state = gr.State(0) # int: total images
1138
-
1139
- with gr.Tabs():
1140
- # STEP 1
1141
- with gr.Tab("Step 1 · Extract Frames"):
1142
- with gr.Row():
1143
- video = gr.File(label="Upload video", file_types=[".mp4", ".mov", ".mkv", ".avi", ".webm", ".m4v"], type="filepath")
1144
- with gr.Accordion("Extraction Settings", open=True):
1145
- with gr.Row():
1146
- mode = gr.Dropdown(["Every N seconds", "Every Nth frame", "Exact FPS", "All frames"], value="Every N seconds", label="Mode")
1147
- every_seconds = gr.Number(value=1.0, label="Every N seconds")
1148
- nth_frame = gr.Number(value=30, label="Every Nth frame")
1149
- exact_fps = gr.Number(value=1.0, label="Exact FPS")
1150
- with gr.Row():
1151
- start_time = gr.Textbox(value="", label="Start (HH:MM:SS.mmm)")
1152
- end_time = gr.Textbox(value="", label="End (HH:MM:SS.mmm)")
1153
- long_side = gr.Number(value=0, label="Resize long side px (0 = none)")
1154
- with gr.Row():
1155
- out_format = gr.Dropdown(["jpg", "png"], value="jpg", label="Output format")
1156
- jpg_quality = gr.Slider(2, 31, value=3, step=1, label="JPG quality (2=best)")
1157
- png_level = gr.Slider(0, 9, value=2, step=1, label="PNG compression level")
1158
- with gr.Row():
1159
- scene_detect = gr.Checkbox(False, label="Scene-change detect")
1160
- scene_thresh = gr.Slider(0.0, 1.0, value=0.3, step=0.01, label="Scene threshold")
1161
- prefix_vid = gr.Textbox(value="", label="Filename prefix (defaults to input file name)")
1162
- estimate_md = gr.Markdown("Estimated output: —")
1163
- with gr.Row():
1164
- btn_extract = gr.Button("Step 1: Extract Frames", variant="primary")
1165
- prog1 = gr.HTML(render_progress(0.0, "Idle"))
1166
- gallery = gr.Gallery(label="Preview (all ≤100, else sample 100)", columns=6, height=480)
1167
- zip_out = gr.File(label="Download frames ZIP")
1168
- details1 = gr.Markdown("Ready.")
1169
- with gr.Accordion("Show FFmpeg command", open=False):
1170
- cmd_preview = gr.Textbox(label="ffmpeg command", lines=4, elem_classes=["cmdbox"])
1171
- if MISSING_MSG:
1172
- gr.Markdown(f"<span style='color:#b45309'>{MISSING_MSG}</span>")
1173
- # Wire behavior: enable/disable param groups depending on mode / format
1174
- def _toggle_params(mode_val, fmt):
1175
- return (
1176
- gr.update(visible=(mode_val == "Every N seconds")),
1177
- gr.update(visible=(mode_val == "Every Nth frame")),
1178
- gr.update(visible=(mode_val == "Exact FPS")),
1179
- gr.update(visible=(fmt == "jpg")),
1180
- gr.update(visible=(fmt == "png")),
1181
- )
1182
-
1183
- mode.change(
1184
- _toggle_params,
1185
- inputs=[mode, out_format],
1186
- outputs=[every_seconds, nth_frame, exact_fps, jpg_quality, png_level],
1187
- )
1188
- out_format.change(
1189
- _toggle_params,
1190
- inputs=[mode, out_format],
1191
- outputs=[every_seconds, nth_frame, exact_fps, jpg_quality, png_level],
1192
- )
1193
- # Initialize visibility
1194
- demo.load(_toggle_params, inputs=[mode, out_format], outputs=[every_seconds, nth_frame, exact_fps, jpg_quality, png_level])
1195
-
1196
- def update_estimate(vfile, mode_val, evs, nth, exfps, st, et):
1197
- if not vfile or not getattr(vfile, 'name', None):
1198
- return "Estimated output: —"
1199
- info = parse_video_info(ffprobe_json(vfile.name))
1200
- dur = info.get("duration")
1201
- def parse_ts(ts: str):
1202
- if not ts: return 0.0
1203
- parts = ts.split(":")
1204
- if len(parts) == 3:
1205
- try: return float(parts[0])*3600 + float(parts[1])*60 + float(parts[2])
1206
- except Exception: return 0.0
1207
- return 0.0
1208
- st_s = parse_ts(st or ""); et_s = parse_ts(et or "")
1209
- if dur:
1210
- if st_s: dur = max(0.0, dur - st_s)
1211
- if et_s and et_s < info.get("duration", 0) and et_s > 0:
1212
- dur = min(dur, et_s)
1213
- est = estimate_output_count(mode_val, dur, info.get("fps"), evs or 1.0, int(nth or 1), exfps or 1.0)
1214
- return f"Estimated output: **~{est} frames**" if est else "Estimated output: —"
1215
-
1216
- for ctrl in [video, mode, every_seconds, nth_frame, exact_fps, start_time, end_time]:
1217
- ctrl.change(update_estimate, inputs=[video, mode, every_seconds, nth_frame, exact_fps, start_time, end_time], outputs=[estimate_md])
1218
-
1219
- btn_extract.click(
1220
- step1_extract,
1221
- inputs=[
1222
- video, mode, every_seconds, nth_frame, exact_fps,
1223
- start_time, end_time, long_side, out_format, jpg_quality, png_level,
1224
- scene_detect, scene_thresh, prefix_vid,
1225
- prog1,
1226
- ],
1227
- outputs=[gallery, zip_out, details1, cmd_preview, prog1, frames_state, frames_dir_state, prefix_state],
1228
- )
1229
-
1230
- # STEP 2 — Upscale
1231
- with gr.Tab("Step 2 · Upscale Frames"):
1232
- if not HAVE_REALESRGAN:
1233
- gr.Markdown("⚠️ Upscaling disabled. Install dependencies in requirements.txt (realesrgan, basicsr, torch, etc.).")
1234
-
1235
- gr.Markdown("Use frames from Step 1 **or** upload images below.")
1236
- imgs_override = gr.Files(
1237
- label="Upload images to upscale (JPG/PNG)",
1238
- file_types=[".jpg", ".jpeg", ".png"],
1239
- type="filepath"
1240
- )
1241
-
1242
- with gr.Accordion("Upscaling options", open=True):
1243
- with gr.Row():
1244
- ui_model_name = gr.Dropdown(
1245
- label="Upscaler model",
1246
- choices=[
1247
- "RealESRGAN_x4plus",
1248
- "RealESRNet_x4plus",
1249
- "RealESRGAN_x4plus_anime_6B",
1250
- "RealESRGAN_x2plus",
1251
- "realesr-general-x4v3",
1252
- ],
1253
- value="RealESRGAN_x4plus",
1254
- show_label=True
1255
- )
1256
- denoise_strength = gr.Slider(0, 1, value=0.5, step=0.1, label="Denoise (only general-x4v3)")
1257
- outscale = gr.Slider(1, 6, value=4, step=1, label="Resolution upscale")
1258
- face_enhance = gr.Checkbox(value=False, label="Face Enhancement (GFPGAN)")
1259
-
1260
- with gr.Row():
1261
- tile = gr.Number(value=256, label="Tile size (try 128 if OOM; 0=auto)")
1262
- precision = gr.Dropdown(["auto", "half", "full"], value="auto", label="Precision (GPU=half, CPU=full)")
1263
- with gr.Row():
1264
- batch_size = gr.Number(value=12, precision=0, label="Batch size per click")
1265
- max_images = gr.Number(value=0, precision=0, label="Max images to process (0 = all)")
1266
-
1267
- with gr.Row():
1268
- btn_prepare = gr.Button("Step 2: Load / Reset Sources", variant="secondary")
1269
- btn_next = gr.Button("Process Next Batch", variant="primary")
1270
-
1271
- prog2 = gr.HTML(render_progress(0.0, "Idle"))
1272
- gallery_up = gr.Gallery(label="Upscaled preview (30 sampled)", columns=6, height=480)
1273
- zip_up = gr.File(label="Download upscaled ZIP")
1274
- details2 = gr.Markdown("")
1275
-
1276
- # 1) load/reset sources
1277
- btn_prepare.click(
1278
- step2_prepare_sources,
1279
- inputs=[frames_state, imgs_override, max_images],
1280
- outputs=[up_src_paths_state, up_out_dir_state, up_done_idx_state, up_total_state, details2, prog2]
1281
- )
1282
-
1283
- # 2) process one batch per click
1284
- btn_next.click(
1285
- step2_process_next_batch,
1286
- inputs=[
1287
- up_src_paths_state, up_out_dir_state, up_done_idx_state, up_total_state,
1288
- ui_model_name, outscale, tile, precision, denoise_strength, face_enhance, batch_size
1289
- ],
1290
- outputs=[gallery_up, zip_up, details2, prog2, up_done_idx_state, up_out_dir_state]
1291
- )
1292
-
1293
-
1294
- # STEP 3 — Re-encode
1295
- with gr.Tab("Step 3 · Re-encode Video"):
1296
- gr.Markdown("Use frames from Step 1 **or** upload a frames ZIP / images. Optionally provide a video for audio track.")
1297
- uploaded_frames = gr.Files(label="Upload frames (ZIP or images)", type="filepath")
1298
- uploaded_audio = gr.File(label="Optional: video/audio source for audio track", file_types=[".mp4", ".mov", ".mkv", ".webm", ".mp3", ".wav"], type="filepath")
1299
- with gr.Row():
1300
- fmt = gr.Dropdown(["h264", "h265", "vp9"], value="h264", label="Format")
1301
- include_audio = gr.Checkbox(True, label="Include audio if available")
1302
- with gr.Row():
1303
- btn_encode = gr.Button("Step 3: Create Video", variant="primary")
1304
- prog3 = gr.HTML(render_progress(0.0, "Idle"))
1305
- video_player = gr.Video(label="Preview video")
1306
- details3 = gr.Markdown("")
1307
-
1308
- def set_fps(vfile):
1309
- if not vfile or not getattr(vfile, 'name', None):
1310
- return 30.0
1311
- info = parse_video_info(ffprobe_json(vfile.name))
1312
- return float(info.get("fps") or 30.0)
1313
- # capture FPS from the original step1 video when it changes
1314
- video.change(set_fps, inputs=[video], outputs=[fps_state])
1315
 
1316
- btn_encode.click(
1317
- step3_encode,
1318
- inputs=[frames_dir_state, prefix_state, video, fps_state, fmt, include_audio, prog3, uploaded_frames, uploaded_audio],
1319
- outputs=[video_player, details3, prog3],
 
 
1320
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1321
 
1322
- # QUICK MODE
1323
- # with gr.Tab("⚡ Quick Mode"):
1324
- # gr.Markdown("Extract ALL frames → Upscale ×4 → MP4 (H.264) with original audio. No toggles.")
1325
- # with gr.Row():
1326
- # q_video = gr.File(label="Upload video", file_types=[".mp4", ".mov", ".mkv", ".avi", ".webm", ".m4v"], type="filepath")
1327
- # with gr.Row():
1328
- # q_start = gr.Textbox(value="", label="Start (HH:MM:SS.mmm, optional)")
1329
- # q_end = gr.Textbox(value="", label="End (HH:MM:SS.mmm, optional)")
1330
- # q_resize = gr.Number(value=0, label="Resize long side before upscale (0 = none)")
1331
- # q_prefix = gr.Textbox(value="", label="Filename prefix (defaults to input file name)")
1332
-
1333
- # q_btn = gr.Button("Run Quick Pipeline", variant="primary")
1334
- # q_prog = gr.HTML(render_progress(0.0, "Idle"))
1335
- # q_video_out = gr.Video(label="Output video")
1336
- # with gr.Accordion("Show intermediates", open=False):
1337
- # q_zip_frames = gr.File(label="frames.zip")
1338
- # q_zip_up = gr.File(label="upscaled.zip")
1339
- # q_details = gr.Markdown("")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1340
 
1341
- # q_btn.click(
1342
- # quick_mode,
1343
- # inputs=[q_video, q_start, q_end, q_resize, q_prefix, q_prog],
1344
- # outputs=[q_video_out, q_zip_frames, q_zip_up, q_details, q_prog],
1345
- # )
1346
 
1347
  return demo
1348
-
1349
-
1350
- if __name__ == "__main__":
1351
- demo = build_ui()
1352
- demo.queue().launch()
 
 
 
 
 
 
 
 
 
 
 
 
1
  # ────────────────────────────────────────────────────────
2
  # Standard imports
3
  # ────────────────────────────────────────────────────────
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
 
5
+ import os, re, json, math, time, zipfile, tempfile, subprocess, base64
 
 
 
 
 
 
 
 
 
 
6
  from pathlib import Path
7
+ from typing import List, Optional
 
8
  import gradio as gr
9
  import numpy as np
 
 
 
 
 
 
 
10
 
11
  _num = re.compile(r'(\d+)')
12
 
 
 
 
 
 
 
13
  def _natural_key(p: Path | str):
14
  s = str(p)
15
  return [int(t) if t.isdigit() else t.lower() for t in _num.split(s)]
16
 
17
  def sample_paths(paths: List[Path] | List[str], n: int = 30) -> List[str]:
 
18
  if not paths:
19
  return []
 
20
  paths = sorted(paths, key=_natural_key)
21
  total = len(paths)
22
  n = max(1, min(n, total))
23
  if n == total:
24
  return [str(p) for p in paths]
 
25
  step = (total - 1) / (n - 1)
26
  idxs = [round(i * step) for i in range(n)]
27
+ out, seen = [], set()
 
 
28
  for i in idxs:
29
  if i not in seen:
30
+ out.append(str(paths[int(i)]))
31
+ seen.add(int(i))
32
  return out
33
+
34
  import base64
35
 
36
  APP_DIR = os.getcwd()
 
53
  <hr>
54
  """
55
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
 
57
 
58
  # ─────────────────────────────────────────────────────────────
 
81
  # Helpers
82
  # ─────────────────────────────────────────────────────────────
83
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
  def sanitize_prefix(txt: str) -> str:
85
  txt = (txt or "").strip()
86
  if not txt:
 
211
  details = f"Frames extracted: {len(frames)} | Saved to: {raw_dir}"
212
  return gallery, str(zip_path), details, cmd_preview, render_progress(100.0, f"Extracted {len(frames)} frames"), [str(p) for p in frames], str(raw_dir), prefix
213
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
214
  def render_progress(pct: float, label: str = "") -> str:
215
  pct = max(0.0, min(100.0, pct))
216
  return f'''<div style="width:100%;border:1px solid #ddd;border-radius:8px;overflow:hidden;height:18px;"><div style="height:100%;width:{pct:.1f}%;background:#3b82f6;"></div></div><div style="font-size:12px;opacity:.8;margin-top:4px;">{label} {pct:.1f}%</div>'''
 
344
  details = f"Frames extracted: {len(frames)} | Saved to: {raw_dir}"
345
  yield gallery, str(zip_path), details, cmd_preview, render_progress(100.0, f"Extracted {len(frames)} frames"), [str(p) for p in frames], str(raw_dir), prefix
346
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
347
  # ───────────────── UI
348
 
349
  def build_ui():
350
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
351
+ gr.HTML(render_logo_html(88))
352
+ gr.Markdown("Extract frames from a video with live progress.")
353
+
354
+ # Upload video
355
+ with gr.Row():
356
+ video = gr.File(
357
+ label="Upload video",
358
+ file_types=[".mp4", ".mov", ".mkv", ".avi", ".webm", ".m4v"],
359
+ type="filepath"
360
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
361
 
362
+ # Extraction settings
363
+ with gr.Accordion("Extraction Settings", open=True):
364
+ with gr.Row():
365
+ mode = gr.Dropdown(
366
+ ["Every N seconds", "Every Nth frame", "Exact FPS", "All frames"],
367
+ value="Every N seconds", label="Mode"
368
  )
369
+ every_seconds = gr.Number(value=1.0, label="Every N seconds")
370
+ nth_frame = gr.Number(value=30, label="Every Nth frame")
371
+ exact_fps = gr.Number(value=1.0, label="Exact FPS")
372
+ with gr.Row():
373
+ start_time = gr.Textbox(value="", label="Start (HH:MM:SS.mmm)")
374
+ end_time = gr.Textbox(value="", label="End (HH:MM:SS.mmm)")
375
+ long_side = gr.Number(value=0, label="Resize long side px (0 = none)")
376
+ with gr.Row():
377
+ out_format = gr.Dropdown(["jpg", "png"], value="jpg", label="Output format")
378
+ jpg_quality = gr.Slider(2, 31, value=3, step=1, label="JPG quality (2=best)")
379
+ png_level = gr.Slider(0, 9, value=2, step=1, label="PNG compression level")
380
+ with gr.Row():
381
+ scene_detect = gr.Checkbox(False, label="Scene-change detect")
382
+ scene_thresh = gr.Slider(0.0, 1.0, value=0.3, step=0.01, label="Scene threshold")
383
+ prefix_vid = gr.Textbox(value="", label="Filename prefix (defaults to input file name)")
384
+
385
+ # Controls & outputs
386
+ btn_extract = gr.Button("Extract Frames", variant="primary")
387
+ prog = gr.HTML(render_progress(0.0, "Idle"))
388
+ gallery = gr.Gallery(label="Preview (≤100, else sample 100)", columns=6, height=480)
389
+ zip_out = gr.File(label="Download frames ZIP")
390
+ details = gr.Markdown("Ready.")
391
+ with gr.Accordion("Show FFmpeg command", open=False):
392
+ cmd_preview = gr.Textbox(label="ffmpeg command", lines=4)
393
+ estimate_md = gr.Markdown("Estimated output: —")
394
+
395
+ # === Functions wired into UI ===
396
+ def _toggle_params(mode_val, fmt):
397
+ return (
398
+ gr.update(visible=(mode_val == "Every N seconds")),
399
+ gr.update(visible=(mode_val == "Every Nth frame")),
400
+ gr.update(visible=(mode_val == "Exact FPS")),
401
+ gr.update(visible=(fmt == "jpg")),
402
+ gr.update(visible=(fmt == "png")),
403
+ )
404
 
405
+ def update_estimate(vfile, mode_val, evs, nth, exfps, st, et):
406
+ if not vfile or not getattr(vfile, 'name', None):
407
+ return "Estimated output: "
408
+ info = parse_video_info(ffprobe_json(vfile.name))
409
+ dur = info.get("duration")
410
+
411
+ def parse_ts(ts: str):
412
+ if not ts: return 0.0
413
+ parts = ts.split(":")
414
+ if len(parts) == 3:
415
+ try:
416
+ return float(parts[0])*3600 + float(parts[1])*60 + float(parts[2])
417
+ except Exception:
418
+ return 0.0
419
+ return 0.0
420
+
421
+ st_s = parse_ts(st or ""); et_s = parse_ts(et or "")
422
+ if dur:
423
+ if st_s: dur = max(0.0, dur - st_s)
424
+ if et_s and et_s < info.get("duration", 0) and et_s > 0:
425
+ dur = min(dur, et_s)
426
+ est = estimate_output_count(mode_val, dur, info.get("fps"), evs or 1.0, int(nth or 1), exfps or 1.0)
427
+ return f"Estimated output: **~{est} frames**" if est else "Estimated output: —"
428
+
429
+ # Wire up dynamic visibility
430
+ mode.change(_toggle_params, [mode, out_format], [every_seconds, nth_frame, exact_fps, jpg_quality, png_level])
431
+ out_format.change(_toggle_params, [mode, out_format], [every_seconds, nth_frame, exact_fps, jpg_quality, png_level])
432
+ demo.load(_toggle_params, [mode, out_format], [every_seconds, nth_frame, exact_fps, jpg_quality, png_level])
433
+
434
+ # Wire up estimate updater
435
+ for ctrl in [video, mode, every_seconds, nth_frame, exact_fps, start_time, end_time]:
436
+ ctrl.change(update_estimate, inputs=[video, mode, every_seconds, nth_frame, exact_fps, start_time, end_time], outputs=[estimate_md])
437
+
438
+ # Extract button
439
+ btn_extract.click(
440
+ step1_extract,
441
+ inputs=[video, mode, every_seconds, nth_frame, exact_fps, start_time, end_time, long_side, out_format, jpg_quality, png_level, scene_detect, scene_thresh, prefix_vid, prog],
442
+ outputs=[gallery, zip_out, details, cmd_preview, prog],
443
+ )
444
 
445
+ if MISSING_MSG:
446
+ gr.Markdown(f"<span style='color:#b45309'>{MISSING_MSG}</span>")
 
 
 
447
 
448
  return demo
449
+ if __name__ == "__main__": demo = build_ui() demo.queue().launch()