baenacoco commited on
Commit
b64777f
·
verified ·
1 Parent(s): c78baae

Upload folder using huggingface_hub

Browse files
Files changed (5) hide show
  1. README.md +6 -7
  2. app.py +401 -0
  3. hub_utils.py +64 -0
  4. packages.txt +6 -0
  5. requirements.txt +12 -0
README.md CHANGED
@@ -1,12 +1,11 @@
1
  ---
2
- title: Talking Head Frames
3
- emoji: 🐠
4
- colorFrom: indigo
5
- colorTo: red
6
  sdk: gradio
7
- sdk_version: 6.9.0
8
  app_file: app.py
9
  pinned: false
 
10
  ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: Talking Head - Frames
3
+ emoji: 🎞️
4
+ colorFrom: blue
5
+ colorTo: indigo
6
  sdk: gradio
7
+ sdk_version: 5.9.1
8
  app_file: app.py
9
  pinned: false
10
+ hardware: t4-medium
11
  ---
 
 
app.py ADDED
@@ -0,0 +1,401 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Space 1: Extract Frames + Caption (Florence-2)
2
+
3
+ Uploads videos -> extracts frames with face detection -> captions with Florence-2 -> saves to Hub.
4
+ GPU: T4 medium (~4GB VRAM for Florence-2)
5
+ """
6
+ import gc
7
+ import json
8
+ import logging
9
+ import os
10
+ import shutil
11
+ import subprocess
12
+ import traceback
13
+ from pathlib import Path
14
+
15
+ import cv2
16
+ import gradio as gr
17
+ import numpy as np
18
+ import torch
19
+ from PIL import Image
20
+
21
+ from hub_utils import upload_step, list_projects
22
+
23
+ logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(name)s: %(message)s")
24
+ logger = logging.getLogger(__name__)
25
+
26
+ # ── Config ──
27
+ IS_HF_SPACE = os.environ.get("SPACE_ID") is not None
28
+ _data_path = Path("/data")
29
+ if IS_HF_SPACE and _data_path.exists() and os.access(_data_path, os.W_OK):
30
+ BASE_DIR = _data_path
31
+ else:
32
+ BASE_DIR = Path("data")
33
+
34
+ FRAMES_DIR = BASE_DIR / "frames"
35
+ TEMP_DIR = BASE_DIR / "temp"
36
+ HF_CACHE_DIR = BASE_DIR / "hf_cache"
37
+
38
+ for d in [FRAMES_DIR, TEMP_DIR, HF_CACHE_DIR]:
39
+ d.mkdir(parents=True, exist_ok=True)
40
+
41
+ os.environ["HF_HOME"] = str(HF_CACHE_DIR)
42
+ os.environ["TRANSFORMERS_CACHE"] = str(HF_CACHE_DIR)
43
+
44
+ FLORENCE2_MODEL_ID = "microsoft/Florence-2-large"
45
+ FRAME_EXTRACT_FPS = 1
46
+ MIN_SHARPNESS = 50.0
47
+ TARGET_NUM_FRAMES = 100
48
+ DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
49
+
50
+ APP_VERSION = "1.0.0"
51
+
52
+ # ── FFmpeg utils ──
53
+
54
+ def _ffmpeg_extract_frames(video_path: str, output_dir: str, fps: float = 1.0):
55
+ Path(output_dir).mkdir(parents=True, exist_ok=True)
56
+ cmd = [
57
+ "ffmpeg", "-y", "-i", video_path,
58
+ "-vf", f"fps={fps}",
59
+ "-qmin", "1", "-q:v", "2",
60
+ f"{output_dir}/frame_%06d.jpg",
61
+ ]
62
+ result = subprocess.run(cmd, capture_output=True, text=True)
63
+ if result.returncode != 0:
64
+ raise RuntimeError(f"FFmpeg failed: {result.stderr[-500:]}")
65
+
66
+
67
+ # ── Face detection & scoring ──
68
+
69
+ _face_net = None
70
+
71
+ def _get_face_detector():
72
+ global _face_net
73
+ if _face_net is not None:
74
+ return _face_net
75
+ cascade_path = cv2.data.haarcascades + "haarcascade_frontalface_default.xml"
76
+ _face_net = cv2.CascadeClassifier(cascade_path)
77
+ return _face_net
78
+
79
+
80
+ def _compute_sharpness(gray):
81
+ return cv2.Laplacian(gray, cv2.CV_64F).var()
82
+
83
+
84
+ def _detect_faces(image_bgr):
85
+ detector = _get_face_detector()
86
+ h, w = image_bgr.shape[:2]
87
+ gray = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2GRAY)
88
+ rects = detector.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=5, minSize=(60, 60))
89
+ faces = []
90
+ for (x, y, fw, fh) in rects:
91
+ faces.append({"confidence": 0.9, "x": x/w, "y": y/h, "w": fw/w, "h": fh/h})
92
+ return faces
93
+
94
+
95
+ def _score_frame(image_path):
96
+ img = cv2.imread(image_path)
97
+ if img is None:
98
+ return None
99
+ h, w = img.shape[:2]
100
+ faces = _detect_faces(img)
101
+ if not faces:
102
+ return None
103
+ best_face = max(faces, key=lambda f: f["w"] * f["h"])
104
+ fx, fy = max(0, int(best_face["x"]*w)), max(0, int(best_face["y"]*h))
105
+ fw, fh = int(best_face["w"]*w), int(best_face["h"]*h)
106
+ face_crop = img[fy:fy+fh, fx:fx+fw]
107
+ if face_crop.size == 0:
108
+ return None
109
+ gray_face = cv2.cvtColor(face_crop, cv2.COLOR_BGR2GRAY)
110
+ sharpness = _compute_sharpness(gray_face)
111
+ if sharpness < MIN_SHARPNESS:
112
+ return None
113
+ face_area_ratio = best_face["w"] * best_face["h"]
114
+ center_x = best_face["x"] + best_face["w"] / 2
115
+ center_y = best_face["y"] + best_face["h"] / 2
116
+ center_score = 1.0 - (abs(center_x - 0.5) + abs(center_y - 0.45))
117
+ total_score = (
118
+ sharpness / 500.0 * 0.4 +
119
+ best_face["confidence"] * 0.3 +
120
+ face_area_ratio * 10 * 0.15 +
121
+ max(0, center_score) * 0.15
122
+ )
123
+ return {"path": image_path, "sharpness": sharpness, "score": total_score}
124
+
125
+
126
+ def _select_diverse(scored, target):
127
+ if len(scored) <= target:
128
+ return scored
129
+ candidates = scored[:target * 3]
130
+ candidates.sort(key=lambda x: x["path"])
131
+ step = max(1, len(candidates) // target)
132
+ selected = candidates[::step][:target]
133
+ if len(selected) < target:
134
+ used = {s["path"] for s in selected}
135
+ for item in scored:
136
+ if item["path"] not in used:
137
+ selected.append(item)
138
+ if len(selected) >= target:
139
+ break
140
+ return selected
141
+
142
+
143
+ def extract_and_select_frames(video_paths, num_frames, fps, progress_callback=None):
144
+ temp_frames_dir = TEMP_DIR / "raw_frames"
145
+ if temp_frames_dir.exists():
146
+ shutil.rmtree(temp_frames_dir)
147
+ temp_frames_dir.mkdir(parents=True)
148
+
149
+ all_frame_paths = []
150
+ for i, vpath in enumerate(video_paths):
151
+ if progress_callback:
152
+ progress_callback(i / len(video_paths) * 0.3, f"Extrayendo frames del video {i+1}/{len(video_paths)}...")
153
+ out_dir = str(temp_frames_dir / f"video_{i}")
154
+ _ffmpeg_extract_frames(vpath, out_dir, fps)
155
+ frames = sorted(Path(out_dir).glob("*.jpg"))
156
+ all_frame_paths.extend([str(f) for f in frames])
157
+
158
+ logger.info(f"Extracted {len(all_frame_paths)} raw frames")
159
+
160
+ scored = []
161
+ for i, fpath in enumerate(all_frame_paths):
162
+ if progress_callback and i % 50 == 0:
163
+ progress_callback(0.3 + (i / len(all_frame_paths)) * 0.5, f"Puntuando frame {i+1}/{len(all_frame_paths)}...")
164
+ result = _score_frame(fpath)
165
+ if result:
166
+ scored.append(result)
167
+
168
+ if not scored:
169
+ raise ValueError("No se encontraron frames validos con caras. Revisa la calidad del video.")
170
+
171
+ scored.sort(key=lambda x: x["score"], reverse=True)
172
+ selected = _select_diverse(scored, num_frames)
173
+
174
+ output_dir = FRAMES_DIR
175
+ if output_dir.exists():
176
+ shutil.rmtree(output_dir)
177
+ output_dir.mkdir(parents=True)
178
+
179
+ output_paths = []
180
+ for i, item in enumerate(selected):
181
+ dst = output_dir / f"frame_{i:04d}.jpg"
182
+ shutil.copy2(item["path"], dst)
183
+ output_paths.append(str(dst))
184
+
185
+ shutil.rmtree(temp_frames_dir, ignore_errors=True)
186
+ logger.info(f"Selected {len(output_paths)} diverse, high-quality frames")
187
+ return output_paths
188
+
189
+
190
+ # ── Florence-2 captioner ──
191
+
192
+ _florence_model = None
193
+ _florence_processor = None
194
+
195
+
196
+ def _load_florence2():
197
+ global _florence_model, _florence_processor
198
+ if _florence_model is not None:
199
+ return
200
+
201
+ from transformers import AutoModelForCausalLM, AutoProcessor
202
+
203
+ logger.info(f"Loading Florence-2 from {FLORENCE2_MODEL_ID}...")
204
+ _florence_model = AutoModelForCausalLM.from_pretrained(
205
+ FLORENCE2_MODEL_ID,
206
+ torch_dtype=torch.float16,
207
+ trust_remote_code=True,
208
+ attn_implementation="eager",
209
+ ).to(DEVICE)
210
+ _florence_processor = AutoProcessor.from_pretrained(
211
+ FLORENCE2_MODEL_ID, trust_remote_code=True,
212
+ )
213
+ # Monkey-patch for transformers compatibility
214
+ _orig = _florence_model.language_model.prepare_inputs_for_generation
215
+ def _patched(input_ids, past_key_values=None, **kwargs):
216
+ try:
217
+ return _orig(input_ids, past_key_values=past_key_values, **kwargs)
218
+ except (AttributeError, TypeError):
219
+ model_inputs = {"input_ids": input_ids}
220
+ if "attention_mask" in kwargs:
221
+ model_inputs["attention_mask"] = kwargs["attention_mask"]
222
+ return model_inputs
223
+ _florence_model.language_model.prepare_inputs_for_generation = _patched
224
+ logger.info("Florence-2 loaded")
225
+
226
+
227
+ def _unload_florence2():
228
+ global _florence_model, _florence_processor
229
+ if _florence_model is not None:
230
+ _florence_model.to("cpu")
231
+ del _florence_model
232
+ _florence_model = None
233
+ _florence_processor = None
234
+ gc.collect()
235
+ if torch.cuda.is_available():
236
+ torch.cuda.empty_cache()
237
+
238
+
239
+ def caption_single(image_path):
240
+ _load_florence2()
241
+ image = Image.open(image_path).convert("RGB")
242
+ prompt = "<MORE_DETAILED_CAPTION>"
243
+ inputs = _florence_processor(text=prompt, images=image, return_tensors="pt").to(DEVICE, torch.float16)
244
+ with torch.inference_mode():
245
+ generated_ids = _florence_model.generate(**inputs, max_new_tokens=150, num_beams=1, do_sample=False)
246
+ text = _florence_processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
247
+ caption = text.strip()
248
+ return caption if caption else "a photo of a person"
249
+
250
+
251
+ def caption_dataset(image_paths, progress_callback=None):
252
+ if not image_paths:
253
+ raise ValueError("No hay imagenes para captar")
254
+ _load_florence2()
255
+ captions = {}
256
+ for i, img_path in enumerate(image_paths):
257
+ if progress_callback:
258
+ progress_callback(i / len(image_paths), f"Captioning {i+1}/{len(image_paths)}...")
259
+ captions[img_path] = caption_single(img_path)
260
+ logger.info(f"[{i+1}/{len(image_paths)}] {Path(img_path).name}: {captions[img_path][:80]}...")
261
+
262
+ captions_file = FRAMES_DIR / "captions.json"
263
+ portable = {Path(k).name: v for k, v in captions.items()}
264
+ with open(captions_file, "w") as f:
265
+ json.dump(portable, f, indent=2, ensure_ascii=False)
266
+
267
+ for img_path, caption in captions.items():
268
+ Path(img_path).with_suffix(".txt").write_text(caption)
269
+
270
+ _unload_florence2()
271
+ return captions
272
+
273
+
274
+ # ── Gradio handlers ──
275
+
276
+ def process_videos(project_name, videos, num_frames, progress=gr.Progress()):
277
+ if not project_name or not project_name.strip():
278
+ return None, "Error: Debes introducir un nombre de proyecto"
279
+ if not videos:
280
+ return None, "Error: No se han subido videos"
281
+
282
+ video_paths = [v.name if hasattr(v, "name") else v for v in videos]
283
+ logger.info(f"=== Frame Extraction Started === Videos: {len(video_paths)}, Target: {num_frames}")
284
+
285
+ try:
286
+ progress(0.0, desc="Extrayendo frames...")
287
+ frame_paths = extract_and_select_frames(
288
+ video_paths, num_frames=int(num_frames), fps=FRAME_EXTRACT_FPS,
289
+ progress_callback=lambda p, m: progress(p * 0.5, desc=m),
290
+ )
291
+
292
+ progress(0.5, desc="Captioning con Florence-2...")
293
+ captions = caption_dataset(
294
+ frame_paths,
295
+ progress_callback=lambda p, m: progress(0.5 + p * 0.5, desc=m),
296
+ )
297
+
298
+ gallery = [(p, Path(p).stem) for p in frame_paths]
299
+ status = f"OK - {len(frame_paths)} frames extraidos, {len(captions)} captions generados"
300
+ logger.info(f"=== Frame Extraction Complete === {status}")
301
+ return gallery, status
302
+
303
+ except Exception as e:
304
+ logger.error(f"=== Frame Extraction Failed ===\n{traceback.format_exc()}")
305
+ return None, f"Error: {e}"
306
+
307
+
308
+ def save_to_hub(project_name):
309
+ if not project_name or not project_name.strip():
310
+ return "Error: Debes introducir un nombre de proyecto"
311
+ name = project_name.strip()
312
+ frames = list(FRAMES_DIR.glob("*.jpg"))
313
+ if not frames:
314
+ return "Error: No hay frames para guardar. Procesa videos primero."
315
+ try:
316
+ return upload_step(name, "step1_frames", str(FRAMES_DIR))
317
+ except Exception as e:
318
+ return f"Error: {e}"
319
+
320
+
321
+ def delete_selected_frame(gallery, selected_index):
322
+ if gallery is None or selected_index is None:
323
+ return gallery, "Selecciona una imagen para eliminar"
324
+ if selected_index < 0 or selected_index >= len(gallery):
325
+ return gallery, "Indice fuera de rango"
326
+
327
+ item = gallery[selected_index]
328
+ img_path = Path(item[0] if isinstance(item, (list, tuple)) else item)
329
+
330
+ deleted = False
331
+ for frame_file in FRAMES_DIR.glob("*.jpg"):
332
+ if frame_file.name == img_path.name or str(frame_file) == str(img_path):
333
+ frame_file.unlink(missing_ok=True)
334
+ frame_file.with_suffix(".txt").unlink(missing_ok=True)
335
+ deleted = True
336
+ break
337
+
338
+ if not deleted:
339
+ return gallery, "No se encontro el archivo para eliminar"
340
+
341
+ captions_file = FRAMES_DIR / "captions.json"
342
+ if captions_file.exists():
343
+ with open(captions_file) as f:
344
+ captions = json.load(f)
345
+ captions.pop(img_path.name, None)
346
+ with open(captions_file, "w") as f:
347
+ json.dump(captions, f, indent=2, ensure_ascii=False)
348
+
349
+ remaining = sorted(FRAMES_DIR.glob("*.jpg"))
350
+ new_gallery = [(str(p), p.stem) for p in remaining]
351
+ return new_gallery, f"Eliminado. Quedan {len(remaining)} frames"
352
+
353
+
354
+ # ── UI ──
355
+
356
+ with gr.Blocks(title="Talking Head - Frames", theme=gr.themes.Soft()) as demo:
357
+ gr.Markdown(f"# Talking Head - Extraer Frames `v{APP_VERSION}`\nExtrae frames con deteccion facial y genera captions con Florence-2")
358
+
359
+ project_name = gr.Textbox(
360
+ label="Nombre del proyecto",
361
+ placeholder="mi_proyecto",
362
+ info="Obligatorio. Se usa como carpeta en el Hub.",
363
+ )
364
+
365
+ with gr.Row():
366
+ with gr.Column():
367
+ video_input = gr.File(
368
+ label="Videos (MP4/MOV/AVI/MKV)", file_count="multiple",
369
+ file_types=[".mp4", ".mov", ".avi", ".mkv"],
370
+ )
371
+ num_frames = gr.Slider(20, 200, value=TARGET_NUM_FRAMES, step=10, label="Numero de frames a extraer")
372
+ process_btn = gr.Button("Procesar Videos", variant="primary")
373
+ with gr.Column():
374
+ frame_gallery = gr.Gallery(label="Frames extraidos", columns=5, height=500, object_fit="contain")
375
+ with gr.Row():
376
+ selected_idx = gr.Number(value=0, label="Indice seleccionado", precision=0)
377
+ delete_btn = gr.Button("Eliminar frame", variant="stop", size="sm")
378
+ status_box = gr.Textbox(label="Estado", interactive=False)
379
+
380
+ save_btn = gr.Button("Guardar en Hub", variant="secondary")
381
+ save_status = gr.Textbox(label="Estado guardado", interactive=False)
382
+
383
+ def on_gallery_select(evt: gr.SelectData):
384
+ return evt.index
385
+
386
+ frame_gallery.select(fn=on_gallery_select, inputs=None, outputs=[selected_idx])
387
+
388
+ process_btn.click(
389
+ process_videos,
390
+ inputs=[project_name, video_input, num_frames],
391
+ outputs=[frame_gallery, status_box],
392
+ )
393
+ delete_btn.click(
394
+ delete_selected_frame,
395
+ inputs=[frame_gallery, selected_idx],
396
+ outputs=[frame_gallery, status_box],
397
+ )
398
+ save_btn.click(save_to_hub, inputs=[project_name], outputs=[save_status])
399
+
400
+ if __name__ == "__main__":
401
+ demo.queue().launch(server_name="0.0.0.0", server_port=7860)
hub_utils.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Hub utilities for uploading/downloading step data to HF Dataset repo."""
2
+ import os
3
+ import logging
4
+ from pathlib import Path
5
+ from huggingface_hub import HfApi, hf_hub_download, list_repo_tree
6
+
7
+ logger = logging.getLogger(__name__)
8
+
9
+ HF_DATASET_REPO_ID = "baenacoco/talking-head-avatar"
10
+
11
+
12
+ def _get_api():
13
+ token = os.environ.get("HF_TOKEN")
14
+ if not token:
15
+ raise ValueError("HF_TOKEN no encontrado en variables de entorno")
16
+ api = HfApi(token=token)
17
+ api.create_repo(repo_id=HF_DATASET_REPO_ID, repo_type="dataset", exist_ok=True)
18
+ return api
19
+
20
+
21
+ def upload_step(name: str, step_folder: str, local_dir: str):
22
+ """Upload a local directory to {name}/{step_folder}/ in the dataset repo."""
23
+ api = _get_api()
24
+ api.upload_folder(
25
+ folder_path=local_dir,
26
+ path_in_repo=f"{name}/{step_folder}",
27
+ repo_id=HF_DATASET_REPO_ID,
28
+ repo_type="dataset",
29
+ )
30
+ logger.info(f"Uploaded {local_dir} -> {name}/{step_folder}")
31
+ return f"Subido a Hub: {name}/{step_folder}"
32
+
33
+
34
+ def download_step(name: str, step_folder: str, local_dir: str):
35
+ """Download {name}/{step_folder}/ from the dataset repo to a local directory."""
36
+ from huggingface_hub import snapshot_download
37
+ token = os.environ.get("HF_TOKEN")
38
+ snapshot_download(
39
+ repo_id=HF_DATASET_REPO_ID,
40
+ repo_type="dataset",
41
+ local_dir=local_dir,
42
+ allow_patterns=[f"{name}/{step_folder}/**"],
43
+ token=token,
44
+ )
45
+ logger.info(f"Downloaded {name}/{step_folder} -> {local_dir}")
46
+ return f"Descargado de Hub: {name}/{step_folder}"
47
+
48
+
49
+ def list_projects() -> list[str]:
50
+ """List project names (top-level folders) in the dataset repo."""
51
+ token = os.environ.get("HF_TOKEN")
52
+ try:
53
+ api = HfApi(token=token)
54
+ entries = list(api.list_repo_tree(
55
+ repo_id=HF_DATASET_REPO_ID, repo_type="dataset", path_in_repo="",
56
+ ))
57
+ return sorted(set(
58
+ e.rfilename.split("/")[0] if hasattr(e, "rfilename") else e.path.split("/")[0]
59
+ for e in entries
60
+ if ("/" in getattr(e, "rfilename", "")) or hasattr(e, "path")
61
+ ))
62
+ except Exception as e:
63
+ logger.warning(f"Could not list projects: {e}")
64
+ return []
packages.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ ffmpeg
2
+ libgl1-mesa-glx
3
+ libglib2.0-0
4
+ libsm6
5
+ libxext6
6
+ libxrender-dev
requirements.txt ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ setuptools>=69.0.0
2
+ gradio>=5.9.1
3
+ torch>=2.1.0
4
+ transformers>=4.36.0,<5.0.0
5
+ huggingface_hub>=0.20.0
6
+ opencv-python-headless>=4.8.0
7
+ numpy>=1.24.0
8
+ Pillow>=10.0.0
9
+ timm>=0.9.0
10
+ sentencepiece>=0.1.99
11
+ protobuf>=3.20.0
12
+ einops>=0.7.0