Upload 2 files
Browse files- preprocessing_router.py +35 -5
- svision_client.py +36 -8
preprocessing_router.py
CHANGED
|
@@ -492,9 +492,25 @@ def process_video_job(job_id: str):
|
|
| 492 |
fn = f"face_{frame_idx:06d}_{saved_count:03d}.jpg"
|
| 493 |
local_crop_path = faces_root / fn
|
| 494 |
|
| 495 |
-
|
| 496 |
-
|
| 497 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 498 |
# If no crop from svision, use original frame
|
| 499 |
shutil.copy2(frame_path, local_crop_path)
|
| 500 |
|
|
@@ -536,8 +552,18 @@ def process_video_job(job_id: str):
|
|
| 536 |
chars_dir = base / "characters"
|
| 537 |
chars_dir.mkdir(parents=True, exist_ok=True)
|
| 538 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 539 |
for ci, idxs in sorted(cluster_map.items(), key=lambda x: x[0]):
|
| 540 |
char_id = f"char_{ci:02d}"
|
|
|
|
| 541 |
|
| 542 |
if not idxs:
|
| 543 |
continue
|
|
@@ -555,10 +581,12 @@ def process_video_job(job_id: str):
|
|
| 555 |
|
| 556 |
for j in selected_idxs:
|
| 557 |
if j >= len(crops_meta):
|
|
|
|
| 558 |
continue
|
| 559 |
meta = crops_meta[j]
|
| 560 |
fname = meta.get("file")
|
| 561 |
if not fname:
|
|
|
|
| 562 |
continue
|
| 563 |
|
| 564 |
src = faces_root / fname
|
|
@@ -568,8 +596,10 @@ def process_video_job(job_id: str):
|
|
| 568 |
shutil.copy2(src, dst)
|
| 569 |
files.append(fname)
|
| 570 |
file_urls.append(f"/files/{video_name}/{char_id}/{fname}")
|
| 571 |
-
|
| 572 |
-
|
|
|
|
|
|
|
| 573 |
|
| 574 |
# Create representative image
|
| 575 |
rep = files[0] if files else None
|
|
|
|
| 492 |
fn = f"face_{frame_idx:06d}_{saved_count:03d}.jpg"
|
| 493 |
local_crop_path = faces_root / fn
|
| 494 |
|
| 495 |
+
crop_saved = False
|
| 496 |
+
if crop_path:
|
| 497 |
+
# Handle remote URLs from svision (Gradio)
|
| 498 |
+
if isinstance(crop_path, str) and crop_path.startswith("http"):
|
| 499 |
+
try:
|
| 500 |
+
import requests
|
| 501 |
+
resp = requests.get(crop_path, timeout=30)
|
| 502 |
+
if resp.status_code == 200:
|
| 503 |
+
with open(local_crop_path, "wb") as f:
|
| 504 |
+
f.write(resp.content)
|
| 505 |
+
crop_saved = True
|
| 506 |
+
except Exception as dl_err:
|
| 507 |
+
print(f"[{job_id}] Error descargando crop: {dl_err}")
|
| 508 |
+
# Handle local paths
|
| 509 |
+
elif isinstance(crop_path, str) and os.path.exists(crop_path):
|
| 510 |
+
shutil.copy2(crop_path, local_crop_path)
|
| 511 |
+
crop_saved = True
|
| 512 |
+
|
| 513 |
+
if not crop_saved:
|
| 514 |
# If no crop from svision, use original frame
|
| 515 |
shutil.copy2(frame_path, local_crop_path)
|
| 516 |
|
|
|
|
| 552 |
chars_dir = base / "characters"
|
| 553 |
chars_dir.mkdir(parents=True, exist_ok=True)
|
| 554 |
|
| 555 |
+
print(f"[{job_id}] cluster_map: {cluster_map}")
|
| 556 |
+
print(f"[{job_id}] crops_meta count: {len(crops_meta)}")
|
| 557 |
+
print(f"[{job_id}] faces_root: {faces_root}, exists: {faces_root.exists()}")
|
| 558 |
+
if faces_root.exists():
|
| 559 |
+
existing_files = list(faces_root.glob("*"))
|
| 560 |
+
print(f"[{job_id}] Files in faces_root: {len(existing_files)}")
|
| 561 |
+
for ef in existing_files[:5]:
|
| 562 |
+
print(f"[{job_id}] - {ef.name}")
|
| 563 |
+
|
| 564 |
for ci, idxs in sorted(cluster_map.items(), key=lambda x: x[0]):
|
| 565 |
char_id = f"char_{ci:02d}"
|
| 566 |
+
print(f"[{job_id}] Processing cluster {char_id} with {len(idxs)} indices: {idxs[:5]}...")
|
| 567 |
|
| 568 |
if not idxs:
|
| 569 |
continue
|
|
|
|
| 581 |
|
| 582 |
for j in selected_idxs:
|
| 583 |
if j >= len(crops_meta):
|
| 584 |
+
print(f"[{job_id}] Index {j} out of range (crops_meta len={len(crops_meta)})")
|
| 585 |
continue
|
| 586 |
meta = crops_meta[j]
|
| 587 |
fname = meta.get("file")
|
| 588 |
if not fname:
|
| 589 |
+
print(f"[{job_id}] No filename in meta for index {j}")
|
| 590 |
continue
|
| 591 |
|
| 592 |
src = faces_root / fname
|
|
|
|
| 596 |
shutil.copy2(src, dst)
|
| 597 |
files.append(fname)
|
| 598 |
file_urls.append(f"/files/{video_name}/{char_id}/{fname}")
|
| 599 |
+
else:
|
| 600 |
+
print(f"[{job_id}] Source file not found: {src}")
|
| 601 |
+
except Exception as cp_err:
|
| 602 |
+
print(f"[{job_id}] Error copying {fname}: {cp_err}")
|
| 603 |
|
| 604 |
# Create representative image
|
| 605 |
rep = files[0] if files else None
|
svision_client.py
CHANGED
|
@@ -3,6 +3,7 @@ os.environ["CUDA_VISIBLE_DEVICES"] = "1"
|
|
| 3 |
|
| 4 |
from gradio_client import Client, handle_file
|
| 5 |
from typing import Any, Dict, List, Optional, Tuple, Union
|
|
|
|
| 6 |
import json
|
| 7 |
|
| 8 |
# Lazy initialization to avoid crash if Space is down at import time
|
|
@@ -123,6 +124,20 @@ def extract_descripcion_escena(imagen_path: str) -> str:
|
|
| 123 |
return result
|
| 124 |
|
| 125 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 126 |
def get_face_embeddings_from_image(image_path: str) -> List[Dict[str, Any]]:
|
| 127 |
"""
|
| 128 |
Call the /face_image_embedding_casting endpoint to detect faces and get embeddings.
|
|
@@ -137,31 +152,44 @@ def get_face_embeddings_from_image(image_path: str) -> List[Dict[str, Any]]:
|
|
| 137 |
Returns
|
| 138 |
-------
|
| 139 |
List[Dict[str, Any]]
|
| 140 |
-
List of dicts with 'embedding' (list of floats) and '
|
| 141 |
Returns empty list if no faces detected or on error.
|
| 142 |
"""
|
| 143 |
try:
|
| 144 |
-
# Returns: (face_crops: list of images, face_embeddings: list of dicts)
|
| 145 |
result = _get_svision_client().predict(
|
| 146 |
image=handle_file(image_path),
|
| 147 |
api_name="/face_image_embedding_casting"
|
| 148 |
)
|
| 149 |
-
|
|
|
|
| 150 |
if result and len(result) >= 2:
|
| 151 |
-
|
| 152 |
face_embeddings = result[1] if result[1] else []
|
| 153 |
-
|
|
|
|
| 154 |
faces = []
|
| 155 |
for i, emb_dict in enumerate(face_embeddings):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 156 |
faces.append({
|
| 157 |
-
"embedding":
|
| 158 |
-
"face_crop_path":
|
| 159 |
-
"index": emb_dict.get("index", i),
|
| 160 |
})
|
|
|
|
|
|
|
| 161 |
return faces
|
| 162 |
return []
|
| 163 |
except Exception as e:
|
| 164 |
print(f"[svision_client] get_face_embeddings_from_image error: {e}")
|
|
|
|
|
|
|
| 165 |
return []
|
| 166 |
|
| 167 |
|
|
|
|
| 3 |
|
| 4 |
from gradio_client import Client, handle_file
|
| 5 |
from typing import Any, Dict, List, Optional, Tuple, Union
|
| 6 |
+
import requests
|
| 7 |
import json
|
| 8 |
|
| 9 |
# Lazy initialization to avoid crash if Space is down at import time
|
|
|
|
| 124 |
return result
|
| 125 |
|
| 126 |
|
| 127 |
+
def _extract_path_from_gradio_file(file_obj) -> Optional[str]:
|
| 128 |
+
"""Extract file path from Gradio file object (can be dict, str, or other)."""
|
| 129 |
+
if file_obj is None:
|
| 130 |
+
return None
|
| 131 |
+
if isinstance(file_obj, str):
|
| 132 |
+
return file_obj
|
| 133 |
+
if isinstance(file_obj, dict):
|
| 134 |
+
# Gradio returns dicts like {"path": "...", "url": "...", "orig_name": "..."}
|
| 135 |
+
return file_obj.get("path") or file_obj.get("url") or file_obj.get("name")
|
| 136 |
+
if hasattr(file_obj, "name"):
|
| 137 |
+
return file_obj.name
|
| 138 |
+
return str(file_obj)
|
| 139 |
+
|
| 140 |
+
|
| 141 |
def get_face_embeddings_from_image(image_path: str) -> List[Dict[str, Any]]:
|
| 142 |
"""
|
| 143 |
Call the /face_image_embedding_casting endpoint to detect faces and get embeddings.
|
|
|
|
| 152 |
Returns
|
| 153 |
-------
|
| 154 |
List[Dict[str, Any]]
|
| 155 |
+
List of dicts with 'embedding' (list of floats) and 'face_crop_path' (image path string).
|
| 156 |
Returns empty list if no faces detected or on error.
|
| 157 |
"""
|
| 158 |
try:
|
| 159 |
+
# Returns: (face_crops: list of images/dicts, face_embeddings: list of dicts)
|
| 160 |
result = _get_svision_client().predict(
|
| 161 |
image=handle_file(image_path),
|
| 162 |
api_name="/face_image_embedding_casting"
|
| 163 |
)
|
| 164 |
+
|
| 165 |
+
# result is a tuple: (list of image paths/dicts, list of embedding dicts)
|
| 166 |
if result and len(result) >= 2:
|
| 167 |
+
face_crops_raw = result[0] if result[0] else []
|
| 168 |
face_embeddings = result[1] if result[1] else []
|
| 169 |
+
|
| 170 |
+
# Combine into unified structure, extracting paths correctly
|
| 171 |
faces = []
|
| 172 |
for i, emb_dict in enumerate(face_embeddings):
|
| 173 |
+
# Extract path from Gradio file object (might be dict or string)
|
| 174 |
+
crop_path = None
|
| 175 |
+
if i < len(face_crops_raw):
|
| 176 |
+
crop_path = _extract_path_from_gradio_file(face_crops_raw[i])
|
| 177 |
+
|
| 178 |
+
embedding = emb_dict.get("embedding", []) if isinstance(emb_dict, dict) else []
|
| 179 |
+
|
| 180 |
faces.append({
|
| 181 |
+
"embedding": embedding,
|
| 182 |
+
"face_crop_path": crop_path,
|
| 183 |
+
"index": emb_dict.get("index", i) if isinstance(emb_dict, dict) else i,
|
| 184 |
})
|
| 185 |
+
|
| 186 |
+
print(f"[svision_client] Detected {len(faces)} faces from image")
|
| 187 |
return faces
|
| 188 |
return []
|
| 189 |
except Exception as e:
|
| 190 |
print(f"[svision_client] get_face_embeddings_from_image error: {e}")
|
| 191 |
+
import traceback
|
| 192 |
+
traceback.print_exc()
|
| 193 |
return []
|
| 194 |
|
| 195 |
|