Upload 2 files
Browse files
api.py
CHANGED
|
@@ -49,6 +49,95 @@ class JobStatus(str, Enum):
|
|
| 49 |
|
| 50 |
jobs: Dict[str, dict] = {}
|
| 51 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
def normalize_face_lighting(image):
|
| 53 |
"""
|
| 54 |
Normaliza el brillo de una imagen de cara usando t茅cnicas combinadas:
|
|
@@ -496,9 +585,24 @@ def process_video_job(job_id: str):
|
|
| 496 |
_sh.copy2(rep_src, rep_dst)
|
| 497 |
except Exception:
|
| 498 |
pass
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 499 |
characters.append({
|
| 500 |
"id": char_id,
|
| 501 |
-
"name":
|
|
|
|
| 502 |
"folder": str(out_dir),
|
| 503 |
"num_faces": len(files),
|
| 504 |
"image_url": f"/files/{video_name}/{char_id}/representative.jpg" if rep else "",
|
|
@@ -896,8 +1000,25 @@ async def detect_scenes(
|
|
| 896 |
# Representative
|
| 897 |
rep = frame_files[0] if frame_files else None
|
| 898 |
image_url = f"/files_scene/{video_name}/{scene_id}/{rep}" if rep else ""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 899 |
scene_list.append({
|
| 900 |
"id": scene_id,
|
|
|
|
|
|
|
| 901 |
"folder": str(out_dir),
|
| 902 |
"num_frames": len(frame_files),
|
| 903 |
"image_url": image_url,
|
|
|
|
| 49 |
|
| 50 |
jobs: Dict[str, dict] = {}
|
| 51 |
|
| 52 |
+
def describe_image_with_svision(image_path: str, is_face: bool = True) -> tuple[str, str]:
|
| 53 |
+
"""
|
| 54 |
+
Llama al space svision para describir una imagen (cara o escena).
|
| 55 |
+
|
| 56 |
+
Args:
|
| 57 |
+
image_path: Ruta absoluta a la imagen
|
| 58 |
+
is_face: True si es una cara, False si es una escena
|
| 59 |
+
|
| 60 |
+
Returns:
|
| 61 |
+
tuple (descripci贸n_completa, nombre_abreviado)
|
| 62 |
+
"""
|
| 63 |
+
try:
|
| 64 |
+
from pathlib import Path as _P
|
| 65 |
+
import yaml
|
| 66 |
+
from llm_router import LLMRouter
|
| 67 |
+
|
| 68 |
+
# Cargar configuraci贸n
|
| 69 |
+
config_path = _P(__file__).parent / "config.yaml"
|
| 70 |
+
if not config_path.exists():
|
| 71 |
+
print(f"[svision] Config no encontrado: {config_path}")
|
| 72 |
+
return ("", "")
|
| 73 |
+
|
| 74 |
+
with open(config_path, 'r', encoding='utf-8') as f:
|
| 75 |
+
cfg = yaml.safe_load(f) or {}
|
| 76 |
+
|
| 77 |
+
router = LLMRouter(cfg)
|
| 78 |
+
|
| 79 |
+
# Contexto diferente para caras vs escenas
|
| 80 |
+
if is_face:
|
| 81 |
+
context = {
|
| 82 |
+
"task": "describe_person",
|
| 83 |
+
"instructions": "Describe la persona en la imagen. Incluye: edad aproximada (joven/adulto), g茅nero, caracter铆sticas f铆sicas notables (gafas, barba, bigote, etc.), expresi贸n y vestimenta."
|
| 84 |
+
}
|
| 85 |
+
else:
|
| 86 |
+
context = {
|
| 87 |
+
"task": "describe_scene",
|
| 88 |
+
"instructions": "Describe la escena en la imagen. Incluye: tipo de locaci贸n (interior/exterior), elementos principales, ambiente, iluminaci贸n."
|
| 89 |
+
}
|
| 90 |
+
|
| 91 |
+
# Llamar a svision
|
| 92 |
+
descriptions = router.vision_describe([str(image_path)], context=context, model="salamandra-vision")
|
| 93 |
+
full_description = descriptions[0] if descriptions else ""
|
| 94 |
+
|
| 95 |
+
if not full_description:
|
| 96 |
+
return ("", "")
|
| 97 |
+
|
| 98 |
+
# Generar nombre abreviado para caras
|
| 99 |
+
if is_face:
|
| 100 |
+
# Extraer caracter铆sticas clave para el nombre
|
| 101 |
+
desc_lower = full_description.lower()
|
| 102 |
+
|
| 103 |
+
# Determinar edad y g茅nero
|
| 104 |
+
is_young = any(word in desc_lower for word in ["joven", "ni帽o", "ni帽a", "adolescente", "chico", "chica"])
|
| 105 |
+
is_female = any(word in desc_lower for word in ["mujer", "chica", "ni帽a", "femenin"])
|
| 106 |
+
|
| 107 |
+
if is_young:
|
| 108 |
+
base_name = "Chica" if is_female else "Chico"
|
| 109 |
+
else:
|
| 110 |
+
base_name = "Mujer" if is_female else "Hombre"
|
| 111 |
+
|
| 112 |
+
# A帽adir caracter铆sticas distintivas
|
| 113 |
+
features = []
|
| 114 |
+
if "gafa" in desc_lower:
|
| 115 |
+
features.append("gafas")
|
| 116 |
+
if "barba" in desc_lower:
|
| 117 |
+
features.append("barba")
|
| 118 |
+
if "bigote" in desc_lower:
|
| 119 |
+
features.append("bigote")
|
| 120 |
+
|
| 121 |
+
if features:
|
| 122 |
+
short_name = f"{base_name} con {', '.join(features)}"
|
| 123 |
+
else:
|
| 124 |
+
short_name = base_name
|
| 125 |
+
else:
|
| 126 |
+
# Para escenas, extraer primeras palabras clave
|
| 127 |
+
words = full_description.split()[:4]
|
| 128 |
+
short_name = " ".join(words).capitalize()
|
| 129 |
+
|
| 130 |
+
print(f"[svision] Descripci贸n generada: {full_description[:100]}...")
|
| 131 |
+
print(f"[svision] Nombre: {short_name}")
|
| 132 |
+
|
| 133 |
+
return (full_description, short_name)
|
| 134 |
+
|
| 135 |
+
except Exception as e:
|
| 136 |
+
print(f"[svision] Error al describir imagen: {e}")
|
| 137 |
+
import traceback
|
| 138 |
+
traceback.print_exc()
|
| 139 |
+
return ("", "")
|
| 140 |
+
|
| 141 |
def normalize_face_lighting(image):
|
| 142 |
"""
|
| 143 |
Normaliza el brillo de una imagen de cara usando t茅cnicas combinadas:
|
|
|
|
| 585 |
_sh.copy2(rep_src, rep_dst)
|
| 586 |
except Exception:
|
| 587 |
pass
|
| 588 |
+
# Llamar a svision para describir la cara representativa
|
| 589 |
+
description = ""
|
| 590 |
+
short_name = f"Personatge {ci+1}"
|
| 591 |
+
if rep:
|
| 592 |
+
rep_full_path = out_dir / "representative.jpg"
|
| 593 |
+
if rep_full_path.exists():
|
| 594 |
+
print(f"[{job_id}] Llamando a svision para describir {char_id}...")
|
| 595 |
+
try:
|
| 596 |
+
description, short_name = describe_image_with_svision(str(rep_full_path), is_face=True)
|
| 597 |
+
if not short_name:
|
| 598 |
+
short_name = f"Personatge {ci+1}"
|
| 599 |
+
except Exception as e:
|
| 600 |
+
print(f"[{job_id}] Error describiendo {char_id}: {e}")
|
| 601 |
+
|
| 602 |
characters.append({
|
| 603 |
"id": char_id,
|
| 604 |
+
"name": short_name,
|
| 605 |
+
"description": description,
|
| 606 |
"folder": str(out_dir),
|
| 607 |
"num_faces": len(files),
|
| 608 |
"image_url": f"/files/{video_name}/{char_id}/representative.jpg" if rep else "",
|
|
|
|
| 1000 |
# Representative
|
| 1001 |
rep = frame_files[0] if frame_files else None
|
| 1002 |
image_url = f"/files_scene/{video_name}/{scene_id}/{rep}" if rep else ""
|
| 1003 |
+
|
| 1004 |
+
# Llamar a svision para describir la escena representativa
|
| 1005 |
+
scene_description = ""
|
| 1006 |
+
scene_name = f"Escena {lbl+1}"
|
| 1007 |
+
if rep:
|
| 1008 |
+
rep_full_path = out_dir / rep
|
| 1009 |
+
if rep_full_path.exists():
|
| 1010 |
+
print(f"Llamando a svision para describir {scene_id}...")
|
| 1011 |
+
try:
|
| 1012 |
+
scene_description, scene_name = describe_image_with_svision(str(rep_full_path), is_face=False)
|
| 1013 |
+
if not scene_name:
|
| 1014 |
+
scene_name = f"Escena {lbl+1}"
|
| 1015 |
+
except Exception as e:
|
| 1016 |
+
print(f"Error describiendo {scene_id}: {e}")
|
| 1017 |
+
|
| 1018 |
scene_list.append({
|
| 1019 |
"id": scene_id,
|
| 1020 |
+
"name": scene_name,
|
| 1021 |
+
"description": scene_description,
|
| 1022 |
"folder": str(out_dir),
|
| 1023 |
"num_frames": len(frame_files),
|
| 1024 |
"image_url": image_url,
|