Spaces:

VeuReu
/

engine

Running

App Files Files Community

VeuReu commited on Oct 30, 2025

Commit

937b963

verified ·

1 Parent(s): 16f87a8

Upload 2 files

Browse files

Files changed (1) hide show

api.py +122 -1

api.py CHANGED Viewed

@@ -49,6 +49,95 @@ class JobStatus(str, Enum):
 jobs: Dict[str, dict] = {}
 def normalize_face_lighting(image):
     """
     Normaliza el brillo de una imagen de cara usando técnicas combinadas:
@@ -496,9 +585,24 @@ def process_video_job(job_id: str):
                         _sh.copy2(rep_src, rep_dst)
                     except Exception:
                         pass
                 characters.append({
                     "id": char_id,
-                    "name": f"Personatge {ci+1}",
                     "folder": str(out_dir),
                     "num_faces": len(files),
                     "image_url": f"/files/{video_name}/{char_id}/representative.jpg" if rep else "",
@@ -896,8 +1000,25 @@ async def detect_scenes(
         # Representative
         rep = frame_files[0] if frame_files else None
         image_url = f"/files_scene/{video_name}/{scene_id}/{rep}" if rep else ""
         scene_list.append({
             "id": scene_id,
             "folder": str(out_dir),
             "num_frames": len(frame_files),
             "image_url": image_url,

 jobs: Dict[str, dict] = {}
+def describe_image_with_svision(image_path: str, is_face: bool = True) -> tuple[str, str]:
+    """
+    Llama al space svision para describir una imagen (cara o escena).
+    Args:
+        image_path: Ruta absoluta a la imagen
+        is_face: True si es una cara, False si es una escena
+    Returns:
+        tuple (descripción_completa, nombre_abreviado)
+    """
+    try:
+        from pathlib import Path as _P
+        import yaml
+        from llm_router import LLMRouter
+        # Cargar configuración
+        config_path = _P(__file__).parent / "config.yaml"
+        if not config_path.exists():
+            print(f"[svision] Config no encontrado: {config_path}")
+            return ("", "")
+        with open(config_path, 'r', encoding='utf-8') as f:
+            cfg = yaml.safe_load(f) or {}
+        router = LLMRouter(cfg)
+        # Contexto diferente para caras vs escenas
+        if is_face:
+            context = {
+                "task": "describe_person",
+                "instructions": "Describe la persona en la imagen. Incluye: edad aproximada (joven/adulto), género, características físicas notables (gafas, barba, bigote, etc.), expresión y vestimenta."
+            }
+        else:
+            context = {
+                "task": "describe_scene",
+                "instructions": "Describe la escena en la imagen. Incluye: tipo de locación (interior/exterior), elementos principales, ambiente, iluminación."
+            }
+        # Llamar a svision
+        descriptions = router.vision_describe([str(image_path)], context=context, model="salamandra-vision")
+        full_description = descriptions[0] if descriptions else ""
+        if not full_description:
+            return ("", "")
+        # Generar nombre abreviado para caras
+        if is_face:
+            # Extraer características clave para el nombre
+            desc_lower = full_description.lower()
+            # Determinar edad y género
+            is_young = any(word in desc_lower for word in ["joven", "niño", "niña", "adolescente", "chico", "chica"])
+            is_female = any(word in desc_lower for word in ["mujer", "chica", "niña", "femenin"])
+            if is_young:
+                base_name = "Chica" if is_female else "Chico"
+            else:
+                base_name = "Mujer" if is_female else "Hombre"
+            # Añadir características distintivas
+            features = []
+            if "gafa" in desc_lower:
+                features.append("gafas")
+            if "barba" in desc_lower:
+                features.append("barba")
+            if "bigote" in desc_lower:
+                features.append("bigote")
+            if features:
+                short_name = f"{base_name} con {', '.join(features)}"
+            else:
+                short_name = base_name
+        else:
+            # Para escenas, extraer primeras palabras clave
+            words = full_description.split()[:4]
+            short_name = " ".join(words).capitalize()
+        print(f"[svision] Descripción generada: {full_description[:100]}...")
+        print(f"[svision] Nombre: {short_name}")
+        return (full_description, short_name)
+    except Exception as e:
+        print(f"[svision] Error al describir imagen: {e}")
+        import traceback
+        traceback.print_exc()
+        return ("", "")
 def normalize_face_lighting(image):
     """
     Normaliza el brillo de una imagen de cara usando técnicas combinadas:
                         _sh.copy2(rep_src, rep_dst)
                     except Exception:
                         pass
+                # Llamar a svision para describir la cara representativa
+                description = ""
+                short_name = f"Personatge {ci+1}"
+                if rep:
+                    rep_full_path = out_dir / "representative.jpg"
+                    if rep_full_path.exists():
+                        print(f"[{job_id}] Llamando a svision para describir {char_id}...")
+                        try:
+                            description, short_name = describe_image_with_svision(str(rep_full_path), is_face=True)
+                            if not short_name:
+                                short_name = f"Personatge {ci+1}"
+                        except Exception as e:
+                            print(f"[{job_id}] Error describiendo {char_id}: {e}")
                 characters.append({
                     "id": char_id,
+                    "name": short_name,
+                    "description": description,
                     "folder": str(out_dir),
                     "num_faces": len(files),
                     "image_url": f"/files/{video_name}/{char_id}/representative.jpg" if rep else "",
         # Representative
         rep = frame_files[0] if frame_files else None
         image_url = f"/files_scene/{video_name}/{scene_id}/{rep}" if rep else ""
+        # Llamar a svision para describir la escena representativa
+        scene_description = ""
+        scene_name = f"Escena {lbl+1}"
+        if rep:
+            rep_full_path = out_dir / rep
+            if rep_full_path.exists():
+                print(f"Llamando a svision para describir {scene_id}...")
+                try:
+                    scene_description, scene_name = describe_image_with_svision(str(rep_full_path), is_face=False)
+                    if not scene_name:
+                        scene_name = f"Escena {lbl+1}"
+                except Exception as e:
+                    print(f"Error describiendo {scene_id}: {e}")
         scene_list.append({
             "id": scene_id,
+            "name": scene_name,
+            "description": scene_description,
             "folder": str(out_dir),
             "num_frames": len(frame_files),
             "image_url": image_url,