Spaces:

VeuReu
/

engine

Running

App Files Files Community

VeuReu commited on Oct 30, 2025

Commit

e9c6cef

verified ·

1 Parent(s): 5c22d4f

Upload api.py

Browse files

Files changed (1) hide show

api.py +7 -55

api.py CHANGED Viewed

@@ -51,7 +51,7 @@ jobs: Dict[str, dict] = {}
 def describe_image_with_svision(image_path: str, is_face: bool = True) -> tuple[str, str]:
     """
-    Llama al space svision para describir una imagen (cara o escena).
     Args:
         image_path: Ruta absoluta a la imagen
@@ -80,12 +80,12 @@ def describe_image_with_svision(image_path: str, is_face: bool = True) -> tuple[
         if is_face:
             context = {
                 "task": "describe_person",
-                "instructions": "Describe la persona en la imagen. Incluye: edad aproximada (joven/adulto), género, características físicas notables (gafas, barba, bigote, etc.), expresión y vestimenta."
             }
         else:
             context = {
                 "task": "describe_scene",
-                "instructions": "Describe la escena en la imagen. Incluye: tipo de locación (interior/exterior), elementos principales, ambiente, iluminación."
             }
         # Llamar a svision
@@ -95,45 +95,12 @@ def describe_image_with_svision(image_path: str, is_face: bool = True) -> tuple[
         if not full_description:
             return ("", "")
-        # Generar nombre abreviado para caras
-        if is_face:
-            # Extraer características clave para el nombre
-            desc_lower = full_description.lower()
-            # Determinar edad y género
-            is_young = any(word in desc_lower for word in ["joven", "niño", "niña", "adolescente", "chico", "chica"])
-            is_female = any(word in desc_lower for word in ["mujer", "chica", "niña", "femenin"])
-            if is_young:
-                base_name = "Chica" if is_female else "Chico"
-            else:
-                base_name = "Mujer" if is_female else "Hombre"
-            # Añadir características distintivas
-            features = []
-            if "gafa" in desc_lower:
-                features.append("gafas")
-            if "barba" in desc_lower:
-                features.append("barba")
-            if "bigote" in desc_lower:
-                features.append("bigote")
-            if features:
-                short_name = f"{base_name} con {', '.join(features)}"
-            else:
-                short_name = base_name
-        else:
-            # Para escenas, extraer primeras palabras clave
-            words = full_description.split()[:4]
-            short_name = " ".join(words).capitalize()
-        print(f"[svision] Descripción generada: {full_description[:100]}...")
-        print(f"[svision] Nombre: {short_name}")
-        return (full_description, short_name)
     except Exception as e:
-        print(f"[svision] Error al describir imagen: {e}")
         import traceback
         traceback.print_exc()
         return ("", "")
@@ -585,24 +552,9 @@ def process_video_job(job_id: str):
                         _sh.copy2(rep_src, rep_dst)
                     except Exception:
                         pass
-                # Llamar a svision para describir la cara representativa
-                description = ""
-                short_name = f"Personatge {ci+1}"
-                if rep:
-                    rep_full_path = out_dir / "representative.jpg"
-                    if rep_full_path.exists():
-                        print(f"[{job_id}] Llamando a svision para describir {char_id}...")
-                        try:
-                            description, short_name = describe_image_with_svision(str(rep_full_path), is_face=True)
-                            if not short_name:
-                                short_name = f"Personatge {ci+1}"
-                        except Exception as e:
-                            print(f"[{job_id}] Error describiendo {char_id}: {e}")
                 characters.append({
                     "id": char_id,
-                    "name": short_name,
-                    "description": description,
                     "folder": str(out_dir),
                     "num_faces": len(files),
                     "image_url": f"/files/{video_name}/{char_id}/representative.jpg" if rep else "",

 def describe_image_with_svision(image_path: str, is_face: bool = True) -> tuple[str, str]:
     """
+    Llama al space svision para describir una imagen (usado en generación de AD).
     Args:
         image_path: Ruta absoluta a la imagen
         if is_face:
             context = {
                 "task": "describe_person",
+                "instructions": "Descriu la persona en la imatge. Inclou: edat aproximada (jove/adult), gènere, característiques físiques notables (ulleres, barba, bigoti, etc.), expressió i vestimenta."
             }
         else:
             context = {
                 "task": "describe_scene",
+                "instructions": "Descriu l'escena en la imatge. Inclou: tipus de localització (interior/exterior), elements principals, ambient, il·luminació."
             }
         # Llamar a svision
         if not full_description:
             return ("", "")
+        print(f"[svision] Descripció generada: {full_description[:100]}...")
+        return (full_description, "")
     except Exception as e:
+        print(f"[svision] Error al descriure imatge: {e}")
         import traceback
         traceback.print_exc()
         return ("", "")
                         _sh.copy2(rep_src, rep_dst)
                     except Exception:
                         pass
                 characters.append({
                     "id": char_id,
+                    "name": f"Personatge {ci+1}",
                     "folder": str(out_dir),
                     "num_faces": len(files),
                     "image_url": f"/files/{video_name}/{char_id}/representative.jpg" if rep else "",