VeuReu commited on
Commit
937b963
verified
1 Parent(s): 16f87a8

Upload 2 files

Browse files
Files changed (1) hide show
  1. api.py +122 -1
api.py CHANGED
@@ -49,6 +49,95 @@ class JobStatus(str, Enum):
49
 
50
  jobs: Dict[str, dict] = {}
51
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  def normalize_face_lighting(image):
53
  """
54
  Normaliza el brillo de una imagen de cara usando t茅cnicas combinadas:
@@ -496,9 +585,24 @@ def process_video_job(job_id: str):
496
  _sh.copy2(rep_src, rep_dst)
497
  except Exception:
498
  pass
 
 
 
 
 
 
 
 
 
 
 
 
 
 
499
  characters.append({
500
  "id": char_id,
501
- "name": f"Personatge {ci+1}",
 
502
  "folder": str(out_dir),
503
  "num_faces": len(files),
504
  "image_url": f"/files/{video_name}/{char_id}/representative.jpg" if rep else "",
@@ -896,8 +1000,25 @@ async def detect_scenes(
896
  # Representative
897
  rep = frame_files[0] if frame_files else None
898
  image_url = f"/files_scene/{video_name}/{scene_id}/{rep}" if rep else ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
899
  scene_list.append({
900
  "id": scene_id,
 
 
901
  "folder": str(out_dir),
902
  "num_frames": len(frame_files),
903
  "image_url": image_url,
 
49
 
50
  jobs: Dict[str, dict] = {}
51
 
52
+ def describe_image_with_svision(image_path: str, is_face: bool = True) -> tuple[str, str]:
53
+ """
54
+ Llama al space svision para describir una imagen (cara o escena).
55
+
56
+ Args:
57
+ image_path: Ruta absoluta a la imagen
58
+ is_face: True si es una cara, False si es una escena
59
+
60
+ Returns:
61
+ tuple (descripci贸n_completa, nombre_abreviado)
62
+ """
63
+ try:
64
+ from pathlib import Path as _P
65
+ import yaml
66
+ from llm_router import LLMRouter
67
+
68
+ # Cargar configuraci贸n
69
+ config_path = _P(__file__).parent / "config.yaml"
70
+ if not config_path.exists():
71
+ print(f"[svision] Config no encontrado: {config_path}")
72
+ return ("", "")
73
+
74
+ with open(config_path, 'r', encoding='utf-8') as f:
75
+ cfg = yaml.safe_load(f) or {}
76
+
77
+ router = LLMRouter(cfg)
78
+
79
+ # Contexto diferente para caras vs escenas
80
+ if is_face:
81
+ context = {
82
+ "task": "describe_person",
83
+ "instructions": "Describe la persona en la imagen. Incluye: edad aproximada (joven/adulto), g茅nero, caracter铆sticas f铆sicas notables (gafas, barba, bigote, etc.), expresi贸n y vestimenta."
84
+ }
85
+ else:
86
+ context = {
87
+ "task": "describe_scene",
88
+ "instructions": "Describe la escena en la imagen. Incluye: tipo de locaci贸n (interior/exterior), elementos principales, ambiente, iluminaci贸n."
89
+ }
90
+
91
+ # Llamar a svision
92
+ descriptions = router.vision_describe([str(image_path)], context=context, model="salamandra-vision")
93
+ full_description = descriptions[0] if descriptions else ""
94
+
95
+ if not full_description:
96
+ return ("", "")
97
+
98
+ # Generar nombre abreviado para caras
99
+ if is_face:
100
+ # Extraer caracter铆sticas clave para el nombre
101
+ desc_lower = full_description.lower()
102
+
103
+ # Determinar edad y g茅nero
104
+ is_young = any(word in desc_lower for word in ["joven", "ni帽o", "ni帽a", "adolescente", "chico", "chica"])
105
+ is_female = any(word in desc_lower for word in ["mujer", "chica", "ni帽a", "femenin"])
106
+
107
+ if is_young:
108
+ base_name = "Chica" if is_female else "Chico"
109
+ else:
110
+ base_name = "Mujer" if is_female else "Hombre"
111
+
112
+ # A帽adir caracter铆sticas distintivas
113
+ features = []
114
+ if "gafa" in desc_lower:
115
+ features.append("gafas")
116
+ if "barba" in desc_lower:
117
+ features.append("barba")
118
+ if "bigote" in desc_lower:
119
+ features.append("bigote")
120
+
121
+ if features:
122
+ short_name = f"{base_name} con {', '.join(features)}"
123
+ else:
124
+ short_name = base_name
125
+ else:
126
+ # Para escenas, extraer primeras palabras clave
127
+ words = full_description.split()[:4]
128
+ short_name = " ".join(words).capitalize()
129
+
130
+ print(f"[svision] Descripci贸n generada: {full_description[:100]}...")
131
+ print(f"[svision] Nombre: {short_name}")
132
+
133
+ return (full_description, short_name)
134
+
135
+ except Exception as e:
136
+ print(f"[svision] Error al describir imagen: {e}")
137
+ import traceback
138
+ traceback.print_exc()
139
+ return ("", "")
140
+
141
  def normalize_face_lighting(image):
142
  """
143
  Normaliza el brillo de una imagen de cara usando t茅cnicas combinadas:
 
585
  _sh.copy2(rep_src, rep_dst)
586
  except Exception:
587
  pass
588
+ # Llamar a svision para describir la cara representativa
589
+ description = ""
590
+ short_name = f"Personatge {ci+1}"
591
+ if rep:
592
+ rep_full_path = out_dir / "representative.jpg"
593
+ if rep_full_path.exists():
594
+ print(f"[{job_id}] Llamando a svision para describir {char_id}...")
595
+ try:
596
+ description, short_name = describe_image_with_svision(str(rep_full_path), is_face=True)
597
+ if not short_name:
598
+ short_name = f"Personatge {ci+1}"
599
+ except Exception as e:
600
+ print(f"[{job_id}] Error describiendo {char_id}: {e}")
601
+
602
  characters.append({
603
  "id": char_id,
604
+ "name": short_name,
605
+ "description": description,
606
  "folder": str(out_dir),
607
  "num_faces": len(files),
608
  "image_url": f"/files/{video_name}/{char_id}/representative.jpg" if rep else "",
 
1000
  # Representative
1001
  rep = frame_files[0] if frame_files else None
1002
  image_url = f"/files_scene/{video_name}/{scene_id}/{rep}" if rep else ""
1003
+
1004
+ # Llamar a svision para describir la escena representativa
1005
+ scene_description = ""
1006
+ scene_name = f"Escena {lbl+1}"
1007
+ if rep:
1008
+ rep_full_path = out_dir / rep
1009
+ if rep_full_path.exists():
1010
+ print(f"Llamando a svision para describir {scene_id}...")
1011
+ try:
1012
+ scene_description, scene_name = describe_image_with_svision(str(rep_full_path), is_face=False)
1013
+ if not scene_name:
1014
+ scene_name = f"Escena {lbl+1}"
1015
+ except Exception as e:
1016
+ print(f"Error describiendo {scene_id}: {e}")
1017
+
1018
  scene_list.append({
1019
  "id": scene_id,
1020
+ "name": scene_name,
1021
+ "description": scene_description,
1022
  "folder": str(out_dir),
1023
  "num_frames": len(frame_files),
1024
  "image_url": image_url,