Aurel-test commited on
Commit
c88ee81
·
verified ·
1 Parent(s): a44ab0f

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +1504 -0
app.py ADDED
@@ -0,0 +1,1504 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Web Demo v2 pour la base de données d'œuvres d'art - Version Sécurisée et Optimisée
4
+ Interface multi-étapes avec matching basé sur prénom, date, ville et émotions
5
+ Optimisé pour les performances avec caching et indexation
6
+ Version sécurisée avec validation des entrées et gestion d'état propre
7
+ """
8
+
9
+ import gradio as gr
10
+ import os
11
+ import sys
12
+ import logging
13
+ from logging.handlers import RotatingFileHandler
14
+ import random
15
+ import re
16
+ import json
17
+ import uuid
18
+ import time
19
+ from datetime import datetime
20
+ from typing import List, Dict, Tuple, Optional, Any, Set
21
+ from collections import Counter, defaultdict
22
+ from functools import lru_cache
23
+ from dataclasses import dataclass, field, asdict
24
+ import pandas as pd
25
+
26
+ # Configuration du logging principal
27
+ logging.basicConfig(
28
+ level=logging.INFO,
29
+ format="[%(asctime)s] %(levelname)s: %(message)s",
30
+ datefmt="%Y-%m-%d %H:%M:%S",
31
+ )
32
+ logger = logging.getLogger(__name__)
33
+
34
+ # Configuration du logging des sessions dans un fichier
35
+ SESSION_LOG_FILE = "session_logs.jsonl"
36
+ STATS_LOG_FILE = "statistics.json"
37
+
38
+ # Créer un handler pour le fichier de logs des sessions
39
+ if not os.path.exists("logs"):
40
+ os.makedirs("logs")
41
+
42
+ session_file_handler = RotatingFileHandler(
43
+ filename=os.path.join("logs", SESSION_LOG_FILE),
44
+ maxBytes=10*1024*1024, # 10MB
45
+ backupCount=5,
46
+ encoding='utf-8'
47
+ )
48
+ session_file_handler.setLevel(logging.INFO)
49
+ session_logger = logging.getLogger('session_logger')
50
+ session_logger.addHandler(session_file_handler)
51
+ session_logger.setLevel(logging.INFO)
52
+
53
+ sys.path.insert(0, os.path.join(os.path.dirname(__file__), "src"))
54
+
55
+ from art_pieces_db.database import Database
56
+ from art_pieces_db.query import TargetProfile, WeightedLeximaxOptimizer, Optimizer
57
+ from art_pieces_db.emotions import EmotionWheel
58
+ from art_pieces_db.utils import str_to_date
59
+
60
+
61
+ @dataclass
62
+ class ScoringWeights:
63
+ """Centralise toutes les constantes de scoring pour éviter les magic numbers"""
64
+
65
+ PRESELECTION_NAME_WEIGHT: float = 3.0
66
+ PRESELECTION_DATE_WEIGHT: float = 1.0
67
+ PRESELECTION_PLACE_WEIGHT: float = 2.0
68
+ PRESELECTION_EMOTION_WEIGHT: float = 0.0
69
+
70
+ MIN_PRESELECTION_COUNT: int = 20
71
+ MAX_IMAGES_PER_SELECTION: int = 3 # nombre d'images par sélection
72
+ TOTAL_ROUNDS: int = 3 # nombre de rounds avant la recommandation finale
73
+
74
+
75
+ @dataclass
76
+ class SessionState:
77
+ """Gère l'état de session"""
78
+
79
+ firstname: str = ""
80
+ birthday: str = ""
81
+ city: str = ""
82
+
83
+ current_round: int = 0
84
+ selected_images: List[str] = field(default_factory=list)
85
+ current_image_ids: List[str] = field(default_factory=list)
86
+
87
+ preselected_pieces: Optional[pd.DataFrame] = None
88
+
89
+ # Nouvelles propriétés pour le tracking
90
+ session_id: str = field(default_factory=lambda: str(uuid.uuid4()))
91
+ session_start_time: float = field(default_factory=time.time)
92
+ recommendation_type: str = "" # "name_date_place" ou "emotions"
93
+ final_artwork: str = ""
94
+
95
+ def reset(self):
96
+ """Réinitialise l'état de session"""
97
+ self.firstname = ""
98
+ self.birthday = ""
99
+ self.city = ""
100
+ self.current_round = 0
101
+ self.selected_images = []
102
+ self.current_image_ids = []
103
+ self.preselected_pieces = None
104
+ self.session_id = str(uuid.uuid4())
105
+ self.session_start_time = time.time()
106
+ self.recommendation_type = ""
107
+ self.final_artwork = ""
108
+
109
+ def is_complete(self) -> bool:
110
+ """Vérifie si la sélection est complète"""
111
+ return self.current_round >= ScoringWeights.TOTAL_ROUNDS
112
+
113
+
114
+ class SessionLogger:
115
+ """Gère le logging des sessions et les statistiques"""
116
+
117
+ def __init__(self):
118
+ self.logs_dir = "logs"
119
+ if not os.path.exists(self.logs_dir):
120
+ os.makedirs(self.logs_dir)
121
+
122
+ self.session_log_path = os.path.join(self.logs_dir, SESSION_LOG_FILE)
123
+ self.stats_log_path = os.path.join(self.logs_dir, STATS_LOG_FILE)
124
+
125
+ def log_session(self, state: SessionState, recommendation_system: str):
126
+ """Enregistre les données d'une session terminée"""
127
+ session_duration = time.time() - state.session_start_time
128
+
129
+ session_data = {
130
+ "session_id": state.session_id,
131
+ "timestamp": datetime.now().isoformat(),
132
+ "duration_seconds": round(session_duration, 2),
133
+ "recommended_artwork": state.final_artwork,
134
+ "recommendation_type": recommendation_system
135
+ }
136
+
137
+ # Écrire dans le fichier de logs des sessions (format JSONL)
138
+ try:
139
+ with open(self.session_log_path, 'a', encoding='utf-8') as f:
140
+ f.write(json.dumps(session_data, ensure_ascii=False) + '\n')
141
+
142
+ logger.info(f"Session {state.session_id} logged successfully")
143
+ session_logger.info(json.dumps(session_data, ensure_ascii=False))
144
+
145
+ # Mettre à jour les statistiques globales
146
+ self.update_statistics(session_data)
147
+
148
+ except Exception as e:
149
+ logger.error(f"Error logging session: {e}")
150
+
151
+ def update_statistics(self, session_data: dict):
152
+ """Met à jour les statistiques globales"""
153
+ try:
154
+ # Charger les statistiques existantes
155
+ if os.path.exists(self.stats_log_path):
156
+ with open(self.stats_log_path, 'r', encoding='utf-8') as f:
157
+ stats = json.load(f)
158
+ else:
159
+ stats = {
160
+ "total_sessions": 0,
161
+ "total_duration_seconds": 0,
162
+ "average_duration_seconds": 0,
163
+ "recommendation_systems_usage": {
164
+ "name_date_place": 0,
165
+ "emotions": 0
166
+ },
167
+ "artworks_recommended": {},
168
+ "last_updated": None
169
+ }
170
+
171
+ # Mettre à jour les statistiques
172
+ stats["total_sessions"] += 1
173
+ stats["total_duration_seconds"] += session_data["duration_seconds"]
174
+ stats["average_duration_seconds"] = stats["total_duration_seconds"] / stats["total_sessions"]
175
+
176
+ # Compter l'utilisation des systèmes de recommandation
177
+ rec_type = session_data["recommendation_type"]
178
+ if rec_type in stats["recommendation_systems_usage"]:
179
+ stats["recommendation_systems_usage"][rec_type] += 1
180
+
181
+ # Compter les œuvres recommandées
182
+ artwork = session_data["recommended_artwork"]
183
+ if artwork:
184
+ if artwork not in stats["artworks_recommended"]:
185
+ stats["artworks_recommended"][artwork] = 0
186
+ stats["artworks_recommended"][artwork] += 1
187
+
188
+ # Trouver l'œuvre la plus recommandée
189
+ if stats["artworks_recommended"]:
190
+ most_recommended = max(stats["artworks_recommended"].items(), key=lambda x: x[1])
191
+ stats["most_recommended_artwork"] = {
192
+ "title": most_recommended[0],
193
+ "count": most_recommended[1]
194
+ }
195
+
196
+ # Calculer l'utilité de chaque système (pourcentage d'utilisation)
197
+ total_recs = sum(stats["recommendation_systems_usage"].values())
198
+ if total_recs > 0:
199
+ stats["system_utility_percentage"] = {
200
+ system: (count / total_recs * 100)
201
+ for system, count in stats["recommendation_systems_usage"].items()
202
+ }
203
+
204
+ stats["last_updated"] = datetime.now().isoformat()
205
+
206
+ # Sauvegarder les statistiques mises à jour
207
+ with open(self.stats_log_path, 'w', encoding='utf-8') as f:
208
+ json.dump(stats, f, indent=2, ensure_ascii=False)
209
+
210
+ logger.info("Global statistics updated")
211
+
212
+ except Exception as e:
213
+ logger.error(f"Error updating statistics: {e}")
214
+
215
+ def get_statistics(self) -> dict:
216
+ """Retourne les statistiques globales"""
217
+ try:
218
+ if os.path.exists(self.stats_log_path):
219
+ with open(self.stats_log_path, 'r', encoding='utf-8') as f:
220
+ return json.load(f)
221
+ return {}
222
+ except Exception as e:
223
+ logger.error(f"Error reading statistics: {e}")
224
+ return {}
225
+
226
+
227
+ # Initialiser le logger de sessions
228
+ session_tracker = SessionLogger()
229
+
230
+
231
+ class SecurityValidator:
232
+ """Classe pour centraliser les validations de sécurité"""
233
+
234
+ PATH_TRAVERSAL_PATTERN = re.compile(r"\.\.|\.\/")
235
+ VALID_FILENAME_PATTERN = re.compile(r"^[\w\-\.\s]+$")
236
+ VALID_INPUT_PATTERN = re.compile(
237
+ r"^[\w\-\s\'\.,àâäéèêëïîôûùüÿæœçÀÂÄÉÈÊËÏÎÔÛÙÜŸÆŒÇ]+$", re.UNICODE
238
+ )
239
+ DATE_PATTERN = re.compile(r"^\d{1,2}/\d{1,2}$")
240
+
241
+ @classmethod
242
+ def validate_filename(cls, filename: str) -> bool:
243
+ """Valide qu'un nom de fichier est sécurisé"""
244
+ if not filename:
245
+ return False
246
+
247
+ # Vérifier les tentatives de path traversal
248
+ if cls.PATH_TRAVERSAL_PATTERN.search(filename):
249
+ logger.warning(f"Tentative de path traversal détectée: {filename}")
250
+ return False
251
+
252
+ # Vérifier que le nom ne contient que des caractères autorisés
253
+ base_name = os.path.basename(filename)
254
+ if not cls.VALID_FILENAME_PATTERN.match(base_name):
255
+ logger.warning(f"Nom de fichier invalide: {filename}")
256
+ return False
257
+
258
+ return True
259
+
260
+ @classmethod
261
+ def sanitize_input(cls, input_str: str, max_length: int = 100) -> str:
262
+ """Nettoie et valide une entrée utilisateur"""
263
+ if not input_str:
264
+ return ""
265
+
266
+ # Tronquer si trop long
267
+ input_str = input_str[:max_length].strip()
268
+
269
+ if not cls.VALID_INPUT_PATTERN.match(input_str):
270
+ # Garder seulement les caractères valides
271
+ cleaned = "".join(c for c in input_str if cls.VALID_INPUT_PATTERN.match(c))
272
+ logger.info(f"Input sanitized: '{input_str}' -> '{cleaned}'")
273
+ return cleaned
274
+
275
+ return input_str
276
+
277
+ @classmethod
278
+ def validate_date(cls, date_str: str) -> Tuple[bool, Optional[datetime]]:
279
+ """Valide et parse une date au format JJ/MM"""
280
+ if not date_str:
281
+ return False, None
282
+
283
+ if not cls.DATE_PATTERN.match(date_str):
284
+ return False, None
285
+
286
+ try:
287
+ day, month = map(int, date_str.split("/"))
288
+ if not (1 <= day <= 31 and 1 <= month <= 12):
289
+ return False, None
290
+
291
+ date_obj = datetime(year=2000, month=month, day=day)
292
+ return True, date_obj
293
+ except (ValueError, Exception) as e:
294
+ logger.error(f"Erreur de parsing de date: {e}")
295
+ return False, None
296
+
297
+
298
+ class ImageIndexer:
299
+ """Classe pour indexer et mapper les images depuis la base de données CSV"""
300
+
301
+ # Constants for better maintainability
302
+ IMAGE_EXTENSIONS = (".jpg", ".png")
303
+ COMMON_SUFFIXES = [".jpg", ".png", "_medium"]
304
+ MAR_BVM_TEST_SUFFIXES = ["-001", "-002", "-003"]
305
+
306
+ def __init__(self, images_dir: str):
307
+ self.images_dir = os.path.abspath(images_dir)
308
+ self.available_files = set()
309
+ self.image_lookup = {} # normalized_name -> filename
310
+ self.mar_bvm_lookup = {} # Special handling for MAR-BVM files
311
+ self._build_index()
312
+
313
+ def _strip_file_extensions(self, filename: str) -> str:
314
+ """Remove file extensions from filename"""
315
+ base_name = filename.lower()
316
+ if base_name.endswith("_medium.jpg"):
317
+ return base_name[:-11]
318
+ elif base_name.endswith((".jpg", ".png")):
319
+ return base_name[:-4]
320
+ return base_name
321
+
322
+ def _normalize_basic_patterns(self, name: str) -> str:
323
+ """Apply basic normalization patterns"""
324
+ # Remove trailing comma and normalize whitespace
325
+ normalized = name.lower().strip().rstrip(",")
326
+
327
+ # Remove common suffixes
328
+ for suffix in self.COMMON_SUFFIXES:
329
+ if normalized.endswith(suffix):
330
+ normalized = normalized[: -len(suffix)]
331
+
332
+ # Normalize spaces and underscores to dashes
333
+ return re.sub(r"[\s_]+", "-", normalized)
334
+
335
+ def _normalize_mar_bvm_format(self, name: str) -> str:
336
+ """Handle MAR-BVM specific normalization"""
337
+ if "mar-bvm" not in name:
338
+ return name
339
+
340
+ # Replace .0. with -0- and remaining dots with dashes
341
+ return name.replace(".0.", "-0-").replace(".", "-")
342
+
343
+ def _normalize_name(self, name: str) -> str:
344
+ """Normalise un nom pour la comparaison"""
345
+ normalized = self._normalize_basic_patterns(name)
346
+
347
+ # Special handling for MAR-BVM format
348
+ if "mar-bvm" in normalized:
349
+ normalized = self._normalize_mar_bvm_format(normalized)
350
+ # For files starting with year (like 2022.0.86), keep dots
351
+ elif not normalized.startswith("20"):
352
+ normalized = normalized.replace(".", "-")
353
+
354
+ return normalized
355
+
356
+ def _create_mar_bvm_lookups(self, normalized: str, filename: str):
357
+ """Create additional lookup entries for MAR-BVM files"""
358
+ if "mar-bvm" not in normalized:
359
+ return
360
+
361
+ parts = normalized.split("-")
362
+ for i, part in enumerate(parts):
363
+ if part.isdigit() and i >= 5: # After mar-bvm-7-2022-0
364
+ base_key = "-".join(parts[:6]) # mar-bvm-7-2022-0-22
365
+ if base_key not in self.mar_bvm_lookup:
366
+ self.mar_bvm_lookup[base_key] = []
367
+ self.mar_bvm_lookup[base_key].append(filename)
368
+ break
369
+
370
+ def _process_image_file(self, filename: str):
371
+ """Process a single image file for indexing"""
372
+ if not SecurityValidator.validate_filename(filename):
373
+ logger.warning(f"Fichier ignoré pour raison de sécurité: {filename}")
374
+ return
375
+
376
+ if not filename.lower().endswith(self.IMAGE_EXTENSIONS):
377
+ return
378
+
379
+ self.available_files.add(filename)
380
+
381
+ base_name = self._strip_file_extensions(filename)
382
+ normalized = self._normalize_name(base_name)
383
+ self.image_lookup[normalized] = filename
384
+ self._create_mar_bvm_lookups(normalized, filename)
385
+
386
+ def _build_index(self):
387
+ """Construit un index des images disponibles"""
388
+ try:
389
+ all_files = os.listdir(self.images_dir)
390
+ for filename in all_files:
391
+ self._process_image_file(filename)
392
+
393
+ logger.info(
394
+ f"Index des images construit: {len(self.available_files)} fichiers disponibles, "
395
+ f"{len(self.image_lookup)} entrées normalisées"
396
+ )
397
+ except Exception as e:
398
+ logger.error(f"Erreur lors de la construction de l'index: {e}")
399
+ self.available_files = set()
400
+
401
+ def _clean_input_name(self, image_name: str) -> str:
402
+ """Clean and prepare input name for processing"""
403
+ # Basic cleaning
404
+ cleaned = image_name.strip().rstrip(",").rstrip("-").strip()
405
+ # Remove spaces before -001, -002, etc.
406
+ return re.sub(r"\s+(-\d)", r"\1", cleaned)
407
+
408
+ def _normalize_mar_bvm_input(self, image_name: str) -> str:
409
+ """Handle MAR-BVM specific input normalization"""
410
+ if "MAR-BVM" not in image_name:
411
+ return image_name
412
+
413
+ # Handle missing "7-" in MAR-BVM-2022-0-153
414
+ if "MAR-BVM-2022-0-" in image_name:
415
+ image_name = image_name.replace("MAR-BVM-2022-0-", "MAR-BVM-7-2022-0-")
416
+
417
+ # Convert .0. to -0-
418
+ if ".0." in image_name:
419
+ image_name = image_name.replace(".0.", "-0-")
420
+
421
+ # Handle .001, .002 at the end (convert to -001, -002)
422
+ image_name = re.sub(r"\.(\d{3})$", r"-\1", image_name)
423
+
424
+ # Handle .1 or .2 suffix
425
+ if image_name.endswith(".1"):
426
+ image_name = image_name[:-2] + "-1"
427
+ elif image_name.endswith(".2"):
428
+ image_name = image_name[:-2] + "-2"
429
+
430
+ # Replace any remaining dots with dashes (but be careful not to mess up already processed parts)
431
+ return image_name.replace(".", "-")
432
+
433
+ def _try_mar_bvm_lookups(self, normalized: str) -> Optional[str]:
434
+ """Try various MAR-BVM specific lookup strategies"""
435
+ # Check special MAR-BVM lookup
436
+ if normalized in self.mar_bvm_lookup and self.mar_bvm_lookup[normalized]:
437
+ return self.mar_bvm_lookup[normalized][0]
438
+
439
+ # Try with suffix variations
440
+ for suffix in self.MAR_BVM_TEST_SUFFIXES:
441
+ test_pattern = f"{normalized}{suffix}"
442
+ if test_pattern in self.image_lookup:
443
+ return self.image_lookup[test_pattern]
444
+
445
+ return None
446
+
447
+ def _try_year_format_lookup(self, image_name: str) -> Optional[str]:
448
+ """Handle special case for files starting with year"""
449
+ if not image_name.startswith("20"):
450
+ return None
451
+
452
+ test_name = image_name.lower().replace(" ", "-")
453
+ return self.image_lookup.get(test_name)
454
+
455
+ def _try_partial_matching(self, normalized: str) -> Optional[str]:
456
+ """Try partial matching as last resort"""
457
+ for key, filename in self.image_lookup.items():
458
+ if key.startswith(normalized) or normalized in key:
459
+ return filename
460
+ return None
461
+
462
+ def _split_multiple_names(self, image_name: str) -> List[str]:
463
+ """Split image names that contain multiple names separated by commas or slashes"""
464
+ # First try comma separation
465
+ if "," in image_name:
466
+ return [name.strip() for name in image_name.split(",") if name.strip()]
467
+
468
+ # Then try slash separation
469
+ if "/" in image_name:
470
+ return [name.strip() for name in image_name.split("/") if name.strip()]
471
+
472
+ # Handle " - " separation (for cases like "MAR-BVM-7-2022.0.81 - 2022.0.81")
473
+ if " - " in image_name and image_name.count(" - ") == 1:
474
+ parts = [name.strip() for name in image_name.split(" - ")]
475
+ # Only use the first part if they look like duplicates
476
+ if len(parts) == 2:
477
+ first, second = parts
478
+ # Check if second part is a suffix of the first (like duplicate year)
479
+ if first.endswith(second) or second in first:
480
+ return [first]
481
+ return parts
482
+
483
+ return [image_name]
484
+
485
+ def find_image(self, image_name: str) -> Optional[str]:
486
+ """Trouve un fichier image correspondant au nom donné"""
487
+ if not image_name:
488
+ return None
489
+
490
+ # Handle multiple image names in one field
491
+ possible_names = self._split_multiple_names(image_name)
492
+
493
+ # Try each name individually
494
+ for name in possible_names:
495
+ result = self._find_single_image(name)
496
+ if result:
497
+ return result
498
+
499
+ return None
500
+
501
+ def _find_single_image(self, image_name: str) -> Optional[str]:
502
+ """Find a single image by name"""
503
+ # Clean and normalize the input
504
+ cleaned_name = self._clean_input_name(image_name)
505
+ processed_name = self._normalize_mar_bvm_input(cleaned_name)
506
+ normalized = self._normalize_name(processed_name)
507
+
508
+ # Try direct lookup first
509
+ if normalized in self.image_lookup:
510
+ return self.image_lookup[normalized]
511
+
512
+ # Try MAR-BVM specific lookups
513
+ if "mar-bvm" in normalized:
514
+ result = self._try_mar_bvm_lookups(normalized)
515
+ if result:
516
+ return result
517
+
518
+ # Try year format lookup
519
+ result = self._try_year_format_lookup(image_name)
520
+ if result:
521
+ return result
522
+
523
+ # Try partial matching as last resort
524
+ return self._try_partial_matching(normalized)
525
+
526
+ def get_all_files(self) -> Set[str]:
527
+ """Retourne tous les fichiers disponibles"""
528
+ return self.available_files.copy()
529
+
530
+
531
+ class ArtMatcherV2:
532
+ """Classe principale pour le matching d'œuvres d'art"""
533
+
534
+ def __init__(self, csv_path: str, images_dir: str):
535
+ """Initialise le système avec la base de données et le répertoire d'images"""
536
+ self.db = Database(csv_path)
537
+ self.images_dir = os.path.abspath(images_dir)
538
+ self.emotion_wheel = EmotionWheel()
539
+ self.weights = ScoringWeights()
540
+
541
+ self.optimizer_helper = WeightedLeximaxOptimizer(TargetProfile(), {})
542
+
543
+ self.image_indexer = ImageIndexer(images_dir)
544
+
545
+ df = self.db.get_dataframe()
546
+ self.df_with_images = df[
547
+ df["name_image"].notna()
548
+ & (df["name_image"] != "")
549
+ & (df["name_image"].str.strip() != "")
550
+ ].copy()
551
+
552
+ self.df_with_images["database_id_str"] = self.df_with_images[
553
+ "database_id"
554
+ ].astype(str)
555
+ self.id_to_index = {
556
+ str(row["database_id"]): idx for idx, row in self.df_with_images.iterrows()
557
+ }
558
+
559
+ self.artwork_images = self._build_artwork_image_index()
560
+
561
+ self.temp_db_with_images = Database.__new__(Database)
562
+ self.temp_db_with_images.dataframe = self.df_with_images
563
+
564
+ logger.info(f"Base de données chargée: {self.db.n_pieces()} œuvres")
565
+ logger.info(f"Œuvres avec images: {len(self.df_with_images)}")
566
+ logger.info(f"Index des images: {len(self.artwork_images)} œuvres mappées")
567
+
568
+ def _sanitize_input(self, input_str: str) -> str:
569
+ """Nettoie et valide une entrée utilisateur"""
570
+ return SecurityValidator.sanitize_input(input_str)
571
+
572
+ def _parse_date(self, date_str: str) -> Optional[datetime]:
573
+ """Parse une date avec validation"""
574
+ is_valid, date_obj = SecurityValidator.validate_date(date_str)
575
+ return date_obj if is_valid else None
576
+
577
+ def _build_artwork_image_index(self) -> Dict[str, List[str]]:
578
+ """Construit un index artwork_id -> [image_paths] au démarrage"""
579
+ artwork_images = {}
580
+
581
+ for idx, row in self.df_with_images.iterrows():
582
+ artwork_id = str(row["database_id"])
583
+ image_paths = []
584
+
585
+ if row["name_image"] and str(row["name_image"]).strip():
586
+ # Parse the image names - handle special separators
587
+ image_string = str(row["name_image"]).strip().strip('"')
588
+
589
+ # Handle cases with " / " or " - " separators
590
+ if " / " in image_string:
591
+ # Take first part before the slash
592
+ image_string = image_string.split(" / ")[0].strip()
593
+
594
+ # Special case: if it has " - 2022" it's a separator, not part of the name
595
+ if " - 2022" in image_string:
596
+ # Take the part before " - 2022"
597
+ image_string = image_string.split(" - 2022")[0].strip()
598
+ elif " - " in image_string and "MAR-BVM-7-2022-0-" not in image_string:
599
+ # For other MAR-BVM formats with " - " separator
600
+ parts = image_string.split(" - ")
601
+ if "MAR-BVM" in parts[0]:
602
+ image_string = parts[0].strip()
603
+
604
+ # Clean up trailing " -" or spaces before "-001"
605
+ image_string = re.sub(
606
+ r"\s+-\s*$", "", image_string
607
+ ) # Remove trailing " -"
608
+ image_string = re.sub(
609
+ r"\s+(-\d)", r"\1", image_string
610
+ ) # Remove spaces before -001
611
+
612
+ # Parse comma-separated list
613
+ images = [
614
+ img.strip()
615
+ for img in re.split(r"[,/]", image_string)
616
+ if img.strip()
617
+ ]
618
+
619
+ for img_name in images:
620
+ # Find the actual file for this image name
621
+ matched_file = self.image_indexer.find_image(img_name)
622
+ if matched_file:
623
+ img_path = os.path.join(self.images_dir, matched_file)
624
+ image_paths.append(img_path)
625
+
626
+ if image_paths:
627
+ artwork_images[artwork_id] = image_paths
628
+
629
+ return artwork_images
630
+
631
+ def preselect_artworks(
632
+ self, firstname: str, birthday: str, city: str
633
+ ) -> pd.DataFrame:
634
+ """
635
+ Pré-sélectionne les œuvres selon la hiérarchie: prénom > date > ville
636
+ """
637
+ logger.info("=== DÉBUT PRÉ-SÉLECTION ===")
638
+
639
+ # Nettoyer les entrées
640
+ firstname = self._sanitize_input(firstname)
641
+ city = self._sanitize_input(city)
642
+
643
+ logger.info(
644
+ f"Critères de pré-sélection: prénom='{firstname}', date='{birthday}', ville='{city}'"
645
+ )
646
+
647
+ birth_date = self._parse_date(birthday)
648
+ if birth_date:
649
+ logger.info(f"Date convertie: {birth_date.strftime('%d/%m')}")
650
+
651
+ profile = TargetProfile()
652
+ profile.set_target_name(firstname)
653
+ profile.set_target_date(birth_date)
654
+ profile.set_target_place(city)
655
+
656
+ weights = {
657
+ "related_names": self.weights.PRESELECTION_NAME_WEIGHT,
658
+ "related_dates": self.weights.PRESELECTION_DATE_WEIGHT,
659
+ "related_places": self.weights.PRESELECTION_PLACE_WEIGHT,
660
+ "related_emotions": self.weights.PRESELECTION_EMOTION_WEIGHT,
661
+ }
662
+
663
+ logger.info(
664
+ f"Poids utilisés: nom={weights['related_names']}, date={weights['related_dates']}, lieu={weights['related_places']}, émotions={weights['related_emotions']}"
665
+ )
666
+
667
+ optimizer = WeightedLeximaxOptimizer(profile, weights)
668
+ result = optimizer.optimize_max(self.temp_db_with_images)
669
+
670
+ preselected = result[result["score"] > (0, 0, 0)]
671
+ logger.info(f"Œuvres avec score > 0: {len(preselected)}")
672
+
673
+ if len(preselected) < self.weights.MIN_PRESELECTION_COUNT:
674
+ preselected = result.head(self.weights.MIN_PRESELECTION_COUNT)
675
+ logger.info(f"Ajustement au minimum requis: {len(preselected)} œuvres")
676
+
677
+ logger.info("Top 5 pré-sélections:")
678
+ for i, (idx, piece) in enumerate(preselected.head(5).iterrows()):
679
+ logger.info(
680
+ f" {i+1}. Œuvre #{piece['database_id']} - Score: {piece['score']}"
681
+ )
682
+ if firstname and piece["related_names"]:
683
+ name_score = Optimizer.name_similarity(
684
+ firstname, piece["related_names"]
685
+ )
686
+ if name_score > 0:
687
+ logger.info(
688
+ f" → Nom: {piece['related_names']} (score: {name_score:.2f})"
689
+ )
690
+ if birth_date and piece["related_dates"]:
691
+ date_score = Optimizer.date_similarity(
692
+ birth_date, piece["related_dates"]
693
+ )
694
+ if date_score > 0:
695
+ logger.info(
696
+ f" → Dates: {[d.strftime('%d/%m') for d in piece['related_dates']]} (score: {date_score:.2f})"
697
+ )
698
+ if city and piece["related_places"]:
699
+ place_score = self.optimizer_helper.place_similarity(
700
+ city, piece["related_places"]
701
+ )
702
+ if place_score > 0:
703
+ logger.info(
704
+ f" → Lieux: {piece['related_places']} (score: {place_score:.2f})"
705
+ )
706
+
707
+ logger.info("=== FIN PRÉ-SÉLECTION ===")
708
+ return preselected
709
+
710
+ def get_random_images_for_selection(
711
+ self, round_num: int, already_selected: List[str] = None
712
+ ) -> List[Tuple[str, str]]:
713
+ """
714
+ Retourne 3 images aléatoires depuis l'index pré-construit
715
+ Exclut les œuvres déjà sélectionnées dans les tours précédents
716
+ """
717
+ logger.info(f"=== SÉLECTION D'IMAGES POUR LE TOUR {round_num} ===")
718
+
719
+ if already_selected:
720
+ logger.info(f"Œuvres déjà sélectionnées à exclure: {already_selected}")
721
+
722
+ available_artworks = list(self.artwork_images.keys())
723
+
724
+ # Exclure les œuvres déjà sélectionnées
725
+ if already_selected:
726
+ already_selected_set = set(already_selected)
727
+ available_artworks = [
728
+ a for a in available_artworks if a not in already_selected_set
729
+ ]
730
+
731
+ logger.info(
732
+ f"Nombre total d'œuvres avec images disponibles: {len(available_artworks)}"
733
+ )
734
+
735
+ if len(available_artworks) < self.weights.MAX_IMAGES_PER_SELECTION:
736
+ logger.warning(
737
+ f"Seulement {len(available_artworks)} œuvres avec images disponibles"
738
+ )
739
+ direct_images = []
740
+ for filename in list(self.image_indexer.get_all_files())[:10]:
741
+ if filename.endswith(".jpg"):
742
+ img_path = os.path.join(self.images_dir, filename)
743
+ direct_images.append((img_path, "0"))
744
+ return direct_images[: self.weights.MAX_IMAGES_PER_SELECTION]
745
+
746
+ num_to_select = min(
747
+ self.weights.MAX_IMAGES_PER_SELECTION, len(available_artworks)
748
+ )
749
+ selected_artworks = random.sample(available_artworks, num_to_select)
750
+
751
+ logger.info(f"Œuvres sélectionnées aléatoirement: {selected_artworks}")
752
+
753
+ selected = []
754
+ for artwork_id in selected_artworks:
755
+ img_path = random.choice(self.artwork_images[artwork_id])
756
+ selected.append((img_path, artwork_id))
757
+ if artwork_id in self.id_to_index:
758
+ idx = self.id_to_index[artwork_id]
759
+ artwork = self.df_with_images.loc[idx]
760
+ logger.info(f" Image {len(selected)}: Œuvre #{artwork_id}")
761
+ logger.info(f" Type: {artwork['art_piece_type']}")
762
+ logger.info(f" Émotions: {artwork['related_emotions']}")
763
+
764
+ logger.info(f"=== FIN SÉLECTION IMAGES TOUR {round_num} ===")
765
+ return selected
766
+
767
+ def extract_emotions_from_image_id(self, database_id: str) -> List[str]:
768
+ """
769
+ Extrait les émotions associées à une œuvre via son ID
770
+ Utilise l'index pré-calculé pour éviter les conversions répétées
771
+ """
772
+ if database_id in self.id_to_index:
773
+ idx = self.id_to_index[database_id]
774
+ emotions = self.df_with_images.loc[idx, "related_emotions"]
775
+ if isinstance(emotions, list):
776
+ return emotions
777
+ return []
778
+
779
+ @lru_cache(maxsize=1024)
780
+ def _cached_emotion_similarity(self, emotion1: str, emotion2: str) -> float:
781
+ """Cache les calculs de similarité émotionnelle"""
782
+ return self.emotion_wheel.calculate_emotion_similarity(emotion1, emotion2)
783
+
784
+ def calculate_emotion_profile(self, selected_ids: List[str]) -> Dict[str, float]:
785
+ """
786
+ Calcule le profil émotionnel basé sur les images sélectionnées
787
+ """
788
+ logger.info("=== CALCUL DU PROFIL ÉMOTIONNEL ===")
789
+ logger.info(f"Images sélectionnées: {selected_ids}")
790
+
791
+ emotion_counter = Counter()
792
+
793
+ for db_id in selected_ids:
794
+ emotions = self.extract_emotions_from_image_id(db_id)
795
+ logger.info(f" Image {db_id}: émotions = {emotions}")
796
+ emotion_counter.update(emotions)
797
+
798
+ total = sum(emotion_counter.values())
799
+ if total > 0:
800
+ emotion_profile = {
801
+ emotion: count / total for emotion, count in emotion_counter.items()
802
+ }
803
+ logger.info(f"Profil émotionnel calculé: {emotion_profile}")
804
+ else:
805
+ emotion_profile = {}
806
+ logger.info("Aucune émotion trouvée dans les images sélectionnées")
807
+
808
+ logger.info("=== FIN CALCUL PROFIL ÉMOTIONNEL ===")
809
+ return emotion_profile
810
+
811
+ def _get_artwork_image(self, artwork) -> Optional[str]:
812
+ """Retourne le chemin de l'image pour une œuvre d'art"""
813
+ artwork_id = str(artwork["database_id"])
814
+
815
+ # Simply return the first image from our pre-built index
816
+ if artwork_id in self.artwork_images:
817
+ return self.artwork_images[artwork_id][0]
818
+
819
+ return None
820
+
821
+ def find_best_match(
822
+ self, firstname: str, birthday: str, city: str, selected_image_ids: List[str]
823
+ ) -> Tuple[Optional[str], str, Dict]:
824
+ """
825
+ Trouve la meilleure correspondance selon la hiérarchie du scénario:
826
+ 1. Match exact (name/date/city) = gagnant automatique
827
+ 2. Si pré-sélection existe: utiliser émotions pour départager
828
+ 3. Si aucune pré-sélection: utiliser émotions seules
829
+ 4. Type d'objet comme critère de départage final
830
+ """
831
+ firstname = self._sanitize_input(firstname)
832
+ city = self._sanitize_input(city)
833
+ birth_date = self._parse_date(birthday)
834
+
835
+ logger.info(
836
+ f"Recherche de correspondance pour: {firstname}, {birthday}, {city}"
837
+ )
838
+
839
+ preselected = self.preselect_artworks(firstname, birthday, city)
840
+
841
+ logger.info("=== DÉTECTION DE MATCH EXACT ===")
842
+ for idx, piece in preselected.iterrows():
843
+ if firstname and piece["related_names"]:
844
+ name_score = Optimizer.name_similarity(
845
+ firstname, piece["related_names"]
846
+ )
847
+ if name_score >= 0.95:
848
+ logger.info(
849
+ f"🎯 MATCH EXACT TROUVÉ: prénom '{firstname}' → œuvre #{piece['database_id']} (score: {name_score:.2f})"
850
+ )
851
+ logger.info(f" Noms dans l'œuvre: {piece['related_names']}")
852
+ match_image = self._get_artwork_image(piece)
853
+ match_info = {
854
+ "title": f"Œuvre #{piece['database_id']}",
855
+ "type": piece["art_piece_type"],
856
+ "place": piece["art_piece_place"],
857
+ "emotions": piece["related_emotions"],
858
+ "explanation": piece["explanation"],
859
+ }
860
+ return (
861
+ match_image,
862
+ f"Prénom '{firstname}' correspond exactement",
863
+ match_info,
864
+ )
865
+
866
+ if birth_date and piece["related_dates"]:
867
+ date_score = Optimizer.date_similarity(
868
+ birth_date, piece["related_dates"]
869
+ )
870
+ if date_score == 1.0:
871
+ logger.info(
872
+ f"🎯 MATCH EXACT TROUVÉ: date '{birthday}' → œuvre #{piece['database_id']}"
873
+ )
874
+ logger.info(
875
+ f" Dates dans l'œuvre: {[d.strftime('%d/%m/%Y') for d in piece['related_dates']]}"
876
+ )
877
+ match_image = self._get_artwork_image(piece)
878
+ match_info = {
879
+ "title": f"Œuvre #{piece['database_id']}",
880
+ "type": piece["art_piece_type"],
881
+ "place": piece["art_piece_place"],
882
+ "emotions": piece["related_emotions"],
883
+ "explanation": piece["explanation"],
884
+ }
885
+ return (
886
+ match_image,
887
+ f"Date d'anniversaire {birthday} correspond exactement",
888
+ match_info,
889
+ )
890
+
891
+ if city and piece["related_places"]:
892
+ place_score = self.optimizer_helper.place_similarity(
893
+ city, piece["related_places"]
894
+ )
895
+ if place_score == 1.0:
896
+ logger.info(
897
+ f"🎯 MATCH EXACT TROUVÉ: ville '{city}' → œuvre #{piece['database_id']}"
898
+ )
899
+ logger.info(f" Lieux dans l'œuvre: {piece['related_places']}")
900
+ match_image = self._get_artwork_image(piece)
901
+ match_info = {
902
+ "title": f"Œuvre #{piece['database_id']}",
903
+ "type": piece["art_piece_type"],
904
+ "place": piece["art_piece_place"],
905
+ "emotions": piece["related_emotions"],
906
+ "explanation": piece["explanation"],
907
+ }
908
+ return (
909
+ match_image,
910
+ f"Ville '{city}' correspond exactement",
911
+ match_info,
912
+ )
913
+
914
+ logger.info("Aucun match exact trouvé, passage à la sélection par émotions")
915
+
916
+ emotion_profile = self.calculate_emotion_profile(selected_image_ids)
917
+
918
+ logger.info("=== STRATÉGIE DE MATCHING ===")
919
+ valid_preselection = preselected[preselected["score"] > (0, 0, 0)]
920
+
921
+ if len(valid_preselection) > 0:
922
+ logger.info(
923
+ f"📋 CAS A: {len(valid_preselection)} œuvres pré-sélectionnées - utilisation des émotions pour départager"
924
+ )
925
+ candidates = valid_preselection
926
+ else:
927
+ logger.info(
928
+ f"📋 CAS B: Aucune pré-sélection valide - recherche par émotions sur {len(self.df_with_images)} œuvres"
929
+ )
930
+ candidates = self.df_with_images
931
+
932
+ # Exclure les œuvres déjà sélectionnées par l'utilisateur
933
+ selected_artwork_ids = set(selected_image_ids)
934
+ candidates = candidates[
935
+ ~candidates["database_id"].astype(str).isin(selected_artwork_ids)
936
+ ]
937
+ logger.info(
938
+ f"Après exclusion des œuvres déjà sélectionnées {selected_artwork_ids}: {len(candidates)} candidats restants"
939
+ )
940
+
941
+ logger.info("=== CALCUL DES SCORES ÉMOTIONNELS ===")
942
+ best_matches = []
943
+ best_emotion_score = -1
944
+
945
+ for idx, piece in candidates.iterrows():
946
+ emotion_score = 0
947
+
948
+ if emotion_profile and piece["related_emotions"]:
949
+ for user_emotion, weight in emotion_profile.items():
950
+ best_similarity = 0
951
+ for piece_emotion in piece["related_emotions"]:
952
+ similarity = self._cached_emotion_similarity(
953
+ user_emotion, piece_emotion
954
+ )
955
+ if similarity > best_similarity:
956
+ best_similarity = similarity
957
+ emotion_score += best_similarity * weight
958
+
959
+ if len(piece["related_emotions"]) > 0:
960
+ emotion_score /= len(piece["related_emotions"])
961
+
962
+ if emotion_score > best_emotion_score:
963
+ best_emotion_score = emotion_score
964
+ best_matches = [piece]
965
+ logger.info(
966
+ f" Nouveau meilleur score émotionnel: {emotion_score:.3f} - Œuvre #{piece['database_id']}"
967
+ )
968
+ elif emotion_score == best_emotion_score and emotion_score > 0:
969
+ best_matches.append(piece)
970
+ logger.info(
971
+ f" Score égal au meilleur: {emotion_score:.3f} - Œuvre #{piece['database_id']}"
972
+ )
973
+
974
+ logger.info(
975
+ f"Nombre de meilleures correspondances: {len(best_matches)} avec score {best_emotion_score:.3f}"
976
+ )
977
+
978
+ if len(best_matches) > 1:
979
+ logger.info("=== DÉPARTAGE PAR TYPE D'OBJET ===")
980
+ selected_types = []
981
+ for img_id in selected_image_ids:
982
+ if img_id in self.id_to_index:
983
+ idx = self.id_to_index[img_id]
984
+ selected_types.append(
985
+ self.df_with_images.loc[idx, "art_piece_type"]
986
+ )
987
+
988
+ selected_types_counter = Counter(selected_types)
989
+
990
+ type_scored_matches = []
991
+ best_type_score = -1
992
+
993
+ for piece in best_matches:
994
+ type_score = selected_types_counter.get(piece["art_piece_type"], 0)
995
+ if type_score > best_type_score:
996
+ best_type_score = type_score
997
+ type_scored_matches = [piece]
998
+ elif type_score == best_type_score:
999
+ type_scored_matches.append(piece)
1000
+
1001
+ if len(type_scored_matches) > 1:
1002
+ logger.info(
1003
+ f" {len(type_scored_matches)} œuvres avec le même score de type ({best_type_score}) - sélection aléatoire"
1004
+ )
1005
+ best_match = random.choice(type_scored_matches)
1006
+ match_reason = (
1007
+ "Sélection aléatoire parmi les meilleures correspondances"
1008
+ )
1009
+ else:
1010
+ best_match = type_scored_matches[0]
1011
+ match_reason = f"Type d'objet '{best_match['art_piece_type']}' préféré"
1012
+ logger.info(
1013
+ f" Type '{best_match['art_piece_type']}' sélectionné avec score {best_type_score}"
1014
+ )
1015
+ elif len(best_matches) == 1:
1016
+ best_match = best_matches[0]
1017
+ match_reason = "Meilleure correspondance émotionnelle"
1018
+ else:
1019
+ logger.info("Aucune correspondance trouvée")
1020
+ return None, "Aucune correspondance trouvée", {}
1021
+
1022
+ reasons = []
1023
+ if len(valid_preselection) > 0:
1024
+ if firstname and best_match["related_names"]:
1025
+ name_score = Optimizer.name_similarity(
1026
+ firstname, best_match["related_names"]
1027
+ )
1028
+ if name_score > 0:
1029
+ reasons.append(f"prénom '{firstname}' trouvé")
1030
+
1031
+ if birth_date and best_match["related_dates"]:
1032
+ date_score = Optimizer.date_similarity(
1033
+ birth_date, best_match["related_dates"]
1034
+ )
1035
+ if date_score > 0:
1036
+ reasons.append(
1037
+ f"date {'exacte' if date_score == 1.0 else 'partielle'}"
1038
+ )
1039
+
1040
+ if city and best_match["related_places"]:
1041
+ place_score = self.optimizer_helper.place_similarity(
1042
+ city, best_match["related_places"]
1043
+ )
1044
+ if place_score > 0:
1045
+ reasons.append(f"ville '{city}' trouvée")
1046
+
1047
+ if best_emotion_score > 0:
1048
+ reasons.append(
1049
+ f"correspondance émotionnelle (score: {best_emotion_score:.2f})"
1050
+ )
1051
+
1052
+ if len(reasons) == 0:
1053
+ reasons.append(match_reason)
1054
+
1055
+ final_reason = " ; ".join(reasons)
1056
+
1057
+ logger.info(f"\n🏆 RÉSULTAT FINAL: Œuvre #{best_match['database_id']}")
1058
+ logger.info(f" Raison: {final_reason}")
1059
+ logger.info(f" Type: {best_match['art_piece_type']}")
1060
+ logger.info(f" Lieu: {best_match['art_piece_place']}")
1061
+
1062
+ match_image = self._get_artwork_image(best_match)
1063
+
1064
+ match_info = {
1065
+ "title": f"Œuvre #{best_match['database_id']}",
1066
+ "type": best_match["art_piece_type"],
1067
+ "place": best_match["art_piece_place"],
1068
+ "emotions": best_match["related_emotions"],
1069
+ "explanation": best_match["explanation"],
1070
+ }
1071
+
1072
+ return match_image, final_reason, match_info
1073
+
1074
+
1075
+ csv_path = "PP1-Collection_Database_new-cleaned.csv"
1076
+ images_dir = "pictures_data"
1077
+
1078
+ if not os.path.exists(csv_path):
1079
+ logger.error(f"Fichier CSV introuvable: {csv_path}")
1080
+ if not os.path.exists(images_dir):
1081
+ logger.error(f"Répertoire images introuvable: {images_dir}")
1082
+
1083
+ matcher = ArtMatcherV2(csv_path, images_dir)
1084
+
1085
+
1086
+ def process_user_info(firstname: str, birthday: str, city: str, state: SessionState):
1087
+ """Traite les informations utilisateur avec validation"""
1088
+ firstname = SecurityValidator.sanitize_input(firstname)
1089
+ city = SecurityValidator.sanitize_input(city)
1090
+
1091
+ state.firstname = firstname
1092
+ state.birthday = birthday
1093
+ state.city = city
1094
+
1095
+ if not firstname or not birthday:
1096
+ return (
1097
+ gr.update(visible=True),
1098
+ gr.update(visible=False),
1099
+ gr.update(visible=False),
1100
+ "Veuillez remplir au moins votre prénom et date de naissance.",
1101
+ state,
1102
+ )
1103
+
1104
+ is_valid, _ = SecurityValidator.validate_date(birthday)
1105
+ if not is_valid:
1106
+ return (
1107
+ gr.update(visible=True),
1108
+ gr.update(visible=False),
1109
+ gr.update(visible=False),
1110
+ "Format de date invalide. Utilisez JJ/MM (ex: 15/03)",
1111
+ state,
1112
+ )
1113
+
1114
+ return (
1115
+ gr.update(visible=False),
1116
+ gr.update(visible=True),
1117
+ gr.update(visible=False),
1118
+ "Informations enregistrées ! Passons à la sélection d'images.",
1119
+ state,
1120
+ )
1121
+
1122
+
1123
+ def load_images_for_round(round_num: int, state: SessionState):
1124
+ """Charge 3 images pour un tour de sélection"""
1125
+ images_data = matcher.get_random_images_for_selection(
1126
+ round_num, state.selected_images
1127
+ )
1128
+
1129
+ if len(images_data) < ScoringWeights.MAX_IMAGES_PER_SELECTION:
1130
+ logger.warning(f"Seulement {len(images_data)} images disponibles")
1131
+ return (
1132
+ [None, None, None],
1133
+ [],
1134
+ f"Pas assez d'images disponibles (seulement {len(images_data)} trouvées)",
1135
+ state,
1136
+ )
1137
+
1138
+ images = [img[0] for img in images_data]
1139
+ ids = [img[1] for img in images_data]
1140
+
1141
+ state.current_image_ids = ids
1142
+
1143
+ return (
1144
+ images,
1145
+ ids,
1146
+ f"Tour {round_num + 1}/{ScoringWeights.TOTAL_ROUNDS} : Sélectionnez l'image qui vous attire le plus",
1147
+ state,
1148
+ )
1149
+
1150
+
1151
+ def select_image(choice: Optional[int], state: SessionState):
1152
+ """Traite la sélection d'image"""
1153
+ if choice is None:
1154
+ return (
1155
+ gr.update(),
1156
+ gr.update(),
1157
+ gr.update(),
1158
+ gr.update(),
1159
+ "Veuillez sélectionner une image",
1160
+ state,
1161
+ )
1162
+
1163
+ if state.current_image_ids and len(state.current_image_ids) > choice:
1164
+ selected_id = state.current_image_ids[choice]
1165
+ else:
1166
+ return (
1167
+ gr.update(),
1168
+ gr.update(),
1169
+ gr.update(),
1170
+ gr.update(),
1171
+ "Erreur: image non trouvée",
1172
+ state,
1173
+ )
1174
+
1175
+ state.selected_images.append(selected_id)
1176
+ state.current_round += 1
1177
+
1178
+ logger.info(
1179
+ f"Tour {state.current_round}: Image {choice+1} sélectionnée (ID: {selected_id})"
1180
+ )
1181
+
1182
+ if state.current_round < ScoringWeights.TOTAL_ROUNDS:
1183
+ new_images, new_ids, message, state = load_images_for_round(
1184
+ state.current_round, state
1185
+ )
1186
+ return (
1187
+ gr.update(value=new_images[0]),
1188
+ gr.update(value=new_images[1]),
1189
+ gr.update(value=new_images[2]),
1190
+ gr.update(value=None),
1191
+ message,
1192
+ state,
1193
+ gr.update(visible=True), # keep selection_section visible
1194
+ gr.update(visible=False), # keep loading_section hidden
1195
+ )
1196
+ else:
1197
+ # Toutes les sélections sont terminées, afficher le loading
1198
+ return (
1199
+ gr.update(), # img1
1200
+ gr.update(), # img2
1201
+ gr.update(), # img3
1202
+ gr.update(), # image_choice
1203
+ "", # status_message vide
1204
+ state,
1205
+ gr.update(visible=False), # hide selection_section
1206
+ gr.update(visible=True), # show loading_section
1207
+ )
1208
+
1209
+
1210
+ def show_results(state: SessionState):
1211
+ """Affiche les résultats finaux"""
1212
+ if not state.is_complete():
1213
+ return (
1214
+ gr.update(visible=False), # info_section
1215
+ gr.update(visible=True), # selection_section
1216
+ gr.update(visible=False), # loading_section
1217
+ gr.update(visible=False), # results_section
1218
+ None,
1219
+ "",
1220
+ "",
1221
+ )
1222
+
1223
+ match_image, reason, info = matcher.find_best_match(
1224
+ state.firstname,
1225
+ state.birthday,
1226
+ state.city,
1227
+ state.selected_images,
1228
+ )
1229
+
1230
+ if match_image:
1231
+ # Déterminer le type de système de recommandation utilisé
1232
+ if "correspond exactement" in reason.lower():
1233
+ # Match exact sur nom, date ou lieu
1234
+ recommendation_type = "name_date_place"
1235
+ else:
1236
+ # Match basé sur les émotions
1237
+ recommendation_type = "emotions"
1238
+
1239
+ # Enregistrer l'œuvre finale et le type de recommandation
1240
+ state.final_artwork = info.get("title", "Œuvre inconnue")
1241
+ state.recommendation_type = recommendation_type
1242
+
1243
+ # Logger la session
1244
+ session_tracker.log_session(state, recommendation_type)
1245
+
1246
+ explanation = f"""
1247
+ **Votre œuvre correspondante a été trouvée !**
1248
+
1249
+ **Raison du match :** {reason}
1250
+
1251
+ **Détails de l'œuvre :**
1252
+ - Type : {info.get('type', 'Non spécifié')}
1253
+ - Lieu : {info.get('place', 'Non spécifié')}
1254
+ - Émotions : {', '.join(info.get('emotions', [])) if info.get('emotions') else 'Non spécifiées'}
1255
+
1256
+ **Description :**
1257
+ {info.get('explanation', 'Aucune description disponible')}
1258
+ """
1259
+ else:
1260
+ # Aucune œuvre trouvée - logger quand même
1261
+ state.final_artwork = "Aucune œuvre trouvée"
1262
+ state.recommendation_type = "none"
1263
+ session_tracker.log_session(state, "none")
1264
+
1265
+ explanation = "Désolé, aucune œuvre correspondante n'a pu être trouvée."
1266
+
1267
+ return (
1268
+ gr.update(visible=False), # info_section
1269
+ gr.update(visible=False), # selection_section
1270
+ gr.update(visible=False), # loading_section
1271
+ gr.update(visible=True), # results_section
1272
+ match_image,
1273
+ info.get("title", "Œuvre non trouvée") if match_image else "Œuvre non trouvée",
1274
+ explanation,
1275
+ )
1276
+
1277
+
1278
+ with gr.Blocks(
1279
+ title="Art Matcher",
1280
+ theme=gr.themes.Soft(
1281
+ primary_hue="teal", secondary_hue="teal", neutral_hue="zinc"
1282
+ ),
1283
+ ) as demo:
1284
+ gr.Markdown(
1285
+ """
1286
+ # 🎨 Art Matcher
1287
+ ### Découvrez l'œuvre d'art qui vous correspond !
1288
+
1289
+ Cette application utilise vos informations personnelles et vos préférences visuelles
1290
+ pour trouver l'œuvre d'art qui vous correspond le mieux dans notre collection.
1291
+ """
1292
+ )
1293
+
1294
+ session_state = gr.State(SessionState())
1295
+
1296
+ with gr.Group(visible=True) as info_section:
1297
+ gr.Markdown("### Étape 1 : Vos informations")
1298
+ with gr.Row():
1299
+ firstname_input = gr.Textbox(
1300
+ label="Prénom", placeholder="Entrez votre prénom", max_lines=1
1301
+ )
1302
+ birthday_input = gr.Textbox(
1303
+ label="Date d'anniversaire (JJ/MM)",
1304
+ placeholder="Ex: 25/12",
1305
+ max_lines=1,
1306
+ )
1307
+ city_input = gr.Textbox(
1308
+ label="Ville de résidence", placeholder="Ex: Paris", max_lines=1
1309
+ )
1310
+
1311
+ submit_info_btn = gr.Button("Valider mes informations", variant="primary")
1312
+
1313
+ with gr.Group(visible=False) as selection_section:
1314
+ selection_title = gr.Markdown("### Étape 2 : Sélection d'images")
1315
+
1316
+ with gr.Row():
1317
+ img1 = gr.Image(label="Image 1", type="filepath", height=300)
1318
+ img2 = gr.Image(label="Image 2", type="filepath", height=300)
1319
+ img3 = gr.Image(label="Image 3", type="filepath", height=300)
1320
+
1321
+ image_choice = gr.Radio(
1322
+ choices=["Image 1", "Image 2", "Image 3"],
1323
+ label="Quelle image vous attire le plus ?",
1324
+ type="index",
1325
+ )
1326
+
1327
+ select_btn = gr.Button("Valider mon choix", variant="primary")
1328
+
1329
+ with gr.Group(visible=False) as loading_section:
1330
+ gr.Markdown("### ⏳ Analyse en cours...")
1331
+ gr.HTML("""
1332
+ <div style="text-align: center; padding: 40px;">
1333
+ <div style="display: inline-block; width: 60px; height: 60px; border: 6px solid #f3f3f3; border-top: 6px solid #14b8a6; border-radius: 50%; animation: spin 1s linear infinite;"></div>
1334
+ <style>
1335
+ @keyframes spin {
1336
+ 0% { transform: rotate(0deg); }
1337
+ 100% { transform: rotate(360deg); }
1338
+ }
1339
+ </style>
1340
+ <p style="margin-top: 20px; font-size: 18px; color: #666;">
1341
+ <strong>Traitement de vos sélections émotionnelles...</strong><br>
1342
+ <span style="font-size: 14px;">Nous analysons votre profil pour trouver l'œuvre parfaite</span>
1343
+ </p>
1344
+ </div>
1345
+ """)
1346
+
1347
+ with gr.Group(visible=False) as results_section:
1348
+ gr.Markdown("### Votre œuvre correspondante")
1349
+
1350
+ with gr.Row():
1351
+ with gr.Column(scale=1):
1352
+ result_image = gr.Image(label="Votre œuvre", height=400)
1353
+ result_title = gr.Markdown("## Titre de l'œuvre")
1354
+
1355
+ with gr.Column(scale=1):
1356
+ result_explanation = gr.Markdown("")
1357
+
1358
+ restart_btn = gr.Button("Recommencer", variant="secondary")
1359
+
1360
+ status_message = gr.Markdown("")
1361
+
1362
+ def on_info_submit(firstname, birthday, city, state):
1363
+ state.reset()
1364
+
1365
+ info_vis, select_vis, results_vis, message, state = process_user_info(
1366
+ firstname, birthday, city, state
1367
+ )
1368
+
1369
+ if select_vis["visible"]:
1370
+ images, ids, round_message, state = load_images_for_round(0, state)
1371
+ return (
1372
+ info_vis,
1373
+ select_vis,
1374
+ results_vis,
1375
+ images[0] if len(images) > 0 else None,
1376
+ images[1] if len(images) > 1 else None,
1377
+ images[2] if len(images) > 2 else None,
1378
+ round_message,
1379
+ state,
1380
+ )
1381
+ else:
1382
+ return (info_vis, select_vis, results_vis, None, None, None, message, state)
1383
+
1384
+ submit_info_btn.click(
1385
+ fn=on_info_submit,
1386
+ inputs=[firstname_input, birthday_input, city_input, session_state],
1387
+ outputs=[
1388
+ info_section,
1389
+ selection_section,
1390
+ results_section,
1391
+ img1,
1392
+ img2,
1393
+ img3,
1394
+ status_message,
1395
+ session_state,
1396
+ ],
1397
+ )
1398
+
1399
+ def on_image_select(choice, state):
1400
+ result = select_image(choice, state)
1401
+
1402
+ # La fonction select_image retourne maintenant 8 valeurs
1403
+ if len(result) == 8:
1404
+ (img1_update, img2_update, img3_update, choice_update, message, state,
1405
+ selection_vis, loading_vis) = result
1406
+ return (
1407
+ gr.update(), # info_section
1408
+ selection_vis, # selection_section
1409
+ loading_vis, # loading_section
1410
+ gr.update(), # results_section
1411
+ img1_update, # img1
1412
+ img2_update, # img2
1413
+ img3_update, # img3
1414
+ choice_update, # image_choice
1415
+ message, # status_message
1416
+ state,
1417
+ )
1418
+ else:
1419
+ # Format avec 6 valeurs (cas sans loading)
1420
+ (img1_update, img2_update, img3_update, choice_update, message, state) = result
1421
+ return (
1422
+ gr.update(), # info_section
1423
+ gr.update(), # selection_section
1424
+ gr.update(), # loading_section
1425
+ gr.update(), # results_section
1426
+ img1_update, # img1
1427
+ img2_update, # img2
1428
+ img3_update, # img3
1429
+ choice_update, # image_choice
1430
+ message, # status_message
1431
+ state,
1432
+ )
1433
+
1434
+ def handle_final_results(state):
1435
+ if state.is_complete():
1436
+ return show_results(state)
1437
+ else:
1438
+ return gr.update(), gr.update(), gr.update(), gr.update(), None, "", ""
1439
+
1440
+ select_btn.click(
1441
+ fn=on_image_select,
1442
+ inputs=[image_choice, session_state],
1443
+ outputs=[
1444
+ info_section,
1445
+ selection_section,
1446
+ loading_section,
1447
+ results_section,
1448
+ img1,
1449
+ img2,
1450
+ img3,
1451
+ image_choice,
1452
+ status_message,
1453
+ session_state,
1454
+ ],
1455
+ ).then(
1456
+ fn=handle_final_results,
1457
+ inputs=[session_state],
1458
+ outputs=[
1459
+ info_section,
1460
+ selection_section,
1461
+ loading_section,
1462
+ results_section,
1463
+ result_image,
1464
+ result_title,
1465
+ result_explanation,
1466
+ ],
1467
+ )
1468
+
1469
+ def restart_app(state):
1470
+ state.reset()
1471
+
1472
+ return (
1473
+ gr.update(visible=True), # info_section
1474
+ gr.update(visible=False), # selection_section
1475
+ gr.update(visible=False), # loading_section
1476
+ gr.update(visible=False), # results_section
1477
+ "", # firstname_input
1478
+ "", # birthday_input
1479
+ "", # city_input
1480
+ None, # image_choice
1481
+ "Application réinitialisée. Veuillez entrer vos informations.", # status_message
1482
+ state,
1483
+ )
1484
+
1485
+ restart_btn.click(
1486
+ fn=restart_app,
1487
+ inputs=[session_state],
1488
+ outputs=[
1489
+ info_section,
1490
+ selection_section,
1491
+ loading_section,
1492
+ results_section,
1493
+ firstname_input,
1494
+ birthday_input,
1495
+ city_input,
1496
+ image_choice,
1497
+ status_message,
1498
+ session_state,
1499
+ ],
1500
+ )
1501
+
1502
+
1503
+ if __name__ == "__main__":
1504
+ demo.launch()