Aurel-test commited on
Commit
ef788ca
·
verified ·
1 Parent(s): 0e957c6

Delete app_old.py

Browse files
Files changed (1) hide show
  1. app_old.py +0 -1286
app_old.py DELETED
@@ -1,1286 +0,0 @@
1
- #!/usr/bin/env python3
2
- """
3
- Web Demo v2 pour la base de données d'œuvres d'art - Version Sécurisée et Optimisée
4
- Interface multi-étapes avec matching basé sur prénom, date, ville et émotions
5
- Optimisé pour les performances avec caching et indexation
6
- Version sécurisée avec validation des entrées et gestion d'état propre
7
- """
8
-
9
- import gradio as gr
10
- import os
11
- import sys
12
- import logging
13
- import random
14
- import re
15
- from datetime import datetime
16
- from typing import List, Dict, Tuple, Optional, Any, Set
17
- from collections import Counter, defaultdict
18
- from functools import lru_cache
19
- from dataclasses import dataclass, field
20
- import pandas as pd
21
-
22
- logging.basicConfig(
23
- level=logging.INFO,
24
- format="[%(asctime)s] %(levelname)s: %(message)s",
25
- datefmt="%Y-%m-%d %H:%M:%S",
26
- )
27
- logger = logging.getLogger(__name__)
28
-
29
- sys.path.insert(0, os.path.join(os.path.dirname(__file__), "src"))
30
-
31
- from art_pieces_db.database import Database
32
- from art_pieces_db.query import TargetProfile, WeightedLeximaxOptimizer, Optimizer
33
- from art_pieces_db.emotions import EmotionWheel
34
- from art_pieces_db.utils import str_to_date
35
-
36
-
37
- @dataclass
38
- class ScoringWeights:
39
- """Centralise toutes les constantes de scoring pour éviter les magic numbers"""
40
-
41
- PRESELECTION_NAME_WEIGHT: float = 3.0
42
- PRESELECTION_DATE_WEIGHT: float = 1.0
43
- PRESELECTION_PLACE_WEIGHT: float = 2.0
44
- PRESELECTION_EMOTION_WEIGHT: float = 0.0
45
-
46
- MIN_PRESELECTION_COUNT: int = 20
47
- MAX_IMAGES_PER_SELECTION: int = 3 # nombre d'images par sélection
48
- TOTAL_ROUNDS: int = 3 # nombre de rounds avant la recommandation finale
49
-
50
-
51
- @dataclass
52
- class SessionState:
53
- """Gère l'état de session"""
54
-
55
- firstname: str = ""
56
- birthday: str = ""
57
- city: str = ""
58
-
59
- current_round: int = 0
60
- selected_images: List[str] = field(default_factory=list)
61
- current_image_ids: List[str] = field(default_factory=list)
62
-
63
- preselected_pieces: Optional[pd.DataFrame] = None
64
-
65
- def reset(self):
66
- """Réinitialise l'état de session"""
67
- self.firstname = ""
68
- self.birthday = ""
69
- self.city = ""
70
- self.current_round = 0
71
- self.selected_images = []
72
- self.current_image_ids = []
73
- self.preselected_pieces = None
74
-
75
- def is_complete(self) -> bool:
76
- """Vérifie si la sélection est complète"""
77
- return self.current_round >= ScoringWeights.TOTAL_ROUNDS
78
-
79
-
80
- class SecurityValidator:
81
- """Classe pour centraliser les validations de sécurité"""
82
-
83
- PATH_TRAVERSAL_PATTERN = re.compile(r"\.\.|\.\/")
84
- VALID_FILENAME_PATTERN = re.compile(r"^[\w\-\.\s]+$")
85
- VALID_INPUT_PATTERN = re.compile(
86
- r"^[\w\-\s\'\.,àâäéèêëïîôûùüÿæœçÀÂÄÉÈÊËÏÎÔÛÙÜŸÆŒÇ]+$", re.UNICODE
87
- )
88
- DATE_PATTERN = re.compile(r"^\d{1,2}/\d{1,2}$")
89
-
90
- @classmethod
91
- def validate_filename(cls, filename: str) -> bool:
92
- """Valide qu'un nom de fichier est sécurisé"""
93
- if not filename:
94
- return False
95
-
96
- # Vérifier les tentatives de path traversal
97
- if cls.PATH_TRAVERSAL_PATTERN.search(filename):
98
- logger.warning(f"Tentative de path traversal détectée: {filename}")
99
- return False
100
-
101
- # Vérifier que le nom ne contient que des caractères autorisés
102
- base_name = os.path.basename(filename)
103
- if not cls.VALID_FILENAME_PATTERN.match(base_name):
104
- logger.warning(f"Nom de fichier invalide: {filename}")
105
- return False
106
-
107
- return True
108
-
109
- @classmethod
110
- def sanitize_input(cls, input_str: str, max_length: int = 100) -> str:
111
- """Nettoie et valide une entrée utilisateur"""
112
- if not input_str:
113
- return ""
114
-
115
- # Tronquer si trop long
116
- input_str = input_str[:max_length].strip()
117
-
118
- if not cls.VALID_INPUT_PATTERN.match(input_str):
119
- # Garder seulement les caractères valides
120
- cleaned = "".join(c for c in input_str if cls.VALID_INPUT_PATTERN.match(c))
121
- logger.info(f"Input sanitized: '{input_str}' -> '{cleaned}'")
122
- return cleaned
123
-
124
- return input_str
125
-
126
- @classmethod
127
- def validate_date(cls, date_str: str) -> Tuple[bool, Optional[datetime]]:
128
- """Valide et parse une date au format JJ/MM"""
129
- if not date_str:
130
- return False, None
131
-
132
- if not cls.DATE_PATTERN.match(date_str):
133
- return False, None
134
-
135
- try:
136
- day, month = map(int, date_str.split("/"))
137
- if not (1 <= day <= 31 and 1 <= month <= 12):
138
- return False, None
139
-
140
- date_obj = datetime(year=2000, month=month, day=day)
141
- return True, date_obj
142
- except (ValueError, Exception) as e:
143
- logger.error(f"Erreur de parsing de date: {e}")
144
- return False, None
145
-
146
-
147
- class ImageIndexer:
148
- """Classe pour indexer et mapper les images depuis la base de données CSV"""
149
-
150
- # Constants for better maintainability
151
- IMAGE_EXTENSIONS = (".jpg", ".png")
152
- COMMON_SUFFIXES = [".jpg", ".png", "_medium"]
153
- MAR_BVM_TEST_SUFFIXES = ["-001", "-002", "-003"]
154
-
155
- def __init__(self, images_dir: str):
156
- self.images_dir = os.path.abspath(images_dir)
157
- self.available_files = set()
158
- self.image_lookup = {} # normalized_name -> filename
159
- self.mar_bvm_lookup = {} # Special handling for MAR-BVM files
160
- self._build_index()
161
-
162
- def _strip_file_extensions(self, filename: str) -> str:
163
- """Remove file extensions from filename"""
164
- base_name = filename.lower()
165
- if base_name.endswith("_medium.jpg"):
166
- return base_name[:-11]
167
- elif base_name.endswith((".jpg", ".png")):
168
- return base_name[:-4]
169
- return base_name
170
-
171
- def _normalize_basic_patterns(self, name: str) -> str:
172
- """Apply basic normalization patterns"""
173
- # Remove trailing comma and normalize whitespace
174
- normalized = name.lower().strip().rstrip(",")
175
-
176
- # Remove common suffixes
177
- for suffix in self.COMMON_SUFFIXES:
178
- if normalized.endswith(suffix):
179
- normalized = normalized[: -len(suffix)]
180
-
181
- # Normalize spaces and underscores to dashes
182
- return re.sub(r"[\s_]+", "-", normalized)
183
-
184
- def _normalize_mar_bvm_format(self, name: str) -> str:
185
- """Handle MAR-BVM specific normalization"""
186
- if "mar-bvm" not in name:
187
- return name
188
-
189
- # Replace .0. with -0- and remaining dots with dashes
190
- return name.replace(".0.", "-0-").replace(".", "-")
191
-
192
- def _normalize_name(self, name: str) -> str:
193
- """Normalise un nom pour la comparaison"""
194
- normalized = self._normalize_basic_patterns(name)
195
-
196
- # Special handling for MAR-BVM format
197
- if "mar-bvm" in normalized:
198
- normalized = self._normalize_mar_bvm_format(normalized)
199
- # For files starting with year (like 2022.0.86), keep dots
200
- elif not normalized.startswith("20"):
201
- normalized = normalized.replace(".", "-")
202
-
203
- return normalized
204
-
205
- def _create_mar_bvm_lookups(self, normalized: str, filename: str):
206
- """Create additional lookup entries for MAR-BVM files"""
207
- if "mar-bvm" not in normalized:
208
- return
209
-
210
- parts = normalized.split("-")
211
- for i, part in enumerate(parts):
212
- if part.isdigit() and i >= 5: # After mar-bvm-7-2022-0
213
- base_key = "-".join(parts[:6]) # mar-bvm-7-2022-0-22
214
- if base_key not in self.mar_bvm_lookup:
215
- self.mar_bvm_lookup[base_key] = []
216
- self.mar_bvm_lookup[base_key].append(filename)
217
- break
218
-
219
- def _process_image_file(self, filename: str):
220
- """Process a single image file for indexing"""
221
- if not SecurityValidator.validate_filename(filename):
222
- logger.warning(f"Fichier ignoré pour raison de sécurité: {filename}")
223
- return
224
-
225
- if not filename.lower().endswith(self.IMAGE_EXTENSIONS):
226
- return
227
-
228
- self.available_files.add(filename)
229
-
230
- base_name = self._strip_file_extensions(filename)
231
- normalized = self._normalize_name(base_name)
232
- self.image_lookup[normalized] = filename
233
- self._create_mar_bvm_lookups(normalized, filename)
234
-
235
- def _build_index(self):
236
- """Construit un index des images disponibles"""
237
- try:
238
- all_files = os.listdir(self.images_dir)
239
- for filename in all_files:
240
- self._process_image_file(filename)
241
-
242
- logger.info(
243
- f"Index des images construit: {len(self.available_files)} fichiers disponibles, "
244
- f"{len(self.image_lookup)} entrées normalisées"
245
- )
246
- except Exception as e:
247
- logger.error(f"Erreur lors de la construction de l'index: {e}")
248
- self.available_files = set()
249
-
250
- def _clean_input_name(self, image_name: str) -> str:
251
- """Clean and prepare input name for processing"""
252
- # Basic cleaning
253
- cleaned = image_name.strip().rstrip(",").rstrip("-").strip()
254
- # Remove spaces before -001, -002, etc.
255
- return re.sub(r"\s+(-\d)", r"\1", cleaned)
256
-
257
- def _normalize_mar_bvm_input(self, image_name: str) -> str:
258
- """Handle MAR-BVM specific input normalization"""
259
- if "MAR-BVM" not in image_name:
260
- return image_name
261
-
262
- # Handle missing "7-" in MAR-BVM-2022-0-153
263
- if "MAR-BVM-2022-0-" in image_name:
264
- image_name = image_name.replace("MAR-BVM-2022-0-", "MAR-BVM-7-2022-0-")
265
-
266
- # Convert .0. to -0-
267
- if ".0." in image_name:
268
- image_name = image_name.replace(".0.", "-0-")
269
-
270
- # Handle .001, .002 at the end (convert to -001, -002)
271
- image_name = re.sub(r"\.(\d{3})$", r"-\1", image_name)
272
-
273
- # Handle .1 or .2 suffix
274
- if image_name.endswith(".1"):
275
- image_name = image_name[:-2] + "-1"
276
- elif image_name.endswith(".2"):
277
- image_name = image_name[:-2] + "-2"
278
-
279
- # Replace any remaining dots with dashes (but be careful not to mess up already processed parts)
280
- return image_name.replace(".", "-")
281
-
282
- def _try_mar_bvm_lookups(self, normalized: str) -> Optional[str]:
283
- """Try various MAR-BVM specific lookup strategies"""
284
- # Check special MAR-BVM lookup
285
- if normalized in self.mar_bvm_lookup and self.mar_bvm_lookup[normalized]:
286
- return self.mar_bvm_lookup[normalized][0]
287
-
288
- # Try with suffix variations
289
- for suffix in self.MAR_BVM_TEST_SUFFIXES:
290
- test_pattern = f"{normalized}{suffix}"
291
- if test_pattern in self.image_lookup:
292
- return self.image_lookup[test_pattern]
293
-
294
- return None
295
-
296
- def _try_year_format_lookup(self, image_name: str) -> Optional[str]:
297
- """Handle special case for files starting with year"""
298
- if not image_name.startswith("20"):
299
- return None
300
-
301
- test_name = image_name.lower().replace(" ", "-")
302
- return self.image_lookup.get(test_name)
303
-
304
- def _try_partial_matching(self, normalized: str) -> Optional[str]:
305
- """Try partial matching as last resort"""
306
- for key, filename in self.image_lookup.items():
307
- if key.startswith(normalized) or normalized in key:
308
- return filename
309
- return None
310
-
311
- def _split_multiple_names(self, image_name: str) -> List[str]:
312
- """Split image names that contain multiple names separated by commas or slashes"""
313
- # First try comma separation
314
- if "," in image_name:
315
- return [name.strip() for name in image_name.split(",") if name.strip()]
316
-
317
- # Then try slash separation
318
- if "/" in image_name:
319
- return [name.strip() for name in image_name.split("/") if name.strip()]
320
-
321
- # Handle " - " separation (for cases like "MAR-BVM-7-2022.0.81 - 2022.0.81")
322
- if " - " in image_name and image_name.count(" - ") == 1:
323
- parts = [name.strip() for name in image_name.split(" - ")]
324
- # Only use the first part if they look like duplicates
325
- if len(parts) == 2:
326
- first, second = parts
327
- # Check if second part is a suffix of the first (like duplicate year)
328
- if first.endswith(second) or second in first:
329
- return [first]
330
- return parts
331
-
332
- return [image_name]
333
-
334
- def find_image(self, image_name: str) -> Optional[str]:
335
- """Trouve un fichier image correspondant au nom donné"""
336
- if not image_name:
337
- return None
338
-
339
- # Handle multiple image names in one field
340
- possible_names = self._split_multiple_names(image_name)
341
-
342
- # Try each name individually
343
- for name in possible_names:
344
- result = self._find_single_image(name)
345
- if result:
346
- return result
347
-
348
- return None
349
-
350
- def _find_single_image(self, image_name: str) -> Optional[str]:
351
- """Find a single image by name"""
352
- # Clean and normalize the input
353
- cleaned_name = self._clean_input_name(image_name)
354
- processed_name = self._normalize_mar_bvm_input(cleaned_name)
355
- normalized = self._normalize_name(processed_name)
356
-
357
- # Try direct lookup first
358
- if normalized in self.image_lookup:
359
- return self.image_lookup[normalized]
360
-
361
- # Try MAR-BVM specific lookups
362
- if "mar-bvm" in normalized:
363
- result = self._try_mar_bvm_lookups(normalized)
364
- if result:
365
- return result
366
-
367
- # Try year format lookup
368
- result = self._try_year_format_lookup(image_name)
369
- if result:
370
- return result
371
-
372
- # Try partial matching as last resort
373
- return self._try_partial_matching(normalized)
374
-
375
- def get_all_files(self) -> Set[str]:
376
- """Retourne tous les fichiers disponibles"""
377
- return self.available_files.copy()
378
-
379
-
380
- class ArtMatcherV2:
381
- """Classe principale pour le matching d'œuvres d'art"""
382
-
383
- def __init__(self, csv_path: str, images_dir: str):
384
- """Initialise le système avec la base de données et le répertoire d'images"""
385
- self.db = Database(csv_path)
386
- self.images_dir = os.path.abspath(images_dir)
387
- self.emotion_wheel = EmotionWheel()
388
- self.weights = ScoringWeights()
389
-
390
- self.optimizer_helper = WeightedLeximaxOptimizer(TargetProfile(), {})
391
-
392
- self.image_indexer = ImageIndexer(images_dir)
393
-
394
- df = self.db.get_dataframe()
395
- self.df_with_images = df[
396
- df["name_image"].notna()
397
- & (df["name_image"] != "")
398
- & (df["name_image"].str.strip() != "")
399
- ].copy()
400
-
401
- self.df_with_images["database_id_str"] = self.df_with_images[
402
- "database_id"
403
- ].astype(str)
404
- self.id_to_index = {
405
- str(row["database_id"]): idx for idx, row in self.df_with_images.iterrows()
406
- }
407
-
408
- self.artwork_images = self._build_artwork_image_index()
409
-
410
- self.temp_db_with_images = Database.__new__(Database)
411
- self.temp_db_with_images.dataframe = self.df_with_images
412
-
413
- logger.info(f"Base de données chargée: {self.db.n_pieces()} œuvres")
414
- logger.info(f"Œuvres avec images: {len(self.df_with_images)}")
415
- logger.info(f"Index des images: {len(self.artwork_images)} œuvres mappées")
416
-
417
- def _sanitize_input(self, input_str: str) -> str:
418
- """Nettoie et valide une entrée utilisateur"""
419
- return SecurityValidator.sanitize_input(input_str)
420
-
421
- def _parse_date(self, date_str: str) -> Optional[datetime]:
422
- """Parse une date avec validation"""
423
- is_valid, date_obj = SecurityValidator.validate_date(date_str)
424
- return date_obj if is_valid else None
425
-
426
- def _build_artwork_image_index(self) -> Dict[str, List[str]]:
427
- """Construit un index artwork_id -> [image_paths] au démarrage"""
428
- artwork_images = {}
429
-
430
- for idx, row in self.df_with_images.iterrows():
431
- artwork_id = str(row["database_id"])
432
- image_paths = []
433
-
434
- if row["name_image"] and str(row["name_image"]).strip():
435
- # Parse the image names - handle special separators
436
- image_string = str(row["name_image"]).strip().strip('"')
437
-
438
- # Handle cases with " / " or " - " separators
439
- if " / " in image_string:
440
- # Take first part before the slash
441
- image_string = image_string.split(" / ")[0].strip()
442
-
443
- # Special case: if it has " - 2022" it's a separator, not part of the name
444
- if " - 2022" in image_string:
445
- # Take the part before " - 2022"
446
- image_string = image_string.split(" - 2022")[0].strip()
447
- elif " - " in image_string and "MAR-BVM-7-2022-0-" not in image_string:
448
- # For other MAR-BVM formats with " - " separator
449
- parts = image_string.split(" - ")
450
- if "MAR-BVM" in parts[0]:
451
- image_string = parts[0].strip()
452
-
453
- # Clean up trailing " -" or spaces before "-001"
454
- image_string = re.sub(
455
- r"\s+-\s*$", "", image_string
456
- ) # Remove trailing " -"
457
- image_string = re.sub(
458
- r"\s+(-\d)", r"\1", image_string
459
- ) # Remove spaces before -001
460
-
461
- # Parse comma-separated list
462
- images = [
463
- img.strip()
464
- for img in re.split(r"[,/]", image_string)
465
- if img.strip()
466
- ]
467
-
468
- for img_name in images:
469
- # Find the actual file for this image name
470
- matched_file = self.image_indexer.find_image(img_name)
471
- if matched_file:
472
- img_path = os.path.join(self.images_dir, matched_file)
473
- image_paths.append(img_path)
474
-
475
- if image_paths:
476
- artwork_images[artwork_id] = image_paths
477
-
478
- return artwork_images
479
-
480
- def preselect_artworks(
481
- self, firstname: str, birthday: str, city: str
482
- ) -> pd.DataFrame:
483
- """
484
- Pré-sélectionne les œuvres selon la hiérarchie: prénom > date > ville
485
- """
486
- logger.info("=== DÉBUT PRÉ-SÉLECTION ===")
487
-
488
- # Nettoyer les entrées
489
- firstname = self._sanitize_input(firstname)
490
- city = self._sanitize_input(city)
491
-
492
- logger.info(
493
- f"Critères de pré-sélection: prénom='{firstname}', date='{birthday}', ville='{city}'"
494
- )
495
-
496
- birth_date = self._parse_date(birthday)
497
- if birth_date:
498
- logger.info(f"Date convertie: {birth_date.strftime('%d/%m')}")
499
-
500
- profile = TargetProfile()
501
- profile.set_target_name(firstname)
502
- profile.set_target_date(birth_date)
503
- profile.set_target_place(city)
504
-
505
- weights = {
506
- "related_names": self.weights.PRESELECTION_NAME_WEIGHT,
507
- "related_dates": self.weights.PRESELECTION_DATE_WEIGHT,
508
- "related_places": self.weights.PRESELECTION_PLACE_WEIGHT,
509
- "related_emotions": self.weights.PRESELECTION_EMOTION_WEIGHT,
510
- }
511
-
512
- logger.info(
513
- f"Poids utilisés: nom={weights['related_names']}, date={weights['related_dates']}, lieu={weights['related_places']}, émotions={weights['related_emotions']}"
514
- )
515
-
516
- optimizer = WeightedLeximaxOptimizer(profile, weights)
517
- result = optimizer.optimize_max(self.temp_db_with_images)
518
-
519
- preselected = result[result["score"] > (0, 0, 0)]
520
- logger.info(f"Œuvres avec score > 0: {len(preselected)}")
521
-
522
- if len(preselected) < self.weights.MIN_PRESELECTION_COUNT:
523
- preselected = result.head(self.weights.MIN_PRESELECTION_COUNT)
524
- logger.info(f"Ajustement au minimum requis: {len(preselected)} œuvres")
525
-
526
- logger.info("Top 5 pré-sélections:")
527
- for i, (idx, piece) in enumerate(preselected.head(5).iterrows()):
528
- logger.info(
529
- f" {i+1}. Œuvre #{piece['database_id']} - Score: {piece['score']}"
530
- )
531
- if firstname and piece["related_names"]:
532
- name_score = Optimizer.name_similarity(
533
- firstname, piece["related_names"]
534
- )
535
- if name_score > 0:
536
- logger.info(
537
- f" → Nom: {piece['related_names']} (score: {name_score:.2f})"
538
- )
539
- if birth_date and piece["related_dates"]:
540
- date_score = Optimizer.date_similarity(
541
- birth_date, piece["related_dates"]
542
- )
543
- if date_score > 0:
544
- logger.info(
545
- f" → Dates: {[d.strftime('%d/%m') for d in piece['related_dates']]} (score: {date_score:.2f})"
546
- )
547
- if city and piece["related_places"]:
548
- place_score = self.optimizer_helper.place_similarity(
549
- city, piece["related_places"]
550
- )
551
- if place_score > 0:
552
- logger.info(
553
- f" → Lieux: {piece['related_places']} (score: {place_score:.2f})"
554
- )
555
-
556
- logger.info("=== FIN PRÉ-SÉLECTION ===")
557
- return preselected
558
-
559
- def get_random_images_for_selection(
560
- self, round_num: int, already_selected: List[str] = None
561
- ) -> List[Tuple[str, str]]:
562
- """
563
- Retourne 3 images aléatoires depuis l'index pré-construit
564
- Exclut les œuvres déjà sélectionnées dans les tours précédents
565
- """
566
- logger.info(f"=== SÉLECTION D'IMAGES POUR LE TOUR {round_num} ===")
567
-
568
- if already_selected:
569
- logger.info(f"Œuvres déjà sélectionnées à exclure: {already_selected}")
570
-
571
- available_artworks = list(self.artwork_images.keys())
572
-
573
- # Exclure les œuvres déjà sélectionnées
574
- if already_selected:
575
- already_selected_set = set(already_selected)
576
- available_artworks = [
577
- a for a in available_artworks if a not in already_selected_set
578
- ]
579
-
580
- logger.info(
581
- f"Nombre total d'œuvres avec images disponibles: {len(available_artworks)}"
582
- )
583
-
584
- if len(available_artworks) < self.weights.MAX_IMAGES_PER_SELECTION:
585
- logger.warning(
586
- f"Seulement {len(available_artworks)} œuvres avec images disponibles"
587
- )
588
- direct_images = []
589
- for filename in list(self.image_indexer.get_all_files())[:10]:
590
- if filename.endswith(".jpg"):
591
- img_path = os.path.join(self.images_dir, filename)
592
- direct_images.append((img_path, "0"))
593
- return direct_images[: self.weights.MAX_IMAGES_PER_SELECTION]
594
-
595
- num_to_select = min(
596
- self.weights.MAX_IMAGES_PER_SELECTION, len(available_artworks)
597
- )
598
- selected_artworks = random.sample(available_artworks, num_to_select)
599
-
600
- logger.info(f"Œuvres sélectionnées aléatoirement: {selected_artworks}")
601
-
602
- selected = []
603
- for artwork_id in selected_artworks:
604
- img_path = random.choice(self.artwork_images[artwork_id])
605
- selected.append((img_path, artwork_id))
606
- if artwork_id in self.id_to_index:
607
- idx = self.id_to_index[artwork_id]
608
- artwork = self.df_with_images.loc[idx]
609
- logger.info(f" Image {len(selected)}: Œuvre #{artwork_id}")
610
- logger.info(f" Type: {artwork['art_piece_type']}")
611
- logger.info(f" Émotions: {artwork['related_emotions']}")
612
-
613
- logger.info(f"=== FIN SÉLECTION IMAGES TOUR {round_num} ===")
614
- return selected
615
-
616
- def extract_emotions_from_image_id(self, database_id: str) -> List[str]:
617
- """
618
- Extrait les émotions associées à une œuvre via son ID
619
- Utilise l'index pré-calculé pour éviter les conversions répétées
620
- """
621
- if database_id in self.id_to_index:
622
- idx = self.id_to_index[database_id]
623
- emotions = self.df_with_images.loc[idx, "related_emotions"]
624
- if isinstance(emotions, list):
625
- return emotions
626
- return []
627
-
628
- @lru_cache(maxsize=1024)
629
- def _cached_emotion_similarity(self, emotion1: str, emotion2: str) -> float:
630
- """Cache les calculs de similarité émotionnelle"""
631
- return self.emotion_wheel.calculate_emotion_similarity(emotion1, emotion2)
632
-
633
- def calculate_emotion_profile(self, selected_ids: List[str]) -> Dict[str, float]:
634
- """
635
- Calcule le profil émotionnel basé sur les images sélectionnées
636
- """
637
- logger.info("=== CALCUL DU PROFIL ÉMOTIONNEL ===")
638
- logger.info(f"Images sélectionnées: {selected_ids}")
639
-
640
- emotion_counter = Counter()
641
-
642
- for db_id in selected_ids:
643
- emotions = self.extract_emotions_from_image_id(db_id)
644
- logger.info(f" Image {db_id}: émotions = {emotions}")
645
- emotion_counter.update(emotions)
646
-
647
- total = sum(emotion_counter.values())
648
- if total > 0:
649
- emotion_profile = {
650
- emotion: count / total for emotion, count in emotion_counter.items()
651
- }
652
- logger.info(f"Profil émotionnel calculé: {emotion_profile}")
653
- else:
654
- emotion_profile = {}
655
- logger.info("Aucune émotion trouvée dans les images sélectionnées")
656
-
657
- logger.info("=== FIN CALCUL PROFIL ÉMOTIONNEL ===")
658
- return emotion_profile
659
-
660
- def _get_artwork_image(self, artwork) -> Optional[str]:
661
- """Retourne le chemin de l'image pour une œuvre d'art"""
662
- artwork_id = str(artwork["database_id"])
663
-
664
- # Simply return the first image from our pre-built index
665
- if artwork_id in self.artwork_images:
666
- return self.artwork_images[artwork_id][0]
667
-
668
- return None
669
-
670
- def find_best_match(
671
- self, firstname: str, birthday: str, city: str, selected_image_ids: List[str]
672
- ) -> Tuple[Optional[str], str, Dict]:
673
- """
674
- Trouve la meilleure correspondance selon la hiérarchie du scénario:
675
- 1. Match exact (name/date/city) = gagnant automatique
676
- 2. Si pré-sélection existe: utiliser émotions pour départager
677
- 3. Si aucune pré-sélection: utiliser émotions seules
678
- 4. Type d'objet comme critère de départage final
679
- """
680
- firstname = self._sanitize_input(firstname)
681
- city = self._sanitize_input(city)
682
- birth_date = self._parse_date(birthday)
683
-
684
- logger.info(
685
- f"Recherche de correspondance pour: {firstname}, {birthday}, {city}"
686
- )
687
-
688
- preselected = self.preselect_artworks(firstname, birthday, city)
689
-
690
- logger.info("=== DÉTECTION DE MATCH EXACT ===")
691
- for idx, piece in preselected.iterrows():
692
- if firstname and piece["related_names"]:
693
- name_score = Optimizer.name_similarity(
694
- firstname, piece["related_names"]
695
- )
696
- if name_score >= 0.95:
697
- logger.info(
698
- f"🎯 MATCH EXACT TROUVÉ: prénom '{firstname}' → œuvre #{piece['database_id']} (score: {name_score:.2f})"
699
- )
700
- logger.info(f" Noms dans l'œuvre: {piece['related_names']}")
701
- match_image = self._get_artwork_image(piece)
702
- match_info = {
703
- "title": f"Œuvre #{piece['database_id']}",
704
- "type": piece["art_piece_type"],
705
- "place": piece["art_piece_place"],
706
- "emotions": piece["related_emotions"],
707
- "explanation": piece["explanation"],
708
- }
709
- return (
710
- match_image,
711
- f"Prénom '{firstname}' correspond exactement",
712
- match_info,
713
- )
714
-
715
- if birth_date and piece["related_dates"]:
716
- date_score = Optimizer.date_similarity(
717
- birth_date, piece["related_dates"]
718
- )
719
- if date_score == 1.0:
720
- logger.info(
721
- f"🎯 MATCH EXACT TROUVÉ: date '{birthday}' → œuvre #{piece['database_id']}"
722
- )
723
- logger.info(
724
- f" Dates dans l'œuvre: {[d.strftime('%d/%m/%Y') for d in piece['related_dates']]}"
725
- )
726
- match_image = self._get_artwork_image(piece)
727
- match_info = {
728
- "title": f"Œuvre #{piece['database_id']}",
729
- "type": piece["art_piece_type"],
730
- "place": piece["art_piece_place"],
731
- "emotions": piece["related_emotions"],
732
- "explanation": piece["explanation"],
733
- }
734
- return (
735
- match_image,
736
- f"Date d'anniversaire {birthday} correspond exactement",
737
- match_info,
738
- )
739
-
740
- if city and piece["related_places"]:
741
- place_score = self.optimizer_helper.place_similarity(
742
- city, piece["related_places"]
743
- )
744
- if place_score == 1.0:
745
- logger.info(
746
- f"🎯 MATCH EXACT TROUVÉ: ville '{city}' → œuvre #{piece['database_id']}"
747
- )
748
- logger.info(f" Lieux dans l'œuvre: {piece['related_places']}")
749
- match_image = self._get_artwork_image(piece)
750
- match_info = {
751
- "title": f"Œuvre #{piece['database_id']}",
752
- "type": piece["art_piece_type"],
753
- "place": piece["art_piece_place"],
754
- "emotions": piece["related_emotions"],
755
- "explanation": piece["explanation"],
756
- }
757
- return (
758
- match_image,
759
- f"Ville '{city}' correspond exactement",
760
- match_info,
761
- )
762
-
763
- logger.info("Aucun match exact trouvé, passage à la sélection par émotions")
764
-
765
- emotion_profile = self.calculate_emotion_profile(selected_image_ids)
766
-
767
- logger.info("=== STRATÉGIE DE MATCHING ===")
768
- valid_preselection = preselected[preselected["score"] > (0, 0, 0)]
769
-
770
- if len(valid_preselection) > 0:
771
- logger.info(
772
- f"📋 CAS A: {len(valid_preselection)} œuvres pré-sélectionnées - utilisation des émotions pour départager"
773
- )
774
- candidates = valid_preselection
775
- else:
776
- logger.info(
777
- f"📋 CAS B: Aucune pré-sélection valide - recherche par émotions sur {len(self.df_with_images)} œuvres"
778
- )
779
- candidates = self.df_with_images
780
-
781
- # Exclure les œuvres déjà sélectionnées par l'utilisateur
782
- selected_artwork_ids = set(selected_image_ids)
783
- candidates = candidates[
784
- ~candidates["database_id"].astype(str).isin(selected_artwork_ids)
785
- ]
786
- logger.info(
787
- f"Après exclusion des œuvres déjà sélectionnées {selected_artwork_ids}: {len(candidates)} candidats restants"
788
- )
789
-
790
- logger.info("=== CALCUL DES SCORES ÉMOTIONNELS ===")
791
- best_matches = []
792
- best_emotion_score = -1
793
-
794
- for idx, piece in candidates.iterrows():
795
- emotion_score = 0
796
-
797
- if emotion_profile and piece["related_emotions"]:
798
- for user_emotion, weight in emotion_profile.items():
799
- best_similarity = 0
800
- for piece_emotion in piece["related_emotions"]:
801
- similarity = self._cached_emotion_similarity(
802
- user_emotion, piece_emotion
803
- )
804
- if similarity > best_similarity:
805
- best_similarity = similarity
806
- emotion_score += best_similarity * weight
807
-
808
- if len(piece["related_emotions"]) > 0:
809
- emotion_score /= len(piece["related_emotions"])
810
-
811
- if emotion_score > best_emotion_score:
812
- best_emotion_score = emotion_score
813
- best_matches = [piece]
814
- logger.info(
815
- f" Nouveau meilleur score émotionnel: {emotion_score:.3f} - Œuvre #{piece['database_id']}"
816
- )
817
- elif emotion_score == best_emotion_score and emotion_score > 0:
818
- best_matches.append(piece)
819
- logger.info(
820
- f" Score égal au meilleur: {emotion_score:.3f} - Œuvre #{piece['database_id']}"
821
- )
822
-
823
- logger.info(
824
- f"Nombre de meilleures correspondances: {len(best_matches)} avec score {best_emotion_score:.3f}"
825
- )
826
-
827
- if len(best_matches) > 1:
828
- logger.info("=== DÉPARTAGE PAR TYPE D'OBJET ===")
829
- selected_types = []
830
- for img_id in selected_image_ids:
831
- if img_id in self.id_to_index:
832
- idx = self.id_to_index[img_id]
833
- selected_types.append(
834
- self.df_with_images.loc[idx, "art_piece_type"]
835
- )
836
-
837
- selected_types_counter = Counter(selected_types)
838
-
839
- type_scored_matches = []
840
- best_type_score = -1
841
-
842
- for piece in best_matches:
843
- type_score = selected_types_counter.get(piece["art_piece_type"], 0)
844
- if type_score > best_type_score:
845
- best_type_score = type_score
846
- type_scored_matches = [piece]
847
- elif type_score == best_type_score:
848
- type_scored_matches.append(piece)
849
-
850
- if len(type_scored_matches) > 1:
851
- logger.info(
852
- f" {len(type_scored_matches)} œuvres avec le même score de type ({best_type_score}) - sélection aléatoire"
853
- )
854
- best_match = random.choice(type_scored_matches)
855
- match_reason = (
856
- "Sélection aléatoire parmi les meilleures correspondances"
857
- )
858
- else:
859
- best_match = type_scored_matches[0]
860
- match_reason = f"Type d'objet '{best_match['art_piece_type']}' préféré"
861
- logger.info(
862
- f" Type '{best_match['art_piece_type']}' sélectionné avec score {best_type_score}"
863
- )
864
- elif len(best_matches) == 1:
865
- best_match = best_matches[0]
866
- match_reason = "Meilleure correspondance émotionnelle"
867
- else:
868
- logger.info("Aucune correspondance trouvée")
869
- return None, "Aucune correspondance trouvée", {}
870
-
871
- reasons = []
872
- if len(valid_preselection) > 0:
873
- if firstname and best_match["related_names"]:
874
- name_score = Optimizer.name_similarity(
875
- firstname, best_match["related_names"]
876
- )
877
- if name_score > 0:
878
- reasons.append(f"prénom '{firstname}' trouvé")
879
-
880
- if birth_date and best_match["related_dates"]:
881
- date_score = Optimizer.date_similarity(
882
- birth_date, best_match["related_dates"]
883
- )
884
- if date_score > 0:
885
- reasons.append(
886
- f"date {'exacte' if date_score == 1.0 else 'partielle'}"
887
- )
888
-
889
- if city and best_match["related_places"]:
890
- place_score = self.optimizer_helper.place_similarity(
891
- city, best_match["related_places"]
892
- )
893
- if place_score > 0:
894
- reasons.append(f"ville '{city}' trouvée")
895
-
896
- if best_emotion_score > 0:
897
- reasons.append(
898
- f"correspondance émotionnelle (score: {best_emotion_score:.2f})"
899
- )
900
-
901
- if len(reasons) == 0:
902
- reasons.append(match_reason)
903
-
904
- final_reason = " ; ".join(reasons)
905
-
906
- logger.info(f"\n🏆 RÉSULTAT FINAL: Œuvre #{best_match['database_id']}")
907
- logger.info(f" Raison: {final_reason}")
908
- logger.info(f" Type: {best_match['art_piece_type']}")
909
- logger.info(f" Lieu: {best_match['art_piece_place']}")
910
-
911
- match_image = self._get_artwork_image(best_match)
912
-
913
- match_info = {
914
- "title": f"Œuvre #{best_match['database_id']}",
915
- "type": best_match["art_piece_type"],
916
- "place": best_match["art_piece_place"],
917
- "emotions": best_match["related_emotions"],
918
- "explanation": best_match["explanation"],
919
- }
920
-
921
- return match_image, final_reason, match_info
922
-
923
-
924
- csv_path = "PP1-Collection_Database_new-cleaned.csv"
925
- images_dir = "pictures_data"
926
-
927
- if not os.path.exists(csv_path):
928
- logger.error(f"Fichier CSV introuvable: {csv_path}")
929
- if not os.path.exists(images_dir):
930
- logger.error(f"Répertoire images introuvable: {images_dir}")
931
-
932
- matcher = ArtMatcherV2(csv_path, images_dir)
933
-
934
-
935
- def process_user_info(firstname: str, birthday: str, city: str, state: SessionState):
936
- """Traite les informations utilisateur avec validation"""
937
- firstname = SecurityValidator.sanitize_input(firstname)
938
- city = SecurityValidator.sanitize_input(city)
939
-
940
- state.firstname = firstname
941
- state.birthday = birthday
942
- state.city = city
943
-
944
- if not firstname or not birthday:
945
- return (
946
- gr.update(visible=True),
947
- gr.update(visible=False),
948
- gr.update(visible=False),
949
- "Veuillez remplir au moins votre prénom et date de naissance.",
950
- state,
951
- )
952
-
953
- is_valid, _ = SecurityValidator.validate_date(birthday)
954
- if not is_valid:
955
- return (
956
- gr.update(visible=True),
957
- gr.update(visible=False),
958
- gr.update(visible=False),
959
- "Format de date invalide. Utilisez JJ/MM (ex: 15/03)",
960
- state,
961
- )
962
-
963
- return (
964
- gr.update(visible=False),
965
- gr.update(visible=True),
966
- gr.update(visible=False),
967
- "Informations enregistrées ! Passons à la sélection d'images.",
968
- state,
969
- )
970
-
971
-
972
- def load_images_for_round(round_num: int, state: SessionState):
973
- """Charge 3 images pour un tour de sélection"""
974
- images_data = matcher.get_random_images_for_selection(
975
- round_num, state.selected_images
976
- )
977
-
978
- if len(images_data) < ScoringWeights.MAX_IMAGES_PER_SELECTION:
979
- logger.warning(f"Seulement {len(images_data)} images disponibles")
980
- return (
981
- [None, None, None],
982
- [],
983
- f"Pas assez d'images disponibles (seulement {len(images_data)} trouvées)",
984
- state,
985
- )
986
-
987
- images = [img[0] for img in images_data]
988
- ids = [img[1] for img in images_data]
989
-
990
- state.current_image_ids = ids
991
-
992
- return (
993
- images,
994
- ids,
995
- f"Tour {round_num + 1}/{ScoringWeights.TOTAL_ROUNDS} : Sélectionnez l'image qui vous attire le plus",
996
- state,
997
- )
998
-
999
-
1000
- def select_image(choice: Optional[int], state: SessionState):
1001
- """Traite la sélection d'image"""
1002
- if choice is None:
1003
- return (
1004
- gr.update(),
1005
- gr.update(),
1006
- gr.update(),
1007
- gr.update(),
1008
- "Veuillez sélectionner une image",
1009
- state,
1010
- )
1011
-
1012
- if state.current_image_ids and len(state.current_image_ids) > choice:
1013
- selected_id = state.current_image_ids[choice]
1014
- else:
1015
- return (
1016
- gr.update(),
1017
- gr.update(),
1018
- gr.update(),
1019
- gr.update(),
1020
- "Erreur: image non trouvée",
1021
- state,
1022
- )
1023
-
1024
- state.selected_images.append(selected_id)
1025
- state.current_round += 1
1026
-
1027
- logger.info(
1028
- f"Tour {state.current_round}: Image {choice+1} sélectionnée (ID: {selected_id})"
1029
- )
1030
-
1031
- if state.current_round < ScoringWeights.TOTAL_ROUNDS:
1032
- new_images, new_ids, message, state = load_images_for_round(
1033
- state.current_round, state
1034
- )
1035
- return (
1036
- gr.update(value=new_images[0]),
1037
- gr.update(value=new_images[1]),
1038
- gr.update(value=new_images[2]),
1039
- gr.update(value=None),
1040
- message,
1041
- state,
1042
- )
1043
- else:
1044
- return (
1045
- gr.update(),
1046
- gr.update(),
1047
- gr.update(),
1048
- gr.update(),
1049
- "Sélection terminée ! Calcul de votre œuvre correspondante...",
1050
- state,
1051
- )
1052
-
1053
-
1054
- def show_results(state: SessionState):
1055
- """Affiche les résultats finaux"""
1056
- if not state.is_complete():
1057
- return (
1058
- gr.update(visible=False),
1059
- gr.update(visible=True),
1060
- gr.update(visible=False),
1061
- None,
1062
- "",
1063
- "",
1064
- )
1065
-
1066
- match_image, reason, info = matcher.find_best_match(
1067
- state.firstname,
1068
- state.birthday,
1069
- state.city,
1070
- state.selected_images,
1071
- )
1072
-
1073
- if match_image:
1074
- explanation = f"""
1075
- **Votre œuvre correspondante a été trouvée !**
1076
-
1077
- **Raison du match :** {reason}
1078
-
1079
- **Détails de l'œuvre :**
1080
- - Type : {info.get('type', 'Non spécifié')}
1081
- - Lieu : {info.get('place', 'Non spécifié')}
1082
- - Émotions : {', '.join(info.get('emotions', [])) if info.get('emotions') else 'Non spécifiées'}
1083
-
1084
- **Description :**
1085
- {info.get('explanation', 'Aucune description disponible')}
1086
- """
1087
- else:
1088
- explanation = "Désolé, aucune œuvre correspondante n'a pu être trouvée."
1089
-
1090
- return (
1091
- gr.update(visible=False),
1092
- gr.update(visible=False),
1093
- gr.update(visible=True),
1094
- match_image,
1095
- info.get("title", "Œuvre non trouvée"),
1096
- explanation,
1097
- )
1098
-
1099
-
1100
- with gr.Blocks(
1101
- title="Art Matcher",
1102
- theme=gr.themes.Soft(
1103
- primary_hue="teal", secondary_hue="teal", neutral_hue="zinc"
1104
- ),
1105
- ) as demo:
1106
- gr.Markdown(
1107
- """
1108
- # 🎨 Art Matcher
1109
- ### Découvrez l'œuvre d'art qui vous correspond !
1110
-
1111
- Cette application utilise vos informations personnelles et vos préférences visuelles
1112
- pour trouver l'œuvre d'art qui vous correspond le mieux dans notre collection.
1113
- """
1114
- )
1115
-
1116
- session_state = gr.State(SessionState())
1117
-
1118
- with gr.Group(visible=True) as info_section:
1119
- gr.Markdown("### Étape 1 : Vos informations")
1120
- with gr.Row():
1121
- firstname_input = gr.Textbox(
1122
- label="Prénom", placeholder="Entrez votre prénom", max_lines=1
1123
- )
1124
- birthday_input = gr.Textbox(
1125
- label="Date d'anniversaire (JJ/MM)",
1126
- placeholder="Ex: 25/12",
1127
- max_lines=1,
1128
- )
1129
- city_input = gr.Textbox(
1130
- label="Ville de résidence", placeholder="Ex: Paris", max_lines=1
1131
- )
1132
-
1133
- submit_info_btn = gr.Button("Valider mes informations", variant="primary")
1134
-
1135
- with gr.Group(visible=False) as selection_section:
1136
- selection_title = gr.Markdown("### Étape 2 : Sélection d'images")
1137
-
1138
- with gr.Row():
1139
- img1 = gr.Image(label="Image 1", type="filepath", height=300)
1140
- img2 = gr.Image(label="Image 2", type="filepath", height=300)
1141
- img3 = gr.Image(label="Image 3", type="filepath", height=300)
1142
-
1143
- image_choice = gr.Radio(
1144
- choices=["Image 1", "Image 2", "Image 3"],
1145
- label="Quelle image vous attire le plus ?",
1146
- type="index",
1147
- )
1148
-
1149
- select_btn = gr.Button("Valider mon choix", variant="primary")
1150
-
1151
- with gr.Group(visible=False) as results_section:
1152
- gr.Markdown("### Votre œuvre correspondante")
1153
-
1154
- with gr.Row():
1155
- with gr.Column(scale=1):
1156
- result_image = gr.Image(label="Votre œuvre", height=400)
1157
- result_title = gr.Markdown("## Titre de l'œuvre")
1158
-
1159
- with gr.Column(scale=1):
1160
- result_explanation = gr.Markdown("")
1161
-
1162
- restart_btn = gr.Button("Recommencer", variant="secondary")
1163
-
1164
- status_message = gr.Markdown("")
1165
-
1166
- def on_info_submit(firstname, birthday, city, state):
1167
- state.reset()
1168
-
1169
- info_vis, select_vis, results_vis, message, state = process_user_info(
1170
- firstname, birthday, city, state
1171
- )
1172
-
1173
- if select_vis["visible"]:
1174
- images, ids, round_message, state = load_images_for_round(0, state)
1175
- return (
1176
- info_vis,
1177
- select_vis,
1178
- results_vis,
1179
- images[0] if len(images) > 0 else None,
1180
- images[1] if len(images) > 1 else None,
1181
- images[2] if len(images) > 2 else None,
1182
- round_message,
1183
- state,
1184
- )
1185
- else:
1186
- return (info_vis, select_vis, results_vis, None, None, None, message, state)
1187
-
1188
- submit_info_btn.click(
1189
- fn=on_info_submit,
1190
- inputs=[firstname_input, birthday_input, city_input, session_state],
1191
- outputs=[
1192
- info_section,
1193
- selection_section,
1194
- results_section,
1195
- img1,
1196
- img2,
1197
- img3,
1198
- status_message,
1199
- session_state,
1200
- ],
1201
- )
1202
-
1203
- def on_image_select(choice, state):
1204
- (img1_update, img2_update, img3_update, choice_update, message, state) = (
1205
- select_image(choice, state)
1206
- )
1207
-
1208
- return (
1209
- gr.update(), # info_section
1210
- gr.update(), # selection_section
1211
- gr.update(), # results_section
1212
- img1_update, # img1
1213
- img2_update, # img2
1214
- img3_update, # img3
1215
- choice_update, # image_choice
1216
- message, # status_message
1217
- state,
1218
- )
1219
-
1220
- def handle_final_results(state):
1221
- if state.is_complete():
1222
- return show_results(state)
1223
- else:
1224
- return gr.update(), gr.update(), gr.update(), None, "", ""
1225
-
1226
- select_btn.click(
1227
- fn=on_image_select,
1228
- inputs=[image_choice, session_state],
1229
- outputs=[
1230
- info_section,
1231
- selection_section,
1232
- results_section,
1233
- img1,
1234
- img2,
1235
- img3,
1236
- image_choice,
1237
- status_message,
1238
- session_state,
1239
- ],
1240
- ).then(
1241
- fn=handle_final_results,
1242
- inputs=[session_state],
1243
- outputs=[
1244
- info_section,
1245
- selection_section,
1246
- results_section,
1247
- result_image,
1248
- result_title,
1249
- result_explanation,
1250
- ],
1251
- )
1252
-
1253
- def restart_app(state):
1254
- state.reset()
1255
-
1256
- return (
1257
- gr.update(visible=True),
1258
- gr.update(visible=False),
1259
- gr.update(visible=False),
1260
- "",
1261
- "",
1262
- "",
1263
- None,
1264
- "Application réinitialisée. Veuillez entrer vos informations.",
1265
- state,
1266
- )
1267
-
1268
- restart_btn.click(
1269
- fn=restart_app,
1270
- inputs=[session_state],
1271
- outputs=[
1272
- info_section,
1273
- selection_section,
1274
- results_section,
1275
- firstname_input,
1276
- birthday_input,
1277
- city_input,
1278
- image_choice,
1279
- status_message,
1280
- session_state,
1281
- ],
1282
- )
1283
-
1284
-
1285
- if __name__ == "__main__":
1286
- demo.launch()