Aurel-test commited on
Commit
6d62fc9
·
verified ·
1 Parent(s): 3ecc91d

Delete app_old.py

Browse files
Files changed (1) hide show
  1. app_old.py +0 -1573
app_old.py DELETED
@@ -1,1573 +0,0 @@
1
- #!/usr/bin/env python3
2
- """
3
- Web Demo v2 pour la base de données d'œuvres d'art - Version Sécurisée et Optimisée
4
- Interface multi-étapes avec matching basé sur prénom, date, ville et émotions
5
- Optimisé pour les performances avec caching et indexation
6
- Version sécurisée avec validation des entrées et gestion d'état propre
7
- """
8
-
9
- import gradio as gr
10
- import os
11
- import sys
12
- import logging
13
- from logging.handlers import RotatingFileHandler
14
- import random
15
- import re
16
- import json
17
- import uuid
18
- import time
19
- from datetime import datetime
20
- from typing import List, Dict, Tuple, Optional, Any, Set
21
- from collections import Counter, defaultdict
22
- from functools import lru_cache
23
- from dataclasses import dataclass, field, asdict
24
- from pathlib import Path
25
- import pandas as pd
26
-
27
- # Configuration du logging principal
28
- logging.basicConfig(
29
- level=logging.INFO,
30
- format="[%(asctime)s] %(levelname)s: %(message)s",
31
- datefmt="%Y-%m-%d %H:%M:%S",
32
- )
33
- logger = logging.getLogger(__name__)
34
-
35
- # Import pour la sauvegarde persistante sur HF Spaces
36
- try:
37
- from huggingface_hub import CommitScheduler
38
- HF_HUB_AVAILABLE = True
39
- except ImportError:
40
- HF_HUB_AVAILABLE = False
41
- logger.warning("huggingface_hub non installé - Les logs ne seront pas sauvegardés dans un dataset HF")
42
-
43
- # Configuration du logging des sessions
44
- SESSION_LOG_FILE = "session_logs.jsonl"
45
- STATS_LOG_FILE = "statistics.json"
46
-
47
- # Configuration du dataset HF pour la persistance (modifiez ces valeurs)
48
- HF_DATASET_ID = os.environ.get("HF_DATASET_ID", "ClickMons/art-matcher-logs") # Remplacez par votre dataset
49
- HF_TOKEN = os.environ.get("HF_TOKEN", None) # Token HF pour l'authentification
50
- LOGS_UPLOAD_INTERVAL = 1 # Upload toutes les minutes
51
-
52
- # Créer un handler pour le fichier de logs des sessions (local)
53
- if not os.path.exists("logs"):
54
- os.makedirs("logs")
55
-
56
- session_file_handler = RotatingFileHandler(
57
- filename=os.path.join("logs", SESSION_LOG_FILE),
58
- maxBytes=10*1024*1024, # 10MB
59
- backupCount=5,
60
- encoding='utf-8'
61
- )
62
- session_file_handler.setLevel(logging.INFO)
63
- session_logger = logging.getLogger('session_logger')
64
- session_logger.addHandler(session_file_handler)
65
- session_logger.setLevel(logging.INFO)
66
-
67
- sys.path.insert(0, os.path.join(os.path.dirname(__file__), "src"))
68
-
69
- from art_pieces_db.database import Database
70
- from art_pieces_db.query import TargetProfile, WeightedLeximaxOptimizer, Optimizer
71
- from art_pieces_db.emotions import EmotionWheel
72
- from art_pieces_db.utils import str_to_date
73
-
74
-
75
- @dataclass
76
- class ScoringWeights:
77
- """Centralise toutes les constantes de scoring pour éviter les magic numbers"""
78
-
79
- PRESELECTION_NAME_WEIGHT: float = 3.0
80
- PRESELECTION_DATE_WEIGHT: float = 1.0
81
- PRESELECTION_PLACE_WEIGHT: float = 2.0
82
- PRESELECTION_EMOTION_WEIGHT: float = 0.0
83
-
84
- MIN_PRESELECTION_COUNT: int = 20
85
- MAX_IMAGES_PER_SELECTION: int = 3 # nombre d'images par sélection
86
- TOTAL_ROUNDS: int = 3 # nombre de rounds avant la recommandation finale
87
-
88
-
89
- @dataclass
90
- class SessionState:
91
- """Gère l'état de session"""
92
-
93
- firstname: str = ""
94
- birthday: str = ""
95
- city: str = ""
96
-
97
- current_round: int = 0
98
- selected_images: List[str] = field(default_factory=list)
99
- current_image_ids: List[str] = field(default_factory=list)
100
-
101
- preselected_pieces: Optional[pd.DataFrame] = None
102
-
103
- # Nouvelles propriétés pour le tracking
104
- session_id: str = field(default_factory=lambda: str(uuid.uuid4()))
105
- session_start_time: float = field(default_factory=time.time)
106
- recommendation_type: str = "" # "name_date_place" ou "emotions"
107
- final_artwork: str = ""
108
-
109
- def reset(self):
110
- """Réinitialise l'état de session"""
111
- self.firstname = ""
112
- self.birthday = ""
113
- self.city = ""
114
- self.current_round = 0
115
- self.selected_images = []
116
- self.current_image_ids = []
117
- self.preselected_pieces = None
118
- self.session_id = str(uuid.uuid4())
119
- self.session_start_time = time.time()
120
- self.recommendation_type = ""
121
- self.final_artwork = ""
122
-
123
- def is_complete(self) -> bool:
124
- """Vérifie si la sélection est complète"""
125
- return self.current_round >= ScoringWeights.TOTAL_ROUNDS
126
-
127
-
128
- class SessionLogger:
129
- """Gère le logging des sessions et les statistiques avec persistance HF"""
130
-
131
- def __init__(self):
132
- # Détection de l'environnement
133
- is_hf_space = os.environ.get('SPACE_ID') or os.environ.get('SPACE_HOST')
134
-
135
- # Configuration du répertoire de logs
136
- if is_hf_space and HF_HUB_AVAILABLE and HF_TOKEN:
137
- # Sur HF Spaces avec huggingface_hub installé
138
- self.logs_dir = Path("hf_logs_data")
139
- self.logs_dir.mkdir(exist_ok=True)
140
-
141
- # Initialiser le CommitScheduler pour la sauvegarde automatique
142
- try:
143
- self.scheduler = CommitScheduler(
144
- repo_id=HF_DATASET_ID,
145
- repo_type="dataset",
146
- folder_path=self.logs_dir,
147
- path_in_repo="logs",
148
- every=LOGS_UPLOAD_INTERVAL,
149
- token=HF_TOKEN
150
- )
151
- logger.info(f"CommitScheduler initialisé - Sauvegarde dans {HF_DATASET_ID} toutes les {LOGS_UPLOAD_INTERVAL} minutes")
152
- self.use_hf_dataset = True
153
- except Exception as e:
154
- logger.error(f"Impossible d'initialiser CommitScheduler: {e}")
155
- self.scheduler = None
156
- self.use_hf_dataset = False
157
- else:
158
- # Développement local ou pas de configuration HF
159
- self.logs_dir = Path("logs")
160
- self.logs_dir.mkdir(exist_ok=True)
161
- self.scheduler = None
162
- self.use_hf_dataset = False
163
-
164
- if is_hf_space:
165
- logger.warning("Sur HF Spaces mais CommitScheduler non configuré - Les logs seront éphémères")
166
- logger.info("Pour activer la persistance, configurez HF_DATASET_ID et HF_TOKEN dans les secrets du Space")
167
-
168
- # Chemins des fichiers de logs
169
- self.session_log_path = self.logs_dir / SESSION_LOG_FILE
170
- self.stats_log_path = self.logs_dir / STATS_LOG_FILE
171
-
172
- # Créer des fichiers uniques pour chaque instance si on utilise HF Dataset
173
- if self.use_hf_dataset:
174
- # Utiliser des fichiers datés pour éviter les conflits
175
- timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
176
- self.session_log_path = self.logs_dir / f"sessions_{timestamp}.jsonl"
177
- self.stats_log_path = self.logs_dir / "statistics_latest.json"
178
-
179
- def log_session(self, state: SessionState, recommendation_system: str):
180
- """Enregistre les données d'une session terminée"""
181
- session_duration = time.time() - state.session_start_time
182
-
183
- session_data = {
184
- "session_id": state.session_id,
185
- "timestamp": datetime.now().isoformat(),
186
- "duration_seconds": round(session_duration, 2),
187
- "recommended_artwork": state.final_artwork,
188
- "recommendation_type": recommendation_system
189
- }
190
-
191
- # Écrire dans le fichier de logs des sessions
192
- try:
193
- # Si on utilise CommitScheduler, utiliser le lock pour la thread safety
194
- if self.scheduler:
195
- with self.scheduler.lock:
196
- with open(self.session_log_path, 'a', encoding='utf-8') as f:
197
- f.write(json.dumps(session_data, ensure_ascii=False) + '\n')
198
- else:
199
- # Sauvegarde locale simple
200
- with open(self.session_log_path, 'a', encoding='utf-8') as f:
201
- f.write(json.dumps(session_data, ensure_ascii=False) + '\n')
202
-
203
- logger.info(f"Session {state.session_id} logged successfully")
204
- session_logger.info(json.dumps(session_data, ensure_ascii=False))
205
-
206
- # Mettre à jour les statistiques globales
207
- self.update_statistics(session_data)
208
-
209
- if self.use_hf_dataset:
210
- logger.info(f"Session sauvegardée - Upload automatique vers {HF_DATASET_ID} dans max {LOGS_UPLOAD_INTERVAL} minutes")
211
-
212
- except Exception as e:
213
- logger.error(f"Error logging session: {e}")
214
-
215
- def update_statistics(self, session_data: dict):
216
- """Met à jour les statistiques globales"""
217
- try:
218
- # Charger les statistiques existantes
219
- if os.path.exists(self.stats_log_path):
220
- with open(self.stats_log_path, 'r', encoding='utf-8') as f:
221
- stats = json.load(f)
222
- else:
223
- stats = {
224
- "total_sessions": 0,
225
- "total_duration_seconds": 0,
226
- "average_duration_seconds": 0,
227
- "recommendation_systems_usage": {
228
- "name_date_place": 0,
229
- "emotions": 0
230
- },
231
- "artworks_recommended": {},
232
- "last_updated": None
233
- }
234
-
235
- # Mettre à jour les statistiques
236
- stats["total_sessions"] += 1
237
- stats["total_duration_seconds"] += session_data["duration_seconds"]
238
- stats["average_duration_seconds"] = stats["total_duration_seconds"] / stats["total_sessions"]
239
-
240
- # Compter l'utilisation des systèmes de recommandation
241
- rec_type = session_data["recommendation_type"]
242
- if rec_type in stats["recommendation_systems_usage"]:
243
- stats["recommendation_systems_usage"][rec_type] += 1
244
-
245
- # Compter les œuvres recommandées
246
- artwork = session_data["recommended_artwork"]
247
- if artwork:
248
- if artwork not in stats["artworks_recommended"]:
249
- stats["artworks_recommended"][artwork] = 0
250
- stats["artworks_recommended"][artwork] += 1
251
-
252
- # Trouver l'œuvre la plus recommandée
253
- if stats["artworks_recommended"]:
254
- most_recommended = max(stats["artworks_recommended"].items(), key=lambda x: x[1])
255
- stats["most_recommended_artwork"] = {
256
- "title": most_recommended[0],
257
- "count": most_recommended[1]
258
- }
259
-
260
- # Calculer l'utilité de chaque système (pourcentage d'utilisation)
261
- total_recs = sum(stats["recommendation_systems_usage"].values())
262
- if total_recs > 0:
263
- stats["system_utility_percentage"] = {
264
- system: (count / total_recs * 100)
265
- for system, count in stats["recommendation_systems_usage"].items()
266
- }
267
-
268
- stats["last_updated"] = datetime.now().isoformat()
269
-
270
- # Sauvegarder les statistiques mises à jour
271
- if self.scheduler:
272
- with self.scheduler.lock:
273
- with open(self.stats_log_path, 'w', encoding='utf-8') as f:
274
- json.dump(stats, f, indent=2, ensure_ascii=False)
275
- else:
276
- with open(self.stats_log_path, 'w', encoding='utf-8') as f:
277
- json.dump(stats, f, indent=2, ensure_ascii=False)
278
-
279
- logger.info("Global statistics updated")
280
-
281
- except Exception as e:
282
- logger.error(f"Error updating statistics: {e}")
283
-
284
- def get_statistics(self) -> dict:
285
- """Retourne les statistiques globales"""
286
- try:
287
- if os.path.exists(self.stats_log_path):
288
- with open(self.stats_log_path, 'r', encoding='utf-8') as f:
289
- return json.load(f)
290
- return {}
291
- except Exception as e:
292
- logger.error(f"Error reading statistics: {e}")
293
- return {}
294
-
295
-
296
- # Initialiser le logger de sessions
297
- session_tracker = SessionLogger()
298
-
299
-
300
- class SecurityValidator:
301
- """Classe pour centraliser les validations de sécurité"""
302
-
303
- PATH_TRAVERSAL_PATTERN = re.compile(r"\.\.|\.\/")
304
- VALID_FILENAME_PATTERN = re.compile(r"^[\w\-\.\s]+$")
305
- VALID_INPUT_PATTERN = re.compile(
306
- r"^[\w\-\s\'\.,àâäéèêëïîôûùüÿæœçÀÂÄÉÈÊËÏÎÔÛÙÜŸÆŒÇ]+$", re.UNICODE
307
- )
308
- DATE_PATTERN = re.compile(r"^\d{1,2}/\d{1,2}$")
309
-
310
- @classmethod
311
- def validate_filename(cls, filename: str) -> bool:
312
- """Valide qu'un nom de fichier est sécurisé"""
313
- if not filename:
314
- return False
315
-
316
- # Vérifier les tentatives de path traversal
317
- if cls.PATH_TRAVERSAL_PATTERN.search(filename):
318
- logger.warning(f"Tentative de path traversal détectée: {filename}")
319
- return False
320
-
321
- # Vérifier que le nom ne contient que des caractères autorisés
322
- base_name = os.path.basename(filename)
323
- if not cls.VALID_FILENAME_PATTERN.match(base_name):
324
- logger.warning(f"Nom de fichier invalide: {filename}")
325
- return False
326
-
327
- return True
328
-
329
- @classmethod
330
- def sanitize_input(cls, input_str: str, max_length: int = 100) -> str:
331
- """Nettoie et valide une entrée utilisateur"""
332
- if not input_str:
333
- return ""
334
-
335
- # Tronquer si trop long
336
- input_str = input_str[:max_length].strip()
337
-
338
- if not cls.VALID_INPUT_PATTERN.match(input_str):
339
- # Garder seulement les caractères valides
340
- cleaned = "".join(c for c in input_str if cls.VALID_INPUT_PATTERN.match(c))
341
- logger.info(f"Input sanitized: '{input_str}' -> '{cleaned}'")
342
- return cleaned
343
-
344
- return input_str
345
-
346
- @classmethod
347
- def validate_date(cls, date_str: str) -> Tuple[bool, Optional[datetime]]:
348
- """Valide et parse une date au format JJ/MM"""
349
- if not date_str:
350
- return False, None
351
-
352
- if not cls.DATE_PATTERN.match(date_str):
353
- return False, None
354
-
355
- try:
356
- day, month = map(int, date_str.split("/"))
357
- if not (1 <= day <= 31 and 1 <= month <= 12):
358
- return False, None
359
-
360
- date_obj = datetime(year=2000, month=month, day=day)
361
- return True, date_obj
362
- except (ValueError, Exception) as e:
363
- logger.error(f"Erreur de parsing de date: {e}")
364
- return False, None
365
-
366
-
367
- class ImageIndexer:
368
- """Classe pour indexer et mapper les images depuis la base de données CSV"""
369
-
370
- # Constants for better maintainability
371
- IMAGE_EXTENSIONS = (".jpg", ".png")
372
- COMMON_SUFFIXES = [".jpg", ".png", "_medium"]
373
- MAR_BVM_TEST_SUFFIXES = ["-001", "-002", "-003"]
374
-
375
- def __init__(self, images_dir: str):
376
- self.images_dir = os.path.abspath(images_dir)
377
- self.available_files = set()
378
- self.image_lookup = {} # normalized_name -> filename
379
- self.mar_bvm_lookup = {} # Special handling for MAR-BVM files
380
- self._build_index()
381
-
382
- def _strip_file_extensions(self, filename: str) -> str:
383
- """Remove file extensions from filename"""
384
- base_name = filename.lower()
385
- if base_name.endswith("_medium.jpg"):
386
- return base_name[:-11]
387
- elif base_name.endswith((".jpg", ".png")):
388
- return base_name[:-4]
389
- return base_name
390
-
391
- def _normalize_basic_patterns(self, name: str) -> str:
392
- """Apply basic normalization patterns"""
393
- # Remove trailing comma and normalize whitespace
394
- normalized = name.lower().strip().rstrip(",")
395
-
396
- # Remove common suffixes
397
- for suffix in self.COMMON_SUFFIXES:
398
- if normalized.endswith(suffix):
399
- normalized = normalized[: -len(suffix)]
400
-
401
- # Normalize spaces and underscores to dashes
402
- return re.sub(r"[\s_]+", "-", normalized)
403
-
404
- def _normalize_mar_bvm_format(self, name: str) -> str:
405
- """Handle MAR-BVM specific normalization"""
406
- if "mar-bvm" not in name:
407
- return name
408
-
409
- # Replace .0. with -0- and remaining dots with dashes
410
- return name.replace(".0.", "-0-").replace(".", "-")
411
-
412
- def _normalize_name(self, name: str) -> str:
413
- """Normalise un nom pour la comparaison"""
414
- normalized = self._normalize_basic_patterns(name)
415
-
416
- # Special handling for MAR-BVM format
417
- if "mar-bvm" in normalized:
418
- normalized = self._normalize_mar_bvm_format(normalized)
419
- # For files starting with year (like 2022.0.86), keep dots
420
- elif not normalized.startswith("20"):
421
- normalized = normalized.replace(".", "-")
422
-
423
- return normalized
424
-
425
- def _create_mar_bvm_lookups(self, normalized: str, filename: str):
426
- """Create additional lookup entries for MAR-BVM files"""
427
- if "mar-bvm" not in normalized:
428
- return
429
-
430
- parts = normalized.split("-")
431
- for i, part in enumerate(parts):
432
- if part.isdigit() and i >= 5: # After mar-bvm-7-2022-0
433
- base_key = "-".join(parts[:6]) # mar-bvm-7-2022-0-22
434
- if base_key not in self.mar_bvm_lookup:
435
- self.mar_bvm_lookup[base_key] = []
436
- self.mar_bvm_lookup[base_key].append(filename)
437
- break
438
-
439
- def _process_image_file(self, filename: str):
440
- """Process a single image file for indexing"""
441
- if not SecurityValidator.validate_filename(filename):
442
- logger.warning(f"Fichier ignoré pour raison de sécurité: {filename}")
443
- return
444
-
445
- if not filename.lower().endswith(self.IMAGE_EXTENSIONS):
446
- return
447
-
448
- self.available_files.add(filename)
449
-
450
- base_name = self._strip_file_extensions(filename)
451
- normalized = self._normalize_name(base_name)
452
- self.image_lookup[normalized] = filename
453
- self._create_mar_bvm_lookups(normalized, filename)
454
-
455
- def _build_index(self):
456
- """Construit un index des images disponibles"""
457
- try:
458
- all_files = os.listdir(self.images_dir)
459
- for filename in all_files:
460
- self._process_image_file(filename)
461
-
462
- logger.info(
463
- f"Index des images construit: {len(self.available_files)} fichiers disponibles, "
464
- f"{len(self.image_lookup)} entrées normalisées"
465
- )
466
- except Exception as e:
467
- logger.error(f"Erreur lors de la construction de l'index: {e}")
468
- self.available_files = set()
469
-
470
- def _clean_input_name(self, image_name: str) -> str:
471
- """Clean and prepare input name for processing"""
472
- # Basic cleaning
473
- cleaned = image_name.strip().rstrip(",").rstrip("-").strip()
474
- # Remove spaces before -001, -002, etc.
475
- return re.sub(r"\s+(-\d)", r"\1", cleaned)
476
-
477
- def _normalize_mar_bvm_input(self, image_name: str) -> str:
478
- """Handle MAR-BVM specific input normalization"""
479
- if "MAR-BVM" not in image_name:
480
- return image_name
481
-
482
- # Handle missing "7-" in MAR-BVM-2022-0-153
483
- if "MAR-BVM-2022-0-" in image_name:
484
- image_name = image_name.replace("MAR-BVM-2022-0-", "MAR-BVM-7-2022-0-")
485
-
486
- # Convert .0. to -0-
487
- if ".0." in image_name:
488
- image_name = image_name.replace(".0.", "-0-")
489
-
490
- # Handle .001, .002 at the end (convert to -001, -002)
491
- image_name = re.sub(r"\.(\d{3})$", r"-\1", image_name)
492
-
493
- # Handle .1 or .2 suffix
494
- if image_name.endswith(".1"):
495
- image_name = image_name[:-2] + "-1"
496
- elif image_name.endswith(".2"):
497
- image_name = image_name[:-2] + "-2"
498
-
499
- # Replace any remaining dots with dashes (but be careful not to mess up already processed parts)
500
- return image_name.replace(".", "-")
501
-
502
- def _try_mar_bvm_lookups(self, normalized: str) -> Optional[str]:
503
- """Try various MAR-BVM specific lookup strategies"""
504
- # Check special MAR-BVM lookup
505
- if normalized in self.mar_bvm_lookup and self.mar_bvm_lookup[normalized]:
506
- return self.mar_bvm_lookup[normalized][0]
507
-
508
- # Try with suffix variations
509
- for suffix in self.MAR_BVM_TEST_SUFFIXES:
510
- test_pattern = f"{normalized}{suffix}"
511
- if test_pattern in self.image_lookup:
512
- return self.image_lookup[test_pattern]
513
-
514
- return None
515
-
516
- def _try_year_format_lookup(self, image_name: str) -> Optional[str]:
517
- """Handle special case for files starting with year"""
518
- if not image_name.startswith("20"):
519
- return None
520
-
521
- test_name = image_name.lower().replace(" ", "-")
522
- return self.image_lookup.get(test_name)
523
-
524
- def _try_partial_matching(self, normalized: str) -> Optional[str]:
525
- """Try partial matching as last resort"""
526
- for key, filename in self.image_lookup.items():
527
- if key.startswith(normalized) or normalized in key:
528
- return filename
529
- return None
530
-
531
- def _split_multiple_names(self, image_name: str) -> List[str]:
532
- """Split image names that contain multiple names separated by commas or slashes"""
533
- # First try comma separation
534
- if "," in image_name:
535
- return [name.strip() for name in image_name.split(",") if name.strip()]
536
-
537
- # Then try slash separation
538
- if "/" in image_name:
539
- return [name.strip() for name in image_name.split("/") if name.strip()]
540
-
541
- # Handle " - " separation (for cases like "MAR-BVM-7-2022.0.81 - 2022.0.81")
542
- if " - " in image_name and image_name.count(" - ") == 1:
543
- parts = [name.strip() for name in image_name.split(" - ")]
544
- # Only use the first part if they look like duplicates
545
- if len(parts) == 2:
546
- first, second = parts
547
- # Check if second part is a suffix of the first (like duplicate year)
548
- if first.endswith(second) or second in first:
549
- return [first]
550
- return parts
551
-
552
- return [image_name]
553
-
554
- def find_image(self, image_name: str) -> Optional[str]:
555
- """Trouve un fichier image correspondant au nom donné"""
556
- if not image_name:
557
- return None
558
-
559
- # Handle multiple image names in one field
560
- possible_names = self._split_multiple_names(image_name)
561
-
562
- # Try each name individually
563
- for name in possible_names:
564
- result = self._find_single_image(name)
565
- if result:
566
- return result
567
-
568
- return None
569
-
570
- def _find_single_image(self, image_name: str) -> Optional[str]:
571
- """Find a single image by name"""
572
- # Clean and normalize the input
573
- cleaned_name = self._clean_input_name(image_name)
574
- processed_name = self._normalize_mar_bvm_input(cleaned_name)
575
- normalized = self._normalize_name(processed_name)
576
-
577
- # Try direct lookup first
578
- if normalized in self.image_lookup:
579
- return self.image_lookup[normalized]
580
-
581
- # Try MAR-BVM specific lookups
582
- if "mar-bvm" in normalized:
583
- result = self._try_mar_bvm_lookups(normalized)
584
- if result:
585
- return result
586
-
587
- # Try year format lookup
588
- result = self._try_year_format_lookup(image_name)
589
- if result:
590
- return result
591
-
592
- # Try partial matching as last resort
593
- return self._try_partial_matching(normalized)
594
-
595
- def get_all_files(self) -> Set[str]:
596
- """Retourne tous les fichiers disponibles"""
597
- return self.available_files.copy()
598
-
599
-
600
- class ArtMatcherV2:
601
- """Classe principale pour le matching d'œuvres d'art"""
602
-
603
- def __init__(self, csv_path: str, images_dir: str):
604
- """Initialise le système avec la base de données et le répertoire d'images"""
605
- self.db = Database(csv_path)
606
- self.images_dir = os.path.abspath(images_dir)
607
- self.emotion_wheel = EmotionWheel()
608
- self.weights = ScoringWeights()
609
-
610
- self.optimizer_helper = WeightedLeximaxOptimizer(TargetProfile(), {})
611
-
612
- self.image_indexer = ImageIndexer(images_dir)
613
-
614
- df = self.db.get_dataframe()
615
- self.df_with_images = df[
616
- df["name_image"].notna()
617
- & (df["name_image"] != "")
618
- & (df["name_image"].str.strip() != "")
619
- ].copy()
620
-
621
- self.df_with_images["database_id_str"] = self.df_with_images[
622
- "database_id"
623
- ].astype(str)
624
- self.id_to_index = {
625
- str(row["database_id"]): idx for idx, row in self.df_with_images.iterrows()
626
- }
627
-
628
- self.artwork_images = self._build_artwork_image_index()
629
-
630
- self.temp_db_with_images = Database.__new__(Database)
631
- self.temp_db_with_images.dataframe = self.df_with_images
632
-
633
- logger.info(f"Base de données chargée: {self.db.n_pieces()} œuvres")
634
- logger.info(f"Œuvres avec images: {len(self.df_with_images)}")
635
- logger.info(f"Index des images: {len(self.artwork_images)} œuvres mappées")
636
-
637
- def _sanitize_input(self, input_str: str) -> str:
638
- """Nettoie et valide une entrée utilisateur"""
639
- return SecurityValidator.sanitize_input(input_str)
640
-
641
- def _parse_date(self, date_str: str) -> Optional[datetime]:
642
- """Parse une date avec validation"""
643
- is_valid, date_obj = SecurityValidator.validate_date(date_str)
644
- return date_obj if is_valid else None
645
-
646
- def _build_artwork_image_index(self) -> Dict[str, List[str]]:
647
- """Construit un index artwork_id -> [image_paths] au démarrage"""
648
- artwork_images = {}
649
-
650
- for idx, row in self.df_with_images.iterrows():
651
- artwork_id = str(row["database_id"])
652
- image_paths = []
653
-
654
- if row["name_image"] and str(row["name_image"]).strip():
655
- # Parse the image names - handle special separators
656
- image_string = str(row["name_image"]).strip().strip('"')
657
-
658
- # Handle cases with " / " or " - " separators
659
- if " / " in image_string:
660
- # Take first part before the slash
661
- image_string = image_string.split(" / ")[0].strip()
662
-
663
- # Special case: if it has " - 2022" it's a separator, not part of the name
664
- if " - 2022" in image_string:
665
- # Take the part before " - 2022"
666
- image_string = image_string.split(" - 2022")[0].strip()
667
- elif " - " in image_string and "MAR-BVM-7-2022-0-" not in image_string:
668
- # For other MAR-BVM formats with " - " separator
669
- parts = image_string.split(" - ")
670
- if "MAR-BVM" in parts[0]:
671
- image_string = parts[0].strip()
672
-
673
- # Clean up trailing " -" or spaces before "-001"
674
- image_string = re.sub(
675
- r"\s+-\s*$", "", image_string
676
- ) # Remove trailing " -"
677
- image_string = re.sub(
678
- r"\s+(-\d)", r"\1", image_string
679
- ) # Remove spaces before -001
680
-
681
- # Parse comma-separated list
682
- images = [
683
- img.strip()
684
- for img in re.split(r"[,/]", image_string)
685
- if img.strip()
686
- ]
687
-
688
- for img_name in images:
689
- # Find the actual file for this image name
690
- matched_file = self.image_indexer.find_image(img_name)
691
- if matched_file:
692
- img_path = os.path.join(self.images_dir, matched_file)
693
- image_paths.append(img_path)
694
-
695
- if image_paths:
696
- artwork_images[artwork_id] = image_paths
697
-
698
- return artwork_images
699
-
700
- def preselect_artworks(
701
- self, firstname: str, birthday: str, city: str
702
- ) -> pd.DataFrame:
703
- """
704
- Pré-sélectionne les œuvres selon la hiérarchie: prénom > date > ville
705
- """
706
- logger.info("=== DÉBUT PRÉ-SÉLECTION ===")
707
-
708
- # Nettoyer les entrées
709
- firstname = self._sanitize_input(firstname)
710
- city = self._sanitize_input(city)
711
-
712
- logger.info(
713
- f"Critères de pré-sélection: prénom='{firstname}', date='{birthday}', ville='{city}'"
714
- )
715
-
716
- birth_date = self._parse_date(birthday)
717
- if birth_date:
718
- logger.info(f"Date convertie: {birth_date.strftime('%d/%m')}")
719
-
720
- profile = TargetProfile()
721
- profile.set_target_name(firstname)
722
- profile.set_target_date(birth_date)
723
- profile.set_target_place(city)
724
-
725
- weights = {
726
- "related_names": self.weights.PRESELECTION_NAME_WEIGHT,
727
- "related_dates": self.weights.PRESELECTION_DATE_WEIGHT,
728
- "related_places": self.weights.PRESELECTION_PLACE_WEIGHT,
729
- "related_emotions": self.weights.PRESELECTION_EMOTION_WEIGHT,
730
- }
731
-
732
- logger.info(
733
- f"Poids utilisés: nom={weights['related_names']}, date={weights['related_dates']}, lieu={weights['related_places']}, émotions={weights['related_emotions']}"
734
- )
735
-
736
- optimizer = WeightedLeximaxOptimizer(profile, weights)
737
- result = optimizer.optimize_max(self.temp_db_with_images)
738
-
739
- preselected = result[result["score"] > (0, 0, 0)]
740
- logger.info(f"Œuvres avec score > 0: {len(preselected)}")
741
-
742
- if len(preselected) < self.weights.MIN_PRESELECTION_COUNT:
743
- preselected = result.head(self.weights.MIN_PRESELECTION_COUNT)
744
- logger.info(f"Ajustement au minimum requis: {len(preselected)} œuvres")
745
-
746
- logger.info("Top 5 pré-sélections:")
747
- for i, (idx, piece) in enumerate(preselected.head(5).iterrows()):
748
- logger.info(
749
- f" {i+1}. Œuvre #{piece['database_id']} - Score: {piece['score']}"
750
- )
751
- if firstname and piece["related_names"]:
752
- name_score = Optimizer.name_similarity(
753
- firstname, piece["related_names"]
754
- )
755
- if name_score > 0:
756
- logger.info(
757
- f" → Nom: {piece['related_names']} (score: {name_score:.2f})"
758
- )
759
- if birth_date and piece["related_dates"]:
760
- date_score = Optimizer.date_similarity(
761
- birth_date, piece["related_dates"]
762
- )
763
- if date_score > 0:
764
- logger.info(
765
- f" → Dates: {[d.strftime('%d/%m') for d in piece['related_dates']]} (score: {date_score:.2f})"
766
- )
767
- if city and piece["related_places"]:
768
- place_score = self.optimizer_helper.place_similarity(
769
- city, piece["related_places"]
770
- )
771
- if place_score > 0:
772
- logger.info(
773
- f" → Lieux: {piece['related_places']} (score: {place_score:.2f})"
774
- )
775
-
776
- logger.info("=== FIN PRÉ-SÉLECTION ===")
777
- return preselected
778
-
779
- def get_random_images_for_selection(
780
- self, round_num: int, already_selected: List[str] = None
781
- ) -> List[Tuple[str, str]]:
782
- """
783
- Retourne 3 images aléatoires depuis l'index pré-construit
784
- Exclut les œuvres déjà sélectionnées dans les tours précédents
785
- """
786
- logger.info(f"=== SÉLECTION D'IMAGES POUR LE TOUR {round_num} ===")
787
-
788
- if already_selected:
789
- logger.info(f"Œuvres déjà sélectionnées à exclure: {already_selected}")
790
-
791
- available_artworks = list(self.artwork_images.keys())
792
-
793
- # Exclure les œuvres déjà sélectionnées
794
- if already_selected:
795
- already_selected_set = set(already_selected)
796
- available_artworks = [
797
- a for a in available_artworks if a not in already_selected_set
798
- ]
799
-
800
- logger.info(
801
- f"Nombre total d'œuvres avec images disponibles: {len(available_artworks)}"
802
- )
803
-
804
- if len(available_artworks) < self.weights.MAX_IMAGES_PER_SELECTION:
805
- logger.warning(
806
- f"Seulement {len(available_artworks)} œuvres avec images disponibles"
807
- )
808
- direct_images = []
809
- for filename in list(self.image_indexer.get_all_files())[:10]:
810
- if filename.endswith(".jpg"):
811
- img_path = os.path.join(self.images_dir, filename)
812
- direct_images.append((img_path, "0"))
813
- return direct_images[: self.weights.MAX_IMAGES_PER_SELECTION]
814
-
815
- num_to_select = min(
816
- self.weights.MAX_IMAGES_PER_SELECTION, len(available_artworks)
817
- )
818
- selected_artworks = random.sample(available_artworks, num_to_select)
819
-
820
- logger.info(f"Œuvres sélectionnées aléatoirement: {selected_artworks}")
821
-
822
- selected = []
823
- for artwork_id in selected_artworks:
824
- img_path = random.choice(self.artwork_images[artwork_id])
825
- selected.append((img_path, artwork_id))
826
- if artwork_id in self.id_to_index:
827
- idx = self.id_to_index[artwork_id]
828
- artwork = self.df_with_images.loc[idx]
829
- logger.info(f" Image {len(selected)}: Œuvre #{artwork_id}")
830
- logger.info(f" Type: {artwork['art_piece_type']}")
831
- logger.info(f" Émotions: {artwork['related_emotions']}")
832
-
833
- logger.info(f"=== FIN SÉLECTION IMAGES TOUR {round_num} ===")
834
- return selected
835
-
836
- def extract_emotions_from_image_id(self, database_id: str) -> List[str]:
837
- """
838
- Extrait les émotions associées à une œuvre via son ID
839
- Utilise l'index pré-calculé pour éviter les conversions répétées
840
- """
841
- if database_id in self.id_to_index:
842
- idx = self.id_to_index[database_id]
843
- emotions = self.df_with_images.loc[idx, "related_emotions"]
844
- if isinstance(emotions, list):
845
- return emotions
846
- return []
847
-
848
- @lru_cache(maxsize=1024)
849
- def _cached_emotion_similarity(self, emotion1: str, emotion2: str) -> float:
850
- """Cache les calculs de similarité émotionnelle"""
851
- return self.emotion_wheel.calculate_emotion_similarity(emotion1, emotion2)
852
-
853
- def calculate_emotion_profile(self, selected_ids: List[str]) -> Dict[str, float]:
854
- """
855
- Calcule le profil émotionnel basé sur les images sélectionnées
856
- """
857
- logger.info("=== CALCUL DU PROFIL ÉMOTIONNEL ===")
858
- logger.info(f"Images sélectionnées: {selected_ids}")
859
-
860
- emotion_counter = Counter()
861
-
862
- for db_id in selected_ids:
863
- emotions = self.extract_emotions_from_image_id(db_id)
864
- logger.info(f" Image {db_id}: émotions = {emotions}")
865
- emotion_counter.update(emotions)
866
-
867
- total = sum(emotion_counter.values())
868
- if total > 0:
869
- emotion_profile = {
870
- emotion: count / total for emotion, count in emotion_counter.items()
871
- }
872
- logger.info(f"Profil émotionnel calculé: {emotion_profile}")
873
- else:
874
- emotion_profile = {}
875
- logger.info("Aucune émotion trouvée dans les images sélectionnées")
876
-
877
- logger.info("=== FIN CALCUL PROFIL ÉMOTIONNEL ===")
878
- return emotion_profile
879
-
880
- def _get_artwork_image(self, artwork) -> Optional[str]:
881
- """Retourne le chemin de l'image pour une œuvre d'art"""
882
- artwork_id = str(artwork["database_id"])
883
-
884
- # Simply return the first image from our pre-built index
885
- if artwork_id in self.artwork_images:
886
- return self.artwork_images[artwork_id][0]
887
-
888
- return None
889
-
890
- def find_best_match(
891
- self, firstname: str, birthday: str, city: str, selected_image_ids: List[str]
892
- ) -> Tuple[Optional[str], str, Dict]:
893
- """
894
- Trouve la meilleure correspondance selon la hiérarchie du scénario:
895
- 1. Match exact (name/date/city) = gagnant automatique
896
- 2. Si pré-sélection existe: utiliser émotions pour départager
897
- 3. Si aucune pré-sélection: utiliser émotions seules
898
- 4. Type d'objet comme critère de départage final
899
- """
900
- firstname = self._sanitize_input(firstname)
901
- city = self._sanitize_input(city)
902
- birth_date = self._parse_date(birthday)
903
-
904
- logger.info(
905
- f"Recherche de correspondance pour: {firstname}, {birthday}, {city}"
906
- )
907
-
908
- preselected = self.preselect_artworks(firstname, birthday, city)
909
-
910
- logger.info("=== DÉTECTION DE MATCH EXACT ===")
911
- for idx, piece in preselected.iterrows():
912
- if firstname and piece["related_names"]:
913
- name_score = Optimizer.name_similarity(
914
- firstname, piece["related_names"]
915
- )
916
- if name_score >= 0.95:
917
- logger.info(
918
- f"🎯 MATCH EXACT TROUVÉ: prénom '{firstname}' → œuvre #{piece['database_id']} (score: {name_score:.2f})"
919
- )
920
- logger.info(f" Noms dans l'œuvre: {piece['related_names']}")
921
- match_image = self._get_artwork_image(piece)
922
- match_info = {
923
- "title": f"Œuvre #{piece['database_id']}",
924
- "type": piece["art_piece_type"],
925
- "place": piece["art_piece_place"],
926
- "emotions": piece["related_emotions"],
927
- "explanation": piece["explanation"],
928
- }
929
- return (
930
- match_image,
931
- f"Prénom '{firstname}' correspond exactement",
932
- match_info,
933
- )
934
-
935
- if birth_date and piece["related_dates"]:
936
- date_score = Optimizer.date_similarity(
937
- birth_date, piece["related_dates"]
938
- )
939
- if date_score == 1.0:
940
- logger.info(
941
- f"🎯 MATCH EXACT TROUVÉ: date '{birthday}' → œuvre #{piece['database_id']}"
942
- )
943
- logger.info(
944
- f" Dates dans l'œuvre: {[d.strftime('%d/%m/%Y') for d in piece['related_dates']]}"
945
- )
946
- match_image = self._get_artwork_image(piece)
947
- match_info = {
948
- "title": f"Œuvre #{piece['database_id']}",
949
- "type": piece["art_piece_type"],
950
- "place": piece["art_piece_place"],
951
- "emotions": piece["related_emotions"],
952
- "explanation": piece["explanation"],
953
- }
954
- return (
955
- match_image,
956
- f"Date d'anniversaire {birthday} correspond exactement",
957
- match_info,
958
- )
959
-
960
- if city and piece["related_places"]:
961
- place_score = self.optimizer_helper.place_similarity(
962
- city, piece["related_places"]
963
- )
964
- if place_score == 1.0:
965
- logger.info(
966
- f"🎯 MATCH EXACT TROUVÉ: ville '{city}' → œuvre #{piece['database_id']}"
967
- )
968
- logger.info(f" Lieux dans l'œuvre: {piece['related_places']}")
969
- match_image = self._get_artwork_image(piece)
970
- match_info = {
971
- "title": f"Œuvre #{piece['database_id']}",
972
- "type": piece["art_piece_type"],
973
- "place": piece["art_piece_place"],
974
- "emotions": piece["related_emotions"],
975
- "explanation": piece["explanation"],
976
- }
977
- return (
978
- match_image,
979
- f"Ville '{city}' correspond exactement",
980
- match_info,
981
- )
982
-
983
- logger.info("Aucun match exact trouvé, passage à la sélection par émotions")
984
-
985
- emotion_profile = self.calculate_emotion_profile(selected_image_ids)
986
-
987
- logger.info("=== STRATÉGIE DE MATCHING ===")
988
- valid_preselection = preselected[preselected["score"] > (0, 0, 0)]
989
-
990
- if len(valid_preselection) > 0:
991
- logger.info(
992
- f"📋 CAS A: {len(valid_preselection)} œuvres pré-sélectionnées - utilisation des émotions pour départager"
993
- )
994
- candidates = valid_preselection
995
- else:
996
- logger.info(
997
- f"📋 CAS B: Aucune pré-sélection valide - recherche par émotions sur {len(self.df_with_images)} œuvres"
998
- )
999
- candidates = self.df_with_images
1000
-
1001
- # Exclure les œuvres déjà sélectionnées par l'utilisateur
1002
- selected_artwork_ids = set(selected_image_ids)
1003
- candidates = candidates[
1004
- ~candidates["database_id"].astype(str).isin(selected_artwork_ids)
1005
- ]
1006
- logger.info(
1007
- f"Après exclusion des œuvres déjà sélectionnées {selected_artwork_ids}: {len(candidates)} candidats restants"
1008
- )
1009
-
1010
- logger.info("=== CALCUL DES SCORES ÉMOTIONNELS ===")
1011
- best_matches = []
1012
- best_emotion_score = -1
1013
-
1014
- for idx, piece in candidates.iterrows():
1015
- emotion_score = 0
1016
-
1017
- if emotion_profile and piece["related_emotions"]:
1018
- for user_emotion, weight in emotion_profile.items():
1019
- best_similarity = 0
1020
- for piece_emotion in piece["related_emotions"]:
1021
- similarity = self._cached_emotion_similarity(
1022
- user_emotion, piece_emotion
1023
- )
1024
- if similarity > best_similarity:
1025
- best_similarity = similarity
1026
- emotion_score += best_similarity * weight
1027
-
1028
- if len(piece["related_emotions"]) > 0:
1029
- emotion_score /= len(piece["related_emotions"])
1030
-
1031
- if emotion_score > best_emotion_score:
1032
- best_emotion_score = emotion_score
1033
- best_matches = [piece]
1034
- logger.info(
1035
- f" Nouveau meilleur score émotionnel: {emotion_score:.3f} - Œuvre #{piece['database_id']}"
1036
- )
1037
- elif emotion_score == best_emotion_score and emotion_score > 0:
1038
- best_matches.append(piece)
1039
- logger.info(
1040
- f" Score égal au meilleur: {emotion_score:.3f} - Œuvre #{piece['database_id']}"
1041
- )
1042
-
1043
- logger.info(
1044
- f"Nombre de meilleures correspondances: {len(best_matches)} avec score {best_emotion_score:.3f}"
1045
- )
1046
-
1047
- if len(best_matches) > 1:
1048
- logger.info("=== DÉPARTAGE PAR TYPE D'OBJET ===")
1049
- selected_types = []
1050
- for img_id in selected_image_ids:
1051
- if img_id in self.id_to_index:
1052
- idx = self.id_to_index[img_id]
1053
- selected_types.append(
1054
- self.df_with_images.loc[idx, "art_piece_type"]
1055
- )
1056
-
1057
- selected_types_counter = Counter(selected_types)
1058
-
1059
- type_scored_matches = []
1060
- best_type_score = -1
1061
-
1062
- for piece in best_matches:
1063
- type_score = selected_types_counter.get(piece["art_piece_type"], 0)
1064
- if type_score > best_type_score:
1065
- best_type_score = type_score
1066
- type_scored_matches = [piece]
1067
- elif type_score == best_type_score:
1068
- type_scored_matches.append(piece)
1069
-
1070
- if len(type_scored_matches) > 1:
1071
- logger.info(
1072
- f" {len(type_scored_matches)} œuvres avec le même score de type ({best_type_score}) - sélection aléatoire"
1073
- )
1074
- best_match = random.choice(type_scored_matches)
1075
- match_reason = (
1076
- "Sélection aléatoire parmi les meilleures correspondances"
1077
- )
1078
- else:
1079
- best_match = type_scored_matches[0]
1080
- match_reason = f"Type d'objet '{best_match['art_piece_type']}' préféré"
1081
- logger.info(
1082
- f" Type '{best_match['art_piece_type']}' sélectionné avec score {best_type_score}"
1083
- )
1084
- elif len(best_matches) == 1:
1085
- best_match = best_matches[0]
1086
- match_reason = "Meilleure correspondance émotionnelle"
1087
- else:
1088
- logger.info("Aucune correspondance trouvée")
1089
- return None, "Aucune correspondance trouvée", {}
1090
-
1091
- reasons = []
1092
- if len(valid_preselection) > 0:
1093
- if firstname and best_match["related_names"]:
1094
- name_score = Optimizer.name_similarity(
1095
- firstname, best_match["related_names"]
1096
- )
1097
- if name_score > 0:
1098
- reasons.append(f"prénom '{firstname}' trouvé")
1099
-
1100
- if birth_date and best_match["related_dates"]:
1101
- date_score = Optimizer.date_similarity(
1102
- birth_date, best_match["related_dates"]
1103
- )
1104
- if date_score > 0:
1105
- reasons.append(
1106
- f"date {'exacte' if date_score == 1.0 else 'partielle'}"
1107
- )
1108
-
1109
- if city and best_match["related_places"]:
1110
- place_score = self.optimizer_helper.place_similarity(
1111
- city, best_match["related_places"]
1112
- )
1113
- if place_score > 0:
1114
- reasons.append(f"ville '{city}' trouvée")
1115
-
1116
- if best_emotion_score > 0:
1117
- reasons.append(
1118
- f"correspondance émotionnelle (score: {best_emotion_score:.2f})"
1119
- )
1120
-
1121
- if len(reasons) == 0:
1122
- reasons.append(match_reason)
1123
-
1124
- final_reason = " ; ".join(reasons)
1125
-
1126
- logger.info(f"\n🏆 RÉSULTAT FINAL: Œuvre #{best_match['database_id']}")
1127
- logger.info(f" Raison: {final_reason}")
1128
- logger.info(f" Type: {best_match['art_piece_type']}")
1129
- logger.info(f" Lieu: {best_match['art_piece_place']}")
1130
-
1131
- match_image = self._get_artwork_image(best_match)
1132
-
1133
- match_info = {
1134
- "title": f"Œuvre #{best_match['database_id']}",
1135
- "type": best_match["art_piece_type"],
1136
- "place": best_match["art_piece_place"],
1137
- "emotions": best_match["related_emotions"],
1138
- "explanation": best_match["explanation"],
1139
- }
1140
-
1141
- return match_image, final_reason, match_info
1142
-
1143
-
1144
- csv_path = "PP1-Collection_Database_new-cleaned.csv"
1145
- images_dir = "pictures_data"
1146
-
1147
- if not os.path.exists(csv_path):
1148
- logger.error(f"Fichier CSV introuvable: {csv_path}")
1149
- if not os.path.exists(images_dir):
1150
- logger.error(f"Répertoire images introuvable: {images_dir}")
1151
-
1152
- matcher = ArtMatcherV2(csv_path, images_dir)
1153
-
1154
-
1155
- def process_user_info(firstname: str, birthday: str, city: str, state: SessionState):
1156
- """Traite les informations utilisateur avec validation"""
1157
- firstname = SecurityValidator.sanitize_input(firstname)
1158
- city = SecurityValidator.sanitize_input(city)
1159
-
1160
- state.firstname = firstname
1161
- state.birthday = birthday
1162
- state.city = city
1163
-
1164
- if not firstname or not birthday:
1165
- return (
1166
- gr.update(visible=True),
1167
- gr.update(visible=False),
1168
- gr.update(visible=False),
1169
- "Veuillez remplir au moins votre prénom et date de naissance.",
1170
- state,
1171
- )
1172
-
1173
- is_valid, _ = SecurityValidator.validate_date(birthday)
1174
- if not is_valid:
1175
- return (
1176
- gr.update(visible=True),
1177
- gr.update(visible=False),
1178
- gr.update(visible=False),
1179
- "Format de date invalide. Utilisez JJ/MM (ex: 15/03)",
1180
- state,
1181
- )
1182
-
1183
- return (
1184
- gr.update(visible=False),
1185
- gr.update(visible=True),
1186
- gr.update(visible=False),
1187
- "Informations enregistrées ! Passons à la sélection d'images.",
1188
- state,
1189
- )
1190
-
1191
-
1192
- def load_images_for_round(round_num: int, state: SessionState):
1193
- """Charge 3 images pour un tour de sélection"""
1194
- images_data = matcher.get_random_images_for_selection(
1195
- round_num, state.selected_images
1196
- )
1197
-
1198
- if len(images_data) < ScoringWeights.MAX_IMAGES_PER_SELECTION:
1199
- logger.warning(f"Seulement {len(images_data)} images disponibles")
1200
- return (
1201
- [None, None, None],
1202
- [],
1203
- f"Pas assez d'images disponibles (seulement {len(images_data)} trouvées)",
1204
- state,
1205
- )
1206
-
1207
- images = [img[0] for img in images_data]
1208
- ids = [img[1] for img in images_data]
1209
-
1210
- state.current_image_ids = ids
1211
-
1212
- return (
1213
- images,
1214
- ids,
1215
- f"Tour {round_num + 1}/{ScoringWeights.TOTAL_ROUNDS} : Sélectionnez l'image qui vous attire le plus",
1216
- state,
1217
- )
1218
-
1219
-
1220
- def select_image(choice: Optional[int], state: SessionState):
1221
- """Traite la sélection d'image"""
1222
- if choice is None:
1223
- return (
1224
- gr.update(),
1225
- gr.update(),
1226
- gr.update(),
1227
- gr.update(),
1228
- "Veuillez sélectionner une image",
1229
- state,
1230
- )
1231
-
1232
- if state.current_image_ids and len(state.current_image_ids) > choice:
1233
- selected_id = state.current_image_ids[choice]
1234
- else:
1235
- return (
1236
- gr.update(),
1237
- gr.update(),
1238
- gr.update(),
1239
- gr.update(),
1240
- "Erreur: image non trouvée",
1241
- state,
1242
- )
1243
-
1244
- state.selected_images.append(selected_id)
1245
- state.current_round += 1
1246
-
1247
- logger.info(
1248
- f"Tour {state.current_round}: Image {choice+1} sélectionnée (ID: {selected_id})"
1249
- )
1250
-
1251
- if state.current_round < ScoringWeights.TOTAL_ROUNDS:
1252
- new_images, new_ids, message, state = load_images_for_round(
1253
- state.current_round, state
1254
- )
1255
- return (
1256
- gr.update(value=new_images[0]),
1257
- gr.update(value=new_images[1]),
1258
- gr.update(value=new_images[2]),
1259
- gr.update(value=None),
1260
- message,
1261
- state,
1262
- gr.update(visible=True), # keep selection_section visible
1263
- gr.update(visible=False), # keep loading_section hidden
1264
- )
1265
- else:
1266
- # Toutes les sélections sont terminées, afficher le loading
1267
- return (
1268
- gr.update(), # img1
1269
- gr.update(), # img2
1270
- gr.update(), # img3
1271
- gr.update(), # image_choice
1272
- "", # status_message vide
1273
- state,
1274
- gr.update(visible=False), # hide selection_section
1275
- gr.update(visible=True), # show loading_section
1276
- )
1277
-
1278
-
1279
- def show_results(state: SessionState):
1280
- """Affiche les résultats finaux"""
1281
- if not state.is_complete():
1282
- return (
1283
- gr.update(visible=False), # info_section
1284
- gr.update(visible=True), # selection_section
1285
- gr.update(visible=False), # loading_section
1286
- gr.update(visible=False), # results_section
1287
- None,
1288
- "",
1289
- "",
1290
- )
1291
-
1292
- match_image, reason, info = matcher.find_best_match(
1293
- state.firstname,
1294
- state.birthday,
1295
- state.city,
1296
- state.selected_images,
1297
- )
1298
-
1299
- if match_image:
1300
- # Déterminer le type de système de recommandation utilisé
1301
- if "correspond exactement" in reason.lower():
1302
- # Match exact sur nom, date ou lieu
1303
- recommendation_type = "name_date_place"
1304
- else:
1305
- # Match basé sur les émotions
1306
- recommendation_type = "emotions"
1307
-
1308
- # Enregistrer l'œuvre finale et le type de recommandation
1309
- state.final_artwork = info.get("title", "Œuvre inconnue")
1310
- state.recommendation_type = recommendation_type
1311
-
1312
- # Logger la session
1313
- session_tracker.log_session(state, recommendation_type)
1314
-
1315
- explanation = f"""
1316
- **Votre œuvre correspondante a été trouvée !**
1317
-
1318
- **Raison du match :** {reason}
1319
-
1320
- **Détails de l'œuvre :**
1321
- - Type : {info.get('type', 'Non spécifié')}
1322
- - Lieu : {info.get('place', 'Non spécifié')}
1323
- - Émotions : {', '.join(info.get('emotions', [])) if info.get('emotions') else 'Non spécifiées'}
1324
-
1325
- **Description :**
1326
- {info.get('explanation', 'Aucune description disponible')}
1327
- """
1328
- else:
1329
- # Aucune œuvre trouvée - logger quand même
1330
- state.final_artwork = "Aucune œuvre trouvée"
1331
- state.recommendation_type = "none"
1332
- session_tracker.log_session(state, "none")
1333
-
1334
- explanation = "Désolé, aucune œuvre correspondante n'a pu être trouvée."
1335
-
1336
- return (
1337
- gr.update(visible=False), # info_section
1338
- gr.update(visible=False), # selection_section
1339
- gr.update(visible=False), # loading_section
1340
- gr.update(visible=True), # results_section
1341
- match_image,
1342
- info.get("title", "Œuvre non trouvée") if match_image else "Œuvre non trouvée",
1343
- explanation,
1344
- )
1345
-
1346
-
1347
- with gr.Blocks(
1348
- title="Art Matcher",
1349
- theme=gr.themes.Soft(
1350
- primary_hue="teal", secondary_hue="teal", neutral_hue="zinc"
1351
- ),
1352
- ) as demo:
1353
- gr.Markdown(
1354
- """
1355
- # 🎨 Art Matcher
1356
- ### Découvrez l'œuvre d'art qui vous correspond !
1357
-
1358
- Cette application utilise vos informations personnelles et vos préférences visuelles
1359
- pour trouver l'œuvre d'art qui vous correspond le mieux dans notre collection.
1360
- """
1361
- )
1362
-
1363
- session_state = gr.State(SessionState())
1364
-
1365
- with gr.Group(visible=True) as info_section:
1366
- gr.Markdown("### Étape 1 : Vos informations")
1367
- with gr.Row():
1368
- firstname_input = gr.Textbox(
1369
- label="Prénom", placeholder="Entrez votre prénom", max_lines=1
1370
- )
1371
- birthday_input = gr.Textbox(
1372
- label="Date d'anniversaire (JJ/MM)",
1373
- placeholder="Ex: 25/12",
1374
- max_lines=1,
1375
- )
1376
- city_input = gr.Textbox(
1377
- label="Ville de résidence", placeholder="Ex: Paris", max_lines=1
1378
- )
1379
-
1380
- submit_info_btn = gr.Button("Valider mes informations", variant="primary")
1381
-
1382
- with gr.Group(visible=False) as selection_section:
1383
- selection_title = gr.Markdown("### Étape 2 : Sélection d'images")
1384
-
1385
- with gr.Row():
1386
- img1 = gr.Image(label="Image 1", type="filepath", height=300)
1387
- img2 = gr.Image(label="Image 2", type="filepath", height=300)
1388
- img3 = gr.Image(label="Image 3", type="filepath", height=300)
1389
-
1390
- image_choice = gr.Radio(
1391
- choices=["Image 1", "Image 2", "Image 3"],
1392
- label="Quelle image vous attire le plus ?",
1393
- type="index",
1394
- )
1395
-
1396
- select_btn = gr.Button("Valider mon choix", variant="primary")
1397
-
1398
- with gr.Group(visible=False) as loading_section:
1399
- gr.Markdown("### ⏳ Analyse en cours...")
1400
- gr.HTML("""
1401
- <div style="text-align: center; padding: 40px;">
1402
- <div style="display: inline-block; width: 60px; height: 60px; border: 6px solid #f3f3f3; border-top: 6px solid #14b8a6; border-radius: 50%; animation: spin 1s linear infinite;"></div>
1403
- <style>
1404
- @keyframes spin {
1405
- 0% { transform: rotate(0deg); }
1406
- 100% { transform: rotate(360deg); }
1407
- }
1408
- </style>
1409
- <p style="margin-top: 20px; font-size: 18px; color: #666;">
1410
- <strong>Traitement de vos sélections...</strong><br>
1411
- <span style="font-size: 14px;">Nous analysons votre profil pour trouver l'œuvre parfaite</span>
1412
- </p>
1413
- </div>
1414
- """)
1415
-
1416
- with gr.Group(visible=False) as results_section:
1417
- gr.Markdown("### Votre œuvre correspondante")
1418
-
1419
- with gr.Row():
1420
- with gr.Column(scale=1):
1421
- result_image = gr.Image(label="Votre œuvre", height=400)
1422
- result_title = gr.Markdown("## Titre de l'œuvre")
1423
-
1424
- with gr.Column(scale=1):
1425
- result_explanation = gr.Markdown("")
1426
-
1427
- restart_btn = gr.Button("Recommencer", variant="secondary")
1428
-
1429
- status_message = gr.Markdown("")
1430
-
1431
- def on_info_submit(firstname, birthday, city, state):
1432
- state.reset()
1433
-
1434
- info_vis, select_vis, results_vis, message, state = process_user_info(
1435
- firstname, birthday, city, state
1436
- )
1437
-
1438
- if select_vis["visible"]:
1439
- images, ids, round_message, state = load_images_for_round(0, state)
1440
- return (
1441
- info_vis,
1442
- select_vis,
1443
- results_vis,
1444
- images[0] if len(images) > 0 else None,
1445
- images[1] if len(images) > 1 else None,
1446
- images[2] if len(images) > 2 else None,
1447
- round_message,
1448
- state,
1449
- )
1450
- else:
1451
- return (info_vis, select_vis, results_vis, None, None, None, message, state)
1452
-
1453
- submit_info_btn.click(
1454
- fn=on_info_submit,
1455
- inputs=[firstname_input, birthday_input, city_input, session_state],
1456
- outputs=[
1457
- info_section,
1458
- selection_section,
1459
- results_section,
1460
- img1,
1461
- img2,
1462
- img3,
1463
- status_message,
1464
- session_state,
1465
- ],
1466
- )
1467
-
1468
- def on_image_select(choice, state):
1469
- result = select_image(choice, state)
1470
-
1471
- # La fonction select_image retourne maintenant 8 valeurs
1472
- if len(result) == 8:
1473
- (img1_update, img2_update, img3_update, choice_update, message, state,
1474
- selection_vis, loading_vis) = result
1475
- return (
1476
- gr.update(), # info_section
1477
- selection_vis, # selection_section
1478
- loading_vis, # loading_section
1479
- gr.update(), # results_section
1480
- img1_update, # img1
1481
- img2_update, # img2
1482
- img3_update, # img3
1483
- choice_update, # image_choice
1484
- message, # status_message
1485
- state,
1486
- )
1487
- else:
1488
- # Format avec 6 valeurs (cas sans loading)
1489
- (img1_update, img2_update, img3_update, choice_update, message, state) = result
1490
- return (
1491
- gr.update(), # info_section
1492
- gr.update(), # selection_section
1493
- gr.update(), # loading_section
1494
- gr.update(), # results_section
1495
- img1_update, # img1
1496
- img2_update, # img2
1497
- img3_update, # img3
1498
- choice_update, # image_choice
1499
- message, # status_message
1500
- state,
1501
- )
1502
-
1503
- def handle_final_results(state):
1504
- if state.is_complete():
1505
- return show_results(state)
1506
- else:
1507
- return gr.update(), gr.update(), gr.update(), gr.update(), None, "", ""
1508
-
1509
- select_btn.click(
1510
- fn=on_image_select,
1511
- inputs=[image_choice, session_state],
1512
- outputs=[
1513
- info_section,
1514
- selection_section,
1515
- loading_section,
1516
- results_section,
1517
- img1,
1518
- img2,
1519
- img3,
1520
- image_choice,
1521
- status_message,
1522
- session_state,
1523
- ],
1524
- ).then(
1525
- fn=handle_final_results,
1526
- inputs=[session_state],
1527
- outputs=[
1528
- info_section,
1529
- selection_section,
1530
- loading_section,
1531
- results_section,
1532
- result_image,
1533
- result_title,
1534
- result_explanation,
1535
- ],
1536
- )
1537
-
1538
- def restart_app(state):
1539
- state.reset()
1540
-
1541
- return (
1542
- gr.update(visible=True), # info_section
1543
- gr.update(visible=False), # selection_section
1544
- gr.update(visible=False), # loading_section
1545
- gr.update(visible=False), # results_section
1546
- "", # firstname_input
1547
- "", # birthday_input
1548
- "", # city_input
1549
- None, # image_choice
1550
- "Application réinitialisée. Veuillez entrer vos informations.", # status_message
1551
- state,
1552
- )
1553
-
1554
- restart_btn.click(
1555
- fn=restart_app,
1556
- inputs=[session_state],
1557
- outputs=[
1558
- info_section,
1559
- selection_section,
1560
- loading_section,
1561
- results_section,
1562
- firstname_input,
1563
- birthday_input,
1564
- city_input,
1565
- image_choice,
1566
- status_message,
1567
- session_state,
1568
- ],
1569
- )
1570
-
1571
-
1572
- if __name__ == "__main__":
1573
- demo.launch()