Aurel-test commited on
Commit
2be7a5c
·
verified ·
1 Parent(s): c54cfe1

Delete app_old.py

Browse files
Files changed (1) hide show
  1. app_old.py +0 -1566
app_old.py DELETED
@@ -1,1566 +0,0 @@
1
- #!/usr/bin/env python3
2
- """
3
- Web Demo v2 pour la base de données d'œuvres d'art - Version Sécurisée et Optimisée
4
- Interface multi-étapes avec matching basé sur prénom, date, ville et émotions
5
- Optimisé pour les performances avec caching et indexation
6
- Version sécurisée avec validation des entrées et gestion d'état propre
7
- """
8
-
9
- import gradio as gr
10
- import os
11
- import sys
12
- import logging
13
- from logging.handlers import RotatingFileHandler
14
- import random
15
- import re
16
- import json
17
- import uuid
18
- import time
19
- from datetime import datetime
20
- from typing import List, Dict, Tuple, Optional, Any, Set
21
- from collections import Counter, defaultdict
22
- from functools import lru_cache
23
- from dataclasses import dataclass, field, asdict
24
- from pathlib import Path
25
- import pandas as pd
26
-
27
- # Configuration du logging principal
28
- logging.basicConfig(
29
- level=logging.INFO,
30
- format="[%(asctime)s] %(levelname)s: %(message)s",
31
- datefmt="%Y-%m-%d %H:%M:%S",
32
- )
33
- logger = logging.getLogger(__name__)
34
-
35
- # Import pour la sauvegarde persistante sur HF Spaces
36
- try:
37
- from huggingface_hub import CommitScheduler
38
-
39
- HF_HUB_AVAILABLE = True
40
- except ImportError:
41
- HF_HUB_AVAILABLE = False
42
- logger.warning(
43
- "huggingface_hub non installé - Les logs ne seront pas sauvegardés dans un dataset HF"
44
- )
45
-
46
- # Configuration du logging des sessions
47
- SESSION_LOG_FILE = "session_logs.jsonl"
48
- STATS_LOG_FILE = "statistics.json"
49
-
50
- # Configuration du dataset HF pour la persistance (modifiez ces valeurs)
51
- HF_DATASET_ID = os.environ.get(
52
- "HF_DATASET_ID", "ClickMons/art-matcher-logs"
53
- ) # Remplacez par votre dataset
54
- HF_TOKEN = os.environ.get("HF_TOKEN", None) # Token HF pour l'authentification
55
- LOGS_UPLOAD_INTERVAL = 10 # Upload toutes les 10 minutes
56
-
57
- # Créer un handler pour le fichier de logs des sessions (local)
58
- if not os.path.exists("logs"):
59
- os.makedirs("logs")
60
-
61
- session_file_handler = RotatingFileHandler(
62
- filename=os.path.join("logs", SESSION_LOG_FILE),
63
- maxBytes=10 * 1024 * 1024, # 10MB
64
- backupCount=5,
65
- encoding="utf-8",
66
- )
67
- session_file_handler.setLevel(logging.INFO)
68
- session_logger = logging.getLogger("session_logger")
69
- session_logger.addHandler(session_file_handler)
70
- session_logger.setLevel(logging.INFO)
71
-
72
- sys.path.insert(0, os.path.join(os.path.dirname(__file__), "src"))
73
-
74
- from art_pieces_db.database import Database
75
- from art_pieces_db.query import TargetProfile, WeightedLeximaxOptimizer, Optimizer
76
- from art_pieces_db.emotions import EmotionWheel
77
- from art_pieces_db.utils import str_to_date
78
-
79
-
80
- @dataclass
81
- class ScoringWeights:
82
- """Centralise toutes les constantes de scoring pour éviter les magic numbers"""
83
-
84
- PRESELECTION_NAME_WEIGHT: float = 3.0
85
- PRESELECTION_DATE_WEIGHT: float = 1.0
86
- PRESELECTION_PLACE_WEIGHT: float = 2.0
87
- PRESELECTION_EMOTION_WEIGHT: float = 0.0
88
-
89
- MIN_PRESELECTION_COUNT: int = 20
90
- MAX_IMAGES_PER_SELECTION: int = 3 # nombre d'images par sélection
91
- TOTAL_ROUNDS: int = 3 # nombre de rounds avant la recommandation finale
92
-
93
-
94
- @dataclass
95
- class SessionState:
96
- """Gère l'état de session"""
97
-
98
- firstname: str = ""
99
- birthday: str = ""
100
- city: str = ""
101
-
102
- current_round: int = 0
103
- selected_images: List[str] = field(default_factory=list)
104
- current_image_ids: List[str] = field(default_factory=list)
105
-
106
- preselected_pieces: Optional[pd.DataFrame] = None
107
-
108
- # Propriétés pour le tracking
109
- session_start_time: float = field(default_factory=time.time)
110
- recommendation_type: str = "" # "name_date_place" ou "emotions"
111
- final_artwork: str = ""
112
-
113
- def reset(self):
114
- """Réinitialise l'état de session"""
115
- self.firstname = ""
116
- self.birthday = ""
117
- self.city = ""
118
- self.current_round = 0
119
- self.selected_images = []
120
- self.current_image_ids = []
121
- self.preselected_pieces = None
122
- self.session_start_time = time.time()
123
- self.recommendation_type = ""
124
- self.final_artwork = ""
125
-
126
- def is_complete(self) -> bool:
127
- """Vérifie si la sélection est complète"""
128
- return self.current_round >= ScoringWeights.TOTAL_ROUNDS
129
-
130
-
131
- class SessionLogger:
132
- """Version améliorée du logger de sessions avec CommitScheduler simplifié"""
133
-
134
- def __init__(self):
135
- # Détection de l'environnement HF Spaces
136
- self.is_hf_space = os.environ.get("SPACE_ID") is not None
137
-
138
- # Configuration du répertoire de données
139
- self.data_dir = Path("art_matcher_data")
140
- self.data_dir.mkdir(parents=True, exist_ok=True)
141
-
142
- # Un seul fichier unique par instance du Space (comme Wauplin)
143
- self.instance_id = uuid.uuid4()
144
- self.sessions_file = self.data_dir / f"sessions_{self.instance_id}.jsonl"
145
- self.stats_file = self.data_dir / "global_statistics.json"
146
-
147
- # Initialiser le CommitScheduler si sur HF Spaces
148
- self.scheduler = None
149
- if self.is_hf_space and HF_HUB_AVAILABLE:
150
- try:
151
- # Configuration simplifiée - pas besoin de HF_TOKEN explicite sur Spaces
152
- self.scheduler = CommitScheduler(
153
- repo_id=HF_DATASET_ID, # Utilise la variable existante
154
- repo_type="dataset",
155
- folder_path=self.data_dir,
156
- path_in_repo="data",
157
- every=LOGS_UPLOAD_INTERVAL, # Utilise la variable existante
158
- )
159
- logger.info(
160
- f"✅ CommitScheduler initialisé - Instance ID: {self.instance_id}"
161
- )
162
- logger.info(
163
- f"Les données seront automatiquement sauvegardées dans {HF_DATASET_ID}"
164
- )
165
- except Exception as e:
166
- logger.warning(f"⚠️ CommitScheduler non disponible: {e}")
167
- logger.info("Les données seront stockées localement uniquement")
168
-
169
- def log_session(self, state: SessionState, recommendation_system: str):
170
- """Enregistre une session de manière thread-safe"""
171
- session_duration = time.time() - state.session_start_time
172
-
173
- entry = {
174
- "session_id": str(self.instance_id),
175
- "datetime": datetime.now().isoformat(),
176
- "duration_seconds": round(session_duration, 2),
177
- "recommended_artwork": state.final_artwork,
178
- "recommendation_type": recommendation_system,
179
- }
180
-
181
- # Utiliser le lock du scheduler pour la thread safety
182
- if self.scheduler:
183
- with self.scheduler.lock:
184
- self._write_session(entry)
185
- self._update_stats(entry)
186
- else:
187
- # Sans scheduler, écriture directe
188
- self._write_session(entry)
189
- self._update_stats(entry)
190
-
191
- logger.info(f"Session enregistrée - Durée: {entry['duration_seconds']}s")
192
- session_logger.info(json.dumps(entry, ensure_ascii=False))
193
-
194
- def _write_session(self, entry: dict):
195
- """Écrit une entrée de session dans le fichier JSONL"""
196
- with self.sessions_file.open("a", encoding="utf-8") as f:
197
- f.write(json.dumps(entry, ensure_ascii=False) + "\n")
198
-
199
- def _update_stats(self, session_entry: dict):
200
- """Met à jour les statistiques globales"""
201
- # Charger les stats existantes
202
- stats = {}
203
- if self.stats_file.exists():
204
- try:
205
- with self.stats_file.open("r", encoding="utf-8") as f:
206
- stats = json.load(f)
207
- except json.JSONDecodeError:
208
- stats = {}
209
-
210
- # Initialiser la structure si nécessaire
211
- if "total_sessions" not in stats:
212
- stats = {
213
- "total_sessions": 0,
214
- "total_duration_seconds": 0,
215
- "average_duration_seconds": 0,
216
- "artworks_recommended": {},
217
- "recommendation_types": {
218
- "name_date_place": 0,
219
- "emotions": 0,
220
- "none": 0,
221
- },
222
- "first_session": session_entry["datetime"],
223
- "last_session": session_entry["datetime"],
224
- }
225
-
226
- # Mettre à jour les compteurs
227
- stats["total_sessions"] += 1
228
- stats["total_duration_seconds"] += session_entry.get("duration_seconds", 0)
229
- stats["average_duration_seconds"] = (
230
- stats["total_duration_seconds"] / stats["total_sessions"]
231
- )
232
- stats["last_session"] = session_entry["datetime"]
233
-
234
- # Compter les types de recommandation
235
- rec_type = session_entry.get("recommendation_type", "none")
236
- if rec_type in stats["recommendation_types"]:
237
- stats["recommendation_types"][rec_type] += 1
238
-
239
- # Compter les œuvres recommandées
240
- artwork = session_entry.get("recommended_artwork")
241
- if artwork and artwork != "Aucune œuvre trouvée":
242
- if artwork not in stats["artworks_recommended"]:
243
- stats["artworks_recommended"][artwork] = 0
244
- stats["artworks_recommended"][artwork] += 1
245
-
246
- # Trouver l'œuvre la plus populaire
247
- if stats["artworks_recommended"]:
248
- most_popular = max(
249
- stats["artworks_recommended"].items(), key=lambda x: x[1]
250
- )
251
- stats["most_popular_artwork"] = {
252
- "title": most_popular[0],
253
- "count": most_popular[1],
254
- "percentage": (most_popular[1] / stats["total_sessions"]) * 100,
255
- }
256
-
257
- # Calculer les pourcentages d'utilisation
258
- total = stats["total_sessions"]
259
- if total > 0:
260
- stats["recommendation_percentages"] = {
261
- k: (v / total) * 100 for k, v in stats["recommendation_types"].items()
262
- }
263
-
264
- stats["last_updated"] = datetime.now().isoformat()
265
-
266
- # Sauvegarder les stats mises à jour
267
- with self.stats_file.open("w", encoding="utf-8") as f:
268
- json.dump(stats, f, indent=2, ensure_ascii=False)
269
-
270
- def get_statistics(self) -> dict:
271
- """Retourne les statistiques globales"""
272
- if self.stats_file.exists():
273
- try:
274
- with self.stats_file.open("r", encoding="utf-8") as f:
275
- return json.load(f)
276
- except Exception as e:
277
- logger.error(f"Erreur lecture stats: {e}")
278
- return {}
279
-
280
-
281
- # Initialiser le logger de sessions
282
- session_tracker = SessionLogger()
283
-
284
-
285
- class SecurityValidator:
286
- """Classe pour centraliser les validations de sécurité"""
287
-
288
- PATH_TRAVERSAL_PATTERN = re.compile(r"\.\.|\.\/")
289
- VALID_FILENAME_PATTERN = re.compile(r"^[\w\-\.\s]+$")
290
- VALID_INPUT_PATTERN = re.compile(
291
- r"^[\w\-\s\'\.,àâäéèêëïîôûùüÿæœçÀÂÄÉÈÊËÏÎÔÛÙÜŸÆŒÇ]+$", re.UNICODE
292
- )
293
- DATE_PATTERN = re.compile(r"^\d{1,2}/\d{1,2}$")
294
-
295
- @classmethod
296
- def validate_filename(cls, filename: str) -> bool:
297
- """Valide qu'un nom de fichier est sécurisé"""
298
- if not filename:
299
- return False
300
-
301
- # Vérifier les tentatives de path traversal
302
- if cls.PATH_TRAVERSAL_PATTERN.search(filename):
303
- logger.warning(f"Tentative de path traversal détectée: {filename}")
304
- return False
305
-
306
- # Vérifier que le nom ne contient que des caractères autorisés
307
- base_name = os.path.basename(filename)
308
- if not cls.VALID_FILENAME_PATTERN.match(base_name):
309
- logger.warning(f"Nom de fichier invalide: {filename}")
310
- return False
311
-
312
- return True
313
-
314
- @classmethod
315
- def sanitize_input(cls, input_str: str, max_length: int = 100) -> str:
316
- """Nettoie et valide une entrée utilisateur"""
317
- if not input_str:
318
- return ""
319
-
320
- # Tronquer si trop long
321
- input_str = input_str[:max_length].strip()
322
-
323
- if not cls.VALID_INPUT_PATTERN.match(input_str):
324
- # Garder seulement les caractères valides
325
- cleaned = "".join(c for c in input_str if cls.VALID_INPUT_PATTERN.match(c))
326
- logger.info(f"Input sanitized: '{input_str}' -> '{cleaned}'")
327
- return cleaned
328
-
329
- return input_str
330
-
331
- @classmethod
332
- def validate_date(cls, date_str: str) -> Tuple[bool, Optional[datetime]]:
333
- """Valide et parse une date au format JJ/MM"""
334
- if not date_str:
335
- return False, None
336
-
337
- if not cls.DATE_PATTERN.match(date_str):
338
- return False, None
339
-
340
- try:
341
- day, month = map(int, date_str.split("/"))
342
- if not (1 <= day <= 31 and 1 <= month <= 12):
343
- return False, None
344
-
345
- date_obj = datetime(year=2000, month=month, day=day)
346
- return True, date_obj
347
- except (ValueError, Exception) as e:
348
- logger.error(f"Erreur de parsing de date: {e}")
349
- return False, None
350
-
351
-
352
- class ImageIndexer:
353
- """Classe pour indexer et mapper les images depuis la base de données CSV"""
354
-
355
- # Constants for better maintainability
356
- IMAGE_EXTENSIONS = (".jpg", ".png")
357
- COMMON_SUFFIXES = [".jpg", ".png", "_medium"]
358
- MAR_BVM_TEST_SUFFIXES = ["-001", "-002", "-003"]
359
-
360
- def __init__(self, images_dir: str):
361
- self.images_dir = os.path.abspath(images_dir)
362
- self.available_files = set()
363
- self.image_lookup = {} # normalized_name -> filename
364
- self.mar_bvm_lookup = {} # Special handling for MAR-BVM files
365
- self._build_index()
366
-
367
- def _strip_file_extensions(self, filename: str) -> str:
368
- """Remove file extensions from filename"""
369
- base_name = filename.lower()
370
- if base_name.endswith("_medium.jpg"):
371
- return base_name[:-11]
372
- elif base_name.endswith((".jpg", ".png")):
373
- return base_name[:-4]
374
- return base_name
375
-
376
- def _normalize_basic_patterns(self, name: str) -> str:
377
- """Apply basic normalization patterns"""
378
- # Remove trailing comma and normalize whitespace
379
- normalized = name.lower().strip().rstrip(",")
380
-
381
- # Remove common suffixes
382
- for suffix in self.COMMON_SUFFIXES:
383
- if normalized.endswith(suffix):
384
- normalized = normalized[: -len(suffix)]
385
-
386
- # Normalize spaces and underscores to dashes
387
- return re.sub(r"[\s_]+", "-", normalized)
388
-
389
- def _normalize_mar_bvm_format(self, name: str) -> str:
390
- """Handle MAR-BVM specific normalization"""
391
- if "mar-bvm" not in name:
392
- return name
393
-
394
- # Replace .0. with -0- and remaining dots with dashes
395
- return name.replace(".0.", "-0-").replace(".", "-")
396
-
397
- def _normalize_name(self, name: str) -> str:
398
- """Normalise un nom pour la comparaison"""
399
- normalized = self._normalize_basic_patterns(name)
400
-
401
- # Special handling for MAR-BVM format
402
- if "mar-bvm" in normalized:
403
- normalized = self._normalize_mar_bvm_format(normalized)
404
- # For files starting with year (like 2022.0.86), keep dots
405
- elif not normalized.startswith("20"):
406
- normalized = normalized.replace(".", "-")
407
-
408
- return normalized
409
-
410
- def _create_mar_bvm_lookups(self, normalized: str, filename: str):
411
- """Create additional lookup entries for MAR-BVM files"""
412
- if "mar-bvm" not in normalized:
413
- return
414
-
415
- parts = normalized.split("-")
416
- for i, part in enumerate(parts):
417
- if part.isdigit() and i >= 5: # After mar-bvm-7-2022-0
418
- base_key = "-".join(parts[:6]) # mar-bvm-7-2022-0-22
419
- if base_key not in self.mar_bvm_lookup:
420
- self.mar_bvm_lookup[base_key] = []
421
- self.mar_bvm_lookup[base_key].append(filename)
422
- break
423
-
424
- def _process_image_file(self, filename: str):
425
- """Process a single image file for indexing"""
426
- if not SecurityValidator.validate_filename(filename):
427
- logger.warning(f"Fichier ignoré pour raison de sécurité: {filename}")
428
- return
429
-
430
- if not filename.lower().endswith(self.IMAGE_EXTENSIONS):
431
- return
432
-
433
- self.available_files.add(filename)
434
-
435
- base_name = self._strip_file_extensions(filename)
436
- normalized = self._normalize_name(base_name)
437
- self.image_lookup[normalized] = filename
438
- self._create_mar_bvm_lookups(normalized, filename)
439
-
440
- def _build_index(self):
441
- """Construit un index des images disponibles"""
442
- try:
443
- all_files = os.listdir(self.images_dir)
444
- for filename in all_files:
445
- self._process_image_file(filename)
446
-
447
- logger.info(
448
- f"Index des images construit: {len(self.available_files)} fichiers disponibles, "
449
- f"{len(self.image_lookup)} entrées normalisées"
450
- )
451
- except Exception as e:
452
- logger.error(f"Erreur lors de la construction de l'index: {e}")
453
- self.available_files = set()
454
-
455
- def _clean_input_name(self, image_name: str) -> str:
456
- """Clean and prepare input name for processing"""
457
- # Basic cleaning
458
- cleaned = image_name.strip().rstrip(",").rstrip("-").strip()
459
- # Remove spaces before -001, -002, etc.
460
- return re.sub(r"\s+(-\d)", r"\1", cleaned)
461
-
462
- def _normalize_mar_bvm_input(self, image_name: str) -> str:
463
- """Handle MAR-BVM specific input normalization"""
464
- if "MAR-BVM" not in image_name:
465
- return image_name
466
-
467
- # Handle missing "7-" in MAR-BVM-2022-0-153
468
- if "MAR-BVM-2022-0-" in image_name:
469
- image_name = image_name.replace("MAR-BVM-2022-0-", "MAR-BVM-7-2022-0-")
470
-
471
- # Convert .0. to -0-
472
- if ".0." in image_name:
473
- image_name = image_name.replace(".0.", "-0-")
474
-
475
- # Handle .001, .002 at the end (convert to -001, -002)
476
- image_name = re.sub(r"\.(\d{3})$", r"-\1", image_name)
477
-
478
- # Handle .1 or .2 suffix
479
- if image_name.endswith(".1"):
480
- image_name = image_name[:-2] + "-1"
481
- elif image_name.endswith(".2"):
482
- image_name = image_name[:-2] + "-2"
483
-
484
- # Replace any remaining dots with dashes (but be careful not to mess up already processed parts)
485
- return image_name.replace(".", "-")
486
-
487
- def _try_mar_bvm_lookups(self, normalized: str) -> Optional[str]:
488
- """Try various MAR-BVM specific lookup strategies"""
489
- # Check special MAR-BVM lookup
490
- if normalized in self.mar_bvm_lookup and self.mar_bvm_lookup[normalized]:
491
- return self.mar_bvm_lookup[normalized][0]
492
-
493
- # Try with suffix variations
494
- for suffix in self.MAR_BVM_TEST_SUFFIXES:
495
- test_pattern = f"{normalized}{suffix}"
496
- if test_pattern in self.image_lookup:
497
- return self.image_lookup[test_pattern]
498
-
499
- return None
500
-
501
- def _try_year_format_lookup(self, image_name: str) -> Optional[str]:
502
- """Handle special case for files starting with year"""
503
- if not image_name.startswith("20"):
504
- return None
505
-
506
- test_name = image_name.lower().replace(" ", "-")
507
- return self.image_lookup.get(test_name)
508
-
509
- def _try_partial_matching(self, normalized: str) -> Optional[str]:
510
- """Try partial matching as last resort"""
511
- for key, filename in self.image_lookup.items():
512
- if key.startswith(normalized) or normalized in key:
513
- return filename
514
- return None
515
-
516
- def _split_multiple_names(self, image_name: str) -> List[str]:
517
- """Split image names that contain multiple names separated by commas or slashes"""
518
- # First try comma separation
519
- if "," in image_name:
520
- return [name.strip() for name in image_name.split(",") if name.strip()]
521
-
522
- # Then try slash separation
523
- if "/" in image_name:
524
- return [name.strip() for name in image_name.split("/") if name.strip()]
525
-
526
- # Handle " - " separation (for cases like "MAR-BVM-7-2022.0.81 - 2022.0.81")
527
- if " - " in image_name and image_name.count(" - ") == 1:
528
- parts = [name.strip() for name in image_name.split(" - ")]
529
- # Only use the first part if they look like duplicates
530
- if len(parts) == 2:
531
- first, second = parts
532
- # Check if second part is a suffix of the first (like duplicate year)
533
- if first.endswith(second) or second in first:
534
- return [first]
535
- return parts
536
-
537
- return [image_name]
538
-
539
- def find_image(self, image_name: str) -> Optional[str]:
540
- """Trouve un fichier image correspondant au nom donné"""
541
- if not image_name:
542
- return None
543
-
544
- # Handle multiple image names in one field
545
- possible_names = self._split_multiple_names(image_name)
546
-
547
- # Try each name individually
548
- for name in possible_names:
549
- result = self._find_single_image(name)
550
- if result:
551
- return result
552
-
553
- return None
554
-
555
- def _find_single_image(self, image_name: str) -> Optional[str]:
556
- """Find a single image by name"""
557
- # Clean and normalize the input
558
- cleaned_name = self._clean_input_name(image_name)
559
- processed_name = self._normalize_mar_bvm_input(cleaned_name)
560
- normalized = self._normalize_name(processed_name)
561
-
562
- # Try direct lookup first
563
- if normalized in self.image_lookup:
564
- return self.image_lookup[normalized]
565
-
566
- # Try MAR-BVM specific lookups
567
- if "mar-bvm" in normalized:
568
- result = self._try_mar_bvm_lookups(normalized)
569
- if result:
570
- return result
571
-
572
- # Try year format lookup
573
- result = self._try_year_format_lookup(image_name)
574
- if result:
575
- return result
576
-
577
- # Try partial matching as last resort
578
- return self._try_partial_matching(normalized)
579
-
580
- def get_all_files(self) -> Set[str]:
581
- """Retourne tous les fichiers disponibles"""
582
- return self.available_files.copy()
583
-
584
-
585
- class ArtMatcherV2:
586
- """Classe principale pour le matching d'œuvres d'art"""
587
-
588
- def __init__(self, csv_path: str, images_dir: str):
589
- """Initialise le système avec la base de données et le répertoire d'images"""
590
- self.db = Database(csv_path)
591
- self.images_dir = os.path.abspath(images_dir)
592
- self.emotion_wheel = EmotionWheel()
593
- self.weights = ScoringWeights()
594
-
595
- self.optimizer_helper = WeightedLeximaxOptimizer(TargetProfile(), {})
596
-
597
- self.image_indexer = ImageIndexer(images_dir)
598
-
599
- df = self.db.get_dataframe()
600
- self.df_with_images = df[
601
- df["name_image"].notna()
602
- & (df["name_image"] != "")
603
- & (df["name_image"].str.strip() != "")
604
- ].copy()
605
-
606
- self.df_with_images["database_id_str"] = self.df_with_images[
607
- "database_id"
608
- ].astype(str)
609
- self.id_to_index = {
610
- str(row["database_id"]): idx for idx, row in self.df_with_images.iterrows()
611
- }
612
-
613
- self.artwork_images = self._build_artwork_image_index()
614
-
615
- self.temp_db_with_images = Database.__new__(Database)
616
- self.temp_db_with_images.dataframe = self.df_with_images
617
-
618
- logger.info(f"Base de données chargée: {self.db.n_pieces()} œuvres")
619
- logger.info(f"Œuvres avec images: {len(self.df_with_images)}")
620
- logger.info(f"Index des images: {len(self.artwork_images)} œuvres mappées")
621
-
622
- def _sanitize_input(self, input_str: str) -> str:
623
- """Nettoie et valide une entrée utilisateur"""
624
- return SecurityValidator.sanitize_input(input_str)
625
-
626
- def _parse_date(self, date_str: str) -> Optional[datetime]:
627
- """Parse une date avec validation"""
628
- is_valid, date_obj = SecurityValidator.validate_date(date_str)
629
- return date_obj if is_valid else None
630
-
631
- def _build_artwork_image_index(self) -> Dict[str, List[str]]:
632
- """Construit un index artwork_id -> [image_paths] au démarrage"""
633
- artwork_images = {}
634
-
635
- for idx, row in self.df_with_images.iterrows():
636
- artwork_id = str(row["database_id"])
637
- image_paths = []
638
-
639
- if row["name_image"] and str(row["name_image"]).strip():
640
- # Parse the image names - handle special separators
641
- image_string = str(row["name_image"]).strip().strip('"')
642
-
643
- # Handle cases with " / " or " - " separators
644
- if " / " in image_string:
645
- # Take first part before the slash
646
- image_string = image_string.split(" / ")[0].strip()
647
-
648
- # Special case: if it has " - 2022" it's a separator, not part of the name
649
- if " - 2022" in image_string:
650
- # Take the part before " - 2022"
651
- image_string = image_string.split(" - 2022")[0].strip()
652
- elif " - " in image_string and "MAR-BVM-7-2022-0-" not in image_string:
653
- # For other MAR-BVM formats with " - " separator
654
- parts = image_string.split(" - ")
655
- if "MAR-BVM" in parts[0]:
656
- image_string = parts[0].strip()
657
-
658
- # Clean up trailing " -" or spaces before "-001"
659
- image_string = re.sub(
660
- r"\s+-\s*$", "", image_string
661
- ) # Remove trailing " -"
662
- image_string = re.sub(
663
- r"\s+(-\d)", r"\1", image_string
664
- ) # Remove spaces before -001
665
-
666
- # Parse comma-separated list
667
- images = [
668
- img.strip()
669
- for img in re.split(r"[,/]", image_string)
670
- if img.strip()
671
- ]
672
-
673
- for img_name in images:
674
- # Find the actual file for this image name
675
- matched_file = self.image_indexer.find_image(img_name)
676
- if matched_file:
677
- img_path = os.path.join(self.images_dir, matched_file)
678
- image_paths.append(img_path)
679
-
680
- if image_paths:
681
- artwork_images[artwork_id] = image_paths
682
-
683
- return artwork_images
684
-
685
- def preselect_artworks(
686
- self, firstname: str, birthday: str, city: str
687
- ) -> pd.DataFrame:
688
- """
689
- Pré-sélectionne les œuvres selon la hiérarchie: prénom > date > ville
690
- """
691
- logger.info("=== DÉBUT PRÉ-SÉLECTION ===")
692
-
693
- # Nettoyer les entrées
694
- firstname = self._sanitize_input(firstname)
695
- city = self._sanitize_input(city)
696
-
697
- logger.info(
698
- f"Critères de pré-sélection: prénom='{firstname}', date='{birthday}', ville='{city}'"
699
- )
700
-
701
- birth_date = self._parse_date(birthday)
702
- if birth_date:
703
- logger.info(f"Date convertie: {birth_date.strftime('%d/%m')}")
704
-
705
- profile = TargetProfile()
706
- profile.set_target_name(firstname)
707
- profile.set_target_date(birth_date)
708
- profile.set_target_place(city)
709
-
710
- weights = {
711
- "related_names": self.weights.PRESELECTION_NAME_WEIGHT,
712
- "related_dates": self.weights.PRESELECTION_DATE_WEIGHT,
713
- "related_places": self.weights.PRESELECTION_PLACE_WEIGHT,
714
- "related_emotions": self.weights.PRESELECTION_EMOTION_WEIGHT,
715
- }
716
-
717
- logger.info(
718
- f"Poids utilisés: nom={weights['related_names']}, date={weights['related_dates']}, lieu={weights['related_places']}, émotions={weights['related_emotions']}"
719
- )
720
-
721
- optimizer = WeightedLeximaxOptimizer(profile, weights)
722
- result = optimizer.optimize_max(self.temp_db_with_images)
723
-
724
- preselected = result[result["score"] > (0, 0, 0)]
725
- logger.info(f"Œuvres avec score > 0: {len(preselected)}")
726
-
727
- if len(preselected) < self.weights.MIN_PRESELECTION_COUNT:
728
- preselected = result.head(self.weights.MIN_PRESELECTION_COUNT)
729
- logger.info(f"Ajustement au minimum requis: {len(preselected)} œuvres")
730
-
731
- logger.info("Top 5 pré-sélections:")
732
- for i, (idx, piece) in enumerate(preselected.head(5).iterrows()):
733
- logger.info(
734
- f" {i+1}. Œuvre #{piece['database_id']} - Score: {piece['score']}"
735
- )
736
- if firstname and piece["related_names"]:
737
- name_score = Optimizer.name_similarity(
738
- firstname, piece["related_names"]
739
- )
740
- if name_score > 0:
741
- logger.info(
742
- f" → Nom: {piece['related_names']} (score: {name_score:.2f})"
743
- )
744
- if birth_date and piece["related_dates"]:
745
- date_score = Optimizer.date_similarity(
746
- birth_date, piece["related_dates"]
747
- )
748
- if date_score > 0:
749
- logger.info(
750
- f" → Dates: {[d.strftime('%d/%m') for d in piece['related_dates']]} (score: {date_score:.2f})"
751
- )
752
- if city and piece["related_places"]:
753
- place_score = self.optimizer_helper.place_similarity(
754
- city, piece["related_places"]
755
- )
756
- if place_score > 0:
757
- logger.info(
758
- f" → Lieux: {piece['related_places']} (score: {place_score:.2f})"
759
- )
760
-
761
- logger.info("=== FIN PRÉ-SÉLECTION ===")
762
- return preselected
763
-
764
- def get_random_images_for_selection(
765
- self, round_num: int, already_selected: List[str] = None
766
- ) -> List[Tuple[str, str]]:
767
- """
768
- Retourne 3 images aléatoires depuis l'index pré-construit
769
- Exclut les œuvres déjà sélectionnées dans les tours précédents
770
- """
771
- logger.info(f"=== SÉLECTION D'IMAGES POUR LE TOUR {round_num} ===")
772
-
773
- if already_selected:
774
- logger.info(f"Œuvres déjà sélectionnées à exclure: {already_selected}")
775
-
776
- available_artworks = list(self.artwork_images.keys())
777
-
778
- # Exclure les œuvres déjà sélectionnées
779
- if already_selected:
780
- already_selected_set = set(already_selected)
781
- available_artworks = [
782
- a for a in available_artworks if a not in already_selected_set
783
- ]
784
-
785
- logger.info(
786
- f"Nombre total d'œuvres avec images disponibles: {len(available_artworks)}"
787
- )
788
-
789
- if len(available_artworks) < self.weights.MAX_IMAGES_PER_SELECTION:
790
- logger.warning(
791
- f"Seulement {len(available_artworks)} œuvres avec images disponibles"
792
- )
793
- direct_images = []
794
- for filename in list(self.image_indexer.get_all_files())[:10]:
795
- if filename.endswith(".jpg"):
796
- img_path = os.path.join(self.images_dir, filename)
797
- direct_images.append((img_path, "0"))
798
- return direct_images[: self.weights.MAX_IMAGES_PER_SELECTION]
799
-
800
- num_to_select = min(
801
- self.weights.MAX_IMAGES_PER_SELECTION, len(available_artworks)
802
- )
803
- selected_artworks = random.sample(available_artworks, num_to_select)
804
-
805
- logger.info(f"Œuvres sélectionnées aléatoirement: {selected_artworks}")
806
-
807
- selected = []
808
- for artwork_id in selected_artworks:
809
- img_path = random.choice(self.artwork_images[artwork_id])
810
- selected.append((img_path, artwork_id))
811
- if artwork_id in self.id_to_index:
812
- idx = self.id_to_index[artwork_id]
813
- artwork = self.df_with_images.loc[idx]
814
- logger.info(f" Image {len(selected)}: Œuvre #{artwork_id}")
815
- logger.info(f" Type: {artwork['art_piece_type']}")
816
- logger.info(f" Émotions: {artwork['related_emotions']}")
817
-
818
- logger.info(f"=== FIN SÉLECTION IMAGES TOUR {round_num} ===")
819
- return selected
820
-
821
- def extract_emotions_from_image_id(self, database_id: str) -> List[str]:
822
- """
823
- Extrait les émotions associées à une œuvre via son ID
824
- Utilise l'index pré-calculé pour éviter les conversions répétées
825
- """
826
- if database_id in self.id_to_index:
827
- idx = self.id_to_index[database_id]
828
- emotions = self.df_with_images.loc[idx, "related_emotions"]
829
- if isinstance(emotions, list):
830
- return emotions
831
- return []
832
-
833
- @lru_cache(maxsize=1024)
834
- def _cached_emotion_similarity(self, emotion1: str, emotion2: str) -> float:
835
- """Cache les calculs de similarité émotionnelle"""
836
- return self.emotion_wheel.calculate_emotion_similarity(emotion1, emotion2)
837
-
838
- def calculate_emotion_profile(self, selected_ids: List[str]) -> Dict[str, float]:
839
- """
840
- Calcule le profil émotionnel basé sur les images sélectionnées
841
- """
842
- logger.info("=== CALCUL DU PROFIL ÉMOTIONNEL ===")
843
- logger.info(f"Images sélectionnées: {selected_ids}")
844
-
845
- emotion_counter = Counter()
846
-
847
- for db_id in selected_ids:
848
- emotions = self.extract_emotions_from_image_id(db_id)
849
- logger.info(f" Image {db_id}: émotions = {emotions}")
850
- emotion_counter.update(emotions)
851
-
852
- total = sum(emotion_counter.values())
853
- if total > 0:
854
- emotion_profile = {
855
- emotion: count / total for emotion, count in emotion_counter.items()
856
- }
857
- logger.info(f"Profil émotionnel calculé: {emotion_profile}")
858
- else:
859
- emotion_profile = {}
860
- logger.info("Aucune émotion trouvée dans les images sélectionnées")
861
-
862
- logger.info("=== FIN CALCUL PROFIL ÉMOTIONNEL ===")
863
- return emotion_profile
864
-
865
- def _get_artwork_image(self, artwork) -> Optional[str]:
866
- """Retourne le chemin de l'image pour une œuvre d'art"""
867
- artwork_id = str(artwork["database_id"])
868
-
869
- # Simply return the first image from our pre-built index
870
- if artwork_id in self.artwork_images:
871
- return self.artwork_images[artwork_id][0]
872
-
873
- return None
874
-
875
- def find_best_match(
876
- self, firstname: str, birthday: str, city: str, selected_image_ids: List[str]
877
- ) -> Tuple[Optional[str], str, Dict]:
878
- """
879
- Trouve la meilleure correspondance selon la hiérarchie du scénario:
880
- 1. Match exact (name/date/city) = gagnant automatique
881
- 2. Si pré-sélection existe: utiliser émotions pour départager
882
- 3. Si aucune pré-sélection: utiliser émotions seules
883
- 4. Type d'objet comme critère de départage final
884
- """
885
- firstname = self._sanitize_input(firstname)
886
- city = self._sanitize_input(city)
887
- birth_date = self._parse_date(birthday)
888
-
889
- logger.info(
890
- f"Recherche de correspondance pour: {firstname}, {birthday}, {city}"
891
- )
892
-
893
- preselected = self.preselect_artworks(firstname, birthday, city)
894
-
895
- logger.info("=== DÉTECTION DE MATCH EXACT ===")
896
- for idx, piece in preselected.iterrows():
897
- if firstname and piece["related_names"]:
898
- name_score = Optimizer.name_similarity(
899
- firstname, piece["related_names"]
900
- )
901
- if name_score >= 0.95:
902
- logger.info(
903
- f"🎯 MATCH EXACT TROUVÉ: prénom '{firstname}' → œuvre #{piece['database_id']} (score: {name_score:.2f})"
904
- )
905
- logger.info(f" Noms dans l'œuvre: {piece['related_names']}")
906
- match_image = self._get_artwork_image(piece)
907
- match_info = {
908
- "title": f"Œuvre #{piece['database_id']}",
909
- "type": piece["art_piece_type"],
910
- "place": piece["art_piece_place"],
911
- "emotions": piece["related_emotions"],
912
- "explanation": piece["explanation"],
913
- }
914
- return (
915
- match_image,
916
- f"Prénom '{firstname}' correspond exactement",
917
- match_info,
918
- )
919
-
920
- if birth_date and piece["related_dates"]:
921
- date_score = Optimizer.date_similarity(
922
- birth_date, piece["related_dates"]
923
- )
924
- if date_score == 1.0:
925
- logger.info(
926
- f"🎯 MATCH EXACT TROUVÉ: date '{birthday}' → œuvre #{piece['database_id']}"
927
- )
928
- logger.info(
929
- f" Dates dans l'œuvre: {[d.strftime('%d/%m/%Y') for d in piece['related_dates']]}"
930
- )
931
- match_image = self._get_artwork_image(piece)
932
- match_info = {
933
- "title": f"Œuvre #{piece['database_id']}",
934
- "type": piece["art_piece_type"],
935
- "place": piece["art_piece_place"],
936
- "emotions": piece["related_emotions"],
937
- "explanation": piece["explanation"],
938
- }
939
- return (
940
- match_image,
941
- f"Date d'anniversaire {birthday} correspond exactement",
942
- match_info,
943
- )
944
-
945
- if city and piece["related_places"]:
946
- place_score = self.optimizer_helper.place_similarity(
947
- city, piece["related_places"]
948
- )
949
- if place_score == 1.0:
950
- logger.info(
951
- f"🎯 MATCH EXACT TROUVÉ: ville '{city}' → œuvre #{piece['database_id']}"
952
- )
953
- logger.info(f" Lieux dans l'œuvre: {piece['related_places']}")
954
- match_image = self._get_artwork_image(piece)
955
- match_info = {
956
- "title": f"Œuvre #{piece['database_id']}",
957
- "type": piece["art_piece_type"],
958
- "place": piece["art_piece_place"],
959
- "emotions": piece["related_emotions"],
960
- "explanation": piece["explanation"],
961
- }
962
- return (
963
- match_image,
964
- f"Ville '{city}' correspond exactement",
965
- match_info,
966
- )
967
-
968
- logger.info("Aucun match exact trouvé, passage à la sélection par émotions")
969
-
970
- emotion_profile = self.calculate_emotion_profile(selected_image_ids)
971
-
972
- logger.info("=== STRATÉGIE DE MATCHING ===")
973
- valid_preselection = preselected[preselected["score"] > (0, 0, 0)]
974
-
975
- if len(valid_preselection) > 0:
976
- logger.info(
977
- f"📋 CAS A: {len(valid_preselection)} œuvres pré-sélectionnées - utilisation des émotions pour départager"
978
- )
979
- candidates = valid_preselection
980
- else:
981
- logger.info(
982
- f"📋 CAS B: Aucune pré-sélection valide - recherche par émotions sur {len(self.df_with_images)} œuvres"
983
- )
984
- candidates = self.df_with_images
985
-
986
- # Exclure les œuvres déjà sélectionnées par l'utilisateur
987
- selected_artwork_ids = set(selected_image_ids)
988
- candidates = candidates[
989
- ~candidates["database_id"].astype(str).isin(selected_artwork_ids)
990
- ]
991
- logger.info(
992
- f"Après exclusion des œuvres déjà sélectionnées {selected_artwork_ids}: {len(candidates)} candidats restants"
993
- )
994
-
995
- logger.info("=== CALCUL DES SCORES ÉMOTIONNELS ===")
996
- best_matches = []
997
- best_emotion_score = -1
998
-
999
- for idx, piece in candidates.iterrows():
1000
- emotion_score = 0
1001
-
1002
- if emotion_profile and piece["related_emotions"]:
1003
- for user_emotion, weight in emotion_profile.items():
1004
- best_similarity = 0
1005
- for piece_emotion in piece["related_emotions"]:
1006
- similarity = self._cached_emotion_similarity(
1007
- user_emotion, piece_emotion
1008
- )
1009
- if similarity > best_similarity:
1010
- best_similarity = similarity
1011
- emotion_score += best_similarity * weight
1012
-
1013
- if len(piece["related_emotions"]) > 0:
1014
- emotion_score /= len(piece["related_emotions"])
1015
-
1016
- if emotion_score > best_emotion_score:
1017
- best_emotion_score = emotion_score
1018
- best_matches = [piece]
1019
- logger.info(
1020
- f" Nouveau meilleur score émotionnel: {emotion_score:.3f} - Œuvre #{piece['database_id']}"
1021
- )
1022
- elif emotion_score == best_emotion_score and emotion_score > 0:
1023
- best_matches.append(piece)
1024
- logger.info(
1025
- f" Score égal au meilleur: {emotion_score:.3f} - Œuvre #{piece['database_id']}"
1026
- )
1027
-
1028
- logger.info(
1029
- f"Nombre de meilleures correspondances: {len(best_matches)} avec score {best_emotion_score:.3f}"
1030
- )
1031
-
1032
- if len(best_matches) > 1:
1033
- logger.info("=== DÉPARTAGE PAR TYPE D'OBJET ===")
1034
- selected_types = []
1035
- for img_id in selected_image_ids:
1036
- if img_id in self.id_to_index:
1037
- idx = self.id_to_index[img_id]
1038
- selected_types.append(
1039
- self.df_with_images.loc[idx, "art_piece_type"]
1040
- )
1041
-
1042
- selected_types_counter = Counter(selected_types)
1043
-
1044
- type_scored_matches = []
1045
- best_type_score = -1
1046
-
1047
- for piece in best_matches:
1048
- type_score = selected_types_counter.get(piece["art_piece_type"], 0)
1049
- if type_score > best_type_score:
1050
- best_type_score = type_score
1051
- type_scored_matches = [piece]
1052
- elif type_score == best_type_score:
1053
- type_scored_matches.append(piece)
1054
-
1055
- if len(type_scored_matches) > 1:
1056
- logger.info(
1057
- f" {len(type_scored_matches)} œuvres avec le même score de type ({best_type_score}) - sélection aléatoire"
1058
- )
1059
- best_match = random.choice(type_scored_matches)
1060
- match_reason = (
1061
- "Sélection aléatoire parmi les meilleures correspondances"
1062
- )
1063
- else:
1064
- best_match = type_scored_matches[0]
1065
- match_reason = f"Type d'objet '{best_match['art_piece_type']}' préféré"
1066
- logger.info(
1067
- f" Type '{best_match['art_piece_type']}' sélectionné avec score {best_type_score}"
1068
- )
1069
- elif len(best_matches) == 1:
1070
- best_match = best_matches[0]
1071
- match_reason = "Meilleure correspondance émotionnelle"
1072
- else:
1073
- logger.info("Aucune correspondance trouvée")
1074
- return None, "Aucune correspondance trouvée", {}
1075
-
1076
- reasons = []
1077
- if len(valid_preselection) > 0:
1078
- if firstname and best_match["related_names"]:
1079
- name_score = Optimizer.name_similarity(
1080
- firstname, best_match["related_names"]
1081
- )
1082
- if name_score > 0:
1083
- reasons.append(f"prénom '{firstname}' trouvé")
1084
-
1085
- if birth_date and best_match["related_dates"]:
1086
- date_score = Optimizer.date_similarity(
1087
- birth_date, best_match["related_dates"]
1088
- )
1089
- if date_score > 0:
1090
- reasons.append(
1091
- f"date {'exacte' if date_score == 1.0 else 'partielle'}"
1092
- )
1093
-
1094
- if city and best_match["related_places"]:
1095
- place_score = self.optimizer_helper.place_similarity(
1096
- city, best_match["related_places"]
1097
- )
1098
- if place_score > 0:
1099
- reasons.append(f"ville '{city}' trouvée")
1100
-
1101
- if best_emotion_score > 0:
1102
- reasons.append(f"correspondance émotionnelle")
1103
-
1104
- if len(reasons) == 0:
1105
- reasons.append(match_reason)
1106
-
1107
- final_reason = " ; ".join(reasons)
1108
-
1109
- logger.info(f"\n🏆 RÉSULTAT FINAL: Œuvre #{best_match['database_id']}")
1110
- logger.info(f" Raison: {final_reason}")
1111
- logger.info(f" Type: {best_match['art_piece_type']}")
1112
- logger.info(f" Lieu: {best_match['art_piece_place']}")
1113
-
1114
- match_image = self._get_artwork_image(best_match)
1115
-
1116
- match_info = {
1117
- "title": f"Œuvre #{best_match['database_id']}",
1118
- "type": best_match["art_piece_type"],
1119
- "place": best_match["art_piece_place"],
1120
- "emotions": best_match["related_emotions"],
1121
- "explanation": best_match["explanation"],
1122
- }
1123
-
1124
- return match_image, final_reason, match_info
1125
-
1126
-
1127
- csv_path = "PP1-Collection_Database_new-cleaned.csv"
1128
- images_dir = "pictures_data"
1129
-
1130
- if not os.path.exists(csv_path):
1131
- logger.error(f"Fichier CSV introuvable: {csv_path}")
1132
- if not os.path.exists(images_dir):
1133
- logger.error(f"Répertoire images introuvable: {images_dir}")
1134
-
1135
- matcher = ArtMatcherV2(csv_path, images_dir)
1136
-
1137
-
1138
- def process_user_info(firstname: str, birthday: str, city: str, state: SessionState):
1139
- """Traite les informations utilisateur avec validation"""
1140
- firstname = SecurityValidator.sanitize_input(firstname)
1141
- city = SecurityValidator.sanitize_input(city)
1142
-
1143
- state.firstname = firstname
1144
- state.birthday = birthday
1145
- state.city = city
1146
-
1147
- if not firstname or not birthday:
1148
- return (
1149
- gr.update(visible=True),
1150
- gr.update(visible=False),
1151
- gr.update(visible=False),
1152
- "Veuillez remplir au moins votre prénom et date de naissance.",
1153
- state,
1154
- )
1155
-
1156
- is_valid, _ = SecurityValidator.validate_date(birthday)
1157
- if not is_valid:
1158
- return (
1159
- gr.update(visible=True),
1160
- gr.update(visible=False),
1161
- gr.update(visible=False),
1162
- "Format de date invalide. Utilisez JJ/MM (ex: 15/03)",
1163
- state,
1164
- )
1165
-
1166
- return (
1167
- gr.update(visible=False),
1168
- gr.update(visible=True),
1169
- gr.update(visible=False),
1170
- "Informations enregistrées ! Passons à la sélection d'images.",
1171
- state,
1172
- )
1173
-
1174
-
1175
- def load_images_for_round(round_num: int, state: SessionState):
1176
- """Charge 3 images pour un tour de sélection"""
1177
- images_data = matcher.get_random_images_for_selection(
1178
- round_num, state.selected_images
1179
- )
1180
-
1181
- if len(images_data) < ScoringWeights.MAX_IMAGES_PER_SELECTION:
1182
- logger.warning(f"Seulement {len(images_data)} images disponibles")
1183
- return (
1184
- [None, None, None],
1185
- [],
1186
- f"Pas assez d'images disponibles (seulement {len(images_data)} trouvées)",
1187
- state,
1188
- )
1189
-
1190
- images = [img[0] for img in images_data]
1191
- ids = [img[1] for img in images_data]
1192
-
1193
- state.current_image_ids = ids
1194
-
1195
- return (
1196
- images,
1197
- ids,
1198
- f"Tour {round_num + 1}/{ScoringWeights.TOTAL_ROUNDS} : Sélectionnez l'image qui vous attire le plus",
1199
- state,
1200
- )
1201
-
1202
-
1203
- def select_image(choice: Optional[int], state: SessionState):
1204
- """Traite la sélection d'image"""
1205
- if choice is None:
1206
- return (
1207
- gr.update(),
1208
- gr.update(),
1209
- gr.update(),
1210
- gr.update(),
1211
- "Veuillez sélectionner une image",
1212
- state,
1213
- )
1214
-
1215
- if state.current_image_ids and len(state.current_image_ids) > choice:
1216
- selected_id = state.current_image_ids[choice]
1217
- else:
1218
- return (
1219
- gr.update(),
1220
- gr.update(),
1221
- gr.update(),
1222
- gr.update(),
1223
- "Erreur: image non trouvée",
1224
- state,
1225
- )
1226
-
1227
- state.selected_images.append(selected_id)
1228
- state.current_round += 1
1229
-
1230
- logger.info(
1231
- f"Tour {state.current_round}: Image {choice+1} sélectionnée (ID: {selected_id})"
1232
- )
1233
-
1234
- if state.current_round < ScoringWeights.TOTAL_ROUNDS:
1235
- new_images, new_ids, message, state = load_images_for_round(
1236
- state.current_round, state
1237
- )
1238
- return (
1239
- gr.update(value=new_images[0]),
1240
- gr.update(value=new_images[1]),
1241
- gr.update(value=new_images[2]),
1242
- gr.update(value=None),
1243
- message,
1244
- state,
1245
- gr.update(visible=True), # keep selection_section visible
1246
- gr.update(visible=False), # keep loading_section hidden
1247
- )
1248
- else:
1249
- # Toutes les sélections sont terminées, afficher le loading
1250
- return (
1251
- gr.update(), # img1
1252
- gr.update(), # img2
1253
- gr.update(), # img3
1254
- gr.update(), # image_choice
1255
- "", # status_message vide
1256
- state,
1257
- gr.update(visible=False), # hide selection_section
1258
- gr.update(visible=True), # show loading_section
1259
- )
1260
-
1261
-
1262
- def show_results(state: SessionState):
1263
- """Affiche les résultats finaux"""
1264
- if not state.is_complete():
1265
- return (
1266
- gr.update(visible=False), # info_section
1267
- gr.update(visible=True), # selection_section
1268
- gr.update(visible=False), # loading_section
1269
- gr.update(visible=False), # results_section
1270
- None,
1271
- "",
1272
- "",
1273
- )
1274
-
1275
- match_image, reason, info = matcher.find_best_match(
1276
- state.firstname,
1277
- state.birthday,
1278
- state.city,
1279
- state.selected_images,
1280
- )
1281
-
1282
- if match_image:
1283
- # Déterminer le type de système de recommandation utilisé
1284
- if "correspond exactement" in reason.lower():
1285
- # Match exact sur nom, date ou lieu
1286
- recommendation_type = "name_date_place"
1287
- else:
1288
- # Match basé sur les émotions
1289
- recommendation_type = "emotions"
1290
-
1291
- # Enregistrer l'œuvre finale et le type de recommandation
1292
- state.final_artwork = info.get("title", "Œuvre inconnue")
1293
- state.recommendation_type = recommendation_type
1294
-
1295
- # Logger la session
1296
- session_tracker.log_session(state, recommendation_type)
1297
-
1298
- explanation = f"""
1299
- **Votre œuvre correspondante a été trouvée !**
1300
-
1301
- **Raison du match :** {reason}
1302
-
1303
- **Détails de l'œuvre :**
1304
- - Type : {info.get('type', 'Non spécifié')}
1305
- - Lieu : {info.get('place', 'Non spécifié')}
1306
- - Émotions : {', '.join(info.get('emotions', [])) if info.get('emotions') else 'Non spécifiées'}
1307
-
1308
- **Description :**
1309
- {info.get('explanation', 'Aucune description disponible')}
1310
- """
1311
- else:
1312
- # Aucune œuvre trouvée - logger quand même
1313
- state.final_artwork = "Aucune œuvre trouvée"
1314
- state.recommendation_type = "none"
1315
- session_tracker.log_session(state, "none")
1316
-
1317
- explanation = "Désolé, aucune œuvre correspondante n'a pu être trouvée."
1318
-
1319
- return (
1320
- gr.update(visible=False), # info_section
1321
- gr.update(visible=False), # selection_section
1322
- gr.update(visible=False), # loading_section
1323
- gr.update(visible=True), # results_section
1324
- match_image,
1325
- info.get("title", "Œuvre non trouvée") if match_image else "Œuvre non trouvée",
1326
- explanation,
1327
- )
1328
-
1329
-
1330
- with gr.Blocks(
1331
- title="Art Matcher",
1332
- theme=gr.themes.Soft(primary_hue="teal", secondary_hue="teal", neutral_hue="zinc"),
1333
- ) as demo:
1334
- gr.Markdown(
1335
- """
1336
- # 🎨 Art Matcher
1337
- ### Découvrez l'œuvre d'art qui vous correspond !
1338
-
1339
- Cette application utilise vos informations personnelles et vos préférences visuelles
1340
- pour trouver l'œuvre d'art qui vous correspond le mieux dans notre collection.
1341
- """
1342
- )
1343
-
1344
- session_state = gr.State(SessionState())
1345
-
1346
- with gr.Group(visible=True) as info_section:
1347
- gr.Markdown("### Étape 1 : Vos informations")
1348
- with gr.Row():
1349
- firstname_input = gr.Textbox(
1350
- label="Prénom", placeholder="Entrez votre prénom", max_lines=1
1351
- )
1352
- birthday_input = gr.Textbox(
1353
- label="Date d'anniversaire (JJ/MM)",
1354
- placeholder="Ex: 25/12",
1355
- max_lines=1,
1356
- )
1357
- city_input = gr.Textbox(
1358
- label="Ville de résidence", placeholder="Ex: Paris", max_lines=1
1359
- )
1360
-
1361
- submit_info_btn = gr.Button("Valider mes informations", variant="primary")
1362
-
1363
- with gr.Group(visible=False) as selection_section:
1364
- selection_title = gr.Markdown("### Étape 2 : Sélection d'images")
1365
-
1366
- with gr.Row():
1367
- img1 = gr.Image(label="Image 1", type="filepath", height=300)
1368
- img2 = gr.Image(label="Image 2", type="filepath", height=300)
1369
- img3 = gr.Image(label="Image 3", type="filepath", height=300)
1370
-
1371
- image_choice = gr.Radio(
1372
- choices=["Image 1", "Image 2", "Image 3"],
1373
- label="Quelle image vous attire le plus ?",
1374
- type="index",
1375
- )
1376
-
1377
- select_btn = gr.Button("Valider mon choix", variant="primary")
1378
-
1379
- with gr.Group(visible=False) as loading_section:
1380
- gr.Markdown("### ⏳ Analyse en cours...")
1381
- gr.HTML(
1382
- """
1383
- <div style="text-align: center; padding: 40px;">
1384
- <div style="display: inline-block; width: 60px; height: 60px; border: 6px solid #f3f3f3; border-top: 6px solid #14b8a6; border-radius: 50%; animation: spin 1s linear infinite;"></div>
1385
- <style>
1386
- @keyframes spin {
1387
- 0% { transform: rotate(0deg); }
1388
- 100% { transform: rotate(360deg); }
1389
- }
1390
- </style>
1391
- <p style="margin-top: 20px; font-size: 18px; color: #666;">
1392
- <strong>Traitement de vos sélections...</strong><br>
1393
- <span style="font-size: 14px;">Nous analysons votre profil pour trouver l'œuvre parfaite</span>
1394
- </p>
1395
- </div>
1396
- """
1397
- )
1398
-
1399
- with gr.Group(visible=False) as results_section:
1400
- gr.Markdown("### Votre œuvre correspondante")
1401
-
1402
- with gr.Row():
1403
- with gr.Column(scale=1):
1404
- result_image = gr.Image(label="Votre œuvre", height=400)
1405
- result_title = gr.Markdown("## Titre de l'œuvre")
1406
-
1407
- with gr.Column(scale=1):
1408
- result_explanation = gr.Markdown("")
1409
-
1410
- restart_btn = gr.Button("Recommencer", variant="secondary")
1411
-
1412
- status_message = gr.Markdown("")
1413
-
1414
- def on_info_submit(firstname, birthday, city, state):
1415
- state.reset()
1416
-
1417
- info_vis, select_vis, results_vis, message, state = process_user_info(
1418
- firstname, birthday, city, state
1419
- )
1420
-
1421
- if select_vis["visible"]:
1422
- images, ids, round_message, state = load_images_for_round(0, state)
1423
- return (
1424
- info_vis,
1425
- select_vis,
1426
- results_vis,
1427
- images[0] if len(images) > 0 else None,
1428
- images[1] if len(images) > 1 else None,
1429
- images[2] if len(images) > 2 else None,
1430
- round_message,
1431
- state,
1432
- )
1433
- else:
1434
- return (info_vis, select_vis, results_vis, None, None, None, message, state)
1435
-
1436
- submit_info_btn.click(
1437
- fn=on_info_submit,
1438
- inputs=[firstname_input, birthday_input, city_input, session_state],
1439
- outputs=[
1440
- info_section,
1441
- selection_section,
1442
- results_section,
1443
- img1,
1444
- img2,
1445
- img3,
1446
- status_message,
1447
- session_state,
1448
- ],
1449
- )
1450
-
1451
- def on_image_select(choice, state):
1452
- result = select_image(choice, state)
1453
-
1454
- # La fonction select_image retourne maintenant 8 valeurs
1455
- if len(result) == 8:
1456
- (
1457
- img1_update,
1458
- img2_update,
1459
- img3_update,
1460
- choice_update,
1461
- message,
1462
- state,
1463
- selection_vis,
1464
- loading_vis,
1465
- ) = result
1466
- return (
1467
- gr.update(), # info_section
1468
- selection_vis, # selection_section
1469
- loading_vis, # loading_section
1470
- gr.update(), # results_section
1471
- img1_update, # img1
1472
- img2_update, # img2
1473
- img3_update, # img3
1474
- choice_update, # image_choice
1475
- message, # status_message
1476
- state,
1477
- )
1478
- else:
1479
- # Format avec 6 valeurs (cas sans loading)
1480
- (img1_update, img2_update, img3_update, choice_update, message, state) = (
1481
- result
1482
- )
1483
- return (
1484
- gr.update(), # info_section
1485
- gr.update(), # selection_section
1486
- gr.update(), # loading_section
1487
- gr.update(), # results_section
1488
- img1_update, # img1
1489
- img2_update, # img2
1490
- img3_update, # img3
1491
- choice_update, # image_choice
1492
- message, # status_message
1493
- state,
1494
- )
1495
-
1496
- def handle_final_results(state):
1497
- if state.is_complete():
1498
- return show_results(state)
1499
- else:
1500
- return gr.update(), gr.update(), gr.update(), gr.update(), None, "", ""
1501
-
1502
- select_btn.click(
1503
- fn=on_image_select,
1504
- inputs=[image_choice, session_state],
1505
- outputs=[
1506
- info_section,
1507
- selection_section,
1508
- loading_section,
1509
- results_section,
1510
- img1,
1511
- img2,
1512
- img3,
1513
- image_choice,
1514
- status_message,
1515
- session_state,
1516
- ],
1517
- ).then(
1518
- fn=handle_final_results,
1519
- inputs=[session_state],
1520
- outputs=[
1521
- info_section,
1522
- selection_section,
1523
- loading_section,
1524
- results_section,
1525
- result_image,
1526
- result_title,
1527
- result_explanation,
1528
- ],
1529
- )
1530
-
1531
- def restart_app(state):
1532
- state.reset()
1533
-
1534
- return (
1535
- gr.update(visible=True), # info_section
1536
- gr.update(visible=False), # selection_section
1537
- gr.update(visible=False), # loading_section
1538
- gr.update(visible=False), # results_section
1539
- "", # firstname_input
1540
- "", # birthday_input
1541
- "", # city_input
1542
- None, # image_choice
1543
- "Application réinitialisée. Veuillez entrer vos informations.", # status_message
1544
- state,
1545
- )
1546
-
1547
- restart_btn.click(
1548
- fn=restart_app,
1549
- inputs=[session_state],
1550
- outputs=[
1551
- info_section,
1552
- selection_section,
1553
- loading_section,
1554
- results_section,
1555
- firstname_input,
1556
- birthday_input,
1557
- city_input,
1558
- image_choice,
1559
- status_message,
1560
- session_state,
1561
- ],
1562
- )
1563
-
1564
-
1565
- if __name__ == "__main__":
1566
- demo.launch()