hoololi commited on
Commit
9bf4d36
·
verified ·
1 Parent(s): 82cf12e

Upload game_engine.py

Browse files
Files changed (1) hide show
  1. game_engine.py +187 -182
game_engine.py CHANGED
@@ -1,5 +1,5 @@
1
  # ==========================================
2
- # game_engine.py - Calcul OCR v3.0 CLEAN
3
  # ==========================================
4
 
5
  """
@@ -21,76 +21,62 @@ import threading
21
  import queue
22
  from typing import Dict, Tuple, Optional
23
 
24
- # Auto-détection propre : GPU OU CPU uniquement
25
- ocr_module = None
26
- ocr_info = {"model_name": "Unknown", "device": "Unknown"}
 
 
 
 
 
 
 
 
 
27
 
28
- # Auto-détection adaptée ZeroGPU
29
  ocr_module = None
30
  ocr_info = {"model_name": "Unknown", "device": "Unknown"}
31
 
32
  # Debug des variables d'environnement HF
33
- import os
34
  space_id = os.getenv("SPACE_ID")
35
- space_hardware = os.getenv("SPACE_HARDWARE")
36
  hf_space = os.getenv("HF_SPACE")
37
  space_author = os.getenv("SPACE_AUTHOR_NAME")
38
- zero_gpu = os.getenv("ZERO_GPU") # Variable ZeroGPU
39
 
40
  print(f"🔍 Debug HF Env:")
41
  print(f" SPACE_ID: {space_id}")
42
  print(f" SPACE_HARDWARE: {space_hardware}")
43
  print(f" HF_SPACE: {hf_space}")
44
  print(f" SPACE_AUTHOR_NAME: {space_author}")
45
- print(f" ZERO_GPU: {zero_gpu}")
46
-
47
- # Détecter ZeroGPU (GPU dynamique HF)
48
- is_zerogpu = space_id and ("hoololi" in str(space_id)) # On est sur HF Spaces
49
 
50
- print(f"🎯 ZeroGPU détecté: {is_zerogpu}")
51
-
52
- if is_zerogpu:
53
- # On est sur ZeroGPU, forcer le mode GPU
54
- try:
55
- print("🚀 Force mode ZeroGPU - Import GPU...")
56
- # Créer un simple import qui satisfait ZeroGPU
57
- from simple_gpu import gpu_dummy_function
58
- print("✅ Simple GPU importé")
59
-
60
- # Utiliser le vrai TrOCR qu'on a chargé !
61
  from image_processing_gpu import (
62
- recognize_number_fast_with_image as gpu_recognize,
63
- create_thumbnail_fast,
64
- create_white_canvas,
65
- cleanup_memory,
66
- log_memory_usage,
67
- get_ocr_model_info
68
- )
69
-
70
- # Pas de wrapper, utiliser directement TrOCR
71
- recognize_number_fast_with_image = gpu_recognize
72
-
73
- ocr_module = "zerogpu_trocr"
74
- print("✅ Game Engine: Mode ZeroGPU - TrOCR directement utilisé")
75
-
76
- except Exception as e:
77
- print(f"❌ Erreur ZeroGPU: {e}")
78
- # Fallback CPU pur
79
- from image_processing_cpu import (
80
- recognize_number_fast_with_image,
81
- create_thumbnail_fast,
82
  create_white_canvas,
83
  cleanup_memory,
84
  log_memory_usage,
85
  get_ocr_model_info
86
  )
87
- ocr_module = "cpu"
88
- print("✅ Game Engine: Mode CPU - EasyOCR (fallback)")
89
- else:
90
- # Mode local/classique
 
 
91
  from image_processing_cpu import (
92
- recognize_number_fast_with_image,
93
- create_thumbnail_fast,
94
  create_white_canvas,
95
  cleanup_memory,
96
  log_memory_usage,
@@ -102,19 +88,19 @@ else:
102
  # Récupérer les infos du modèle sélectionné
103
  try:
104
  ocr_info = get_ocr_model_info()
105
- print(f"🎯 OCR sélectionné: {ocr_info['model_name']} sur {ocr_info['device']}")
106
  except Exception as e:
107
  print(f"⚠️ Impossible de récupérer les infos OCR: {e}")
108
  ocr_info = {"model_name": "Error", "device": "Unknown"}
109
 
110
- # Imports dataset avec gestion d'erreur
111
  try:
112
- from datasets import Dataset, load_dataset
113
- DATASET_AVAILABLE = True
114
- print("✅ Modules dataset disponibles")
115
  except ImportError as e:
116
- DATASET_AVAILABLE = False
117
- print(f"⚠️ Modules dataset non disponibles: {e}")
118
 
119
  # Nom du dataset cohérent avec le space
120
  DATASET_NAME = "hoololi/calcul_ocr_dataset"
@@ -129,21 +115,21 @@ DIFFICULTY_RANGES = {
129
 
130
  def create_result_row_with_images(i: int, image: dict | np.ndarray | Image.Image, expected: int, operation_data: tuple[int, int, str, int]) -> dict:
131
 
132
- print(f"🔍 create_result_row_with_images #{i}")
133
- print(f"🔍 Expected: {expected}")
134
- print(f"🔍 Image type: {type(image)}")
135
 
136
  # OCR optimisé avec debug
137
  recognized, optimized_image, dataset_image_data = recognize_number_fast_with_image(image, debug=True)
138
 
139
- print(f"🔍 OCR recognized: '{recognized}' (type: {type(recognized)})")
140
 
141
  try:
142
  recognized_num = int(recognized) if recognized.isdigit() else 0
143
  except:
144
  recognized_num = 0
145
 
146
- print(f"🔍 OCR parsed num: {recognized_num}")
147
 
148
  is_correct = recognized_num == expected
149
  a, b, operation, correct_result = operation_data
@@ -192,10 +178,10 @@ class MathGame:
192
  self.correct_answer = 0
193
  self.user_images = []
194
  self.expected_answers = []
195
- self.operations_history = []
196
  self.question_count = 0
197
  self.time_remaining = 30
198
- self.session_data = []
199
 
200
  # Configuration session
201
  self.duration = 30
@@ -241,35 +227,40 @@ class MathGame:
241
  """Arrête le thread de traitement"""
242
  self.processing_active = False
243
  if self.worker_thread and self.worker_thread.is_alive():
244
- print("⏹️ Arrêt du thread de traitement parallèle")
 
245
 
246
  def _process_images_worker(self) -> None:
247
  """Worker thread qui traite les images en arrière-plan"""
248
  print("🚀 Worker thread démarré")
249
  while self.processing_active:
250
  try:
251
- if not self.processing_queue.empty():
252
- question_num, image, expected, operation_data = self.processing_queue.get(timeout=1)
253
- print(f"🔄 Traitement parallèle image {question_num}...")
254
-
255
- start_time = time.time()
256
- result_data = create_result_row_with_images(question_num, image, expected, operation_data)
257
- processing_time = time.time() - start_time
258
-
259
- # Stocker le résultat
260
- self.results_cache[question_num] = result_data
261
- print(f"✅ Image {question_num} traitée en {processing_time:.1f}s (parallèle)")
262
-
263
- else:
264
- time.sleep(0.1)
265
-
266
  except queue.Empty:
267
- continue
268
  except Exception as e:
269
  print(f"❌ Erreur traitement parallèle: {e}")
 
 
 
 
 
270
 
271
  print("🛑 Worker thread terminé")
272
 
 
273
  def _add_image_to_processing_queue(self, question_num: int, image: dict | np.ndarray | Image.Image,
274
  expected: int, operation_data: tuple) -> None:
275
  """Ajoute une image à la queue de traitement"""
@@ -333,38 +324,40 @@ class MathGame:
333
  self.operation_type = operation
334
  self.difficulty = difficulty
335
 
336
- # Nettoyage
337
  if hasattr(self, 'user_images') and self.user_images:
338
- for img in self.user_images:
339
- if hasattr(img, 'close'):
 
340
  try:
341
- img.close()
342
  except:
343
  pass
344
-
345
- if hasattr(self, 'session_data') and self.session_data:
346
- for entry in self.session_data:
347
- if 'user_drawing' in entry and entry['user_drawing']:
348
- entry['user_drawing'] = None
349
- self.session_data.clear()
 
 
 
350
 
351
  # Réinit avec nettoyage parallèle
352
  self._stop_background_processing()
353
  self.results_cache.clear()
 
354
  while not self.processing_queue.empty():
355
  try:
356
  self.processing_queue.get_nowait()
 
357
  except queue.Empty:
358
  break
359
 
360
  self.is_running = True
361
  self.start_time = time.time()
362
- self.user_images = []
363
- self.expected_answers = []
364
- self.operations_history = []
365
  self.question_count = 0
366
  self.time_remaining = self.duration
367
- self.session_data = []
368
 
369
  # Reset export
370
  self.export_status = "not_exported"
@@ -382,9 +375,13 @@ class MathGame:
382
  self.correct_answer = answer
383
 
384
  # Parser l'opération pour l'historique
385
- parts = operation_str.split()
386
- a, op, b = int(parts[0]), parts[1], int(parts[2])
387
- self.operations_history.append((a, b, op, answer))
 
 
 
 
388
 
389
  # Affichage adapté selon l'opération
390
  operation_emoji = {
@@ -424,9 +421,13 @@ class MathGame:
424
  self.expected_answers.append(self.correct_answer)
425
 
426
  # Parser l'opération actuelle pour le traitement
427
- parts = self.current_operation.split()
428
- a, op, b = int(parts[0]), parts[1], int(parts[2])
429
- current_operation_data = (a, b, op, self.correct_answer)
 
 
 
 
430
 
431
  # Lancer le traitement en parallèle de l'image qu'on vient de recevoir
432
  self._add_image_to_processing_queue(self.question_count, image_data, self.correct_answer, current_operation_data)
@@ -439,9 +440,13 @@ class MathGame:
439
  self.correct_answer = answer
440
 
441
  # Parser pour l'historique
442
- parts = operation_str.split()
443
- a, op, b = int(parts[0]), parts[1], int(parts[2])
444
- self.operations_history.append((a, b, op, answer))
 
 
 
 
445
 
446
  time_remaining = max(0, self.duration - int(elapsed_time))
447
  self.time_remaining = time_remaining
@@ -469,37 +474,46 @@ class MathGame:
469
 
470
  self.is_running = False
471
 
472
- # Arrêter le traitement parallèle
473
  self._stop_background_processing()
474
-
 
475
  print("🏁 Fin de jeu - Assemblage des résultats...")
476
 
477
  if final_image is not None:
478
  self.user_images.append(final_image)
479
  self.expected_answers.append(self.correct_answer)
480
 
481
- # Traitement de la dernière image
482
- parts = self.current_operation.split()
483
- a, op, b = int(parts[0]), parts[1], int(parts[2])
484
- final_operation_data = (a, b, op, self.correct_answer)
485
-
486
- # Traiter la dernière image immédiatement (pas en parallèle)
487
- print(f"🔄 Traitement final de l'image {self.question_count}...")
488
- final_result = create_result_row_with_images(self.question_count, final_image, self.correct_answer, final_operation_data)
489
- self.results_cache[self.question_count] = final_result
490
-
491
- self.question_count += 1
492
  if len(self.operations_history) < len(self.user_images):
493
- self.operations_history.append((a, b, op, self.correct_answer))
494
-
495
- # Attendre que toutes les images soient traitées
496
- max_wait = 10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
497
  wait_start = time.time()
498
  expected_results = len(self.user_images)
499
 
500
- print(f"⏳ Attente de {expected_results} résultats...")
501
  while len(self.results_cache) < expected_results and (time.time() - wait_start) < max_wait:
502
- time.sleep(0.1)
503
 
504
  results_ready = len(self.results_cache)
505
  print(f"✅ {results_ready}/{expected_results} résultats prêts")
@@ -519,33 +533,36 @@ class MathGame:
519
  print(f"📊 Assemblage de {total_questions} résultats...")
520
 
521
  for i in range(total_questions):
522
- if i in self.results_cache:
523
- row_data = self.results_cache[i]
524
- print(f" ✅ Résultat {i} du cache parallèle")
525
- else:
526
- print(f" 🔄 Traitement fallback pour résultat {i}...")
527
- if i < len(self.operations_history):
528
  row_data = create_result_row_with_images(i, self.user_images[i], self.expected_answers[i], self.operations_history[i])
529
  else:
530
  row_data = {
531
- 'html_row': f'<tr><td>{i+1}</td><td colspan="7">Erreur traitement</td></tr>',
532
  'is_correct': False,
533
  'recognized': "0",
534
  'recognized_num': 0,
535
  'dataset_image_data': None
536
  }
 
 
537
 
538
  table_rows_html += row_data['html_row']
539
 
540
  if row_data['is_correct']:
541
  correct_answers += 1
542
 
543
- # Structure pour dataset avec debug OCR
544
- a, b, operation, correct_result = self.operations_history[i] if i < len(self.operations_history) else (0, 0, "×", 0)
545
 
 
 
 
546
  try:
547
  ocr_info_data = get_ocr_model_info()
548
- print(f"🔍 Debug OCR info: {ocr_info_data}")
549
  except Exception as e:
550
  print(f"❌ Erreur get_ocr_model_info: {e}")
551
  ocr_info_data = {"model_name": "Error", "device": "Unknown"}
@@ -555,11 +572,11 @@ class MathGame:
555
  "timestamp": session_timestamp,
556
  "question_number": i + 1,
557
  "session_duration": self.duration,
558
- "operation_type": self.operation_type,
559
- "difficulty_level": self.difficulty,
560
  "operand_a": a,
561
  "operand_b": b,
562
- "operation": operation,
563
  "correct_answer": self.expected_answers[i] if i < len(self.expected_answers) else 0,
564
  "ocr_model": ocr_info_data.get("model_name", "Unknown"),
565
  "ocr_device": ocr_info_data.get("device", "Unknown"),
@@ -567,12 +584,13 @@ class MathGame:
567
  "user_answer_parsed": row_data['recognized_num'],
568
  "is_correct": row_data['is_correct'],
569
  "total_questions": total_questions,
570
- "app_version": "3.0_calcul_ocr_parallel"
 
 
 
 
571
  }
572
 
573
- print(f"🔍 Debug entry OCR fields: ocr_model={entry['ocr_model']}, ocr_device={entry['ocr_device']}")
574
-
575
-
576
  if row_data['dataset_image_data']:
577
  entry["handwriting_image"] = row_data['dataset_image_data']["image_base64"]
578
  entry["image_width"] = int(row_data['dataset_image_data']["compressed_size"][0])
@@ -591,13 +609,19 @@ class MathGame:
591
  for entry in self.session_data:
592
  entry["session_accuracy"] = accuracy
593
 
594
- # Nettoyage mémoire
595
- for img in self.user_images:
596
- if hasattr(img, 'close'):
 
 
 
 
 
597
  try:
598
- img.close()
599
  except:
600
  pass
 
601
 
602
  gc.collect()
603
 
@@ -702,26 +726,8 @@ def export_to_clean_dataset(session_data: list[dict], dataset_name: str = None)
702
  print(f"\n🚀 === EXPORT VERS DATASET CALCUL OCR ===")
703
  print(f"📊 Dataset: {dataset_name}")
704
 
705
- # Filtrer les entrées avec images et ajouter les infos OCR globalement
706
- clean_entries = []
707
-
708
- # Récupérer une seule fois les infos OCR pour toute la session
709
- try:
710
- global_ocr_info = get_ocr_model_info()
711
- print(f"🔍 Infos OCR globales: {global_ocr_info}")
712
- except Exception as e:
713
- print(f"❌ Erreur infos OCR globales: {e}")
714
- global_ocr_info = {"model_name": "Unknown", "device": "Unknown"}
715
-
716
- for entry in session_data:
717
- if entry.get('has_image', False):
718
- # Ajouter explicitement les champs OCR manquants
719
- entry_with_ocr = entry.copy()
720
- entry_with_ocr["ocr_model"] = global_ocr_info.get("model_name", "Unknown")
721
- entry_with_ocr["ocr_device"] = global_ocr_info.get("device", "Unknown")
722
-
723
- print(f"🔍 Entry avec OCR: ocr_model={entry_with_ocr['ocr_model']}, ocr_device={entry_with_ocr['ocr_device']}")
724
- clean_entries.append(entry_with_ocr)
725
 
726
  # Créer un dataset de test avec structure forcée
727
  if len(clean_entries) == 0:
@@ -730,23 +736,22 @@ def export_to_clean_dataset(session_data: list[dict], dataset_name: str = None)
730
  # Vérifier la structure de la première entrée
731
  sample_entry = clean_entries[0]
732
  print(f"🔍 Structure première entrée: {list(sample_entry.keys())}")
733
- print(f"🔍 OCR dans entrée: ocr_model={sample_entry.get('ocr_model', 'MISSING')}, ocr_device={sample_entry.get('ocr_device', 'MISSING')}")
734
 
735
- # Charger dataset existant et combiner (IMPORTANT!)
736
  try:
737
- existing_dataset = load_dataset(dataset_name, split="train")
738
- existing_data = existing_dataset.to_list()
739
- print(f"📊 {len(existing_data)} entrées existantes trouvées")
740
 
741
- # Combiner ancien + nouveau
742
- combined_data = existing_data + clean_entries
743
- clean_dataset = Dataset.from_list(combined_data)
744
- print(f"📊 Dataset combiné: {len(existing_data)} existantes + {len(clean_entries)} nouvelles = {len(combined_data)} total")
745
 
746
  except Exception as e:
747
  print(f"📊 Dataset non trouvé, création nouveau: {e}")
748
  # Si le dataset n'existe pas, créer depuis les nouvelles entrées
749
- clean_dataset = Dataset.from_list(clean_entries)
750
  print(f"📊 Nouveau dataset créé avec {len(clean_entries)} entrées")
751
 
752
  print(f"✅ Dataset créé - Features:")
@@ -756,19 +761,19 @@ def export_to_clean_dataset(session_data: list[dict], dataset_name: str = None)
756
  # Statistiques par opération
757
  operations_count = {}
758
  for entry in clean_entries:
759
- op = entry.get('operation_type', 'unknown')
760
  operations_count[op] = operations_count.get(op, 0) + 1
761
 
762
  operations_summary = ", ".join([f"{op}: {count}" for op, count in operations_count.items()])
763
 
764
- # Push vers HuggingFace
765
- print(f"📤 Push vers {dataset_name}...")
766
- clean_dataset.push_to_hub(
767
- dataset_name,
768
- private=False,
769
- token=hf_token,
770
- commit_message=f"Add {len(clean_entries)} handwriting samples for math OCR ({operations_summary})"
771
- )
772
 
773
  cleanup_memory()
774
 
 
1
  # ==========================================
2
+ # game_engine.py - Calcul OCR v3.0 CLEAN (Modifié)
3
  # ==========================================
4
 
5
  """
 
21
  import queue
22
  from typing import Dict, Tuple, Optional
23
 
24
+ # Import des fonctions utilitaires communes
25
+ from utils import (
26
+ optimize_image_for_ocr,
27
+ prepare_image_for_dataset,
28
+ create_thumbnail_fast,
29
+ create_white_canvas,
30
+ log_memory_usage,
31
+ cleanup_memory,
32
+ decode_image_from_dataset,
33
+ validate_ocr_result,
34
+ analyze_calculation_complexity # Nouvelle importation
35
+ )
36
 
37
+ # Auto-détection propre : GPU OU CPU uniquement
38
  ocr_module = None
39
  ocr_info = {"model_name": "Unknown", "device": "Unknown"}
40
 
41
  # Debug des variables d'environnement HF
 
42
  space_id = os.getenv("SPACE_ID")
43
+ space_hardware = os.getenv("SPACE_HARDWARE")
44
  hf_space = os.getenv("HF_SPACE")
45
  space_author = os.getenv("SPACE_AUTHOR_NAME")
46
+ zero_gpu_env = os.getenv("ZERO_GPU") # Variable ZeroGPU
47
 
48
  print(f"🔍 Debug HF Env:")
49
  print(f" SPACE_ID: {space_id}")
50
  print(f" SPACE_HARDWARE: {space_hardware}")
51
  print(f" HF_SPACE: {hf_space}")
52
  print(f" SPACE_AUTHOR_NAME: {space_author}")
53
+ print(f" ZERO_GPU: {zero_gpu_env}")
 
 
 
54
 
55
+ # ==========================================
56
+ # LOGIQUE DE DÉTECTION OCR AMÉLIORÉE
57
+ # ==========================================
58
+ # Tenter d'importer le module GPU en premier
59
+ try:
60
+ import torch
61
+ # Vérifier la disponibilité CUDA ou la variable d'environnement ZeroGPU
62
+ if torch.cuda.is_available() or zero_gpu_env == "1":
 
 
 
63
  from image_processing_gpu import (
64
+ recognize_number_fast_with_image,
65
+ create_thumbnail_fast,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
  create_white_canvas,
67
  cleanup_memory,
68
  log_memory_usage,
69
  get_ocr_model_info
70
  )
71
+ ocr_module = "gpu"
72
+ print("✅ Game Engine: Mode GPU/ZeroGPU détecté - TrOCR")
73
+ else:
74
+ raise ImportError("No CUDA available and ZERO_GPU not set, falling back to CPU") # Force CPU path
75
+ except ImportError as e:
76
+ print(f"⚠️ GPU/TrOCR non disponible ou erreur: {e}. Fallback CPU...")
77
  from image_processing_cpu import (
78
+ recognize_number_fast_with_image,
79
+ create_thumbnail_fast,
80
  create_white_canvas,
81
  cleanup_memory,
82
  log_memory_usage,
 
88
  # Récupérer les infos du modèle sélectionné
89
  try:
90
  ocr_info = get_ocr_model_info()
91
+ print(f"🎯 OCR sélectionné: {ocr_info.get('model_name', 'Unknown')} sur {ocr_info.get('device', 'Unknown')}")
92
  except Exception as e:
93
  print(f"⚠️ Impossible de récupérer les infos OCR: {e}")
94
  ocr_info = {"model_name": "Error", "device": "Unknown"}
95
 
96
+ # Imports dataset avec gestion d'erreur
97
  try:
98
+ from datasets import Dataset, load_dataset [cite: 2]
99
+ DATASET_AVAILABLE = True [cite: 2]
100
+ print("✅ Modules dataset disponibles") [cite: 2]
101
  except ImportError as e:
102
+ DATASET_AVAILABLE = False [cite: 2]
103
+ print(f"⚠️ Modules dataset non disponibles: {e}") [cite: 2]
104
 
105
  # Nom du dataset cohérent avec le space
106
  DATASET_NAME = "hoololi/calcul_ocr_dataset"
 
115
 
116
  def create_result_row_with_images(i: int, image: dict | np.ndarray | Image.Image, expected: int, operation_data: tuple[int, int, str, int]) -> dict:
117
 
118
+ # print(f"🔍 create_result_row_with_images #{i}")
119
+ # print(f"🔍 Expected: {expected}")
120
+ # print(f"🔍 Image type: {type(image)}")
121
 
122
  # OCR optimisé avec debug
123
  recognized, optimized_image, dataset_image_data = recognize_number_fast_with_image(image, debug=True)
124
 
125
+ # print(f"🔍 OCR recognized: '{recognized}' (type: {type(recognized)})")
126
 
127
  try:
128
  recognized_num = int(recognized) if recognized.isdigit() else 0
129
  except:
130
  recognized_num = 0
131
 
132
+ # print(f"🔍 OCR parsed num: {recognized_num}")
133
 
134
  is_correct = recognized_num == expected
135
  a, b, operation, correct_result = operation_data
 
178
  self.correct_answer = 0
179
  self.user_images = []
180
  self.expected_answers = []
181
+ self.operations_history = [] # Stocke (a, b, op, correct_result)
182
  self.question_count = 0
183
  self.time_remaining = 30
184
+ self.session_data = [] # Données complètes de la session pour l'export
185
 
186
  # Configuration session
187
  self.duration = 30
 
227
  """Arrête le thread de traitement"""
228
  self.processing_active = False
229
  if self.worker_thread and self.worker_thread.is_alive():
230
+ # Optionnel: worker_thread.join(timeout=X) pour attendre la fin, mais peut bloquer l'UI
231
+ print("⏹️ Arrêt du thread de traitement parallèle demandé")
232
 
233
  def _process_images_worker(self) -> None:
234
  """Worker thread qui traite les images en arrière-plan"""
235
  print("🚀 Worker thread démarré")
236
  while self.processing_active:
237
  try:
238
+ # Blocage avec timeout pour permettre l'arrêt propre
239
+ question_num, image, expected, operation_data = self.processing_queue.get(timeout=0.1)
240
+ print(f"🔄 Traitement parallèle image {question_num}...")
241
+
242
+ start_time = time.time()
243
+ result_data = create_result_row_with_images(question_num, image, expected, operation_data)
244
+ processing_time = time.time() - start_time
245
+
246
+ # Stocker le résultat
247
+ self.results_cache[question_num] = result_data
248
+ print(f"✅ Image {question_num} traitée en {processing_time:.1f}s (parallèle)")
249
+ self.processing_queue.task_done() # Indiquer que la tâche est terminée
250
+
 
 
251
  except queue.Empty:
252
+ continue # Continuer si la queue est vide, ré-vérifier processing_active
253
  except Exception as e:
254
  print(f"❌ Erreur traitement parallèle: {e}")
255
+ import traceback
256
+ traceback.print_exc()
257
+ # Marquer la tâche comme faite même en cas d'erreur pour éviter le blocage
258
+ if not self.processing_queue.empty():
259
+ self.processing_queue.task_done()
260
 
261
  print("🛑 Worker thread terminé")
262
 
263
+
264
  def _add_image_to_processing_queue(self, question_num: int, image: dict | np.ndarray | Image.Image,
265
  expected: int, operation_data: tuple) -> None:
266
  """Ajoute une image à la queue de traitement"""
 
324
  self.operation_type = operation
325
  self.difficulty = difficulty
326
 
327
+ # Nettoyage des anciennes images (PIL) et données de session
328
  if hasattr(self, 'user_images') and self.user_images:
329
+ for img_data in self.user_images:
330
+ # Gradio retourne un dict, le composite peut être une PIL Image ou numpy array
331
+ if isinstance(img_data, dict) and 'composite' in img_data and hasattr(img_data['composite'], 'close'):
332
  try:
333
+ img_data['composite'].close()
334
  except:
335
  pass
336
+ elif isinstance(img_data, Image.Image) and hasattr(img_data, 'close'):
337
+ try:
338
+ img_data.close()
339
+ except:
340
+ pass
341
+ self.user_images.clear()
342
+ self.expected_answers.clear()
343
+ self.operations_history.clear()
344
+ self.session_data.clear()
345
 
346
  # Réinit avec nettoyage parallèle
347
  self._stop_background_processing()
348
  self.results_cache.clear()
349
+ # Vider la queue pour les nouvelles sessions
350
  while not self.processing_queue.empty():
351
  try:
352
  self.processing_queue.get_nowait()
353
+ self.processing_queue.task_done()
354
  except queue.Empty:
355
  break
356
 
357
  self.is_running = True
358
  self.start_time = time.time()
 
 
 
359
  self.question_count = 0
360
  self.time_remaining = self.duration
 
361
 
362
  # Reset export
363
  self.export_status = "not_exported"
 
375
  self.correct_answer = answer
376
 
377
  # Parser l'opération pour l'historique
378
+ parts = operation_str.replace(' ', '').split('×') if '×' in operation_str else \
379
+ operation_str.replace(' ', '').split('+') if '+' in operation_str else \
380
+ operation_str.replace(' ', '').split('-') if '-' in operation_str else \
381
+ operation_str.replace(' ', '').split('÷') # gérer les espaces
382
+
383
+ a, op_char, b = int(parts[0]), operation_str.split()[1], int(parts[2])
384
+ self.operations_history.append((a, b, op_char, answer))
385
 
386
  # Affichage adapté selon l'opération
387
  operation_emoji = {
 
421
  self.expected_answers.append(self.correct_answer)
422
 
423
  # Parser l'opération actuelle pour le traitement
424
+ parts = self.current_operation.replace(' ', '').split('×') if '×' in self.current_operation else \
425
+ self.current_operation.replace(' ', '').split('+') if '+' in self.current_operation else \
426
+ self.current_operation.replace(' ', '').split('-') if '-' in self.current_operation else \
427
+ self.current_operation.replace(' ', '').split('÷')
428
+
429
+ a, op_char, b = int(parts[0]), self.current_operation.split()[1], int(parts[2])
430
+ current_operation_data = (a, b, op_char, self.correct_answer)
431
 
432
  # Lancer le traitement en parallèle de l'image qu'on vient de recevoir
433
  self._add_image_to_processing_queue(self.question_count, image_data, self.correct_answer, current_operation_data)
 
440
  self.correct_answer = answer
441
 
442
  # Parser pour l'historique
443
+ parts = operation_str.replace(' ', '').split('×') if '×' in operation_str else \
444
+ operation_str.replace(' ', '').split('+') if '+' in operation_str else \
445
+ operation_str.replace(' ', '').split('-') if '-' in operation_str else \
446
+ operation_str.replace(' ', '').split('÷')
447
+
448
+ a, op_char, b = int(parts[0]), operation_str.split()[1], int(parts[2])
449
+ self.operations_history.append((a, b, op_char, answer))
450
 
451
  time_remaining = max(0, self.duration - int(elapsed_time))
452
  self.time_remaining = time_remaining
 
474
 
475
  self.is_running = False
476
 
477
+ # Arrêter le traitement parallèle et attendre qu'il se vide
478
  self._stop_background_processing()
479
+ self.processing_queue.join(timeout=5) # Attendre que toutes les tâches soient terminées (max 5s)
480
+
481
  print("🏁 Fin de jeu - Assemblage des résultats...")
482
 
483
  if final_image is not None:
484
  self.user_images.append(final_image)
485
  self.expected_answers.append(self.correct_answer)
486
 
487
+ # Traitement de la dernière image si elle n'a pas été ajoutée
 
 
 
 
 
 
 
 
 
 
488
  if len(self.operations_history) < len(self.user_images):
489
+ # Cela signifie que la dernière question n'a pas encore été historisée
490
+ # Ré-parser la dernière opération affichée pour l'historique
491
+ parts = self.current_operation.replace(' ', '').split('×') if '×' in self.current_operation else \
492
+ self.current_operation.replace(' ', '').split('+') if '+' in self.current_operation else \
493
+ self.current_operation.replace(' ', '').split('-') if '-' in self.current_operation else \
494
+ self.current_operation.replace(' ', '').split('÷')
495
+ a, op_char, b = int(parts[0]), self.current_operation.split()[1], int(parts[2])
496
+ self.operations_history.append((a, b, op_char, self.correct_answer))
497
+
498
+ # Assurer que la dernière image est traitée si ce n'est pas déjà fait
499
+ if self.question_count not in self.results_cache and len(self.user_images) > self.question_count:
500
+ print(f"🔄 Traitement final de l'image {self.question_count} (synchrone)...")
501
+ final_operation_data = self.operations_history[self.question_count]
502
+ final_result = create_result_row_with_images(self.question_count, final_image, self.correct_answer, final_operation_data)
503
+ self.results_cache[self.question_count] = final_result
504
+ self.question_count += 1
505
+ elif self.question_count in self.results_cache:
506
+ print(f"✅ Dernière image {self.question_count} déjà traitée en parallèle.")
507
+ self.question_count += 1 # Incrémenter si déjà dans le cache
508
+
509
+ # Attendre que toutes les images soient traitées (petite attente finale)
510
+ max_wait = 2 # temps max d'attente supplémentaire
511
  wait_start = time.time()
512
  expected_results = len(self.user_images)
513
 
514
+ print(f"⏳ Attente finale des {expected_results} résultats...")
515
  while len(self.results_cache) < expected_results and (time.time() - wait_start) < max_wait:
516
+ time.sleep(0.05) # Petite pause pour laisser le worker finir
517
 
518
  results_ready = len(self.results_cache)
519
  print(f"✅ {results_ready}/{expected_results} résultats prêts")
 
533
  print(f"📊 Assemblage de {total_questions} résultats...")
534
 
535
  for i in range(total_questions):
536
+ row_data = self.results_cache.get(i)
537
+ if row_data is None:
538
+ # Fallback si le résultat n'est pas dans le cache (erreur parallèle ou non traité)
539
+ print(f" ❌ Résultat {i} manquant du cache, traitement synchrone de fallback...")
540
+ if i < len(self.operations_history) and i < len(self.user_images) and i < len(self.expected_answers):
 
541
  row_data = create_result_row_with_images(i, self.user_images[i], self.expected_answers[i], self.operations_history[i])
542
  else:
543
  row_data = {
544
+ 'html_row': f'<tr><td>{i+1}</td><td colspan="7">Erreur traitement (données manquantes)</td></tr>',
545
  'is_correct': False,
546
  'recognized': "0",
547
  'recognized_num': 0,
548
  'dataset_image_data': None
549
  }
550
+ else:
551
+ print(f" ✅ Résultat {i} récupéré du cache parallèle")
552
 
553
  table_rows_html += row_data['html_row']
554
 
555
  if row_data['is_correct']:
556
  correct_answers += 1
557
 
558
+ # Structure pour dataset avec debug OCR et COMPLEXITÉ
559
+ a, b, operation, correct_result_op = self.operations_history[i] if i < len(self.operations_history) else (0, 0, "×", 0)
560
 
561
+ # Analyse de la complexité
562
+ complexity_analysis = analyze_calculation_complexity(a, b, operation)
563
+
564
  try:
565
  ocr_info_data = get_ocr_model_info()
 
566
  except Exception as e:
567
  print(f"❌ Erreur get_ocr_model_info: {e}")
568
  ocr_info_data = {"model_name": "Error", "device": "Unknown"}
 
572
  "timestamp": session_timestamp,
573
  "question_number": i + 1,
574
  "session_duration": self.duration,
575
+ "operation_type": self.operation_type, # Type d'opération de la session globale
576
+ "difficulty_level": self.difficulty, # Difficulté de la session globale
577
  "operand_a": a,
578
  "operand_b": b,
579
+ "operation": operation, # Opération spécifique de la question
580
  "correct_answer": self.expected_answers[i] if i < len(self.expected_answers) else 0,
581
  "ocr_model": ocr_info_data.get("model_name", "Unknown"),
582
  "ocr_device": ocr_info_data.get("device", "Unknown"),
 
584
  "user_answer_parsed": row_data['recognized_num'],
585
  "is_correct": row_data['is_correct'],
586
  "total_questions": total_questions,
587
+ "app_version": "3.0_calcul_ocr_parallel_v2",
588
+ # Ajout des métadonnées de complexité
589
+ "complexity_score": complexity_analysis["complexity_score"],
590
+ "difficulty_category": complexity_analysis["difficulty_category"],
591
+ "operation_specific_type": complexity_analysis["operation_type"], # Redondant mais explicite
592
  }
593
 
 
 
 
594
  if row_data['dataset_image_data']:
595
  entry["handwriting_image"] = row_data['dataset_image_data']["image_base64"]
596
  entry["image_width"] = int(row_data['dataset_image_data']["compressed_size"][0])
 
609
  for entry in self.session_data:
610
  entry["session_accuracy"] = accuracy
611
 
612
+ # Nettoyage mémoire des images PIL originales (Gradio dict ou PIL.Image)
613
+ for img_data in self.user_images:
614
+ if isinstance(img_data, dict) and 'composite' in img_data and hasattr(img_data['composite'], 'close'):
615
+ try:
616
+ img_data['composite'].close()
617
+ except:
618
+ pass
619
+ elif isinstance(img_data, Image.Image) and hasattr(img_data, 'close'):
620
  try:
621
+ img_data.close()
622
  except:
623
  pass
624
+ self.user_images.clear() # Vider la liste une fois traitée
625
 
626
  gc.collect()
627
 
 
726
  print(f"\n🚀 === EXPORT VERS DATASET CALCUL OCR ===")
727
  print(f"📊 Dataset: {dataset_name}")
728
 
729
+ # Filtrer les entrées avec images
730
+ clean_entries = [entry for entry in session_data if entry.get('has_image', False)]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
731
 
732
  # Créer un dataset de test avec structure forcée
733
  if len(clean_entries) == 0:
 
736
  # Vérifier la structure de la première entrée
737
  sample_entry = clean_entries[0]
738
  print(f"🔍 Structure première entrée: {list(sample_entry.keys())}")
 
739
 
740
+ # Charger dataset existant et combiner (IMPORTANT!)
741
  try:
742
+ existing_dataset = load_dataset(dataset_name, split="train") [cite: 2]
743
+ existing_data = existing_dataset.to_list() [cite: 2]
744
+ print(f"📊 {len(existing_data)} entrées existantes trouvées") [cite: 2]
745
 
746
+ # Combiner ancien + nouveau
747
+ combined_data = existing_data + clean_entries [cite: 2]
748
+ clean_dataset = Dataset.from_list(combined_data) [cite: 2]
749
+ print(f"📊 Dataset combiné: {len(existing_data)} existantes + {len(clean_entries)} nouvelles = {len(combined_data)} total") [cite: 2]
750
 
751
  except Exception as e:
752
  print(f"📊 Dataset non trouvé, création nouveau: {e}")
753
  # Si le dataset n'existe pas, créer depuis les nouvelles entrées
754
+ clean_dataset = Dataset.from_list(clean_entries) [cite: 2]
755
  print(f"📊 Nouveau dataset créé avec {len(clean_entries)} entrées")
756
 
757
  print(f"✅ Dataset créé - Features:")
 
761
  # Statistiques par opération
762
  operations_count = {}
763
  for entry in clean_entries:
764
+ op = entry.get('operation', 'unknown') # Utiliser 'operation' qui est spécifique à la question
765
  operations_count[op] = operations_count.get(op, 0) + 1
766
 
767
  operations_summary = ", ".join([f"{op}: {count}" for op, count in operations_count.items()])
768
 
769
+ # Push vers HuggingFace
770
+ print(f"📤 Push vers {dataset_name}...") [cite: 2]
771
+ clean_dataset.push_to_hub( [cite: 2]
772
+ dataset_name, [cite: 2]
773
+ private=False, [cite: 2]
774
+ token=hf_token, [cite: 2]
775
+ commit_message=f"Add {len(clean_entries)} handwriting samples for math OCR ({operations_summary})" [cite: 2]
776
+ ) [cite: 2]
777
 
778
  cleanup_memory()
779