hoololi commited on
Commit
5617612
·
verified ·
1 Parent(s): ffb3108

Upload 5 files

Browse files
Files changed (5) hide show
  1. app.py +213 -0
  2. game_engine.py +724 -0
  3. image_processing_cpu.py +115 -0
  4. image_processing_gpu.py +157 -0
  5. requirements.txt +17 -0
app.py ADDED
@@ -0,0 +1,213 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ==========================================
2
+ # app.py - Calcul OCR v3.0
3
+ # ==========================================
4
+
5
+ """
6
+ Application principale - Entraînement aux calculs avec OCR
7
+ """
8
+
9
+ import gradio as gr
10
+ import warnings
11
+ import os
12
+ import gc
13
+ import numpy as np
14
+ from PIL import Image
15
+
16
+ warnings.filterwarnings("ignore")
17
+
18
+ from image_processing import init_ocr_model, create_white_canvas, cleanup_memory
19
+ from game_engine import MathGame, export_to_clean_dataset
20
+
21
+ print("🚀 Initialisation Calcul OCR v3.0...")
22
+
23
+ print("🔄 Chargement modèle OCR...")
24
+ init_ocr_model()
25
+ print("✅ Modèle OCR prêt")
26
+
27
+ game = MathGame()
28
+
29
+ def start_game_wrapper(duration: str, operation: str, difficulty: str) -> tuple:
30
+ cleanup_memory()
31
+ return game.start_game(duration, operation, difficulty)
32
+
33
+ def next_question_wrapper(image_data: dict | np.ndarray | Image.Image | None) -> tuple:
34
+ return game.next_question(image_data)
35
+
36
+ def export_current_session() -> str:
37
+ """Export vers le nouveau dataset calcul_ocr_dataset"""
38
+
39
+ if not hasattr(game, 'session_data') or not game.session_data:
40
+ return "❌ Aucune donnée de session à exporter"
41
+
42
+ export_info = game.get_export_status()
43
+
44
+ if export_info["status"] == "exported":
45
+ return f"""✅ Session déjà exportée !
46
+
47
+ 📅 Exporté le: {export_info['timestamp'][:19].replace('T', ' ')}
48
+ 📊 Résultat: {export_info['result'][:100]}...
49
+
50
+ 💡 Jouez une nouvelle session pour contribuer davantage !"""
51
+
52
+ if export_info["status"] == "exporting":
53
+ return "⏳ Export en cours..."
54
+
55
+ if not export_info["can_export"]:
56
+ return "❌ Aucune donnée à exporter"
57
+
58
+ game.mark_export_in_progress()
59
+
60
+ try:
61
+ result = export_to_clean_dataset(game.session_data)
62
+ game.mark_export_completed(result)
63
+ cleanup_memory()
64
+ return result
65
+
66
+ except Exception as e:
67
+ game.export_status = "not_exported"
68
+ return f"❌ Erreur export: {str(e)}"
69
+
70
+ # Interface Gradio
71
+ with gr.Blocks(
72
+ title="🧮 Calcul OCR - Entraînement mathématiques",
73
+ theme=gr.themes.Soft(),
74
+ css="""
75
+ .gradio-container { max-width: 1200px !important; }
76
+ .config-section {
77
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
78
+ color: white;
79
+ padding: 15px;
80
+ border-radius: 10px;
81
+ margin: 10px 0;
82
+ }
83
+ .dataset-info {
84
+ background: linear-gradient(135deg, #11998e 0%, #38ef7d 100%);
85
+ color: white;
86
+ padding: 15px;
87
+ border-radius: 10px;
88
+ margin: 10px 0;
89
+ }
90
+ .radio-group {
91
+ background: #f8f9fa;
92
+ padding: 10px;
93
+ border-radius: 8px;
94
+ margin: 5px 0;
95
+ }
96
+ """,
97
+ head="<meta name='viewport' content='width=device-width, initial-scale=1.0'>"
98
+ ) as demo:
99
+
100
+ gr.Markdown(
101
+ """
102
+ # 🧮 Entraînement aux calculs avec OCR
103
+
104
+ **Nouveau !** Choisissez votre configuration et entraînez-vous sur différents types de calculs !
105
+
106
+ **Comment jouer :**
107
+ 1. **Configurez** votre session ci-dessous
108
+ 2. Cliquez sur **🚀 GO !** pour démarrer
109
+ 3. **Écrivez** ✏️ votre réponse sur le tableau
110
+ 4. Cliquez sur **➡️ NEXT !** pour la question suivante
111
+
112
+ À la fin, vous pourrez contribuer au dataset ouvert pour améliorer l'OCR mathématique !
113
+
114
+ ---
115
+ """
116
+ )
117
+
118
+ # Configuration de la session
119
+ with gr.Group():
120
+ gr.Markdown("### ⚙️ Configuration de la session", elem_classes=["config-section"])
121
+
122
+ with gr.Row():
123
+ duration_choice = gr.Radio(
124
+ choices=["30 secondes", "60 secondes"],
125
+ value="30 secondes",
126
+ label="⏱️ Durée",
127
+ elem_classes=["radio-group"]
128
+ )
129
+
130
+ operation_choice = gr.Radio(
131
+ choices=["×", "+", "-", "÷", "Aléatoire"],
132
+ value="×",
133
+ label="🔢 Opération",
134
+ elem_classes=["radio-group"]
135
+ )
136
+
137
+ difficulty_choice = gr.Radio(
138
+ choices=["Facile", "Difficile"],
139
+ value="Facile",
140
+ label="🎯 Difficulté",
141
+ elem_classes=["radio-group"]
142
+ )
143
+
144
+ with gr.Row():
145
+ with gr.Column(scale=1):
146
+ # Question
147
+ question_display = gr.HTML(
148
+ value='<div style="font-size: 2.5em; font-weight: bold; text-align: center; padding: 20px; background: linear-gradient(45deg, #667eea 0%, #764ba2 100%); color: white; border-radius: 10px;">Prêt à jouer ?</div>'
149
+ )
150
+
151
+ # Contrôles
152
+ with gr.Row():
153
+ go_button = gr.Button("🚀 GO !", variant="primary", size="lg")
154
+ next_button = gr.Button("➡️ NEXT !", variant="secondary", size="lg", interactive=False)
155
+
156
+ # Status
157
+ status_display = gr.Markdown("### 🎯 Configurez votre session et cliquez sur GO !")
158
+ timer_display = gr.Markdown("### ⏱️ --")
159
+
160
+ with gr.Column(scale=1):
161
+ # Zone de dessin
162
+ canvas = gr.ImageEditor(
163
+ label="✏️ Votre réponse",
164
+ height=350,
165
+ width=350,
166
+ value=create_white_canvas(350, 350),
167
+ brush=gr.Brush(default_size=8, default_color="#000000"),
168
+ sources=[],
169
+ layers=False,
170
+ transforms=[],
171
+ eraser=gr.Eraser(default_size=20)
172
+ )
173
+
174
+ # Résultats
175
+ results_display = gr.HTML("")
176
+
177
+ # Export vers dataset dédié
178
+ gr.Markdown("### 📤 Contribuer au dataset", elem_classes=["dataset-info"])
179
+ export_button = gr.Button("📤 Ajouter la série au dataset calcul_ocr", variant="primary", size="lg")
180
+ export_status = gr.Markdown("")
181
+
182
+ # Événements
183
+ go_button.click(
184
+ fn=start_game_wrapper,
185
+ inputs=[duration_choice, operation_choice, difficulty_choice],
186
+ outputs=[question_display, canvas, status_display, timer_display, go_button, next_button, results_display]
187
+ )
188
+
189
+ next_button.click(
190
+ fn=next_question_wrapper,
191
+ inputs=[canvas],
192
+ outputs=[question_display, canvas, status_display, timer_display, go_button, next_button, results_display]
193
+ )
194
+
195
+ export_button.click(
196
+ fn=export_current_session,
197
+ outputs=[export_status]
198
+ )
199
+
200
+ if __name__ == "__main__":
201
+ print("🚀 Lancement Calcul OCR v3.0...")
202
+ print("🎯 Dataset: calcul_ocr_dataset")
203
+ print("📊 Opérations: ×, +, -, ÷, Aléatoire")
204
+ print("⚙️ Durées: 30s, 60s")
205
+ print("🎯 Difficultés: Facile, Difficile")
206
+ demo.launch(
207
+ share=False,
208
+ show_error=True,
209
+ server_name="0.0.0.0",
210
+ server_port=7860,
211
+ show_api=False,
212
+ favicon_path=None
213
+ )
game_engine.py ADDED
@@ -0,0 +1,724 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ==========================================
2
+ # game_engine.py - Calcul OCR v3.0
3
+ # ==========================================
4
+
5
+ """
6
+ Moteur de jeu mathématique complet
7
+ """
8
+
9
+ import random
10
+ import time
11
+ import datetime
12
+ import gradio as gr
13
+ import os
14
+ import uuid
15
+ import gc
16
+ import base64
17
+ from io import BytesIO
18
+ import numpy as np
19
+ from PIL import Image
20
+ import threading
21
+ import queue
22
+ from typing import Dict, Tuple, Optional
23
+
24
+ # Auto-détection intelligente CPU/GPU avec fallbacks
25
+ ocr_module = None
26
+ ocr_info = {"model_name": "Unknown", "device": "Unknown"}
27
+
28
+ # Tentative 1: GPU/TrOCR (si disponible et packages installés)
29
+ try:
30
+ import torch
31
+ if torch.cuda.is_available():
32
+ from image_processing_gpu import (
33
+ recognize_number_fast_with_image,
34
+ create_thumbnail_fast,
35
+ create_white_canvas,
36
+ cleanup_memory,
37
+ log_memory_usage,
38
+ get_ocr_model_info
39
+ )
40
+ ocr_module = "gpu"
41
+ print("✅ Mode GPU détecté - TrOCR activé")
42
+ else:
43
+ raise ImportError("GPU non disponible")
44
+ except (ImportError, Exception) as e:
45
+ print(f"⚠️ GPU/TrOCR non disponible: {e}")
46
+
47
+ # Tentative 2: CPU/EasyOCR (fallback)
48
+ try:
49
+ from image_processing_cpu import (
50
+ recognize_number_fast_with_image,
51
+ create_thumbnail_fast,
52
+ create_white_canvas,
53
+ cleanup_memory,
54
+ log_memory_usage,
55
+ get_ocr_model_info
56
+ )
57
+ ocr_module = "cpu"
58
+ print("✅ Mode CPU détecté - EasyOCR activé")
59
+ except ImportError:
60
+
61
+ # Tentative 3: Fallback vers ancien fichier (sécurité)
62
+ try:
63
+ from image_processing import (
64
+ recognize_number_fast_with_image,
65
+ create_thumbnail_fast,
66
+ create_white_canvas,
67
+ cleanup_memory,
68
+ log_memory_usage
69
+ )
70
+ def get_ocr_model_info():
71
+ return {"model_name": "Legacy", "device": "Unknown"}
72
+ ocr_module = "legacy"
73
+ print("⚠️ Fallback vers image_processing.py legacy")
74
+ except ImportError:
75
+ print("❌ ERREUR: Aucun module OCR disponible!")
76
+ raise
77
+
78
+ # Récupérer les infos du modèle sélectionné
79
+ try:
80
+ ocr_info = get_ocr_model_info()
81
+ print(f"🎯 OCR sélectionné: {ocr_info['model_name']} sur {ocr_info['device']}")
82
+ except:
83
+ print("⚠️ Impossible de récupérer les infos OCR")
84
+
85
+ # Imports dataset avec gestion d'erreur
86
+ try:
87
+ from datasets import Dataset, load_dataset
88
+ DATASET_AVAILABLE = True
89
+ print("✅ Modules dataset disponibles")
90
+ except ImportError as e:
91
+ DATASET_AVAILABLE = False
92
+ print(f"⚠️ Modules dataset non disponibles: {e}")
93
+
94
+ # Nom du nouveau dataset
95
+ DATASET_NAME = "hoololi/calcul_ocr_dataset"
96
+
97
+ # Configuration des difficultés par opération
98
+ DIFFICULTY_RANGES = {
99
+ "×": {
100
+ "Facile": (2, 9),
101
+ "Difficile": (4, 12)
102
+ },
103
+ "+": {
104
+ "Facile": (1, 50),
105
+ "Difficile": (10, 100)
106
+ },
107
+ "-": {
108
+ "Facile": (1, 50),
109
+ "Difficile": (10, 100)
110
+ },
111
+ "÷": {
112
+ "Facile": (1, 10), # Pour les résultats
113
+ "Difficile": (2, 12) # Pour les résultats
114
+ }
115
+ }
116
+
117
+ def create_result_row_with_images(i: int, image: dict | np.ndarray | Image.Image, expected: int, operation_data: tuple[int, int, str, int]) -> dict:
118
+
119
+ # OCR optimisé
120
+ recognized, optimized_image, dataset_image_data = recognize_number_fast_with_image(image)
121
+
122
+ try:
123
+ recognized_num = int(recognized) if recognized.isdigit() else 0
124
+ except:
125
+ recognized_num = 0
126
+
127
+ is_correct = recognized_num == expected
128
+ a, b, operation, correct_result = operation_data
129
+
130
+ status_icon = "✅" if is_correct else "❌"
131
+ status_text = "Correct" if is_correct else "Incorrect"
132
+ row_color = "#e8f5e8" if is_correct else "#ffe8e8"
133
+
134
+ # Miniature
135
+ image_thumbnail = create_thumbnail_fast(optimized_image, size=(50, 50))
136
+
137
+ # Libérer mémoire
138
+ if optimized_image and hasattr(optimized_image, 'close'):
139
+ try:
140
+ optimized_image.close()
141
+ except:
142
+ pass
143
+
144
+ return {
145
+ 'html_row': f"""
146
+ <tr style="background-color: {row_color};">
147
+ <td style="text-align: center; padding: 8px; border: 1px solid #ddd; color: #333;">{i+1}</td>
148
+ <td style="text-align: center; padding: 8px; border: 1px solid #ddd; font-weight: bold; color: #333;">{a}</td>
149
+ <td style="text-align: center; padding: 8px; border: 1px solid #ddd; font-weight: bold; color: #333;">{operation}</td>
150
+ <td style="text-align: center; padding: 8px; border: 1px solid #ddd; font-weight: bold; color: #333;">{b}</td>
151
+ <td style="text-align: center; padding: 8px; border: 1px solid #ddd; font-weight: bold; color: #333;">{expected}</td>
152
+ <td style="text-align: center; padding: 8px; border: 1px solid #ddd;">{image_thumbnail}</td>
153
+ <td style="text-align: center; padding: 8px; border: 1px solid #ddd; font-weight: bold; color: #333;">{recognized_num}</td>
154
+ <td style="text-align: center; padding: 8px; border: 1px solid #ddd; color: #333;">{status_icon} {status_text}</td>
155
+ </tr>
156
+ """,
157
+ 'is_correct': is_correct,
158
+ 'recognized': recognized,
159
+ 'recognized_num': recognized_num,
160
+ 'dataset_image_data': dataset_image_data
161
+ }
162
+
163
+
164
+ class MathGame:
165
+ """Moteur de jeu mathématique avec traitement parallèle"""
166
+
167
+ def __init__(self):
168
+ self.is_running = False
169
+ self.start_time = 0
170
+ self.current_operation = ""
171
+ self.correct_answer = 0
172
+ self.user_images = []
173
+ self.expected_answers = []
174
+ self.operations_history = []
175
+ self.question_count = 0
176
+ self.time_remaining = 30
177
+ self.session_data = []
178
+
179
+ # Configuration session
180
+ self.duration = 30
181
+ self.operation_type = "×"
182
+ self.difficulty = "Facile"
183
+
184
+ # Gestion export
185
+ self.export_status = "not_exported"
186
+ self.export_timestamp = None
187
+ self.export_result = None
188
+
189
+ # NOUVEAU: Traitement parallèle
190
+ self.processing_queue = queue.Queue()
191
+ self.results_cache: Dict[int, dict] = {} # {question_number: result_data}
192
+ self.worker_thread: Optional[threading.Thread] = None
193
+ self.processing_active = False
194
+
195
+ def _start_background_processing(self) -> None:
196
+ """Démarre le thread de traitement en arrière-plan"""
197
+ if self.worker_thread is None or not self.worker_thread.is_alive():
198
+ self.processing_active = True
199
+ self.worker_thread = threading.Thread(target=self._process_images_worker, daemon=True)
200
+ self.worker_thread.start()
201
+ print("🔄 Thread de traitement parallèle démarré")
202
+
203
+ def _stop_background_processing(self) -> None:
204
+ """Arrête le thread de traitement"""
205
+ self.processing_active = False
206
+ if self.worker_thread and self.worker_thread.is_alive():
207
+ print("⏹️ Arrêt du thread de traitement parallèle")
208
+
209
+ def _process_images_worker(self) -> None:
210
+ """Worker thread qui traite les images en arrière-plan"""
211
+ print("🚀 Worker thread démarré")
212
+ while self.processing_active:
213
+ try:
214
+ if not self.processing_queue.empty():
215
+ question_num, image, expected, operation_data = self.processing_queue.get(timeout=1)
216
+ print(f"🔄 Traitement parallèle image {question_num}...")
217
+
218
+ start_time = time.time()
219
+ result_data = create_result_row_with_images(question_num, image, expected, operation_data)
220
+ processing_time = time.time() - start_time
221
+
222
+ # Stocker le résultat
223
+ self.results_cache[question_num] = result_data
224
+ print(f"✅ Image {question_num} traitée en {processing_time:.1f}s (parallèle)")
225
+
226
+ else:
227
+ time.sleep(0.1) # Éviter la consommation CPU excessive
228
+
229
+ except queue.Empty:
230
+ continue
231
+ except Exception as e:
232
+ print(f"❌ Erreur traitement parallèle: {e}")
233
+
234
+ print("🛑 Worker thread terminé")
235
+
236
+ def _add_image_to_processing_queue(self, question_num: int, image: dict | np.ndarray | Image.Image,
237
+ expected: int, operation_data: tuple) -> None:
238
+ """Ajoute une image à la queue de traitement"""
239
+ if image is not None:
240
+ self.processing_queue.put((question_num, image, expected, operation_data))
241
+ print(f"📝 Image {question_num} ajoutée à la queue de traitement")
242
+ return {
243
+ "status": self.export_status,
244
+ "timestamp": self.export_timestamp,
245
+ "result": self.export_result,
246
+ "can_export": self.export_status == "not_exported" and len(self.session_data) > 0
247
+ }
248
+
249
+ def mark_export_in_progress(self) -> None:
250
+ self.export_status = "exporting"
251
+ self.export_timestamp = datetime.datetime.now().isoformat()
252
+
253
+ def mark_export_completed(self, result: str) -> None:
254
+ self.export_status = "exported"
255
+ self.export_result = result
256
+
257
+ def generate_multiplication(self, difficulty: str) -> tuple[str, int]:
258
+ """Génère une multiplication"""
259
+ min_val, max_val = DIFFICULTY_RANGES["×"][difficulty]
260
+ a = random.randint(min_val, max_val)
261
+ b = random.randint(min_val, max_val)
262
+ return f"{a} × {b}", a * b
263
+
264
+ def generate_addition(self, difficulty: str) -> tuple[str, int]:
265
+ """Génère une addition"""
266
+ min_val, max_val = DIFFICULTY_RANGES["+"][difficulty]
267
+ a = random.randint(min_val, max_val)
268
+ b = random.randint(min_val, max_val)
269
+ return f"{a} + {b}", a + b
270
+
271
+ def generate_subtraction(self, difficulty: str) -> tuple[str, int]:
272
+ """Génère une soustraction (résultat toujours positif)"""
273
+ min_val, max_val = DIFFICULTY_RANGES["-"][difficulty]
274
+ a = random.randint(min_val, max_val)
275
+ b = random.randint(min_val, a) # b <= a pour éviter les négatifs
276
+ return f"{a} - {b}", a - b
277
+
278
+ def generate_division(self, difficulty: str) -> tuple[str, int]:
279
+ """Génère une division exacte"""
280
+ min_result, max_result = DIFFICULTY_RANGES["÷"][difficulty]
281
+ result = random.randint(min_result, max_result)
282
+ if difficulty == "Facile":
283
+ divisor = random.randint(2, 9)
284
+ else:
285
+ divisor = random.randint(2, 12)
286
+ dividend = result * divisor
287
+ return f"{dividend} ÷ {divisor}", result
288
+
289
+ def generate_operation(self, operation_type: str, difficulty: str) -> tuple[str, int]:
290
+ """Génère une opération selon le type et la difficulté"""
291
+ if operation_type == "×":
292
+ return self.generate_multiplication(difficulty)
293
+ elif operation_type == "+":
294
+ return self.generate_addition(difficulty)
295
+ elif operation_type == "-":
296
+ return self.generate_subtraction(difficulty)
297
+ elif operation_type == "÷":
298
+ return self.generate_division(difficulty)
299
+ elif operation_type == "Aléatoire":
300
+ # Choisir aléatoirement une opération
301
+ random_op = random.choice(["×", "+", "-", "÷"])
302
+ return self.generate_operation(random_op, difficulty)
303
+ else:
304
+ # Par défaut, multiplication
305
+ return self.generate_multiplication(difficulty)
306
+
307
+ def start_game(self, duration: str, operation: str, difficulty: str) -> tuple[str, Image.Image, str, str, gr.update, gr.update, str]:
308
+ """Démarre le jeu avec la configuration choisie"""
309
+
310
+ # log_memory_usage("avant nettoyage start_game") # DEBUG: Désactivé
311
+
312
+ # Configuration
313
+ self.duration = 60 if duration == "60 secondes" else 30
314
+ self.operation_type = operation
315
+ self.difficulty = difficulty
316
+
317
+ # Nettoyage
318
+ if hasattr(self, 'user_images') and self.user_images:
319
+ for img in self.user_images:
320
+ if hasattr(img, 'close'):
321
+ try:
322
+ img.close()
323
+ except:
324
+ pass
325
+
326
+ if hasattr(self, 'session_data') and self.session_data:
327
+ for entry in self.session_data:
328
+ if 'user_drawing' in entry and entry['user_drawing']:
329
+ entry['user_drawing'] = None
330
+ self.session_data.clear()
331
+
332
+ # Réinit avec nettoyage parallèle
333
+ self._stop_background_processing()
334
+ self.results_cache.clear()
335
+ while not self.processing_queue.empty():
336
+ try:
337
+ self.processing_queue.get_nowait()
338
+ except queue.Empty:
339
+ break
340
+
341
+ self.is_running = True
342
+ self.start_time = time.time()
343
+ self.user_images = []
344
+ self.expected_answers = []
345
+ self.operations_history = []
346
+ self.question_count = 0
347
+ self.time_remaining = self.duration
348
+ self.session_data = []
349
+
350
+ # Reset export
351
+ self.export_status = "not_exported"
352
+ self.export_timestamp = None
353
+ self.export_result = None
354
+
355
+ # Démarrer le traitement parallèle
356
+ self._start_background_processing()
357
+
358
+ gc.collect()
359
+ # log_memory_usage("après nettoyage start_game") # DEBUG: Désactivé
360
+
361
+ # Première opération
362
+ operation_str, answer = self.generate_operation(self.operation_type, self.difficulty)
363
+ self.current_operation = operation_str
364
+ self.correct_answer = answer
365
+
366
+ # Parser l'opération pour l'historique
367
+ parts = operation_str.split()
368
+ a, op, b = int(parts[0]), parts[1], int(parts[2])
369
+ self.operations_history.append((a, b, op, answer))
370
+
371
+ # Affichage adapté selon l'opération
372
+ operation_emoji = {
373
+ "×": "✖️", "+": "➕", "-": "➖", "÷": "➗", "Aléatoire": "🎲"
374
+ }
375
+ emoji = operation_emoji.get(self.operation_type, "🔢")
376
+
377
+ return (
378
+ f'<div style="font-size: 3em; font-weight: bold; text-align: center; padding: 20px; background: linear-gradient(45deg, #667eea 0%, #764ba2 100%); color: white; border-radius: 10px;">{operation_str}</div>',
379
+ create_white_canvas(),
380
+ f"🎯 {emoji} {self.operation_type} • {self.difficulty} • Écrivez votre réponse !",
381
+ f"⏱️ Temps restant: {self.time_remaining}s",
382
+ gr.update(interactive=False),
383
+ gr.update(interactive=True),
384
+ ""
385
+ )
386
+
387
+ def next_question(self, image_data: dict | np.ndarray | Image.Image | None) -> tuple[str, Image.Image, str, str, gr.update, gr.update, str]:
388
+ if not self.is_running:
389
+ return (
390
+ f'<div style="font-size: 3em; font-weight: bold; text-align: center; padding: 20px; background: linear-gradient(45deg, #667eea 0%, #764ba2 100%); color: white; border-radius: 10px;">{self.current_operation}</div>',
391
+ image_data,
392
+ "❌ Le jeu n'est pas en cours !",
393
+ "⏱️ Temps: 0s",
394
+ gr.update(interactive=True),
395
+ gr.update(interactive=False),
396
+ ""
397
+ )
398
+
399
+ elapsed_time = time.time() - self.start_time
400
+ if elapsed_time >= self.duration:
401
+ return self.end_game(image_data)
402
+
403
+ if image_data is not None:
404
+ # Ajouter l'image à la liste ET au traitement parallèle
405
+ self.user_images.append(image_data)
406
+ self.expected_answers.append(self.correct_answer)
407
+
408
+ # Parser l'opération actuelle pour le traitement
409
+ parts = self.current_operation.split()
410
+ a, op, b = int(parts[0]), parts[1], int(parts[2])
411
+ current_operation_data = (a, b, op, self.correct_answer)
412
+
413
+ # Lancer le traitement en parallèle de l'image qu'on vient de recevoir
414
+ self._add_image_to_processing_queue(self.question_count, image_data, self.correct_answer, current_operation_data)
415
+
416
+ self.question_count += 1
417
+
418
+ # Nouvelle opération
419
+ operation_str, answer = self.generate_operation(self.operation_type, self.difficulty)
420
+ self.current_operation = operation_str
421
+ self.correct_answer = answer
422
+
423
+ # Parser pour l'historique
424
+ parts = operation_str.split()
425
+ a, op, b = int(parts[0]), parts[1], int(parts[2])
426
+ self.operations_history.append((a, b, op, answer))
427
+
428
+ time_remaining = max(0, self.duration - int(elapsed_time))
429
+ self.time_remaining = time_remaining
430
+
431
+ if time_remaining <= 0:
432
+ return self.end_game(image_data)
433
+
434
+ # Emoji pour l'opération
435
+ operation_emoji = {
436
+ "×": "✖️", "+": "➕", "-": "➖", "÷": "➗", "Aléatoire": "🎲"
437
+ }
438
+ emoji = operation_emoji.get(self.operation_type, "🔢")
439
+
440
+ return (
441
+ f'<div style="font-size: 3em; font-weight: bold; text-align: center; padding: 20px; background: linear-gradient(45deg, #667eea 0%, #764ba2 100%); color: white; border-radius: 10px;">{operation_str}</div>',
442
+ create_white_canvas(),
443
+ f"🎯 {emoji} Question {self.question_count + 1} • {self.difficulty}",
444
+ f"⏱️ Temps restant: {time_remaining}s",
445
+ gr.update(interactive=False),
446
+ gr.update(interactive=True),
447
+ ""
448
+ )
449
+
450
+ def end_game(self, final_image: dict | np.ndarray | Image.Image | None) -> tuple[str, Image.Image, str, str, gr.update, gr.update, str]:
451
+
452
+ self.is_running = False
453
+
454
+ # log_memory_usage("début end_game") # DEBUG: Désactivé
455
+
456
+ if final_image is not None:
457
+ self.user_images.append(final_image)
458
+ self.expected_answers.append(self.correct_answer)
459
+ self.question_count += 1
460
+ if len(self.operations_history) < len(self.user_images):
461
+ parts = self.current_operation.split()
462
+ a, op, b = int(parts[0]), parts[1], int(parts[2])
463
+ self.operations_history.append((a, b, op, self.correct_answer))
464
+
465
+ correct_answers = 0
466
+ total_questions = len(self.user_images)
467
+ table_rows_html = ""
468
+
469
+ session_timestamp = datetime.datetime.now().isoformat()
470
+ session_id = f"session_{int(datetime.datetime.now().timestamp())}_{str(uuid.uuid4())[:8]}"
471
+
472
+ self.session_data = []
473
+ images_saved = 0
474
+ total_image_size_kb = 0
475
+
476
+ # Traitement optimisé avec DEBUG
477
+ print(f"🔄 Traitement de {total_questions} images...")
478
+ start_processing = time.time()
479
+
480
+ for i, (image, expected, operation_data) in enumerate(zip(self.user_images, self.expected_answers, self.operations_history)):
481
+ print(f" → Image {i+1}/{total_questions}...")
482
+ img_start = time.time()
483
+
484
+ row_data = create_result_row_with_images(i, image, expected, operation_data)
485
+ table_rows_html += row_data['html_row']
486
+
487
+ img_time = time.time() - img_start
488
+ print(f" ✅ Traitée en {img_time:.1f}s")
489
+
490
+ if row_data['is_correct']:
491
+ correct_answers += 1
492
+
493
+ # Structure pour NOUVEAU DATASET CALCUL OCR
494
+ a, b, operation, correct_result = operation_data
495
+
496
+ entry = {
497
+ "session_id": session_id,
498
+ "timestamp": session_timestamp,
499
+ "question_number": i + 1,
500
+
501
+ # Configuration session
502
+ "session_duration": self.duration,
503
+ "operation_type": self.operation_type,
504
+ "difficulty_level": self.difficulty,
505
+
506
+ # Mathématiques
507
+ "operand_a": a,
508
+ "operand_b": b,
509
+ "operation": operation,
510
+ "correct_answer": expected,
511
+
512
+ # OCR & Résultats avec détection automatique du modèle
513
+ ocr_info = get_ocr_model_info()
514
+ "ocr_model": ocr_info.get("model_name", "Unknown"),
515
+ "ocr_device": ocr_info.get("device", "Unknown"),
516
+ "user_answer_ocr": row_data['recognized'],
517
+ "user_answer_parsed": row_data['recognized_num'],
518
+ "is_correct": row_data['is_correct'],
519
+
520
+ # Métadonnées
521
+ "total_questions": total_questions,
522
+ "app_version": "3.0_calcul_ocr_parallel" # Mis à jour pour le parallélisme
523
+ }
524
+
525
+ # Ajouter image si disponible
526
+ if row_data['dataset_image_data']:
527
+ entry["handwriting_image"] = row_data['dataset_image_data']["image_base64"]
528
+ entry["image_width"] = int(row_data['dataset_image_data']["compressed_size"][0])
529
+ entry["image_height"] = int(row_data['dataset_image_data']["compressed_size"][1])
530
+ entry["image_size_kb"] = float(row_data['dataset_image_data']["file_size_kb"])
531
+ entry["has_image"] = True
532
+ images_saved += 1
533
+ total_image_size_kb += row_data['dataset_image_data']["file_size_kb"]
534
+ else:
535
+ entry["has_image"] = False
536
+
537
+ self.session_data.append(entry)
538
+
539
+ processing_time = time.time() - start_processing
540
+ print(f"⏱️ Traitement total: {processing_time:.1f}s")
541
+
542
+ accuracy = (correct_answers / total_questions * 100) if total_questions > 0 else 0
543
+
544
+ for entry in self.session_data:
545
+ entry["session_accuracy"] = accuracy
546
+
547
+ # Nettoyage mémoire
548
+ for img in self.user_images:
549
+ if hasattr(img, 'close'):
550
+ try:
551
+ img.close()
552
+ except:
553
+ pass
554
+
555
+ gc.collect()
556
+ # log_memory_usage("après nettoyage end_game") # DEBUG: Désactivé
557
+
558
+ # HTML résultats
559
+ table_html = f"""
560
+ <div style="overflow-x: auto; margin: 20px 0;">
561
+ <table style="width: 100%; border-collapse: collapse; border: 2px solid #4a90e2;">
562
+ <thead>
563
+ <tr style="background: #4a90e2; color: white;">
564
+ <th style="padding: 8px;">Question</th>
565
+ <th style="padding: 8px;">A</th>
566
+ <th style="padding: 8px;">Op</th>
567
+ <th style="padding: 8px;">B</th>
568
+ <th style="padding: 8px;">Réponse</th>
569
+ <th style="padding: 8px;">Votre dessin</th>
570
+ <th style="padding: 8px;">OCR</th>
571
+ <th style="padding: 8px;">Statut</th>
572
+ </tr>
573
+ </thead>
574
+ <tbody>
575
+ {table_rows_html}
576
+ </tbody>
577
+ </table>
578
+ </div>
579
+ """
580
+
581
+ # Configuration session pour affichage
582
+ config_display = f"{self.operation_type} • {self.difficulty} • {self.duration}s"
583
+ operation_emoji = {
584
+ "×": "✖️", "+": "➕", "-": "➖", "÷": "➗", "Aléatoire": "🎲"
585
+ }
586
+ emoji = operation_emoji.get(self.operation_type, "🔢")
587
+
588
+ export_info = self.get_export_status()
589
+ if export_info["can_export"]:
590
+ export_section = f"""
591
+ <div style="margin-top: 20px; padding: 15px; background-color: #e8f5e8; border-radius: 8px;">
592
+ <h3 style="color: #2e7d32;">📤 Ajouter cette série au dataset ?</h3>
593
+ <p style="color: #2e7d32;">
594
+ ✅ {total_questions} réponses • 📊 {accuracy:.1f}% de précision<br>
595
+ 📸 {images_saved} opérations et images sauvegardées ({total_image_size_kb:.1f}KB)<br>
596
+ ⚙️ Configuration: {config_display}
597
+ </p>
598
+ </div>
599
+ """
600
+ else:
601
+ export_section = ""
602
+
603
+ final_results = f"""
604
+ <div style="margin: 20px 0;">
605
+ <h1 style="text-align: center; color: #4a90e2;">🎉 Session terminée !</h1>
606
+ <div style="background: linear-gradient(45deg, #667eea 0%, #764ba2 100%); color: white; padding: 20px; border-radius: 10px; margin: 20px 0;">
607
+ <h2>📈 Résultats</h2>
608
+ <div style="text-align: center; margin-bottom: 15px;">
609
+ <strong>{emoji} {config_display}</strong>
610
+ </div>
611
+ <div style="display: flex; justify-content: space-around; flex-wrap: wrap;">
612
+ <div style="text-align: center; margin: 10px;">
613
+ <div style="font-size: 2em; font-weight: bold;">{total_questions}</div>
614
+ <div>Questions</div>
615
+ </div>
616
+ <div style="text-align: center; margin: 10px;">
617
+ <div style="font-size: 2em; font-weight: bold; color: #90EE90;">{correct_answers}</div>
618
+ <div>Correctes</div>
619
+ </div>
620
+ <div style="text-align: center; margin: 10px;">
621
+ <div style="font-size: 2em; font-weight: bold; color: #FFB6C1;">{total_questions - correct_answers}</div>
622
+ <div>Incorrectes</div>
623
+ </div>
624
+ <div style="text-align: center; margin: 10px;">
625
+ <div style="font-size: 2em; font-weight: bold;">{accuracy:.1f}%</div>
626
+ <div>Précision</div>
627
+ </div>
628
+ </div>
629
+ </div>
630
+ <h2 style="color: #4a90e2;">📊 Détail des Réponses</h2>
631
+ {table_html}
632
+ {export_section}
633
+ </div>
634
+ """
635
+
636
+ return (
637
+ """<div style="font-size: 3em; font-weight: bold; text-align: center; padding: 20px; background: linear-gradient(45deg, #667eea 0%, #764ba2 100%); color: white; border-radius: 10px;">🏁 C'est fini !</div>""",
638
+ create_white_canvas(),
639
+ f"✨ Session {config_display} terminée !",
640
+ "⏱️ Temps écoulé !",
641
+ gr.update(interactive=True),
642
+ gr.update(interactive=False),
643
+ final_results
644
+ )
645
+
646
+
647
+ def export_to_clean_dataset(session_data: list[dict], dataset_name: str = DATASET_NAME) -> str:
648
+ """Export vers le nouveau dataset calcul_ocr_dataset"""
649
+ if not DATASET_AVAILABLE:
650
+ return "❌ Modules dataset non disponibles"
651
+
652
+ hf_token = os.getenv("HF_TOKEN") or os.getenv("tk_calcul_ocr") # Support des deux noms
653
+ if not hf_token:
654
+ return "❌ Token HuggingFace manquant (HF_TOKEN ou tk_calcul_ocr)"
655
+
656
+ try:
657
+ print(f"\n🚀 === EXPORT VERS DATASET CALCUL OCR ===")
658
+ print(f"📊 Dataset: {dataset_name}")
659
+
660
+ # Filtrer les entrées avec images
661
+ clean_entries = []
662
+
663
+ for entry in session_data:
664
+ if entry.get('has_image', False):
665
+ clean_entries.append(entry)
666
+
667
+ print(f"✅ {len(clean_entries)} entrées avec images converties")
668
+
669
+ if len(clean_entries) == 0:
670
+ return "❌ Aucune entrée avec image à exporter"
671
+
672
+ # Charger dataset existant OU créer nouveau
673
+ try:
674
+ existing_dataset = load_dataset(dataset_name, split="train")
675
+ existing_data = existing_dataset.to_list()
676
+ print(f"📊 {len(existing_data)} entrées existantes")
677
+ except:
678
+ existing_data = []
679
+ print("📊 Création nouveau dataset calcul_ocr")
680
+
681
+ # Combiner
682
+ combined_data = existing_data + clean_entries
683
+ clean_dataset = Dataset.from_list(combined_data)
684
+
685
+ print(f"✅ Dataset créé - Features:")
686
+ for feature_name in clean_dataset.features:
687
+ print(f" - {feature_name}: {clean_dataset.features[feature_name]}")
688
+
689
+ # Statistiques par opération
690
+ operations_count = {}
691
+ for entry in clean_entries:
692
+ op = entry.get('operation_type', 'unknown')
693
+ operations_count[op] = operations_count.get(op, 0) + 1
694
+
695
+ operations_summary = ", ".join([f"{op}: {count}" for op, count in operations_count.items()])
696
+
697
+ # Push vers HuggingFace
698
+ print(f"📤 Push vers {dataset_name}...")
699
+ clean_dataset.push_to_hub(
700
+ dataset_name,
701
+ private=False,
702
+ token=hf_token,
703
+ commit_message=f"Add {len(clean_entries)} handwriting samples for math OCR ({operations_summary})"
704
+ )
705
+
706
+ cleanup_memory()
707
+
708
+ success_message = f"""✅ Session ajoutée au dataset avec succès !
709
+
710
+ 📊 Dataset: {dataset_name}
711
+ 📸 Images: {len(clean_entries)}
712
+ 🔢 Opérations: {operations_summary}
713
+ 📈 Total: {len(clean_dataset)}
714
+
715
+ 🔗 Le dataset est consultable ici : https://huggingface.co/datasets/{dataset_name}"""
716
+
717
+ return success_message
718
+
719
+ except Exception as e:
720
+ print(f"❌ ERREUR: {e}")
721
+ import traceback
722
+ traceback.print_exc()
723
+ error_message = f"❌ Erreur: {str(e)}"
724
+ return error_message
image_processing_cpu.py ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ==========================================
2
+ # image_processing_cpu.py - Version CPU avec EasyOCR
3
+ # ==========================================
4
+
5
+ """
6
+ Module de traitement d'images CPU-optimisé pour calculs mathématiques
7
+ Utilise EasyOCR pour des performances rapides sur CPU
8
+ """
9
+
10
+ import time
11
+ from utils import (
12
+ optimize_image_for_ocr,
13
+ prepare_image_for_dataset,
14
+ create_thumbnail_fast,
15
+ create_white_canvas,
16
+ log_memory_usage,
17
+ cleanup_memory,
18
+ decode_image_from_dataset,
19
+ validate_ocr_result
20
+ )
21
+
22
+ # Variables globales pour OCR EasyOCR
23
+ easyocr_reader = None
24
+ OCR_MODEL_NAME = "EasyOCR"
25
+
26
+ def init_ocr_model() -> bool:
27
+ """Initialise EasyOCR (optimisé CPU)"""
28
+ global easyocr_reader
29
+
30
+ try:
31
+ print("🔄 Chargement EasyOCR (CPU optimisé)...")
32
+ import easyocr
33
+ easyocr_reader = easyocr.Reader(['en'], gpu=False, verbose=False)
34
+ print("✅ EasyOCR prêt (CPU) !")
35
+ return True
36
+
37
+ except Exception as e:
38
+ print(f"❌ Erreur lors du chargement EasyOCR: {e}")
39
+ return False
40
+
41
+ def get_ocr_model_info() -> dict:
42
+ """Retourne les informations du modèle OCR utilisé"""
43
+ return {
44
+ "model_name": OCR_MODEL_NAME,
45
+ "device": "CPU",
46
+ "framework": "EasyOCR",
47
+ "optimized_for": "speed",
48
+ "version": "1.7.x"
49
+ }
50
+
51
+ def recognize_number_fast_with_image(image_dict, debug: bool = False) -> tuple[str, any, dict | None]:
52
+ """
53
+ OCR avec EasyOCR (CPU optimisé)
54
+
55
+ Args:
56
+ image_dict: Image d'entrée (format Gradio)
57
+ debug: Afficher les logs de debug
58
+
59
+ Returns:
60
+ (résultat_ocr, image_optimisée, données_dataset)
61
+ """
62
+ if image_dict is None or easyocr_reader is None:
63
+ if debug:
64
+ print(" ❌ Image manquante ou EasyOCR non initialisé")
65
+ return "0", None, None
66
+
67
+ try:
68
+ start_time = time.time()
69
+ if debug:
70
+ print(" 🔄 Début OCR EasyOCR...")
71
+
72
+ # Optimiser image (fonction commune)
73
+ optimized_image = optimize_image_for_ocr(image_dict, max_size=300)
74
+ if optimized_image is None:
75
+ if debug:
76
+ print(" ❌ Échec optimisation image")
77
+ return "0", None, None
78
+
79
+ # EasyOCR - traitement spécialisé CPU
80
+ if debug:
81
+ print(" ⚡ Lancement EasyOCR...")
82
+
83
+ import numpy as np
84
+ img_array = np.array(optimized_image)
85
+ results = easyocr_reader.readtext(img_array, detail=0, paragraph=False)
86
+
87
+ # Traitement des résultats EasyOCR
88
+ if results:
89
+ all_text = ' '.join(str(r) for r in results)
90
+ final_result = validate_ocr_result(all_text, max_length=4)
91
+ else:
92
+ final_result = "0"
93
+
94
+ # Préparer pour dataset (fonction commune)
95
+ dataset_image_data = prepare_image_for_dataset(optimized_image)
96
+
97
+ if debug:
98
+ total_time = time.time() - start_time
99
+ print(f" ✅ EasyOCR terminé en {total_time:.1f}s → '{final_result}'")
100
+
101
+ return final_result, optimized_image, dataset_image_data
102
+
103
+ except Exception as e:
104
+ print(f"❌ Erreur OCR EasyOCR: {e}")
105
+ return "0", None, None
106
+
107
+ def recognize_number_fast(image_dict) -> tuple[str, any]:
108
+ """Version rapide standard"""
109
+ result, optimized_image, _ = recognize_number_fast_with_image(image_dict)
110
+ return result, optimized_image
111
+
112
+ def recognize_number(image_dict) -> str:
113
+ """Interface standard"""
114
+ result, _ = recognize_number_fast(image_dict)
115
+ return result
image_processing_gpu.py ADDED
@@ -0,0 +1,157 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ==========================================
2
+ # image_processing_gpu.py - Version GPU avec TrOCR
3
+ # ==========================================
4
+
5
+ """
6
+ Module de traitement d'images GPU-optimisé pour calculs mathématiques
7
+ Utilise TrOCR pour une précision maximale sur GPU
8
+ """
9
+
10
+ import time
11
+ import torch
12
+ from utils import (
13
+ optimize_image_for_ocr,
14
+ prepare_image_for_dataset,
15
+ create_thumbnail_fast,
16
+ create_white_canvas,
17
+ log_memory_usage,
18
+ cleanup_memory,
19
+ decode_image_from_dataset,
20
+ validate_ocr_result
21
+ )
22
+
23
+ # Variables globales pour OCR TrOCR
24
+ processor = None
25
+ model = None
26
+ OCR_MODEL_NAME = "TrOCR-base-handwritten"
27
+
28
+ def init_ocr_model() -> bool:
29
+ """Initialise TrOCR (optimisé GPU)"""
30
+ global processor, model
31
+
32
+ try:
33
+ print("🔄 Chargement TrOCR (GPU optimisé)...")
34
+ from transformers import TrOCRProcessor, VisionEncoderDecoderModel
35
+
36
+ processor = TrOCRProcessor.from_pretrained('microsoft/trocr-base-handwritten')
37
+ model = VisionEncoderDecoderModel.from_pretrained('microsoft/trocr-base-handwritten')
38
+
39
+ # Optimisations GPU
40
+ model.eval()
41
+
42
+ if torch.cuda.is_available():
43
+ model = model.cuda()
44
+ device_info = f"GPU ({torch.cuda.get_device_name()})"
45
+ print(f"✅ TrOCR prêt sur {device_info} !")
46
+ else:
47
+ device_info = "CPU (pas de GPU détecté)"
48
+ print(f"⚠️ TrOCR sur CPU - {device_info}")
49
+
50
+ return True
51
+
52
+ except Exception as e:
53
+ print(f"❌ Erreur lors du chargement TrOCR: {e}")
54
+ return False
55
+
56
+ def get_ocr_model_info() -> dict:
57
+ """Retourne les informations du modèle OCR utilisé"""
58
+ device = "GPU" if torch.cuda.is_available() and model is not None else "CPU"
59
+ gpu_name = torch.cuda.get_device_name() if torch.cuda.is_available() else "N/A"
60
+
61
+ return {
62
+ "model_name": OCR_MODEL_NAME,
63
+ "device": device,
64
+ "gpu_name": gpu_name,
65
+ "framework": "HuggingFace-Transformers",
66
+ "optimized_for": "accuracy",
67
+ "version": "microsoft/trocr-base-handwritten"
68
+ }
69
+
70
+ def recognize_number_fast_with_image(image_dict, debug: bool = False) -> tuple[str, any, dict | None]:
71
+ """
72
+ OCR avec TrOCR (GPU optimisé)
73
+
74
+ Args:
75
+ image_dict: Image d'entrée (format Gradio)
76
+ debug: Afficher les logs de debug
77
+
78
+ Returns:
79
+ (résultat_ocr, image_optimisée, données_dataset)
80
+ """
81
+ if image_dict is None or processor is None or model is None:
82
+ if debug:
83
+ print(" ❌ Image manquante ou TrOCR non initialisé")
84
+ return "0", None, None
85
+
86
+ try:
87
+ start_time = time.time()
88
+ if debug:
89
+ print(" 🔄 Début OCR TrOCR...")
90
+
91
+ # Optimiser image (fonction commune)
92
+ optimized_image = optimize_image_for_ocr(image_dict, max_size=384) # TrOCR préfère 384x384
93
+ if optimized_image is None:
94
+ if debug:
95
+ print(" ❌ Échec optimisation image")
96
+ return "0", None, None
97
+
98
+ # TrOCR - traitement spécialisé GPU
99
+ if debug:
100
+ print(" 🤖 Lancement TrOCR...")
101
+
102
+ with torch.no_grad():
103
+ # Preprocessing
104
+ pixel_values = processor(images=optimized_image, return_tensors="pt").pixel_values
105
+
106
+ # GPU transfer si disponible
107
+ if torch.cuda.is_available():
108
+ pixel_values = pixel_values.cuda()
109
+
110
+ # Génération optimisée
111
+ generated_ids = model.generate(
112
+ pixel_values,
113
+ max_length=4, # Optimisé pour les calculs
114
+ num_beams=1, # Rapide
115
+ do_sample=False, # Déterministe
116
+ early_stopping=True, # Arrêt rapide
117
+ pad_token_id=processor.tokenizer.pad_token_id
118
+ )
119
+
120
+ # Décodage
121
+ result = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
122
+ final_result = validate_ocr_result(result, max_length=4)
123
+
124
+ # Préparer pour dataset (fonction commune)
125
+ dataset_image_data = prepare_image_for_dataset(optimized_image)
126
+
127
+ if debug:
128
+ total_time = time.time() - start_time
129
+ device = "GPU" if torch.cuda.is_available() else "CPU"
130
+ print(f" ✅ TrOCR ({device}) terminé en {total_time:.1f}s → '{final_result}'")
131
+
132
+ return final_result, optimized_image, dataset_image_data
133
+
134
+ except Exception as e:
135
+ print(f"❌ Erreur OCR TrOCR: {e}")
136
+ return "0", None, None
137
+
138
+ def recognize_number_fast(image_dict) -> tuple[str, any]:
139
+ """Version rapide standard"""
140
+ result, optimized_image, _ = recognize_number_fast_with_image(image_dict)
141
+ return result, optimized_image
142
+
143
+ def recognize_number(image_dict) -> str:
144
+ """Interface standard"""
145
+ result, _ = recognize_number_fast(image_dict)
146
+ return result
147
+
148
+ # Fonctions spécifiques au fine-tuning (pour plus tard)
149
+ def prepare_for_finetuning(dataset_path: str) -> dict:
150
+ """Prépare le dataset pour le fine-tuning TrOCR"""
151
+ # TODO: Implémenter quand on aura HF Pro
152
+ return {"status": "ready_for_implementation"}
153
+
154
+ def quantize_model() -> bool:
155
+ """Quantize le modèle TrOCR pour optimiser les performances CPU"""
156
+ # TODO: Implémenter la quantization
157
+ return False
requirements.txt ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Requirements unifiés avec fallbacks intelligents
2
+ gradio>=4.0.0
3
+ pillow>=9.0.0
4
+ numpy>=1.21.0
5
+ datasets>=2.10.0
6
+ huggingface_hub>=0.16.0
7
+ pandas>=1.5.0
8
+ psutil>=5.8.0
9
+
10
+ # EasyOCR (toujours installé - fonctionne partout)
11
+ easyocr>=1.7.0
12
+
13
+ # GPU/TrOCR (optionnel - installé si GPU disponible)
14
+ # Ces packages seront installés automatiquement sur les spaces GPU
15
+ torch>=2.0.0; sys_platform != "emscripten"
16
+ torchvision>=0.15.0; sys_platform != "emscripten"
17
+ transformers>=4.30.0; sys_platform != "emscripten"