Spaces:

hoololi
/

CalcTrainer

Sleeping

App Files Files Community

hoololi commited on Jun 24, 2025

Commit

5617612

verified ·

1 Parent(s): ffb3108

Upload 5 files

Browse files

Files changed (5) hide show

app.py +213 -0
game_engine.py +724 -0
image_processing_cpu.py +115 -0
image_processing_gpu.py +157 -0
requirements.txt +17 -0

app.py ADDED Viewed

	@@ -0,0 +1,213 @@

+# ==========================================
+# app.py - Calcul OCR v3.0
+# ==========================================
+"""
+Application principale - Entraînement aux calculs avec OCR
+"""
+import gradio as gr
+import warnings
+import os
+import gc
+import numpy as np
+from PIL import Image
+warnings.filterwarnings("ignore")
+from image_processing import init_ocr_model, create_white_canvas, cleanup_memory
+from game_engine import MathGame, export_to_clean_dataset
+print("🚀 Initialisation Calcul OCR v3.0...")
+print("🔄 Chargement modèle OCR...")
+init_ocr_model()
+print("✅ Modèle OCR prêt")
+game = MathGame()
+def start_game_wrapper(duration: str, operation: str, difficulty: str) -> tuple:
+    cleanup_memory()
+    return game.start_game(duration, operation, difficulty)
+def next_question_wrapper(image_data: dict | np.ndarray | Image.Image | None) -> tuple:
+    return game.next_question(image_data)
+def export_current_session() -> str:
+    """Export vers le nouveau dataset calcul_ocr_dataset"""
+    if not hasattr(game, 'session_data') or not game.session_data:
+        return "❌ Aucune donnée de session à exporter"
+    export_info = game.get_export_status()
+    if export_info["status"] == "exported":
+        return f"""✅ Session déjà exportée !
+📅 Exporté le: {export_info['timestamp'][:19].replace('T', ' ')}
+📊 Résultat: {export_info['result'][:100]}...
+💡 Jouez une nouvelle session pour contribuer davantage !"""
+    if export_info["status"] == "exporting":
+        return "⏳ Export en cours..."
+    if not export_info["can_export"]:
+        return "❌ Aucune donnée à exporter"
+    game.mark_export_in_progress()
+    try:
+        result = export_to_clean_dataset(game.session_data)
+        game.mark_export_completed(result)
+        cleanup_memory()
+        return result
+    except Exception as e:
+        game.export_status = "not_exported"
+        return f"❌ Erreur export: {str(e)}"
+# Interface Gradio
+with gr.Blocks(
+    title="🧮 Calcul OCR - Entraînement mathématiques",
+    theme=gr.themes.Soft(),
+    css="""
+    .gradio-container { max-width: 1200px !important; }
+    .config-section {
+        background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+        color: white;
+        padding: 15px;
+        border-radius: 10px;
+        margin: 10px 0;
+    }
+    .dataset-info {
+        background: linear-gradient(135deg, #11998e 0%, #38ef7d 100%);
+        color: white;
+        padding: 15px;
+        border-radius: 10px;
+        margin: 10px 0;
+    }
+    .radio-group {
+        background: #f8f9fa;
+        padding: 10px;
+        border-radius: 8px;
+        margin: 5px 0;
+    }
+    """,
+    head="<meta name='viewport' content='width=device-width, initial-scale=1.0'>"
+) as demo:
+    gr.Markdown(
+        """
+        # 🧮 Entraînement aux calculs avec OCR
+        **Nouveau !** Choisissez votre configuration et entraînez-vous sur différents types de calculs !
+        **Comment jouer :**
+        1. **Configurez** votre session ci-dessous
+        2. Cliquez sur **🚀 GO !** pour démarrer
+        3. **Écrivez** ✏️ votre réponse sur le tableau
+        4. Cliquez sur **➡️ NEXT !** pour la question suivante
+        À la fin, vous pourrez contribuer au dataset ouvert pour améliorer l'OCR mathématique !
+        ---
+        """
+    )
+    # Configuration de la session
+    with gr.Group():
+        gr.Markdown("### ⚙️ Configuration de la session", elem_classes=["config-section"])
+        with gr.Row():
+            duration_choice = gr.Radio(
+                choices=["30 secondes", "60 secondes"],
+                value="30 secondes",
+                label="⏱️ Durée",
+                elem_classes=["radio-group"]
+            )
+            operation_choice = gr.Radio(
+                choices=["×", "+", "-", "÷", "Aléatoire"],
+                value="×",
+                label="🔢 Opération",
+                elem_classes=["radio-group"]
+            )
+            difficulty_choice = gr.Radio(
+                choices=["Facile", "Difficile"],
+                value="Facile",
+                label="🎯 Difficulté",
+                elem_classes=["radio-group"]
+            )
+    with gr.Row():
+        with gr.Column(scale=1):
+            # Question
+            question_display = gr.HTML(
+                value='<div style="font-size: 2.5em; font-weight: bold; text-align: center; padding: 20px; background: linear-gradient(45deg, #667eea 0%, #764ba2 100%); color: white; border-radius: 10px;">Prêt à jouer ?</div>'
+            )
+            # Contrôles
+            with gr.Row():
+                go_button = gr.Button("🚀 GO !", variant="primary", size="lg")
+                next_button = gr.Button("➡️ NEXT !", variant="secondary", size="lg", interactive=False)
+            # Status
+            status_display = gr.Markdown("### 🎯 Configurez votre session et cliquez sur GO !")
+            timer_display = gr.Markdown("### ⏱️ --")
+        with gr.Column(scale=1):
+            # Zone de dessin
+            canvas = gr.ImageEditor(
+                label="✏️ Votre réponse",
+                height=350,
+                width=350,
+                value=create_white_canvas(350, 350),
+                brush=gr.Brush(default_size=8, default_color="#000000"),
+                sources=[],
+                layers=False,
+                transforms=[],
+                eraser=gr.Eraser(default_size=20)
+            )
+    # Résultats
+    results_display = gr.HTML("")
+    # Export vers dataset dédié
+    gr.Markdown("### 📤 Contribuer au dataset", elem_classes=["dataset-info"])
+    export_button = gr.Button("📤 Ajouter la série au dataset calcul_ocr", variant="primary", size="lg")
+    export_status = gr.Markdown("")
+    # Événements
+    go_button.click(
+        fn=start_game_wrapper,
+        inputs=[duration_choice, operation_choice, difficulty_choice],
+        outputs=[question_display, canvas, status_display, timer_display, go_button, next_button, results_display]
+    )
+    next_button.click(
+        fn=next_question_wrapper,
+        inputs=[canvas],
+        outputs=[question_display, canvas, status_display, timer_display, go_button, next_button, results_display]
+    )
+    export_button.click(
+        fn=export_current_session,
+        outputs=[export_status]
+    )
+if __name__ == "__main__":
+    print("🚀 Lancement Calcul OCR v3.0...")
+    print("🎯 Dataset: calcul_ocr_dataset")
+    print("📊 Opérations: ×, +, -, ÷, Aléatoire")
+    print("⚙️ Durées: 30s, 60s")
+    print("🎯 Difficultés: Facile, Difficile")
+    demo.launch(
+        share=False,
+        show_error=True,
+        server_name="0.0.0.0",
+        server_port=7860,
+        show_api=False,
+        favicon_path=None
+    )

game_engine.py ADDED Viewed

	@@ -0,0 +1,724 @@

+# ==========================================
+# game_engine.py - Calcul OCR v3.0
+# ==========================================
+"""
+Moteur de jeu mathématique complet
+"""
+import random
+import time
+import datetime
+import gradio as gr
+import os
+import uuid
+import gc
+import base64
+from io import BytesIO
+import numpy as np
+from PIL import Image
+import threading
+import queue
+from typing import Dict, Tuple, Optional
+# Auto-détection intelligente CPU/GPU avec fallbacks
+ocr_module = None
+ocr_info = {"model_name": "Unknown", "device": "Unknown"}
+# Tentative 1: GPU/TrOCR (si disponible et packages installés)
+try:
+    import torch
+    if torch.cuda.is_available():
+        from image_processing_gpu import (
+            recognize_number_fast_with_image,
+            create_thumbnail_fast,
+            create_white_canvas,
+            cleanup_memory,
+            log_memory_usage,
+            get_ocr_model_info
+        )
+        ocr_module = "gpu"
+        print("✅ Mode GPU détecté - TrOCR activé")
+    else:
+        raise ImportError("GPU non disponible")
+except (ImportError, Exception) as e:
+    print(f"⚠️ GPU/TrOCR non disponible: {e}")
+    # Tentative 2: CPU/EasyOCR (fallback)
+    try:
+        from image_processing_cpu import (
+            recognize_number_fast_with_image,
+            create_thumbnail_fast,
+            create_white_canvas,
+            cleanup_memory,
+            log_memory_usage,
+            get_ocr_model_info
+        )
+        ocr_module = "cpu"
+        print("✅ Mode CPU détecté - EasyOCR activé")
+    except ImportError:
+        # Tentative 3: Fallback vers ancien fichier (sécurité)
+        try:
+            from image_processing import (
+                recognize_number_fast_with_image,
+                create_thumbnail_fast,
+                create_white_canvas,
+                cleanup_memory,
+                log_memory_usage
+            )
+            def get_ocr_model_info():
+                return {"model_name": "Legacy", "device": "Unknown"}
+            ocr_module = "legacy"
+            print("⚠️ Fallback vers image_processing.py legacy")
+        except ImportError:
+            print("❌ ERREUR: Aucun module OCR disponible!")
+            raise
+# Récupérer les infos du modèle sélectionné
+try:
+    ocr_info = get_ocr_model_info()
+    print(f"🎯 OCR sélectionné: {ocr_info['model_name']} sur {ocr_info['device']}")
+except:
+    print("⚠️ Impossible de récupérer les infos OCR")
+# Imports dataset avec gestion d'erreur
+try:
+    from datasets import Dataset, load_dataset
+    DATASET_AVAILABLE = True
+    print("✅ Modules dataset disponibles")
+except ImportError as e:
+    DATASET_AVAILABLE = False
+    print(f"⚠️ Modules dataset non disponibles: {e}")
+# Nom du nouveau dataset
+DATASET_NAME = "hoololi/calcul_ocr_dataset"
+# Configuration des difficultés par opération
+DIFFICULTY_RANGES = {
+    "×": {
+        "Facile": (2, 9),
+        "Difficile": (4, 12)
+    },
+    "+": {
+        "Facile": (1, 50),
+        "Difficile": (10, 100)
+    },
+    "-": {
+        "Facile": (1, 50),
+        "Difficile": (10, 100)
+    },
+    "÷": {
+        "Facile": (1, 10),    # Pour les résultats
+        "Difficile": (2, 12)  # Pour les résultats
+    }
+}
+def create_result_row_with_images(i: int, image: dict | np.ndarray | Image.Image, expected: int, operation_data: tuple[int, int, str, int]) -> dict:
+    # OCR optimisé
+    recognized, optimized_image, dataset_image_data = recognize_number_fast_with_image(image)
+    try:
+        recognized_num = int(recognized) if recognized.isdigit() else 0
+    except:
+        recognized_num = 0
+    is_correct = recognized_num == expected
+    a, b, operation, correct_result = operation_data
+    status_icon = "✅" if is_correct else "❌"
+    status_text = "Correct" if is_correct else "Incorrect"
+    row_color = "#e8f5e8" if is_correct else "#ffe8e8"
+    # Miniature
+    image_thumbnail = create_thumbnail_fast(optimized_image, size=(50, 50))
+    # Libérer mémoire
+    if optimized_image and hasattr(optimized_image, 'close'):
+        try:
+            optimized_image.close()
+        except:
+            pass
+    return {
+        'html_row': f"""
+            <tr style="background-color: {row_color};">
+                <td style="text-align: center; padding: 8px; border: 1px solid #ddd; color: #333;">{i+1}</td>
+                <td style="text-align: center; padding: 8px; border: 1px solid #ddd; font-weight: bold; color: #333;">{a}</td>
+                <td style="text-align: center; padding: 8px; border: 1px solid #ddd; font-weight: bold; color: #333;">{operation}</td>
+                <td style="text-align: center; padding: 8px; border: 1px solid #ddd; font-weight: bold; color: #333;">{b}</td>
+                <td style="text-align: center; padding: 8px; border: 1px solid #ddd; font-weight: bold; color: #333;">{expected}</td>
+                <td style="text-align: center; padding: 8px; border: 1px solid #ddd;">{image_thumbnail}</td>
+                <td style="text-align: center; padding: 8px; border: 1px solid #ddd; font-weight: bold; color: #333;">{recognized_num}</td>
+                <td style="text-align: center; padding: 8px; border: 1px solid #ddd; color: #333;">{status_icon} {status_text}</td>
+            </tr>
+        """,
+        'is_correct': is_correct,
+        'recognized': recognized,
+        'recognized_num': recognized_num,
+        'dataset_image_data': dataset_image_data
+    }
+class MathGame:
+    """Moteur de jeu mathématique avec traitement parallèle"""
+    def __init__(self):
+        self.is_running = False
+        self.start_time = 0
+        self.current_operation = ""
+        self.correct_answer = 0
+        self.user_images = []
+        self.expected_answers = []
+        self.operations_history = []
+        self.question_count = 0
+        self.time_remaining = 30
+        self.session_data = []
+        # Configuration session
+        self.duration = 30
+        self.operation_type = "×"
+        self.difficulty = "Facile"
+        # Gestion export
+        self.export_status = "not_exported"
+        self.export_timestamp = None
+        self.export_result = None
+        # NOUVEAU: Traitement parallèle
+        self.processing_queue = queue.Queue()
+        self.results_cache: Dict[int, dict] = {}  # {question_number: result_data}
+        self.worker_thread: Optional[threading.Thread] = None
+        self.processing_active = False
+    def _start_background_processing(self) -> None:
+        """Démarre le thread de traitement en arrière-plan"""
+        if self.worker_thread is None or not self.worker_thread.is_alive():
+            self.processing_active = True
+            self.worker_thread = threading.Thread(target=self._process_images_worker, daemon=True)
+            self.worker_thread.start()
+            print("🔄 Thread de traitement parallèle démarré")
+    def _stop_background_processing(self) -> None:
+        """Arrête le thread de traitement"""
+        self.processing_active = False
+        if self.worker_thread and self.worker_thread.is_alive():
+            print("⏹️ Arrêt du thread de traitement parallèle")
+    def _process_images_worker(self) -> None:
+        """Worker thread qui traite les images en arrière-plan"""
+        print("🚀 Worker thread démarré")
+        while self.processing_active:
+            try:
+                if not self.processing_queue.empty():
+                    question_num, image, expected, operation_data = self.processing_queue.get(timeout=1)
+                    print(f"🔄 Traitement parallèle image {question_num}...")
+                    start_time = time.time()
+                    result_data = create_result_row_with_images(question_num, image, expected, operation_data)
+                    processing_time = time.time() - start_time
+                    # Stocker le résultat
+                    self.results_cache[question_num] = result_data
+                    print(f"✅ Image {question_num} traitée en {processing_time:.1f}s (parallèle)")
+                else:
+                    time.sleep(0.1)  # Éviter la consommation CPU excessive
+            except queue.Empty:
+                continue
+            except Exception as e:
+                print(f"❌ Erreur traitement parallèle: {e}")
+        print("🛑 Worker thread terminé")
+    def _add_image_to_processing_queue(self, question_num: int, image: dict | np.ndarray | Image.Image,
+                                     expected: int, operation_data: tuple) -> None:
+        """Ajoute une image à la queue de traitement"""
+        if image is not None:
+            self.processing_queue.put((question_num, image, expected, operation_data))
+            print(f"📝 Image {question_num} ajoutée à la queue de traitement")
+        return {
+            "status": self.export_status,
+            "timestamp": self.export_timestamp,
+            "result": self.export_result,
+            "can_export": self.export_status == "not_exported" and len(self.session_data) > 0
+        }
+    def mark_export_in_progress(self) -> None:
+        self.export_status = "exporting"
+        self.export_timestamp = datetime.datetime.now().isoformat()
+    def mark_export_completed(self, result: str) -> None:
+        self.export_status = "exported"
+        self.export_result = result
+    def generate_multiplication(self, difficulty: str) -> tuple[str, int]:
+        """Génère une multiplication"""
+        min_val, max_val = DIFFICULTY_RANGES["×"][difficulty]
+        a = random.randint(min_val, max_val)
+        b = random.randint(min_val, max_val)
+        return f"{a} × {b}", a * b
+    def generate_addition(self, difficulty: str) -> tuple[str, int]:
+        """Génère une addition"""
+        min_val, max_val = DIFFICULTY_RANGES["+"][difficulty]
+        a = random.randint(min_val, max_val)
+        b = random.randint(min_val, max_val)
+        return f"{a} + {b}", a + b
+    def generate_subtraction(self, difficulty: str) -> tuple[str, int]:
+        """Génère une soustraction (résultat toujours positif)"""
+        min_val, max_val = DIFFICULTY_RANGES["-"][difficulty]
+        a = random.randint(min_val, max_val)
+        b = random.randint(min_val, a)  # b <= a pour éviter les négatifs
+        return f"{a} - {b}", a - b
+    def generate_division(self, difficulty: str) -> tuple[str, int]:
+        """Génère une division exacte"""
+        min_result, max_result = DIFFICULTY_RANGES["÷"][difficulty]
+        result = random.randint(min_result, max_result)
+        if difficulty == "Facile":
+            divisor = random.randint(2, 9)
+        else:
+            divisor = random.randint(2, 12)
+        dividend = result * divisor
+        return f"{dividend} ÷ {divisor}", result
+    def generate_operation(self, operation_type: str, difficulty: str) -> tuple[str, int]:
+        """Génère une opération selon le type et la difficulté"""
+        if operation_type == "×":
+            return self.generate_multiplication(difficulty)
+        elif operation_type == "+":
+            return self.generate_addition(difficulty)
+        elif operation_type == "-":
+            return self.generate_subtraction(difficulty)
+        elif operation_type == "÷":
+            return self.generate_division(difficulty)
+        elif operation_type == "Aléatoire":
+            # Choisir aléatoirement une opération
+            random_op = random.choice(["×", "+", "-", "÷"])
+            return self.generate_operation(random_op, difficulty)
+        else:
+            # Par défaut, multiplication
+            return self.generate_multiplication(difficulty)
+    def start_game(self, duration: str, operation: str, difficulty: str) -> tuple[str, Image.Image, str, str, gr.update, gr.update, str]:
+        """Démarre le jeu avec la configuration choisie"""
+        # log_memory_usage("avant nettoyage start_game")  # DEBUG: Désactivé
+        # Configuration
+        self.duration = 60 if duration == "60 secondes" else 30
+        self.operation_type = operation
+        self.difficulty = difficulty
+        # Nettoyage
+        if hasattr(self, 'user_images') and self.user_images:
+            for img in self.user_images:
+                if hasattr(img, 'close'):
+                    try:
+                        img.close()
+                    except:
+                        pass
+        if hasattr(self, 'session_data') and self.session_data:
+            for entry in self.session_data:
+                if 'user_drawing' in entry and entry['user_drawing']:
+                    entry['user_drawing'] = None
+            self.session_data.clear()
+        # Réinit avec nettoyage parallèle
+        self._stop_background_processing()
+        self.results_cache.clear()
+        while not self.processing_queue.empty():
+            try:
+                self.processing_queue.get_nowait()
+            except queue.Empty:
+                break
+        self.is_running = True
+        self.start_time = time.time()
+        self.user_images = []
+        self.expected_answers = []
+        self.operations_history = []
+        self.question_count = 0
+        self.time_remaining = self.duration
+        self.session_data = []
+        # Reset export
+        self.export_status = "not_exported"
+        self.export_timestamp = None
+        self.export_result = None
+        # Démarrer le traitement parallèle
+        self._start_background_processing()
+        gc.collect()
+        # log_memory_usage("après nettoyage start_game")  # DEBUG: Désactivé
+        # Première opération
+        operation_str, answer = self.generate_operation(self.operation_type, self.difficulty)
+        self.current_operation = operation_str
+        self.correct_answer = answer
+        # Parser l'opération pour l'historique
+        parts = operation_str.split()
+        a, op, b = int(parts[0]), parts[1], int(parts[2])
+        self.operations_history.append((a, b, op, answer))
+        # Affichage adapté selon l'opération
+        operation_emoji = {
+            "×": "✖️", "+": "➕", "-": "➖", "÷": "➗", "Aléatoire": "🎲"
+        }
+        emoji = operation_emoji.get(self.operation_type, "🔢")
+        return (
+            f'<div style="font-size: 3em; font-weight: bold; text-align: center; padding: 20px; background: linear-gradient(45deg, #667eea 0%, #764ba2 100%); color: white; border-radius: 10px;">{operation_str}</div>',
+            create_white_canvas(),
+            f"🎯 {emoji} {self.operation_type} • {self.difficulty} • Écrivez votre réponse !",
+            f"⏱️ Temps restant: {self.time_remaining}s",
+            gr.update(interactive=False),
+            gr.update(interactive=True),
+            ""
+        )
+    def next_question(self, image_data: dict | np.ndarray | Image.Image | None) -> tuple[str, Image.Image, str, str, gr.update, gr.update, str]:
+        if not self.is_running:
+            return (
+                f'<div style="font-size: 3em; font-weight: bold; text-align: center; padding: 20px; background: linear-gradient(45deg, #667eea 0%, #764ba2 100%); color: white; border-radius: 10px;">{self.current_operation}</div>',
+                image_data,
+                "❌ Le jeu n'est pas en cours !",
+                "⏱️ Temps: 0s",
+                gr.update(interactive=True),
+                gr.update(interactive=False),
+                ""
+            )
+        elapsed_time = time.time() - self.start_time
+        if elapsed_time >= self.duration:
+            return self.end_game(image_data)
+        if image_data is not None:
+            # Ajouter l'image à la liste ET au traitement parallèle
+            self.user_images.append(image_data)
+            self.expected_answers.append(self.correct_answer)
+            # Parser l'opération actuelle pour le traitement
+            parts = self.current_operation.split()
+            a, op, b = int(parts[0]), parts[1], int(parts[2])
+            current_operation_data = (a, b, op, self.correct_answer)
+            # Lancer le traitement en parallèle de l'image qu'on vient de recevoir
+            self._add_image_to_processing_queue(self.question_count, image_data, self.correct_answer, current_operation_data)
+            self.question_count += 1
+        # Nouvelle opération
+        operation_str, answer = self.generate_operation(self.operation_type, self.difficulty)
+        self.current_operation = operation_str
+        self.correct_answer = answer
+        # Parser pour l'historique
+        parts = operation_str.split()
+        a, op, b = int(parts[0]), parts[1], int(parts[2])
+        self.operations_history.append((a, b, op, answer))
+        time_remaining = max(0, self.duration - int(elapsed_time))
+        self.time_remaining = time_remaining
+        if time_remaining <= 0:
+            return self.end_game(image_data)
+        # Emoji pour l'opération
+        operation_emoji = {
+            "×": "✖️", "+": "➕", "-": "➖", "÷": "➗", "Aléatoire": "🎲"
+        }
+        emoji = operation_emoji.get(self.operation_type, "🔢")
+        return (
+            f'<div style="font-size: 3em; font-weight: bold; text-align: center; padding: 20px; background: linear-gradient(45deg, #667eea 0%, #764ba2 100%); color: white; border-radius: 10px;">{operation_str}</div>',
+            create_white_canvas(),
+            f"🎯 {emoji} Question {self.question_count + 1} • {self.difficulty}",
+            f"⏱️ Temps restant: {time_remaining}s",
+            gr.update(interactive=False),
+            gr.update(interactive=True),
+            ""
+        )
+    def end_game(self, final_image: dict | np.ndarray | Image.Image | None) -> tuple[str, Image.Image, str, str, gr.update, gr.update, str]:
+        self.is_running = False
+        # log_memory_usage("début end_game")  # DEBUG: Désactivé
+        if final_image is not None:
+            self.user_images.append(final_image)
+            self.expected_answers.append(self.correct_answer)
+            self.question_count += 1
+            if len(self.operations_history) < len(self.user_images):
+                parts = self.current_operation.split()
+                a, op, b = int(parts[0]), parts[1], int(parts[2])
+                self.operations_history.append((a, b, op, self.correct_answer))
+        correct_answers = 0
+        total_questions = len(self.user_images)
+        table_rows_html = ""
+        session_timestamp = datetime.datetime.now().isoformat()
+        session_id = f"session_{int(datetime.datetime.now().timestamp())}_{str(uuid.uuid4())[:8]}"
+        self.session_data = []
+        images_saved = 0
+        total_image_size_kb = 0
+        # Traitement optimisé avec DEBUG
+        print(f"🔄 Traitement de {total_questions} images...")
+        start_processing = time.time()
+        for i, (image, expected, operation_data) in enumerate(zip(self.user_images, self.expected_answers, self.operations_history)):
+            print(f"  → Image {i+1}/{total_questions}...")
+            img_start = time.time()
+            row_data = create_result_row_with_images(i, image, expected, operation_data)
+            table_rows_html += row_data['html_row']
+            img_time = time.time() - img_start
+            print(f"    ✅ Traitée en {img_time:.1f}s")
+            if row_data['is_correct']:
+                correct_answers += 1
+            # Structure pour NOUVEAU DATASET CALCUL OCR
+            a, b, operation, correct_result = operation_data
+            entry = {
+                "session_id": session_id,
+                "timestamp": session_timestamp,
+                "question_number": i + 1,
+                # Configuration session
+                "session_duration": self.duration,
+                "operation_type": self.operation_type,
+                "difficulty_level": self.difficulty,
+                # Mathématiques
+                "operand_a": a,
+                "operand_b": b,
+                "operation": operation,
+                "correct_answer": expected,
+                # OCR & Résultats avec détection automatique du modèle
+                ocr_info = get_ocr_model_info()
+                "ocr_model": ocr_info.get("model_name", "Unknown"),
+                "ocr_device": ocr_info.get("device", "Unknown"),
+                "user_answer_ocr": row_data['recognized'],
+                "user_answer_parsed": row_data['recognized_num'],
+                "is_correct": row_data['is_correct'],
+                # Métadonnées
+                "total_questions": total_questions,
+                "app_version": "3.0_calcul_ocr_parallel"  # Mis à jour pour le parallélisme
+            }
+            # Ajouter image si disponible
+            if row_data['dataset_image_data']:
+                entry["handwriting_image"] = row_data['dataset_image_data']["image_base64"]
+                entry["image_width"] = int(row_data['dataset_image_data']["compressed_size"][0])
+                entry["image_height"] = int(row_data['dataset_image_data']["compressed_size"][1])
+                entry["image_size_kb"] = float(row_data['dataset_image_data']["file_size_kb"])
+                entry["has_image"] = True
+                images_saved += 1
+                total_image_size_kb += row_data['dataset_image_data']["file_size_kb"]
+            else:
+                entry["has_image"] = False
+            self.session_data.append(entry)
+        processing_time = time.time() - start_processing
+        print(f"⏱️ Traitement total: {processing_time:.1f}s")
+        accuracy = (correct_answers / total_questions * 100) if total_questions > 0 else 0
+        for entry in self.session_data:
+            entry["session_accuracy"] = accuracy
+        # Nettoyage mémoire
+        for img in self.user_images:
+            if hasattr(img, 'close'):
+                try:
+                    img.close()
+                except:
+                    pass
+        gc.collect()
+        # log_memory_usage("après nettoyage end_game")  # DEBUG: Désactivé
+        # HTML résultats
+        table_html = f"""
+        <div style="overflow-x: auto; margin: 20px 0;">
+            <table style="width: 100%; border-collapse: collapse; border: 2px solid #4a90e2;">
+                <thead>
+                    <tr style="background: #4a90e2; color: white;">
+                        <th style="padding: 8px;">Question</th>
+                        <th style="padding: 8px;">A</th>
+                        <th style="padding: 8px;">Op</th>
+                        <th style="padding: 8px;">B</th>
+                        <th style="padding: 8px;">Réponse</th>
+                        <th style="padding: 8px;">Votre dessin</th>
+                        <th style="padding: 8px;">OCR</th>
+                        <th style="padding: 8px;">Statut</th>
+                    </tr>
+                </thead>
+                <tbody>
+                    {table_rows_html}
+                </tbody>
+            </table>
+        </div>
+        """
+        # Configuration session pour affichage
+        config_display = f"{self.operation_type} • {self.difficulty} • {self.duration}s"
+        operation_emoji = {
+            "×": "✖️", "+": "➕", "-": "➖", "÷": "➗", "Aléatoire": "🎲"
+        }
+        emoji = operation_emoji.get(self.operation_type, "🔢")
+        export_info = self.get_export_status()
+        if export_info["can_export"]:
+            export_section = f"""
+            <div style="margin-top: 20px; padding: 15px; background-color: #e8f5e8; border-radius: 8px;">
+                <h3 style="color: #2e7d32;">📤 Ajouter cette série au dataset ?</h3>
+                <p style="color: #2e7d32;">
+                    ✅ {total_questions} réponses • 📊 {accuracy:.1f}% de précision<br>
+                    📸 {images_saved} opérations et images sauvegardées ({total_image_size_kb:.1f}KB)<br>
+                    ⚙️ Configuration: {config_display}
+                </p>
+            </div>
+            """
+        else:
+            export_section = ""
+        final_results = f"""
+        <div style="margin: 20px 0;">
+            <h1 style="text-align: center; color: #4a90e2;">🎉 Session terminée !</h1>
+            <div style="background: linear-gradient(45deg, #667eea 0%, #764ba2 100%); color: white; padding: 20px; border-radius: 10px; margin: 20px 0;">
+                <h2>📈 Résultats</h2>
+                <div style="text-align: center; margin-bottom: 15px;">
+                    <strong>{emoji} {config_display}</strong>
+                </div>
+                <div style="display: flex; justify-content: space-around; flex-wrap: wrap;">
+                    <div style="text-align: center; margin: 10px;">
+                        <div style="font-size: 2em; font-weight: bold;">{total_questions}</div>
+                        <div>Questions</div>
+                    </div>
+                    <div style="text-align: center; margin: 10px;">
+                        <div style="font-size: 2em; font-weight: bold; color: #90EE90;">{correct_answers}</div>
+                        <div>Correctes</div>
+                    </div>
+                    <div style="text-align: center; margin: 10px;">
+                        <div style="font-size: 2em; font-weight: bold; color: #FFB6C1;">{total_questions - correct_answers}</div>
+                        <div>Incorrectes</div>
+                    </div>
+                    <div style="text-align: center; margin: 10px;">
+                        <div style="font-size: 2em; font-weight: bold;">{accuracy:.1f}%</div>
+                        <div>Précision</div>
+                    </div>
+                </div>
+            </div>
+            <h2 style="color: #4a90e2;">📊 Détail des Réponses</h2>
+            {table_html}
+            {export_section}
+        </div>
+        """
+        return (
+            """<div style="font-size: 3em; font-weight: bold; text-align: center; padding: 20px; background: linear-gradient(45deg, #667eea 0%, #764ba2 100%); color: white; border-radius: 10px;">🏁 C'est fini !</div>""",
+            create_white_canvas(),
+            f"✨ Session {config_display} terminée !",
+            "⏱️ Temps écoulé !",
+            gr.update(interactive=True),
+            gr.update(interactive=False),
+            final_results
+        )
+def export_to_clean_dataset(session_data: list[dict], dataset_name: str = DATASET_NAME) -> str:
+    """Export vers le nouveau dataset calcul_ocr_dataset"""
+    if not DATASET_AVAILABLE:
+        return "❌ Modules dataset non disponibles"
+    hf_token = os.getenv("HF_TOKEN") or os.getenv("tk_calcul_ocr")  # Support des deux noms
+    if not hf_token:
+        return "❌ Token HuggingFace manquant (HF_TOKEN ou tk_calcul_ocr)"
+    try:
+        print(f"\n🚀 === EXPORT VERS DATASET CALCUL OCR ===")
+        print(f"📊 Dataset: {dataset_name}")
+        # Filtrer les entrées avec images
+        clean_entries = []
+        for entry in session_data:
+            if entry.get('has_image', False):
+                clean_entries.append(entry)
+        print(f"✅ {len(clean_entries)} entrées avec images converties")
+        if len(clean_entries) == 0:
+            return "❌ Aucune entrée avec image à exporter"
+        # Charger dataset existant OU créer nouveau
+        try:
+            existing_dataset = load_dataset(dataset_name, split="train")
+            existing_data = existing_dataset.to_list()
+            print(f"📊 {len(existing_data)} entrées existantes")
+        except:
+            existing_data = []
+            print("📊 Création nouveau dataset calcul_ocr")
+        # Combiner
+        combined_data = existing_data + clean_entries
+        clean_dataset = Dataset.from_list(combined_data)
+        print(f"✅ Dataset créé - Features:")
+        for feature_name in clean_dataset.features:
+            print(f"  - {feature_name}: {clean_dataset.features[feature_name]}")
+        # Statistiques par opération
+        operations_count = {}
+        for entry in clean_entries:
+            op = entry.get('operation_type', 'unknown')
+            operations_count[op] = operations_count.get(op, 0) + 1
+        operations_summary = ", ".join([f"{op}: {count}" for op, count in operations_count.items()])
+        # Push vers HuggingFace
+        print(f"📤 Push vers {dataset_name}...")
+        clean_dataset.push_to_hub(
+            dataset_name,
+            private=False,
+            token=hf_token,
+            commit_message=f"Add {len(clean_entries)} handwriting samples for math OCR ({operations_summary})"
+        )
+        cleanup_memory()
+        success_message = f"""✅ Session ajoutée au dataset avec succès !
+📊 Dataset: {dataset_name}
+📸 Images: {len(clean_entries)}
+🔢 Opérations: {operations_summary}
+📈 Total: {len(clean_dataset)}
+🔗 Le dataset est consultable ici : https://huggingface.co/datasets/{dataset_name}"""
+        return success_message
+    except Exception as e:
+        print(f"❌ ERREUR: {e}")
+        import traceback
+        traceback.print_exc()
+        error_message = f"❌ Erreur: {str(e)}"
+        return error_message

image_processing_cpu.py ADDED Viewed

	@@ -0,0 +1,115 @@

+# ==========================================
+# image_processing_cpu.py - Version CPU avec EasyOCR
+# ==========================================
+"""
+Module de traitement d'images CPU-optimisé pour calculs mathématiques
+Utilise EasyOCR pour des performances rapides sur CPU
+"""
+import time
+from utils import (
+    optimize_image_for_ocr,
+    prepare_image_for_dataset,
+    create_thumbnail_fast,
+    create_white_canvas,
+    log_memory_usage,
+    cleanup_memory,
+    decode_image_from_dataset,
+    validate_ocr_result
+)
+# Variables globales pour OCR EasyOCR
+easyocr_reader = None
+OCR_MODEL_NAME = "EasyOCR"
+def init_ocr_model() -> bool:
+    """Initialise EasyOCR (optimisé CPU)"""
+    global easyocr_reader
+    try:
+        print("🔄 Chargement EasyOCR (CPU optimisé)...")
+        import easyocr
+        easyocr_reader = easyocr.Reader(['en'], gpu=False, verbose=False)
+        print("✅ EasyOCR prêt (CPU) !")
+        return True
+    except Exception as e:
+        print(f"❌ Erreur lors du chargement EasyOCR: {e}")
+        return False
+def get_ocr_model_info() -> dict:
+    """Retourne les informations du modèle OCR utilisé"""
+    return {
+        "model_name": OCR_MODEL_NAME,
+        "device": "CPU",
+        "framework": "EasyOCR",
+        "optimized_for": "speed",
+        "version": "1.7.x"
+    }
+def recognize_number_fast_with_image(image_dict, debug: bool = False) -> tuple[str, any, dict | None]:
+    """
+    OCR avec EasyOCR (CPU optimisé)
+    Args:
+        image_dict: Image d'entrée (format Gradio)
+        debug: Afficher les logs de debug
+    Returns:
+        (résultat_ocr, image_optimisée, données_dataset)
+    """
+    if image_dict is None or easyocr_reader is None:
+        if debug:
+            print("  ❌ Image manquante ou EasyOCR non initialisé")
+        return "0", None, None
+    try:
+        start_time = time.time()
+        if debug:
+            print("  🔄 Début OCR EasyOCR...")
+        # Optimiser image (fonction commune)
+        optimized_image = optimize_image_for_ocr(image_dict, max_size=300)
+        if optimized_image is None:
+            if debug:
+                print("  ❌ Échec optimisation image")
+            return "0", None, None
+        # EasyOCR - traitement spécialisé CPU
+        if debug:
+            print("  ⚡ Lancement EasyOCR...")
+        import numpy as np
+        img_array = np.array(optimized_image)
+        results = easyocr_reader.readtext(img_array, detail=0, paragraph=False)
+        # Traitement des résultats EasyOCR
+        if results:
+            all_text = ' '.join(str(r) for r in results)
+            final_result = validate_ocr_result(all_text, max_length=4)
+        else:
+            final_result = "0"
+        # Préparer pour dataset (fonction commune)
+        dataset_image_data = prepare_image_for_dataset(optimized_image)
+        if debug:
+            total_time = time.time() - start_time
+            print(f"  ✅ EasyOCR terminé en {total_time:.1f}s → '{final_result}'")
+        return final_result, optimized_image, dataset_image_data
+    except Exception as e:
+        print(f"❌ Erreur OCR EasyOCR: {e}")
+        return "0", None, None
+def recognize_number_fast(image_dict) -> tuple[str, any]:
+    """Version rapide standard"""
+    result, optimized_image, _ = recognize_number_fast_with_image(image_dict)
+    return result, optimized_image
+def recognize_number(image_dict) -> str:
+    """Interface standard"""
+    result, _ = recognize_number_fast(image_dict)
+    return result

image_processing_gpu.py ADDED Viewed

	@@ -0,0 +1,157 @@

+# ==========================================
+# image_processing_gpu.py - Version GPU avec TrOCR
+# ==========================================
+"""
+Module de traitement d'images GPU-optimisé pour calculs mathématiques
+Utilise TrOCR pour une précision maximale sur GPU
+"""
+import time
+import torch
+from utils import (
+    optimize_image_for_ocr,
+    prepare_image_for_dataset,
+    create_thumbnail_fast,
+    create_white_canvas,
+    log_memory_usage,
+    cleanup_memory,
+    decode_image_from_dataset,
+    validate_ocr_result
+)
+# Variables globales pour OCR TrOCR
+processor = None
+model = None
+OCR_MODEL_NAME = "TrOCR-base-handwritten"
+def init_ocr_model() -> bool:
+    """Initialise TrOCR (optimisé GPU)"""
+    global processor, model
+    try:
+        print("🔄 Chargement TrOCR (GPU optimisé)...")
+        from transformers import TrOCRProcessor, VisionEncoderDecoderModel
+        processor = TrOCRProcessor.from_pretrained('microsoft/trocr-base-handwritten')
+        model = VisionEncoderDecoderModel.from_pretrained('microsoft/trocr-base-handwritten')
+        # Optimisations GPU
+        model.eval()
+        if torch.cuda.is_available():
+            model = model.cuda()
+            device_info = f"GPU ({torch.cuda.get_device_name()})"
+            print(f"✅ TrOCR prêt sur {device_info} !")
+        else:
+            device_info = "CPU (pas de GPU détecté)"
+            print(f"⚠️ TrOCR sur CPU - {device_info}")
+        return True
+    except Exception as e:
+        print(f"❌ Erreur lors du chargement TrOCR: {e}")
+        return False
+def get_ocr_model_info() -> dict:
+    """Retourne les informations du modèle OCR utilisé"""
+    device = "GPU" if torch.cuda.is_available() and model is not None else "CPU"
+    gpu_name = torch.cuda.get_device_name() if torch.cuda.is_available() else "N/A"
+    return {
+        "model_name": OCR_MODEL_NAME,
+        "device": device,
+        "gpu_name": gpu_name,
+        "framework": "HuggingFace-Transformers",
+        "optimized_for": "accuracy",
+        "version": "microsoft/trocr-base-handwritten"
+    }
+def recognize_number_fast_with_image(image_dict, debug: bool = False) -> tuple[str, any, dict | None]:
+    """
+    OCR avec TrOCR (GPU optimisé)
+    Args:
+        image_dict: Image d'entrée (format Gradio)
+        debug: Afficher les logs de debug
+    Returns:
+        (résultat_ocr, image_optimisée, données_dataset)
+    """
+    if image_dict is None or processor is None or model is None:
+        if debug:
+            print("  ❌ Image manquante ou TrOCR non initialisé")
+        return "0", None, None
+    try:
+        start_time = time.time()
+        if debug:
+            print("  🔄 Début OCR TrOCR...")
+        # Optimiser image (fonction commune)
+        optimized_image = optimize_image_for_ocr(image_dict, max_size=384)  # TrOCR préfère 384x384
+        if optimized_image is None:
+            if debug:
+                print("  ❌ Échec optimisation image")
+            return "0", None, None
+        # TrOCR - traitement spécialisé GPU
+        if debug:
+            print("  🤖 Lancement TrOCR...")
+        with torch.no_grad():
+            # Preprocessing
+            pixel_values = processor(images=optimized_image, return_tensors="pt").pixel_values
+            # GPU transfer si disponible
+            if torch.cuda.is_available():
+                pixel_values = pixel_values.cuda()
+            # Génération optimisée
+            generated_ids = model.generate(
+                pixel_values,
+                max_length=4,           # Optimisé pour les calculs
+                num_beams=1,            # Rapide
+                do_sample=False,        # Déterministe
+                early_stopping=True,    # Arrêt rapide
+                pad_token_id=processor.tokenizer.pad_token_id
+            )
+            # Décodage
+            result = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
+            final_result = validate_ocr_result(result, max_length=4)
+        # Préparer pour dataset (fonction commune)
+        dataset_image_data = prepare_image_for_dataset(optimized_image)
+        if debug:
+            total_time = time.time() - start_time
+            device = "GPU" if torch.cuda.is_available() else "CPU"
+            print(f"  ✅ TrOCR ({device}) terminé en {total_time:.1f}s → '{final_result}'")
+        return final_result, optimized_image, dataset_image_data
+    except Exception as e:
+        print(f"❌ Erreur OCR TrOCR: {e}")
+        return "0", None, None
+def recognize_number_fast(image_dict) -> tuple[str, any]:
+    """Version rapide standard"""
+    result, optimized_image, _ = recognize_number_fast_with_image(image_dict)
+    return result, optimized_image
+def recognize_number(image_dict) -> str:
+    """Interface standard"""
+    result, _ = recognize_number_fast(image_dict)
+    return result
+# Fonctions spécifiques au fine-tuning (pour plus tard)
+def prepare_for_finetuning(dataset_path: str) -> dict:
+    """Prépare le dataset pour le fine-tuning TrOCR"""
+    # TODO: Implémenter quand on aura HF Pro
+    return {"status": "ready_for_implementation"}
+def quantize_model() -> bool:
+    """Quantize le modèle TrOCR pour optimiser les performances CPU"""
+    # TODO: Implémenter la quantization
+    return False

requirements.txt ADDED Viewed

	@@ -0,0 +1,17 @@

+# Requirements unifiés avec fallbacks intelligents
+gradio>=4.0.0
+pillow>=9.0.0
+numpy>=1.21.0
+datasets>=2.10.0
+huggingface_hub>=0.16.0
+pandas>=1.5.0
+psutil>=5.8.0
+# EasyOCR (toujours installé - fonctionne partout)
+easyocr>=1.7.0
+# GPU/TrOCR (optionnel - installé si GPU disponible)
+# Ces packages seront installés automatiquement sur les spaces GPU
+torch>=2.0.0; sys_platform != "emscripten"
+torchvision>=0.15.0; sys_platform != "emscripten"
+transformers>=4.30.0; sys_platform != "emscripten"