# app.py import gradio as gr import threading import json from pathlib import Path from datetime import datetime import uuid import os from script_brvm import ( initialize_model_pipeline, download_and_extract_pdfs, process_single_pdf, upload_results_to_hf_single ) # ---------- CONFIGURATION ---------- HF_DATASET_PDFS_REPO_ID = "lamekemal/brvm-reports-pdfs" ZIP_FILENAME_IN_DATASET = "brvm_reports.zip" LOCAL_PDF_FOLDER = Path("brvm_reports_extracted") LOCAL_CACHE_DIR = Path("./hf_cache") HF_TOKEN = os.getenv("HF_TOKEN") HF_DATASET_JSON_REPO_ID = "lamekemal/brvm-reports-json" LOCAL_JSON_OUTPUT_BASE_FOLDER = Path("brvm_json_outputs") extractor_pipeline = None processed_files = [] def load_model(): global extractor_pipeline extractor_pipeline = initialize_model_pipeline() def start_background_processing(status_box): def background_task(): pdf_files = download_and_extract_pdfs( HF_DATASET_PDFS_REPO_ID, ZIP_FILENAME_IN_DATASET, LOCAL_PDF_FOLDER, LOCAL_CACHE_DIR ) run_id = datetime.now().strftime("%Y-%m-%d_%H-%M-%S") + "_" + uuid.uuid4().hex[:8] local_output_dir = LOCAL_JSON_OUTPUT_BASE_FOLDER / run_id local_output_dir.mkdir(parents=True, exist_ok=True) for pdf_path in pdf_files: result = process_single_pdf(pdf_path, extractor_pipeline) output_json_path = local_output_dir / f"{pdf_path.stem}.json" with open(output_json_path, "w", encoding="utf-8") as f: json.dump(result, f, ensure_ascii=False, indent=2) upload_results_to_hf_single(result, HF_DATASET_JSON_REPO_ID, HF_TOKEN) processed_files.append(pdf_path.name) status_box.update(value="\n".join(processed_files)) thread = threading.Thread(target=background_task) thread.start() def launch_processing(status_box): start_background_processing(status_box) return "✅ Traitement lancé." with gr.Blocks() as demo: gr.Markdown("# 📊 Extraction BRVM automatisée") gr.Markdown("Le modèle est chargé au démarrage. Cliquez sur le bouton pour lancer le traitement des bulletins.") status_box = gr.Textbox(label="Fichiers traités", lines=20) launch_button = gr.Button("🚀 Lancer le traitement") launch_button.click(launch_processing, inputs=[status_box], outputs=[status_box]) load_model() demo.launch()