brvm / app.py
lamekemal's picture
helioc
ad83998
# app.py
import gradio as gr
import threading
import json
from pathlib import Path
from datetime import datetime
import uuid
import os
from script_brvm import (
initialize_model_pipeline,
download_and_extract_pdfs,
process_single_pdf,
upload_results_to_hf_single
)
# ---------- CONFIGURATION ----------
HF_DATASET_PDFS_REPO_ID = "lamekemal/brvm-reports-pdfs"
ZIP_FILENAME_IN_DATASET = "brvm_reports.zip"
LOCAL_PDF_FOLDER = Path("brvm_reports_extracted")
LOCAL_CACHE_DIR = Path("./hf_cache")
HF_TOKEN = os.getenv("HF_TOKEN")
HF_DATASET_JSON_REPO_ID = "lamekemal/brvm-reports-json"
LOCAL_JSON_OUTPUT_BASE_FOLDER = Path("brvm_json_outputs")
extractor_pipeline = None
processed_files = []
def load_model():
global extractor_pipeline
extractor_pipeline = initialize_model_pipeline()
def start_background_processing(status_box):
def background_task():
pdf_files = download_and_extract_pdfs(
HF_DATASET_PDFS_REPO_ID,
ZIP_FILENAME_IN_DATASET,
LOCAL_PDF_FOLDER,
LOCAL_CACHE_DIR
)
run_id = datetime.now().strftime("%Y-%m-%d_%H-%M-%S") + "_" + uuid.uuid4().hex[:8]
local_output_dir = LOCAL_JSON_OUTPUT_BASE_FOLDER / run_id
local_output_dir.mkdir(parents=True, exist_ok=True)
for pdf_path in pdf_files:
result = process_single_pdf(pdf_path, extractor_pipeline)
output_json_path = local_output_dir / f"{pdf_path.stem}.json"
with open(output_json_path, "w", encoding="utf-8") as f:
json.dump(result, f, ensure_ascii=False, indent=2)
upload_results_to_hf_single(result, HF_DATASET_JSON_REPO_ID, HF_TOKEN)
processed_files.append(pdf_path.name)
status_box.update(value="\n".join(processed_files))
thread = threading.Thread(target=background_task)
thread.start()
def launch_processing(status_box):
start_background_processing(status_box)
return "✅ Traitement lancé."
with gr.Blocks() as demo:
gr.Markdown("# 📊 Extraction BRVM automatisée")
gr.Markdown("Le modèle est chargé au démarrage. Cliquez sur le bouton pour lancer le traitement des bulletins.")
status_box = gr.Textbox(label="Fichiers traités", lines=20)
launch_button = gr.Button("🚀 Lancer le traitement")
launch_button.click(launch_processing, inputs=[status_box], outputs=[status_box])
load_model()
demo.launch()