import gradio as gr import pandas as pd import torch import io import re from transformers import Qwen2VLForConditionalGeneration, AutoProcessor from qwen_vl_utils import process_vision_info from PIL import Image # 1. Configuration du Moteur Activo VLM (Self-Hosted) model_id = "Qwen/Qwen2-VL-2B-Instruct" # Chargement forcé en float32 pour la stabilité CPU model = Qwen2VLForConditionalGeneration.from_pretrained( model_id, torch_dtype=torch.float32, device_map="cpu", low_cpu_mem_usage=True ) # Configuration stricte des pixels pour éviter les erreurs de dimension processor = AutoProcessor.from_pretrained( model_id, min_pixels=256*28*28, max_pixels=512*28*28 ) def process_document(image, instruction): if image is None: return None, "Erreur : Veuillez fournir un document.", None img = image.convert("RGB") # 2. Préparation du message multimodal (Format strict pour forcer la vision) messages = [ { "role": "user", "content": [ {"type": "image", "image": img}, {"type": "text", "text": f"Analyse ce document et extrais ces informations : {instruction}. Réponds directement avec les données."}, ], } ] # Formatage template text_prompt = processor.apply_chat_template( messages, tokenize=False, add_generation_prompt=True ) # Extraction des infos de vision image_inputs, _ = process_vision_info(messages) inputs = processor( text=[text_prompt], images=image_inputs, padding=True, return_tensors="pt", ).to("cpu") # 3. Inférence (Génération débridée) try: generated_ids = model.generate( **inputs, max_new_tokens=512, do_sample=False, # Désactive l'aléatoire pour plus de précision use_cache=True ) generated_ids_trimmed = [ out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids) ] extracted_text = processor.batch_decode( generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False )[0].strip() # 4. Structuration lines = [l.strip() for l in extracted_text.split("\n") if len(l.strip()) > 0] data = [{"Donnée extraite": line} for line in lines] df = pd.DataFrame(data) # Export Excel output = io.BytesIO() with pd.ExcelWriter(output, engine='openpyxl') as writer: df.to_excel(writer, index=False, sheet_name='Activo_Export') excel_path = "Activo_Extraction_VLM.xlsx" with open(excel_path, "wb") as f: f.write(output.getvalue()) return df, extracted_text, excel_path except Exception as e: return None, f"Détail technique : {str(e)}", None # 5. Interface Professionnelle Activo with gr.Blocks(theme=gr.themes.Monochrome()) as demo: gr.Markdown("# 🚀 Activo VLM Engine (Souverain)") gr.Markdown("### Moteur Qwen2-VL optimisé pour l'extraction de documents") with gr.Row(): with gr.Column(scale=1): img_input = gr.Image(label="Document source", type="pil") instruction = gr.Textbox( label="Instruction d'extraction", value="Liste tous les noms et les notes", ) btn = gr.Button("DÉMARRER L'ANALYSE", variant="primary") with gr.Column(scale=2): with gr.Tabs(): with gr.TabItem("Texte extrait"): text_output = gr.Textbox(label="Réponse IA", lines=12) with gr.TabItem("Tableau"): out_df = gr.Dataframe() with gr.TabItem("Export"): out_file = gr.File(label="Télécharger Excel") gr.Markdown("© 2026 Activo Solution - Infrastructure IA sécurisée") btn.click( fn=process_document, inputs=[img_input, instruction], outputs=[out_df, text_output, out_file] ) if __name__ == "__main__": demo.launch()