Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import pandas as pd | |
| import torch | |
| import io | |
| import re | |
| from transformers import Qwen2VLForConditionalGeneration, AutoProcessor | |
| from qwen_vl_utils import process_vision_info | |
| from PIL import Image | |
| # 1. Configuration du Moteur Activo VLM (Self-Hosted) | |
| model_id = "Qwen/Qwen2-VL-2B-Instruct" | |
| # Chargement forcé en float32 pour la stabilité CPU | |
| model = Qwen2VLForConditionalGeneration.from_pretrained( | |
| model_id, | |
| torch_dtype=torch.float32, | |
| device_map="cpu", | |
| low_cpu_mem_usage=True | |
| ) | |
| # Configuration stricte des pixels pour éviter les erreurs de dimension | |
| processor = AutoProcessor.from_pretrained( | |
| model_id, | |
| min_pixels=256*28*28, | |
| max_pixels=512*28*28 | |
| ) | |
| def process_document(image, instruction): | |
| if image is None: | |
| return None, "Erreur : Veuillez fournir un document.", None | |
| img = image.convert("RGB") | |
| # 2. Préparation du message multimodal (Format strict pour forcer la vision) | |
| messages = [ | |
| { | |
| "role": "user", | |
| "content": [ | |
| {"type": "image", "image": img}, | |
| {"type": "text", "text": f"Analyse ce document et extrais ces informations : {instruction}. Réponds directement avec les données."}, | |
| ], | |
| } | |
| ] | |
| # Formatage template | |
| text_prompt = processor.apply_chat_template( | |
| messages, tokenize=False, add_generation_prompt=True | |
| ) | |
| # Extraction des infos de vision | |
| image_inputs, _ = process_vision_info(messages) | |
| inputs = processor( | |
| text=[text_prompt], | |
| images=image_inputs, | |
| padding=True, | |
| return_tensors="pt", | |
| ).to("cpu") | |
| # 3. Inférence (Génération débridée) | |
| try: | |
| generated_ids = model.generate( | |
| **inputs, | |
| max_new_tokens=512, | |
| do_sample=False, # Désactive l'aléatoire pour plus de précision | |
| use_cache=True | |
| ) | |
| generated_ids_trimmed = [ | |
| out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids) | |
| ] | |
| extracted_text = processor.batch_decode( | |
| generated_ids_trimmed, | |
| skip_special_tokens=True, | |
| clean_up_tokenization_spaces=False | |
| )[0].strip() | |
| # 4. Structuration | |
| lines = [l.strip() for l in extracted_text.split("\n") if len(l.strip()) > 0] | |
| data = [{"Donnée extraite": line} for line in lines] | |
| df = pd.DataFrame(data) | |
| # Export Excel | |
| output = io.BytesIO() | |
| with pd.ExcelWriter(output, engine='openpyxl') as writer: | |
| df.to_excel(writer, index=False, sheet_name='Activo_Export') | |
| excel_path = "Activo_Extraction_VLM.xlsx" | |
| with open(excel_path, "wb") as f: | |
| f.write(output.getvalue()) | |
| return df, extracted_text, excel_path | |
| except Exception as e: | |
| return None, f"Détail technique : {str(e)}", None | |
| # 5. Interface Professionnelle Activo | |
| with gr.Blocks(theme=gr.themes.Monochrome()) as demo: | |
| gr.Markdown("# 🚀 Activo VLM Engine (Souverain)") | |
| gr.Markdown("### Moteur Qwen2-VL optimisé pour l'extraction de documents") | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| img_input = gr.Image(label="Document source", type="pil") | |
| instruction = gr.Textbox( | |
| label="Instruction d'extraction", | |
| value="Liste tous les noms et les notes", | |
| ) | |
| btn = gr.Button("DÉMARRER L'ANALYSE", variant="primary") | |
| with gr.Column(scale=2): | |
| with gr.Tabs(): | |
| with gr.TabItem("Texte extrait"): | |
| text_output = gr.Textbox(label="Réponse IA", lines=12) | |
| with gr.TabItem("Tableau"): | |
| out_df = gr.Dataframe() | |
| with gr.TabItem("Export"): | |
| out_file = gr.File(label="Télécharger Excel") | |
| gr.Markdown("© 2026 Activo Solution - Infrastructure IA sécurisée") | |
| btn.click( | |
| fn=process_document, | |
| inputs=[img_input, instruction], | |
| outputs=[out_df, text_output, out_file] | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() |