activo / app.py
dieumercimvemba's picture
Update app.py
d794d32 verified
import gradio as gr
import pandas as pd
import torch
import io
import re
from transformers import Qwen2VLForConditionalGeneration, AutoProcessor
from qwen_vl_utils import process_vision_info
from PIL import Image
# 1. Configuration du Moteur Activo VLM (Self-Hosted)
model_id = "Qwen/Qwen2-VL-2B-Instruct"
# Chargement forcé en float32 pour la stabilité CPU
model = Qwen2VLForConditionalGeneration.from_pretrained(
model_id,
torch_dtype=torch.float32,
device_map="cpu",
low_cpu_mem_usage=True
)
# Configuration stricte des pixels pour éviter les erreurs de dimension
processor = AutoProcessor.from_pretrained(
model_id,
min_pixels=256*28*28,
max_pixels=512*28*28
)
def process_document(image, instruction):
if image is None:
return None, "Erreur : Veuillez fournir un document.", None
img = image.convert("RGB")
# 2. Préparation du message multimodal (Format strict pour forcer la vision)
messages = [
{
"role": "user",
"content": [
{"type": "image", "image": img},
{"type": "text", "text": f"Analyse ce document et extrais ces informations : {instruction}. Réponds directement avec les données."},
],
}
]
# Formatage template
text_prompt = processor.apply_chat_template(
messages, tokenize=False, add_generation_prompt=True
)
# Extraction des infos de vision
image_inputs, _ = process_vision_info(messages)
inputs = processor(
text=[text_prompt],
images=image_inputs,
padding=True,
return_tensors="pt",
).to("cpu")
# 3. Inférence (Génération débridée)
try:
generated_ids = model.generate(
**inputs,
max_new_tokens=512,
do_sample=False, # Désactive l'aléatoire pour plus de précision
use_cache=True
)
generated_ids_trimmed = [
out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
]
extracted_text = processor.batch_decode(
generated_ids_trimmed,
skip_special_tokens=True,
clean_up_tokenization_spaces=False
)[0].strip()
# 4. Structuration
lines = [l.strip() for l in extracted_text.split("\n") if len(l.strip()) > 0]
data = [{"Donnée extraite": line} for line in lines]
df = pd.DataFrame(data)
# Export Excel
output = io.BytesIO()
with pd.ExcelWriter(output, engine='openpyxl') as writer:
df.to_excel(writer, index=False, sheet_name='Activo_Export')
excel_path = "Activo_Extraction_VLM.xlsx"
with open(excel_path, "wb") as f:
f.write(output.getvalue())
return df, extracted_text, excel_path
except Exception as e:
return None, f"Détail technique : {str(e)}", None
# 5. Interface Professionnelle Activo
with gr.Blocks(theme=gr.themes.Monochrome()) as demo:
gr.Markdown("# 🚀 Activo VLM Engine (Souverain)")
gr.Markdown("### Moteur Qwen2-VL optimisé pour l'extraction de documents")
with gr.Row():
with gr.Column(scale=1):
img_input = gr.Image(label="Document source", type="pil")
instruction = gr.Textbox(
label="Instruction d'extraction",
value="Liste tous les noms et les notes",
)
btn = gr.Button("DÉMARRER L'ANALYSE", variant="primary")
with gr.Column(scale=2):
with gr.Tabs():
with gr.TabItem("Texte extrait"):
text_output = gr.Textbox(label="Réponse IA", lines=12)
with gr.TabItem("Tableau"):
out_df = gr.Dataframe()
with gr.TabItem("Export"):
out_file = gr.File(label="Télécharger Excel")
gr.Markdown("© 2026 Activo Solution - Infrastructure IA sécurisée")
btn.click(
fn=process_document,
inputs=[img_input, instruction],
outputs=[out_df, text_output, out_file]
)
if __name__ == "__main__":
demo.launch()