import gradio as gr
import pandas as pd
import torch
import io
import re
from transformers import Qwen2VLForConditionalGeneration, AutoProcessor
from qwen_vl_utils import process_vision_info
from PIL import Image

# 1. Configuration du Moteur Activo VLM (Self-Hosted)
model_id = "Qwen/Qwen2-VL-2B-Instruct"

# Chargement forcé en float32 pour la stabilité CPU
model = Qwen2VLForConditionalGeneration.from_pretrained(
    model_id, 
    torch_dtype=torch.float32, 
    device_map="cpu",
    low_cpu_mem_usage=True
)

# Configuration stricte des pixels pour éviter les erreurs de dimension
processor = AutoProcessor.from_pretrained(
    model_id, 
    min_pixels=256*28*28, 
    max_pixels=512*28*28 
)

def process_document(image, instruction):
    if image is None: 
        return None, "Erreur : Veuillez fournir un document.", None
    
    img = image.convert("RGB")
    
    # 2. Préparation du message multimodal (Format strict pour forcer la vision)
    messages = [
        {
            "role": "user",
            "content": [
                {"type": "image", "image": img},
                {"type": "text", "text": f"Analyse ce document et extrais ces informations : {instruction}. Réponds directement avec les données."},
            ],
        }
    ]

    # Formatage template
    text_prompt = processor.apply_chat_template(
        messages, tokenize=False, add_generation_prompt=True
    )
    
    # Extraction des infos de vision
    image_inputs, _ = process_vision_info(messages)
    
    inputs = processor(
        text=[text_prompt],
        images=image_inputs,
        padding=True,
        return_tensors="pt",
    ).to("cpu")

    # 3. Inférence (Génération débridée)
    try:
        generated_ids = model.generate(
            **inputs, 
            max_new_tokens=512,
            do_sample=False, # Désactive l'aléatoire pour plus de précision
            use_cache=True
        )
        
        generated_ids_trimmed = [
            out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
        ]
        
        extracted_text = processor.batch_decode(
            generated_ids_trimmed, 
            skip_special_tokens=True, 
            clean_up_tokenization_spaces=False
        )[0].strip()
        
        # 4. Structuration
        lines = [l.strip() for l in extracted_text.split("\n") if len(l.strip()) > 0]
        data = [{"Donnée extraite": line} for line in lines]
        df = pd.DataFrame(data)
        
        # Export Excel
        output = io.BytesIO()
        with pd.ExcelWriter(output, engine='openpyxl') as writer:
            df.to_excel(writer, index=False, sheet_name='Activo_Export')
        
        excel_path = "Activo_Extraction_VLM.xlsx"
        with open(excel_path, "wb") as f:
            f.write(output.getvalue())
                
        return df, extracted_text, excel_path

    except Exception as e:
        return None, f"Détail technique : {str(e)}", None

# 5. Interface Professionnelle Activo
with gr.Blocks(theme=gr.themes.Monochrome()) as demo:
    gr.Markdown("# 🚀 Activo VLM Engine (Souverain)")
    gr.Markdown("### Moteur Qwen2-VL optimisé pour l'extraction de documents")
    
    with gr.Row():
        with gr.Column(scale=1):
            img_input = gr.Image(label="Document source", type="pil")
            instruction = gr.Textbox(
                label="Instruction d'extraction", 
                value="Liste tous les noms et les notes",
            )
            btn = gr.Button("DÉMARRER L'ANALYSE", variant="primary")
        
        with gr.Column(scale=2):
            with gr.Tabs():
                with gr.TabItem("Texte extrait"):
                    text_output = gr.Textbox(label="Réponse IA", lines=12)
                with gr.TabItem("Tableau"):
                    out_df = gr.Dataframe()
                with gr.TabItem("Export"):
                    out_file = gr.File(label="Télécharger Excel")
            
    gr.Markdown("© 2026 Activo Solution - Infrastructure IA sécurisée")

    btn.click(
        fn=process_document, 
        inputs=[img_input, instruction], 
        outputs=[out_df, text_output, out_file]
    )

if __name__ == "__main__":
    demo.launch()