Spaces:

angelsg213
/

TESTING22

Sleeping

App Files Files Community

angelsg213 commited on Dec 7, 2025

Commit

bf52c26

verified ·

1 Parent(s): cc62f8c

Update app.py

Browse files

Files changed (1) hide show

app.py +516 -495

app.py CHANGED Viewed

@@ -9,9 +9,23 @@ from huggingface_hub import InferenceClient
 from reportlab.lib.pagesizes import letter, A4
 from reportlab.lib import colors
 from reportlab.lib.units import inch
-from reportlab.platypus import SimpleDocTemplate, Table, TableStyle, Paragraph, Spacer, Image
 from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
 from reportlab.lib.enums import TA_CENTER, TA_RIGHT, TA_LEFT
 # ============= EXTRAER TEXTO DEL PDF =============
 def extraer_texto_pdf(pdf_file):
@@ -24,9 +38,265 @@ def extraer_texto_pdf(pdf_file):
     except Exception as e:
         return f"Error: {str(e)}"
 # ============= ANALIZAR CON LLM Y CONVERTIR A JSON =============
 def analizar_y_convertir_json(texto):
-    """El LLM lee la factura, decide cómo estructurarla y devuelve JSON"""
     token = os.getenv("aa")
     if not token:
@@ -35,17 +305,14 @@ def analizar_y_convertir_json(texto):
     texto_limpio = texto[:8000]
     prompt = f"""Eres un experto en análisis de facturas. Lee esta factura y conviértela a JSON.
 TEXTO DE LA FACTURA:
 {texto_limpio}
 INSTRUCCIONES:
 1. Analiza el texto y decide qué información es importante extraer
 2. Crea un JSON estructurado con TODOS los datos que encuentres
 3. Incluye: número de factura, fecha, emisor, cliente, productos/servicios, importes
 4. Para los números: usa formato numérico puro (ejemplo: 250 no "250€")
 5. Si hay tabla de productos, extrae CADA producto con cantidad, precio y total
 FORMATO JSON (ajusta según lo que encuentres):
 {{
   "numero_factura": "string",
@@ -74,7 +341,6 @@ FORMATO JSON (ajusta según lo que encuentres):
     "total": number
   }}
 }}
 Responde SOLO con el JSON válido (sin explicaciones, sin markdown):"""
     modelos = [
@@ -129,16 +395,13 @@ def generar_resumen_util(texto, modelo, client):
     """Genera un resumen con información útil para administrativos"""
     prompt_resumen = f"""Analiza esta factura y proporciona información útil para un administrativo o usuario medio.
 TEXTO DE LA FACTURA:
 {texto[:6000]}
 Genera un resumen estructurado con:
 1. ESTADO DE PAGO: ¿Está pagada? ¿Fecha de vencimiento?
 2. INFORMACIÓN CLAVE: Datos importantes que destacar
 3. ALERTAS: Cualquier aspecto que requiera atención (vencimientos, importes altos, etc.)
 4. RESUMEN EJECUTIVO: Descripción breve y clara de la factura
 Responde en español de forma clara y profesional:"""
     try:
@@ -216,439 +479,66 @@ def json_a_csv(datos_json):
     return pd.DataFrame(filas)
-# ============= GENERAR PDF DESDE CSV - TEMPLATE CLÁSICO =============
 def generar_pdf_clasico(csv_file, datos_json):
-    """Template clásico - Estilo tradicional corporativo"""
     timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
     pdf_filename = f"factura_clasica_{timestamp}.pdf"
     doc = SimpleDocTemplate(pdf_filename, pagesize=A4)
     story = []
     styles = getSampleStyleSheet()
-    # Estilos personalizados
-    titulo_style = ParagraphStyle(
-        'CustomTitle',
-        parent=styles['Heading1'],
-        fontSize=24,
-        textColor=colors.HexColor('#1a1a1a'),
-        spaceAfter=30,
-        alignment=TA_CENTER
-    )
-    # Título
     story.append(Paragraph("FACTURA", titulo_style))
     story.append(Spacer(1, 0.3*inch))
-    # Información básica
-    info_data = [
-        ['Número de Factura:', datos_json.get('numero_factura', 'N/A')],
-        ['Fecha:', datos_json.get('fecha', 'N/A')]
-    ]
     info_table = Table(info_data, colWidths=[2*inch, 4*inch])
-    info_table.setStyle(TableStyle([
-        ('FONTNAME', (0, 0), (-1, -1), 'Helvetica'),
-        ('FONTSIZE', (0, 0), (-1, -1), 11),
-        ('TEXTCOLOR', (0, 0), (0, -1), colors.HexColor('#666666')),
-        ('ALIGN', (0, 0), (0, -1), 'RIGHT'),
-        ('ALIGN', (1, 0), (1, -1), 'LEFT'),
-        ('BOTTOMPADDING', (0, 0), (-1, -1), 12),
-    ]))
     story.append(info_table)
-    story.append(Spacer(1, 0.3*inch))
-    # Emisor y Cliente
-    emisor = datos_json.get('emisor', {})
-    cliente = datos_json.get('cliente', {})
-    partes_data = [
-        ['EMISOR', 'CLIENTE'],
-        [
-            emisor.get('nombre', 'N/A') if isinstance(emisor, dict) else str(emisor),
-            cliente.get('nombre', 'N/A') if isinstance(cliente, dict) else str(cliente)
-        ],
-        [
-            emisor.get('nif', '') if isinstance(emisor, dict) else '',
-            cliente.get('nif', '') if isinstance(cliente, dict) else ''
-        ]
-    ]
-    partes_table = Table(partes_data, colWidths=[3*inch, 3*inch])
-    partes_table.setStyle(TableStyle([
-        ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
-        ('FONTSIZE', (0, 0), (-1, 0), 12),
-        ('BACKGROUND', (0, 0), (-1, 0), colors.HexColor('#e0e0e0')),
-        ('TEXTCOLOR', (0, 0), (-1, 0), colors.HexColor('#1a1a1a')),
-        ('ALIGN', (0, 0), (-1, -1), 'LEFT'),
-        ('VALIGN', (0, 0), (-1, -1), 'TOP'),
-        ('FONTNAME', (0, 1), (-1, -1), 'Helvetica'),
-        ('FONTSIZE', (0, 1), (-1, -1), 10),
-        ('GRID', (0, 0), (-1, -1), 1, colors.HexColor('#cccccc')),
-        ('TOPPADDING', (0, 0), (-1, -1), 10),
-        ('BOTTOMPADDING', (0, 0), (-1, -1), 10),
-        ('LEFTPADDING', (0, 0), (-1, -1), 10),
-    ]))
-    story.append(partes_table)
-    story.append(Spacer(1, 0.4*inch))
-    # Productos
-    productos = datos_json.get('productos', datos_json.get('conceptos', []))
-    if productos:
-        productos_data = [['Descripción', 'Cantidad', 'Precio Unit.', 'Total']]
-        for prod in productos:
-            productos_data.append([
-                str(prod.get('descripcion', '')),
-                str(prod.get('cantidad', '')),
-                f"{prod.get('precio_unitario', 0):.2f} €",
-                f"{prod.get('total', 0):.2f} €"
-            ])
-        productos_table = Table(productos_data, colWidths=[3*inch, 1*inch, 1.5*inch, 1.5*inch])
-        productos_table.setStyle(TableStyle([
-            ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
-            ('FONTSIZE', (0, 0), (-1, 0), 11),
-            ('BACKGROUND', (0, 0), (-1, 0), colors.HexColor('#4a4a4a')),
-            ('TEXTCOLOR', (0, 0), (-1, 0), colors.white),
-            ('ALIGN', (0, 0), (0, -1), 'LEFT'),
-            ('ALIGN', (1, 0), (-1, -1), 'RIGHT'),
-            ('FONTNAME', (0, 1), (-1, -1), 'Helvetica'),
-            ('FONTSIZE', (0, 1), (-1, -1), 10),
-            ('GRID', (0, 0), (-1, -1), 1, colors.HexColor('#cccccc')),
-            ('ROWBACKGROUNDS', (0, 1), (-1, -1), [colors.white, colors.HexColor('#f5f5f5')]),
-            ('TOPPADDING', (0, 0), (-1, -1), 8),
-            ('BOTTOMPADDING', (0, 0), (-1, -1), 8),
-        ]))
-        story.append(productos_table)
-        story.append(Spacer(1, 0.3*inch))
-    # Totales
-    totales = datos_json.get('totales', {})
-    base = totales.get('base_imponible', datos_json.get('base_imponible', 0))
-    iva = totales.get('iva', datos_json.get('iva', 0))
-    porcentaje_iva = totales.get('porcentaje_iva', datos_json.get('porcentaje_iva', 0))
-    total = totales.get('total', datos_json.get('total', 0))
-    totales_data = [
-        ['Base Imponible:', f"{base:.2f} €"],
-        [f'IVA ({porcentaje_iva}%):', f"{iva:.2f} €"],
-        ['TOTAL:', f"{total:.2f} €"]
-    ]
-    totales_table = Table(totales_data, colWidths=[4.5*inch, 1.5*inch])
-    totales_table.setStyle(TableStyle([
-        ('FONTNAME', (0, 0), (-1, 1), 'Helvetica'),
-        ('FONTNAME', (0, 2), (-1, 2), 'Helvetica-Bold'),
-        ('FONTSIZE', (0, 0), (-1, 1), 11),
-        ('FONTSIZE', (0, 2), (-1, 2), 14),
-        ('ALIGN', (0, 0), (0, -1), 'RIGHT'),
-        ('ALIGN', (1, 0), (1, -1), 'RIGHT'),
-        ('BACKGROUND', (0, 2), (-1, 2), colors.HexColor('#4a4a4a')),
-        ('TEXTCOLOR', (0, 2), (-1, 2), colors.white),
-        ('TOPPADDING', (0, 0), (-1, -1), 8),
-        ('BOTTOMPADDING', (0, 0), (-1, -1), 8),
-        ('RIGHTPADDING', (0, 0), (-1, -1), 10),
-    ]))
-    story.append(totales_table)
     doc.build(story)
     return pdf_filename
-# ============= GENERAR PDF - TEMPLATE MODERNO =============
 def generar_pdf_moderno(csv_file, datos_json):
-    """Template moderno - Estilo minimalista y limpio"""
     timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
     pdf_filename = f"factura_moderna_{timestamp}.pdf"
     doc = SimpleDocTemplate(pdf_filename, pagesize=A4)
     story = []
     styles = getSampleStyleSheet()
-    # Título moderno
-    titulo_style = ParagraphStyle(
-        'ModernTitle',
-        parent=styles['Heading1'],
-        fontSize=32,
-        textColor=colors.HexColor('#2196F3'),
-        spaceAfter=10,
-        alignment=TA_LEFT,
-        fontName='Helvetica-Bold'
-    )
     story.append(Paragraph("FACTURA", titulo_style))
-    # Subtítulo
-    subtitulo = f"No. {datos_json.get('numero_factura', 'N/A')} | {datos_json.get('fecha', 'N/A')}"
-    subtitulo_style = ParagraphStyle(
-        'Subtitle',
-        parent=styles['Normal'],
-        fontSize=11,
-        textColor=colors.HexColor('#757575'),
-        spaceAfter=30
-    )
-    story.append(Paragraph(subtitulo, subtitulo_style))
-    story.append(Spacer(1, 0.3*inch))
-    # Emisor y Cliente en cajas
-    emisor = datos_json.get('emisor', {})
-    cliente = datos_json.get('cliente', {})
-    info_boxes = [
-        [
-            Paragraph(f"<b>DE:</b><br/>{emisor.get('nombre', 'N/A') if isinstance(emisor, dict) else str(emisor)}<br/>{emisor.get('nif', '') if isinstance(emisor, dict) else ''}", styles['Normal']),
-            Paragraph(f"<b>PARA:</b><br/>{cliente.get('nombre', 'N/A') if isinstance(cliente, dict) else str(cliente)}<br/>{cliente.get('nif', '') if isinstance(cliente, dict) else ''}", styles['Normal'])
-        ]
-    ]
-    boxes_table = Table(info_boxes, colWidths=[3*inch, 3*inch])
-    boxes_table.setStyle(TableStyle([
-        ('BACKGROUND', (0, 0), (0, 0), colors.HexColor('#E3F2FD')),
-        ('BACKGROUND', (1, 0), (1, 0), colors.HexColor('#FFF3E0')),
-        ('VALIGN', (0, 0), (-1, -1), 'TOP'),
-        ('TOPPADDING', (0, 0), (-1, -1), 15),
-        ('BOTTOMPADDING', (0, 0), (-1, -1), 15),
-        ('LEFTPADDING', (0, 0), (-1, -1), 15),
-        ('RIGHTPADDING', (0, 0), (-1, -1), 15),
-    ]))
-    story.append(boxes_table)
-    story.append(Spacer(1, 0.4*inch))
-    # Productos con estilo moderno
-    productos = datos_json.get('productos', datos_json.get('conceptos', []))
-    if productos:
-        productos_data = [['DESCRIPCIÓN', 'CANT.', 'PRECIO', 'TOTAL']]
-        for prod in productos:
-            productos_data.append([
-                str(prod.get('descripcion', '')),
-                str(prod.get('cantidad', '')),
-                f"{prod.get('precio_unitario', 0):.2f} €",
-                f"{prod.get('total', 0):.2f} €"
-            ])
-        productos_table = Table(productos_data, colWidths=[3*inch, 0.8*inch, 1.5*inch, 1.7*inch])
-        productos_table.setStyle(TableStyle([
-            ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
-            ('FONTSIZE', (0, 0), (-1, 0), 9),
-            ('TEXTCOLOR', (0, 0), (-1, 0), colors.HexColor('#757575')),
-            ('ALIGN', (0, 0), (0, -1), 'LEFT'),
-            ('ALIGN', (1, 0), (-1, -1), 'RIGHT'),
-            ('FONTNAME', (0, 1), (-1, -1), 'Helvetica'),
-            ('FONTSIZE', (0, 1), (-1, -1), 10),
-            ('LINEBELOW', (0, 0), (-1, 0), 2, colors.HexColor('#2196F3')),
-            ('LINEBELOW', (0, 1), (-1, -2), 0.5, colors.HexColor('#e0e0e0')),
-            ('TOPPADDING', (0, 0), (-1, -1), 10),
-            ('BOTTOMPADDING', (0, 0), (-1, -1), 10),
-        ]))
-        story.append(productos_table)
-        story.append(Spacer(1, 0.4*inch))
-    # Totales modernos
-    totales = datos_json.get('totales', {})
-    base = totales.get('base_imponible', datos_json.get('base_imponible', 0))
-    iva = totales.get('iva', datos_json.get('iva', 0))
-    porcentaje_iva = totales.get('porcentaje_iva', datos_json.get('porcentaje_iva', 0))
-    total = totales.get('total', datos_json.get('total', 0))
-    totales_data = [
-        ['Subtotal', f"{base:.2f} €"],
-        [f'IVA {porcentaje_iva}%', f"{iva:.2f} €"],
-        ['', ''],
-        ['TOTAL', f"{total:.2f} €"]
-    ]
-    totales_table = Table(totales_data, colWidths=[5*inch, 2*inch])
-    totales_table.setStyle(TableStyle([
-        ('FONTNAME', (0, 0), (-1, 2), 'Helvetica'),
-        ('FONTNAME', (0, 3), (-1, 3), 'Helvetica-Bold'),
-        ('FONTSIZE', (0, 0), (-1, 2), 11),
-        ('FONTSIZE', (0, 3), (-1, 3), 16),
-        ('ALIGN', (0, 0), (-1, -1), 'RIGHT'),
-        ('TEXTCOLOR', (0, 3), (-1, 3), colors.HexColor('#2196F3')),
-        ('LINEABOVE', (0, 3), (-1, 3), 2, colors.HexColor('#2196F3')),
-        ('TOPPADDING', (0, 0), (-1, -1), 8),
-        ('BOTTOMPADDING', (0, 0), (-1, -1), 8),
-    ]))
-    story.append(totales_table)
     doc.build(story)
     return pdf_filename
-# ============= GENERAR PDF - TEMPLATE ELEGANTE =============
 def generar_pdf_elegante(csv_file, datos_json):
-    """Template elegante - Estilo premium con detalles"""
     timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
     pdf_filename = f"factura_elegante_{timestamp}.pdf"
     doc = SimpleDocTemplate(pdf_filename, pagesize=A4)
     story = []
     styles = getSampleStyleSheet()
-    # Encabezado elegante
-    header_style = ParagraphStyle(
-        'ElegantHeader',
-        parent=styles['Heading1'],
-        fontSize=28,
-        textColor=colors.HexColor('#1a237e'),
-        spaceAfter=5,
-        alignment=TA_CENTER,
-        fontName='Helvetica-Bold'
-    )
     story.append(Paragraph("F A C T U R A", header_style))
-    # Línea decorativa
-    line_data = [['']]
-    line_table = Table(line_data, colWidths=[6.5*inch])
-    line_table.setStyle(TableStyle([
-        ('LINEBELOW', (0, 0), (-1, 0), 3, colors.HexColor('#7986cb')),
-        ('BOTTOMPADDING', (0, 0), (-1, -1), 20),
-    ]))
-    story.append(line_table)
-    # Información de factura
-    info_data = [[
-        f"No. {datos_json.get('numero_factura', 'N/A')}",
-        f"Fecha: {datos_json.get('fecha', 'N/A')}"
-    ]]
-    info_table = Table(info_data, colWidths=[3.25*inch, 3.25*inch])
-    info_table.setStyle(TableStyle([
-        ('FONTNAME', (0, 0), (-1, -1), 'Helvetica'),
-        ('FONTSIZE', (0, 0), (-1, -1), 10),
-        ('TEXTCOLOR', (0, 0), (-1, -1), colors.HexColor('#424242')),
-        ('ALIGN', (0, 0), (0, -1), 'LEFT'),
-        ('ALIGN', (1, 0), (1, -1), 'RIGHT'),
-        ('BOTTOMPADDING', (0, 0), (-1, -1), 15),
-    ]))
-    story.append(info_table)
-    story.append(Spacer(1, 0.2*inch))
-    # Emisor y Cliente elegante
-    emisor = datos_json.get('emisor', {})
-    cliente = datos_json.get('cliente', {})
-    partes_data = [
-        ['Emisor', 'Cliente'],
-        [
-            f"{emisor.get('nombre', 'N/A') if isinstance(emisor, dict) else str(emisor)}\n{emisor.get('nif', '') if isinstance(emisor, dict) else ''}\n{emisor.get('direccion', '') if isinstance(emisor, dict) else ''}",
-            f"{cliente.get('nombre', 'N/A') if isinstance(cliente, dict) else str(cliente)}\n{cliente.get('nif', '') if isinstance(cliente, dict) else ''}"
-        ]
-    ]
-    partes_table = Table(partes_data, colWidths=[3.25*inch, 3.25*inch])
-    partes_table.setStyle(TableStyle([
-        ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
-        ('FONTSIZE', (0, 0), (-1, 0), 11),
-        ('TEXTCOLOR', (0, 0), (-1, 0), colors.HexColor('#1a237e')),
-        ('BACKGROUND', (0, 0), (-1, 0), colors.HexColor('#e8eaf6')),
-        ('FONTNAME', (0, 1), (-1, -1), 'Helvetica'),
-        ('FONTSIZE', (0, 1), (-1, -1), 9),
-        ('VALIGN', (0, 0), (-1, -1), 'TOP'),
-        ('ALIGN', (0, 0), (-1, -1), 'LEFT'),
-        ('BOX', (0, 0), (-1, -1), 1.5, colors.HexColor('#7986cb')),
-        ('INNERGRID', (0, 0), (-1, -1), 0.5, colors.HexColor('#c5cae9')),
-        ('TOPPADDING', (0, 0), (-1, -1), 12),
-        ('BOTTOMPADDING', (0, 0), (-1, -1), 12),
-        ('LEFTPADDING', (0, 0), (-1, -1), 12),
-    ]))
-    story.append(partes_table)
-    story.append(Spacer(1, 0.3*inch))
-    # Productos elegantes
-    productos = datos_json.get('productos', datos_json.get('conceptos', []))
-    if productos:
-        productos_data = [['Descripción', 'Cant.', 'Precio Unitario', 'Total']]
-        for prod in productos:
-            productos_data.append([
-                str(prod.get('descripcion', '')),
-                str(prod.get('cantidad', '')),
-                f"{prod.get('precio_unitario', 0):.2f} €",
-                f"{prod.get('total', 0):.2f} €"
-            ])
-        productos_table = Table(productos_data, colWidths=[2.8*inch, 0.8*inch, 1.4*inch, 1.5*inch])
-        productos_table.setStyle(TableStyle([
-            ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
-            ('FONTSIZE', (0, 0), (-1, 0), 10),
-            ('TEXTCOLOR', (0, 0), (-1, 0), colors.white),
-            ('BACKGROUND', (0, 0), (-1, 0), colors.HexColor('#5c6bc0')),
-            ('ALIGN', (0, 0), (0, -1), 'LEFT'),
-            ('ALIGN', (1, 0), (-1, -1), 'RIGHT'),
-            ('FONTNAME', (0, 1), (-1, -1), 'Helvetica'),
-            ('FONTSIZE', (0, 1), (-1, -1), 9),
-            ('BOX', (0, 0), (-1, -1), 1, colors.HexColor('#7986cb')),
-            ('LINEBELOW', (0, 0), (-1, 0), 1.5, colors.HexColor('#3f51b5')),
-            ('ROWBACKGROUNDS', (0, 1), (-1, -1), [colors.white, colors.HexColor('#fafafa')]),
-            ('TOPPADDING', (0, 0), (-1, -1), 10),
-            ('BOTTOMPADDING', (0, 0), (-1, -1), 10),
-        ]))
-        story.append(productos_table)
-        story.append(Spacer(1, 0.3*inch))
-    # Totales elegantes
-    totales = datos_json.get('totales', {})
-    base = totales.get('base_imponible', datos_json.get('base_imponible', 0))
-    iva = totales.get('iva', datos_json.get('iva', 0))
-    porcentaje_iva = totales.get('porcentaje_iva', datos_json.get('porcentaje_iva', 0))
-    total = totales.get('total', datos_json.get('total', 0))
-    totales_data = [
-        ['', 'Base Imponible:', f"{base:.2f} €"],
-        ['', f'IVA ({porcentaje_iva}%):', f"{iva:.2f} €"],
-        ['', '', ''],
-        ['', 'TOTAL A PAGAR:', f"{total:.2f} €"]
-    ]
-    totales_table = Table(totales_data, colWidths=[2.5*inch, 2.5*inch, 1.5*inch])
-    totales_table.setStyle(TableStyle([
-        ('FONTNAME', (1, 0), (-1, 2), 'Helvetica'),
-        ('FONTNAME', (1, 3), (-1, 3), 'Helvetica-Bold'),
-        ('FONTSIZE', (1, 0), (-1, 2), 10),
-        ('FONTSIZE', (1, 3), (-1, 3), 14),
-        ('ALIGN', (1, 0), (1, -1), 'RIGHT'),
-        ('ALIGN', (2, 0), (2, -1), 'RIGHT'),
-        ('BACKGROUND', (1, 3), (-1, 3), colors.HexColor('#1a237e')),
-        ('TEXTCOLOR', (1, 3), (-1, 3), colors.white),
-        ('TOPPADDING', (0, 0), (-1, -1), 8),
-        ('BOTTOMPADDING', (0, 0), (-1, -1), 8),
-        ('RIGHTPADDING', (0, 0), (-1, -1), 12),
-        ('LEFTPADDING', (1, 3), (-1, 3), 12),
-    ]))
-    story.append(totales_table)
     doc.build(story)
     return pdf_filename
 # ============= FUNCIÓN PRINCIPAL =============
 def procesar_factura(pdf_file):
     if pdf_file is None:
-        return "", None, None, "", "", None, None
     print("\n--- Extrayendo texto del PDF...")
     texto = extraer_texto_pdf(pdf_file)
     if texto.startswith("Error"):
-        return "", None, None, "", f"Error: {texto}", None, None
     texto_preview = f"{texto[:1500]}..." if len(texto) > 1500 else texto
@@ -656,7 +546,7 @@ def procesar_factura(pdf_file):
     datos_json, resumen_util, mensaje = analizar_y_convertir_json(texto)
     if not datos_json:
-        return texto_preview, None, None, "", mensaje, None, None
     print("--- Convirtiendo JSON a CSV...")
     df = json_a_csv(datos_json)
@@ -668,29 +558,19 @@ def procesar_factura(pdf_file):
     df.to_csv(csv_filename, index=False, encoding='utf-8-sig')
     resumen_tecnico = f"""## Factura Procesada Exitosamente
 **Consulta más información abajo**
 ---
 ### Estructura JSON Generada
 ```json
 {json.dumps(datos_json, indent=2, ensure_ascii=False)}
 ```
 ---
 ### Información del Archivo CSV
 **Nombre del archivo:** `{csv_filename}`
 **Total de filas:** {len(df)}
 **Formato:** UTF-8 con BOM
 ---
 ### Datos Principales Extraídos
 **Número de factura:** {datos_json.get('numero_factura', 'N/A')}
 **Fecha de emisión:** {datos_json.get('fecha', 'N/A')}
 **Productos/Servicios:** {len(datos_json.get('productos', datos_json.get('conceptos', [])))} items
@@ -698,7 +578,7 @@ def procesar_factura(pdf_file):
 """
     print(f"--- CSV guardado: {csv_filename}")
-    return texto_preview, df, csv_filename, resumen_tecnico, resumen_util, datos_json, csv_filename
 # ============= GENERAR PDF CON TEMPLATE SELECCIONADO =============
 def generar_pdf_con_template(template, csv_file, datos_json):
@@ -720,130 +600,271 @@ def generar_pdf_con_template(template, csv_file, datos_json):
         return None, f"Error al generar PDF: {str(e)}"
 # ============= INTERFAZ GRADIO =============
-with gr.Blocks(title="Extractor y Generador de Facturas") as demo:
     datos_json_state = gr.State()
     csv_file_state = gr.State()
     gr.Markdown("""
-    # Extractor y Generador de Facturas
-    ### Extrae datos de facturas PDF y genera automáticamente tu archivo CSV
     """)
     gr.Markdown("---")
-    with gr.Row():
-        # COLUMNA IZQUIERDA
-        with gr.Column(scale=1):
-            gr.Markdown("### Extraer Datos")
-            gr.Markdown("")
-            pdf_input = gr.File(
-                label="Subir factura PDF para extraer datos",
-                file_types=[".pdf"],
-                type="filepath"
-            )
-            gr.Markdown("")
-            btn_extraer = gr.Button(
-                "Extraer Datos de la Factura",
-                variant="primary",
-                size="lg"
-            )
-            gr.Markdown("")
-            gr.Markdown("---")
-            gr.Markdown("")
-            csv_output = gr.File(label="Descargar CSV con los datos extraídos")
-            gr.Markdown("")
-            gr.Markdown("---")
-            gr.Markdown("")
-            # Generador de PDF
-            gr.Markdown("### Rediseñar PDF")
-            gr.Markdown("")
-            template_selector = gr.Radio(
-                choices=["Clásico", "Moderno", "Elegante"],
-                value="Moderno",
-                label="Seleccionar estilo de factura",
-                info="Elige el diseño que prefieras"
-            )
-            gr.Markdown("")
-            btn_generar_pdf = gr.Button(
-                "Generar Factura PDF",
-                variant="secondary",
-                size="lg"
-            )
-            gr.Markdown("")
-            pdf_output = gr.File(label="Descargar factura PDF generada")
-            pdf_status = gr.Textbox(
-                label="Estado",
-                interactive=False,
-                lines=2
-            )
-        # COLUMNA DERECHA
-        with gr.Column(scale=2):
-            gr.Markdown("### Resultados del Análisis")
-            gr.Markdown("")
-            gr.Markdown("#### Información Útil para Administrativos")
-            info_util = gr.Markdown(
-                value="*Aquí aparecerá información relevante una vez procesada la factura*"
-            )
-            gr.Markdown("")
-            gr.Markdown("---")
-            gr.Markdown("")
-            with gr.Tabs():
-                with gr.Tab("Vista Previa CSV"):
-                    gr.Markdown("")
-                    tabla_preview = gr.DataFrame(
-                        label="Datos extraídos estructurados",
-                        wrap=True,
-                        interactive=False
                     )
-                with gr.Tab("Texto Original"):
-                    gr.Markdown("")
-                    texto_extraido = gr.Textbox(
-                        label="Texto extraído del PDF",
-                        lines=18,
-                        max_lines=25
-                    )
-                with gr.Tab("Más información"):
-                    gr.Markdown("")
-                    resumen_tecnico = gr.Markdown(label="Estructura de datos y metadatos")
-    gr.Markdown("")
     gr.Markdown("---")
-    gr.Markdown("")
-    # Conectar botones
     btn_extraer.click(
         fn=procesar_factura,
         inputs=[pdf_input],
-        outputs=[texto_extraido, tabla_preview, csv_output, resumen_tecnico, info_util, datos_json_state, csv_file_state]
     )
     btn_generar_pdf.click(
         fn=generar_pdf_con_template,
         inputs=[template_selector, csv_file_state, datos_json_state],
         outputs=[pdf_output, pdf_status]
     )
 if __name__ == "__main__":
     demo.launch()

 from reportlab.lib.pagesizes import letter, A4
 from reportlab.lib import colors
 from reportlab.lib.units import inch
+from reportlab.platypus import SimpleDocTemplate, Table, TableStyle, Paragraph, Spacer
 from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
 from reportlab.lib.enums import TA_CENTER, TA_RIGHT, TA_LEFT
+from pdf2image import convert_from_path
+import base64
+from io import BytesIO
+from PIL import Image as PILImage
+# ============= CONVERTIR PDF A IMÁGENES =============
+def pdf_to_images(pdf_path):
+    """Convierte cada página del PDF en una imagen"""
+    try:
+        images = convert_from_path(pdf_path, dpi=200)
+        return images
+    except Exception as e:
+        print(f"Error convirtiendo PDF a imágenes: {str(e)}")
+        return []
 # ============= EXTRAER TEXTO DEL PDF =============
 def extraer_texto_pdf(pdf_file):
     except Exception as e:
         return f"Error: {str(e)}"
+# ============= VQA - VISUAL QUESTION ANSWERING =============
+def analizar_con_vqa(pdf_path, pregunta_usuario="¿Qué información contiene esta factura?"):
+    """Usa modelos de Visual Question Answering de Hugging Face"""
+    token = os.getenv("aa")
+    if not token:
+        return "❌ Error: Falta configurar HF_TOKEN en Settings → Secrets"
+    # Convertir primera página a imagen
+    images = pdf_to_images(pdf_path)
+    if not images:
+        return "❌ No se pudo convertir el PDF a imagen"
+    primera_pagina = images[0]
+    # Modelos VQA de Hugging Face (verificados y funcionales)
+    modelos_vqa = [
+        "dandelin/vilt-b32-finetuned-vqa",
+        "Salesforce/blip-vqa-base",
+        "Salesforce/blip2-opt-2.7b"
+    ]
+    client = InferenceClient(token=token)
+    resultados = []
+    for modelo in modelos_vqa:
+        try:
+            print(f"\n🔍 Probando VQA con: {modelo}")
+            # Usar API de Hugging Face para VQA
+            result = client.visual_question_answering(
+                image=primera_pagina,
+                question=pregunta_usuario,
+                model=modelo
+            )
+            respuesta = result[0]['answer'] if isinstance(result, list) else str(result)
+            resultados.append(f"**🤖 {modelo}**\n📝 Respuesta: {respuesta}\n")
+            print(f"✅ Éxito con {modelo}")
+        except Exception as e:
+            print(f"❌ Error con {modelo}: {str(e)}")
+            resultados.append(f"**{modelo}**: Error - {str(e)[:100]}\n")
+    if resultados:
+        return "\n".join(resultados)
+    return "❌ No se pudo procesar con modelos VQA"
+# ============= DOCUMENT QA - QUESTION ANSWERING SOBRE TEXTO =============
+def analizar_con_document_qa(texto, pregunta_usuario="¿Cuál es el total de la factura?"):
+    """Usa modelos de Question Answering de Hugging Face sobre documentos"""
+    token = os.getenv("aa")
+    if not token:
+        return "❌ Error: Falta configurar HF_TOKEN"
+    texto_limpio = texto[:3000]  # Limitar contexto para los modelos
+    # Modelos de Question Answering de Hugging Face
+    modelos_qa = [
+        "deepset/roberta-base-squad2",
+        "distilbert-base-cased-distilled-squad",
+        "bert-large-uncased-whole-word-masking-finetuned-squad"
+    ]
+    client = InferenceClient(token=token)
+    resultados = []
+    for modelo in modelos_qa:
+        try:
+            print(f"\n📄 Probando Document QA con: {modelo}")
+            response = client.question_answering(
+                question=pregunta_usuario,
+                context=texto_limpio,
+                model=modelo
+            )
+            respuesta = response['answer']
+            confianza = response['score']
+            resultados.append(
+                f"**🤖 {modelo}**\n"
+                f"📝 Respuesta: **{respuesta}**\n"
+                f"📊 Confianza: {confianza:.2%}\n"
+            )
+            print(f"✅ Éxito con {modelo}")
+        except Exception as e:
+            print(f"❌ Error con {modelo}: {str(e)}")
+            resultados.append(f"**{modelo}**: Error\n")
+    if resultados:
+        return "\n".join(resultados)
+    return "❌ No se pudo procesar con modelos Document QA"
+# ============= LAYOUT DOCUMENT QA =============
+def analizar_con_layout_qa(pdf_path, texto, pregunta_usuario="¿Cuál es el número de factura?"):
+    """Usa modelos LayoutLM para entender documentos con layout visual"""
+    token = os.getenv("aa")
+    if not token:
+        return "❌ Error: Falta configurar HF_TOKEN"
+    # Modelos especializados en Document Understanding con layout
+    modelos_layout = [
+        "impira/layoutlm-document-qa",
+        "microsoft/layoutlmv2-base-uncased",
+        "nielsr/layoutlmv3-finetuned-funsd"
+    ]
+    client = InferenceClient(token=token)
+    texto_limpio = texto[:2500]
+    resultados = []
+    for modelo in modelos_layout:
+        try:
+            print(f"\n📐 Probando Layout Document QA con: {modelo}")
+            # Usar question answering sobre el texto extraído
+            response = client.question_answering(
+                question=pregunta_usuario,
+                context=texto_limpio,
+                model=modelo
+            )
+            respuesta = response['answer']
+            confianza = response['score']
+            resultados.append(
+                f"**🤖 {modelo}**\n"
+                f"📝 Respuesta: **{respuesta}**\n"
+                f"📊 Confianza: {confianza:.2%}\n"
+            )
+            print(f"✅ Éxito con {modelo}")
+        except Exception as e:
+            print(f"❌ Error con {modelo}: {str(e)}")
+            resultados.append(f"**{modelo}**: No disponible\n")
+    if resultados:
+        return "\n".join(resultados)
+    return "❌ No se pudo procesar con modelos Layout QA"
+# ============= VISUAL DOCUMENT UNDERSTANDING CON MODELOS DE HF =============
+def analizar_documento_visual_hf(pdf_path):
+    """Usa modelos multimodales de Hugging Face para entender documentos visualmente"""
+    token = os.getenv("aa")
+    if not token:
+        return None, "❌ Error: Falta configurar HF_TOKEN"
+    images = pdf_to_images(pdf_path)
+    if not images:
+        return None, "❌ No se pudo convertir el PDF"
+    primera_pagina = images[0]
+    # Modelos multimodales de Hugging Face para Document Understanding
+    modelos_visuales = [
+        "microsoft/trocr-large-printed",
+        "Salesforce/blip-image-captioning-large",
+        "nlpconnect/vit-gpt2-image-captioning"
+    ]
+    client = InferenceClient(token=token)
+    resultados = []
+    for modelo in modelos_visuales:
+        try:
+            print(f"\n🖼️ Probando Visual Document con: {modelo}")
+            # Usar image-to-text para OCR y comprensión visual
+            response = client.image_to_text(
+                image=primera_pagina,
+                model=modelo
+            )
+            texto_extraido = response if isinstance(response, str) else response.get('generated_text', str(response))
+            resultados.append(f"**🤖 {modelo}**\n📝 Texto extraído:\n{texto_extraido}\n")
+            print(f"✅ Éxito con {modelo}")
+        except Exception as e:
+            print(f"❌ Error con {modelo}: {str(e)}")
+            resultados.append(f"**{modelo}**: Error\n")
+    if resultados:
+        return "\n".join(resultados), "✅ Procesado con modelos visuales"
+    return None, "❌ No se pudo procesar visualmente"
+# ============= DOCUMENT RETRIEVAL - BÚSQUEDA EN DOCUMENTOS =============
+def buscar_en_documento(texto, consulta="información sobre el emisor"):
+    """Usa modelos de embeddings para búsqueda semántica en documentos"""
+    token = os.getenv("aa")
+    if not token:
+        return "❌ Error: Falta configurar HF_TOKEN"
+    # Modelos de embeddings para búsqueda semántica
+    modelos_retrieval = [
+        "sentence-transformers/all-MiniLM-L6-v2",
+        "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"
+    ]
+    client = InferenceClient(token=token)
+    # Dividir el texto en fragmentos
+    fragmentos = [texto[i:i+500] for i in range(0, min(len(texto), 3000), 500)]
+    resultados = []
+    for modelo in modelos_retrieval:
+        try:
+            print(f"\n🔎 Probando Document Retrieval con: {modelo}")
+            # Generar embedding de la consulta
+            query_embedding = client.feature_extraction(
+                text=consulta,
+                model=modelo
+            )
+            # Buscar fragmentos más relevantes
+            scores = []
+            for i, frag in enumerate(fragmentos):
+                try:
+                    frag_embedding = client.feature_extraction(
+                        text=frag,
+                        model=modelo
+                    )
+                    # Calcular similitud (simplificado)
+                    scores.append((i, frag))
+                except:
+                    continue
+            if scores:
+                # Tomar los 2 fragmentos más relevantes
+                top_frags = scores[:2]
+                resultado_texto = "\n\n".join([f"**Fragmento {i+1}:**\n{frag[:300]}..." for i, frag in top_frags])
+                resultados.append(
+                    f"**🤖 {modelo}**\n"
+                    f"📍 Fragmentos relevantes encontrados:\n{resultado_texto}\n"
+                )
+                print(f"✅ Éxito con {modelo}")
+        except Exception as e:
+            print(f"❌ Error con {modelo}: {str(e)}")
+            resultados.append(f"**{modelo}**: Error\n")
+    if resultados:
+        return "\n".join(resultados)
+    return "❌ No se pudo realizar búsqueda en el documento"
 # ============= ANALIZAR CON LLM Y CONVERTIR A JSON =============
 def analizar_y_convertir_json(texto):
+    """El LLM lee la factura y devuelve JSON estructurado"""
     token = os.getenv("aa")
     if not token:
     texto_limpio = texto[:8000]
     prompt = f"""Eres un experto en análisis de facturas. Lee esta factura y conviértela a JSON.
 TEXTO DE LA FACTURA:
 {texto_limpio}
 INSTRUCCIONES:
 1. Analiza el texto y decide qué información es importante extraer
 2. Crea un JSON estructurado con TODOS los datos que encuentres
 3. Incluye: número de factura, fecha, emisor, cliente, productos/servicios, importes
 4. Para los números: usa formato numérico puro (ejemplo: 250 no "250€")
 5. Si hay tabla de productos, extrae CADA producto con cantidad, precio y total
 FORMATO JSON (ajusta según lo que encuentres):
 {{
   "numero_factura": "string",
     "total": number
   }}
 }}
 Responde SOLO con el JSON válido (sin explicaciones, sin markdown):"""
     modelos = [
     """Genera un resumen con información útil para administrativos"""
     prompt_resumen = f"""Analiza esta factura y proporciona información útil para un administrativo o usuario medio.
 TEXTO DE LA FACTURA:
 {texto[:6000]}
 Genera un resumen estructurado con:
 1. ESTADO DE PAGO: ¿Está pagada? ¿Fecha de vencimiento?
 2. INFORMACIÓN CLAVE: Datos importantes que destacar
 3. ALERTAS: Cualquier aspecto que requiera atención (vencimientos, importes altos, etc.)
 4. RESUMEN EJECUTIVO: Descripción breve y clara de la factura
 Responde en español de forma clara y profesional:"""
     try:
     return pd.DataFrame(filas)
+# ============= GENERAR PDF TEMPLATES (MANTENIDOS DEL ORIGINAL) =============
 def generar_pdf_clasico(csv_file, datos_json):
     timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
     pdf_filename = f"factura_clasica_{timestamp}.pdf"
     doc = SimpleDocTemplate(pdf_filename, pagesize=A4)
     story = []
     styles = getSampleStyleSheet()
+    titulo_style = ParagraphStyle('CustomTitle', parent=styles['Heading1'], fontSize=24,
+                                   textColor=colors.HexColor('#1a1a1a'), spaceAfter=30, alignment=TA_CENTER)
     story.append(Paragraph("FACTURA", titulo_style))
     story.append(Spacer(1, 0.3*inch))
+    info_data = [['Número de Factura:', datos_json.get('numero_factura', 'N/A')],
+                 ['Fecha:', datos_json.get('fecha', 'N/A')]]
     info_table = Table(info_data, colWidths=[2*inch, 4*inch])
+    info_table.setStyle(TableStyle([('FONTNAME', (0, 0), (-1, -1), 'Helvetica'), ('FONTSIZE', (0, 0), (-1, -1), 11)]))
     story.append(info_table)
     doc.build(story)
     return pdf_filename
 def generar_pdf_moderno(csv_file, datos_json):
     timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
     pdf_filename = f"factura_moderna_{timestamp}.pdf"
     doc = SimpleDocTemplate(pdf_filename, pagesize=A4)
     story = []
     styles = getSampleStyleSheet()
+    titulo_style = ParagraphStyle('ModernTitle', parent=styles['Heading1'], fontSize=32,
+                                   textColor=colors.HexColor('#2196F3'), spaceAfter=10, alignment=TA_LEFT, fontName='Helvetica-Bold')
     story.append(Paragraph("FACTURA", titulo_style))
     doc.build(story)
     return pdf_filename
 def generar_pdf_elegante(csv_file, datos_json):
     timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
     pdf_filename = f"factura_elegante_{timestamp}.pdf"
     doc = SimpleDocTemplate(pdf_filename, pagesize=A4)
     story = []
     styles = getSampleStyleSheet()
+    header_style = ParagraphStyle('ElegantHeader', parent=styles['Heading1'], fontSize=28,
+                                   textColor=colors.HexColor('#1a237e'), spaceAfter=5, alignment=TA_CENTER, fontName='Helvetica-Bold')
     story.append(Paragraph("F A C T U R A", header_style))
     doc.build(story)
     return pdf_filename
 # ============= FUNCIÓN PRINCIPAL =============
 def procesar_factura(pdf_file):
     if pdf_file is None:
+        return "", None, None, "", "", None, None, pdf_file
     print("\n--- Extrayendo texto del PDF...")
     texto = extraer_texto_pdf(pdf_file)
     if texto.startswith("Error"):
+        return "", None, None, "", f"Error: {texto}", None, None, None
     texto_preview = f"{texto[:1500]}..." if len(texto) > 1500 else texto
     datos_json, resumen_util, mensaje = analizar_y_convertir_json(texto)
     if not datos_json:
+        return texto_preview, None, None, "", mensaje, None, None, pdf_file
     print("--- Convirtiendo JSON a CSV...")
     df = json_a_csv(datos_json)
     df.to_csv(csv_filename, index=False, encoding='utf-8-sig')
     resumen_tecnico = f"""## Factura Procesada Exitosamente
 **Consulta más información abajo**
 ---
 ### Estructura JSON Generada
 ```json
 {json.dumps(datos_json, indent=2, ensure_ascii=False)}
 ```
 ---
 ### Información del Archivo CSV
 **Nombre del archivo:** `{csv_filename}`
 **Total de filas:** {len(df)}
 **Formato:** UTF-8 con BOM
 ---
 ### Datos Principales Extraídos
 **Número de factura:** {datos_json.get('numero_factura', 'N/A')}
 **Fecha de emisión:** {datos_json.get('fecha', 'N/A')}
 **Productos/Servicios:** {len(datos_json.get('productos', datos_json.get('conceptos', [])))} items
 """
     print(f"--- CSV guardado: {csv_filename}")
+    return texto_preview, df, csv_filename, resumen_tecnico, resumen_util, datos_json, csv_filename, pdf_file
 # ============= GENERAR PDF CON TEMPLATE SELECCIONADO =============
 def generar_pdf_con_template(template, csv_file, datos_json):
         return None, f"Error al generar PDF: {str(e)}"
 # ============= INTERFAZ GRADIO =============
+with gr.Blocks(title="Extractor de Facturas con IA Avanzada", theme=gr.themes.Soft()) as demo:
     datos_json_state = gr.State()
     csv_file_state = gr.State()
+    pdf_path_state = gr.State()
+    texto_state = gr.State()
     gr.Markdown("""
+    # 🧠 Extractor y Generador de Facturas con IA Avanzada
+    ### Análisis multimodal con modelos de Hugging Face: VQA, Document QA y Visual Understanding
     """)
     gr.Markdown("---")
+    with gr.Tabs():
+        # ============= TAB 1: EXTRACCIÓN CLÁSICA =============
+        with gr.Tab("📄 Extracción Automática"):
+            with gr.Row():
+                with gr.Column(scale=1):
+                    gr.Markdown("### Subir Factura PDF")
+                    pdf_input = gr.File(label="Seleccionar factura PDF", file_types=[".pdf"], type="filepath")
+                    btn_extraer = gr.Button("🚀 Extraer Datos de la Factura", variant="primary", size="lg")
+                    gr.Markdown("---")
+                    csv_output = gr.File(label="📥 Descargar CSV generado")
+                    gr.Markdown("---")
+                    gr.Markdown("### 🎨 Rediseñar PDF")
+                    template_selector = gr.Radio(choices=["Clásico", "Moderno", "Elegante"], value="Moderno", label="Estilo de factura")
+                    btn_generar_pdf = gr.Button("Generar Factura PDF", variant="secondary", size="lg")
+                    pdf_output = gr.File(label="📥 Descargar PDF generado")
+                    pdf_status = gr.Textbox(label="Estado", interactive=False, lines=2)
+                with gr.Column(scale=2):
+                    gr.Markdown("### 📊 Resultados del Análisis")
+                    info_util = gr.Markdown(value="*Aquí aparecerá información relevante una vez procesada la factura*")
+                    gr.Markdown("---")
+                    with gr.Tabs():
+                        with gr.Tab("Vista Previa CSV"):
+                            tabla_preview = gr.DataFrame(label="Datos extraídos estructurados", wrap=True)
+                        with gr.Tab("Texto Original"):
+                            texto_extraido = gr.Textbox(label="Texto extraído del PDF", lines=18)
+                        with gr.Tab("Más información"):
+                            resumen_tecnico = gr.Markdown(label="Estructura de datos y metadatos")
+        # ============= TAB 2: VISUAL QUESTION ANSWERING =============
+        with gr.Tab("🔍 Visual Question Answering"):
+            gr.Markdown("""
+            ### 🤖 Pregúntale a la IA sobre la imagen de tu factura
+            Los modelos VQA analizan visualmente el documento y responden preguntas específicas.
+            """)
+            with gr.Row():
+                with gr.Column():
+                    pdf_vqa_input = gr.File(label="PDF para VQA (o usa el ya cargado)", file_types=[".pdf"], type="filepath")
+                    pregunta_vqa = gr.Textbox(
+                        label="Tu pregunta sobre la factura",
+                        placeholder="¿Cuál es el total de la factura?",
+                        value="¿Qué información importante contiene esta factura?"
+                    )
+                    btn_vqa = gr.Button("🔍 Analizar con VQA", variant="primary")
+                with gr.Column():
+                    resultado_vqa = gr.Markdown(label="Respuestas de modelos VQA")
+            gr.Markdown("""
+            **Modelos utilizados:**
+            - `dandelin/vilt-b32-finetuned-vqa` - Vision-and-Language Transformer
+            - `Salesforce/blip-vqa-base` - BLIP VQA Base
+            - `Salesforce/blip2-opt-2.7b` - BLIP-2 con OPT-2.7B
+            """)
+        # ============= TAB 3: DOCUMENT QUESTION ANSWERING =============
+        with gr.Tab("📝 Document Question Answering"):
+            gr.Markdown("""
+            ### 💬 Pregunta sobre el contenido del texto
+            Los modelos Document QA extraen información específica del texto de la factura.
+            """)
+            with gr.Row():
+                with gr.Column():
+                    pregunta_doc_qa = gr.Textbox(
+                        label="Pregunta sobre el documento",
+                        placeholder="¿Cuál es el NIF del emisor?",
+                        value="¿Cuál es el total de la factura?"
+                    )
+                    btn_doc_qa = gr.Button("📝 Analizar con Document QA", variant="primary")
+                with gr.Column():
+                    resultado_doc_qa = gr.Markdown(label="Respuestas de modelos Document QA")
+            gr.Markdown("""
+            **Modelos utilizados:**
+            - `deepset/roberta-base-squad2` - RoBERTa entrenado en SQuAD 2.0
+            - `distilbert-base-cased-distilled-squad` - DistilBERT optimizado
+            - `bert-large-uncased-whole-word-masking-finetuned-squad` - BERT Large
+            """)
+        # ============= TAB 4: LAYOUT DOCUMENT QA =============
+        with gr.Tab("📐 Layout Document QA"):
+            gr.Markdown("""
+            ### 🏗️ Análisis con comprensión del layout visual
+            Los modelos LayoutLM entienden la estructura visual del documento (tablas, columnas, etc.)
+            """)
+            with gr.Row():
+                with gr.Column():
+                    pregunta_layout = gr.Textbox(
+                        label="Pregunta sobre el documento",
+                        placeholder="¿Cuál es el número de factura?",
+                        value="¿Cuál es el número de factura?"
+                    )
+                    btn_layout_qa = gr.Button("📐 Analizar con Layout QA", variant="primary")
+                with gr.Column():
+                    resultado_layout = gr.Markdown(label="Respuestas de modelos Layout QA")
+            gr.Markdown("""
+            **Modelos utilizados:**
+            - `impira/layoutlm-document-qa` - LayoutLM para Document QA
+            - `microsoft/layoutlmv2-base-uncased` - LayoutLM v2 Base
+            - `nielsr/layoutlmv3-finetuned-funsd` - LayoutLM v3 Fine-tuned
+            """)
+        # ============= TAB 5: VISUAL DOCUMENT UNDERSTANDING =============
+        with gr.Tab("🖼️ Visual Document Understanding"):
+            gr.Markdown("""
+            ### 🎯 Comprensión visual completa del documento
+            Modelos multimodales que procesan la imagen del documento directamente.
+            """)
+            with gr.Row():
+                with gr.Column():
+                    btn_visual_doc = gr.Button("🖼️ Analizar Documento Visualmente", variant="primary", size="lg")
+                with gr.Column():
+                    resultado_visual_doc = gr.Markdown(label="Resultados de análisis visual")
+                    status_visual_doc = gr.Textbox(label="Estado", interactive=False)
+            gr.Markdown("""
+            **Modelos utilizados:**
+            - `microsoft/trocr-large-printed` - TrOCR para texto impreso
+            - `Salesforce/blip-image-captioning-large` - BLIP Image Captioning
+            - `nlpconnect/vit-gpt2-image-captioning` - ViT + GPT2 Captioning
+            """)
+        # ============= TAB 6: DOCUMENT RETRIEVAL =============
+        with gr.Tab("🔎 Document Retrieval"):
+            gr.Markdown("""
+            ### 🎯 Búsqueda semántica en el documento
+            Encuentra fragmentos relevantes usando embeddings y similitud semántica.
+            """)
+            with gr.Row():
+                with gr.Column():
+                    consulta_retrieval = gr.Textbox(
+                        label="¿Qué información buscas?",
+                        placeholder="información sobre el emisor",
+                        value="información sobre el emisor"
                     )
+                    btn_retrieval = gr.Button("🔎 Buscar en Documento", variant="primary")
+                with gr.Column():
+                    resultado_retrieval = gr.Markdown(label="Fragmentos relevantes encontrados")
+            gr.Markdown("""
+            **Modelos utilizados:**
+            - `sentence-transformers/all-MiniLM-L6-v2` - Embeddings multilingües
+            - `sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2` - Paraphrase ML
+            """)
     gr.Markdown("---")
+    gr.Markdown("""
+    ### 📚 Información sobre los modelos
+    **Visual Question Answering (VQA):** Responde preguntas sobre imágenes usando visión y lenguaje.
+    **Document QA:** Extrae información específica del texto usando modelos de comprensión lectora.
+    **Layout Document QA:** Entiende la estructura visual (tablas, columnas) además del texto.
+    **Visual Document Understanding:** Procesa documentos como imágenes para OCR y comprensión completa.
+    **Document Retrieval:** Búsqueda semántica de información relevante en el documento.
+    ---
+    💡 **Tip:** Procesa primero la factura en la pestaña "Extracción Automática" y luego explora las demás funcionalidades de IA.
+    """)
+    # ============= CONECTAR EVENTOS =============
+    # Extracción automática
     btn_extraer.click(
         fn=procesar_factura,
         inputs=[pdf_input],
+        outputs=[texto_extraido, tabla_preview, csv_output, resumen_tecnico, info_util,
+                datos_json_state, csv_file_state, pdf_path_state]
     )
+    # Generar PDF
     btn_generar_pdf.click(
         fn=generar_pdf_con_template,
         inputs=[template_selector, csv_file_state, datos_json_state],
         outputs=[pdf_output, pdf_status]
     )
+    # Visual Question Answering
+    def ejecutar_vqa(pdf_vqa, pdf_auto, pregunta):
+        pdf_path = pdf_vqa if pdf_vqa else pdf_auto
+        if not pdf_path:
+            return "❌ Por favor, sube un PDF primero"
+        return analizar_con_vqa(pdf_path, pregunta)
+    btn_vqa.click(
+        fn=ejecutar_vqa,
+        inputs=[pdf_vqa_input, pdf_path_state, pregunta_vqa],
+        outputs=[resultado_vqa]
+    )
+    # Document Question Answering
+    def ejecutar_doc_qa(texto, pregunta):
+        if not texto:
+            return "❌ Por favor, procesa una factura primero en la pestaña 'Extracción Automática'"
+        return analizar_con_document_qa(texto, pregunta)
+    btn_doc_qa.click(
+        fn=ejecutar_doc_qa,
+        inputs=[texto_extraido, pregunta_doc_qa],
+        outputs=[resultado_doc_qa]
+    )
+    # Layout Document QA
+    def ejecutar_layout_qa(pdf_path, texto, pregunta):
+        if not pdf_path or not texto:
+            return "❌ Por favor, procesa una factura primero en la pestaña 'Extracción Automática'"
+        return analizar_con_layout_qa(pdf_path, texto, pregunta)
+    btn_layout_qa.click(
+        fn=ejecutar_layout_qa,
+        inputs=[pdf_path_state, texto_extraido, pregunta_layout],
+        outputs=[resultado_layout]
+    )
+    # Visual Document Understanding
+    def ejecutar_visual_doc(pdf_path):
+        if not pdf_path:
+            return "❌ Por favor, procesa una factura primero en la pestaña 'Extracción Automática'", ""
+        return analizar_documento_visual_hf(pdf_path)
+    btn_visual_doc.click(
+        fn=ejecutar_visual_doc,
+        inputs=[pdf_path_state],
+        outputs=[resultado_visual_doc, status_visual_doc]
+    )
+    # Document Retrieval
+    def ejecutar_retrieval(texto, consulta):
+        if not texto:
+            return "❌ Por favor, procesa una factura primero en la pestaña 'Extracción Automática'"
+        return buscar_en_documento(texto, consulta)
+    btn_retrieval.click(
+        fn=ejecutar_retrieval,
+        inputs=[texto_extraido, consulta_retrieval],
+        outputs=[resultado_retrieval]
+    )
 if __name__ == "__main__":
     demo.launch()