Spaces:

DaniFera
/

PDFree

Sleeping

App Files Files Community

DaniFera commited on Jan 16

Commit

df4812e

verified ·

1 Parent(s): 8368ba5

Update core.py

Browse files

Files changed (1) hide show

core.py +258 -257

core.py CHANGED Viewed

@@ -1,11 +1,11 @@
-# Versión 2.0: Core Completo con Excel y PowerPoint
 import os
 import zipfile
 import uuid
 import subprocess
-import cv2
-import numpy as np
 import pdfplumber
 import pandas as pd
 from pypdf import PdfWriter, PdfReader
@@ -14,19 +14,20 @@ from pdf2docx import Converter
 from PIL import Image
 from pptx import Presentation
 from pptx.util import Inches
 from config import TEMP_DIR
 class PDFEngine:
-    """
-    Clase principal que encapsula toda la lógica de manipulación de archivos.
-    Sigue el principio de Responsabilidad Única (SRP).
-    """
-    # --- UTILIDADES INTERNAS ---
     @staticmethod
     def _get_output_path(filename: str) -> str:
-        """Genera una ruta única en el directorio temporal."""
         unique_name = f"{uuid.uuid4().hex[:8]}_{filename}"
         return os.path.join(TEMP_DIR, unique_name)
@@ -36,10 +37,10 @@ class PDFEngine:
             meta = reader.metadata
             title = meta.title if meta and meta.title else "Sin título"
             return {"pages": len(reader.pages), "name": os.path.basename(file_path), "title": title}
-        except Exception:
-            return {"pages": 0, "name": "Error", "title": ""}
     def _parse_range_groups(self, range_str: str, max_pages: int) -> list:
         groups = []
         parts = range_str.split(',')
         for part in parts:
@@ -61,31 +62,30 @@ class PDFEngine:
             if current_group: groups.append({"label": part, "indices": current_group})
         return groups
-    # --- PREVISUALIZACIÓN ---
-    def generate_preview(self, file_path: str, page_number: int) -> str:
         try:
-            images = convert_from_path(file_path, first_page=page_number, last_page=page_number, size=(None, 400))
-            if images:
-                output_path = self._get_output_path(f"preview_pg{page_number}.jpg")
-                images[0].save(output_path, "JPEG")
-                return output_path
-            return None
-        except Exception: return None
-    def get_rotated_preview(self, file_path: str, angle: int) -> str:
-        if not file_path: return None
         try:
-            images = convert_from_path(file_path, first_page=1, last_page=1, size=(None, 500))
-            if not images: return None
-            img = images[0]
-            if angle != 0: img = img.rotate(-angle, expand=True)
-            output_path = self._get_output_path(f"preview_rot_{angle}.jpg")
-            img.save(output_path, "JPEG")
-            return output_path
-        except Exception: return None
     def get_preview_indices_from_string(self, range_str: str, max_pages: int) -> list:
         key_pages = []
         parts = range_str.split(',')
         for part in parts:
@@ -102,119 +102,259 @@ class PDFEngine:
                 except ValueError: continue
         return sorted(list(set(key_pages)))
-    # --- FUNCIONALIDADES DE GESTIÓN DE PÁGINAS ---
     def merge_pdfs(self, file_paths: list, order_indices: list = None) -> str:
         if not file_paths: raise ValueError("No hay archivos.")
-        ordered_paths = []
         if order_indices and len(order_indices) == len(file_paths):
-            try:
-                for idx in order_indices: ordered_paths.append(file_paths[int(idx)])
-            except: ordered_paths = file_paths
-        else: ordered_paths = file_paths
-        merger = PdfWriter()
-        try:
-            for path in ordered_paths: merger.append(path)
-            output_path = self._get_output_path("unido_ordenado.pdf")
-            with open(output_path, "wb") as f: merger.write(f)
-            return output_path
-        except Exception as e: raise RuntimeError(f"Error al unir: {str(e)}")
-        finally: merger.close()
     def split_pdf_custom(self, file_path: str, range_str: str) -> str:
         if not file_path: raise ValueError("Falta archivo.")
-        reader = PdfReader(file_path)
-        total = len(reader.pages)
-        groups = self._parse_range_groups(range_str, total)
-        if not groups: raise ValueError("Rango inválido.")
-        generated = []
         base = os.path.basename(file_path).replace(".pdf", "")
-        for group in groups:
-            writer = PdfWriter()
-            for idx in group["indices"]: writer.add_page(reader.pages[idx])
             safe = group["label"].replace(" ", "")
-            out = self._get_output_path(f"{base}_part_{safe}.pdf")
-            with open(out, "wb") as f: writer.write(f)
-            generated.append(out)
-        zname = f"{base}_split_files.zip"
-        zpath = self._get_output_path(zname)
-        with zipfile.ZipFile(zpath, 'w') as zipf:
-            for f in generated: zipf.write(f, arcname=os.path.basename(f))
-        return zpath
     def reorder_pages(self, file_path: str, order_str: str) -> str:
         if not file_path: raise ValueError("Falta archivo.")
-        reader = PdfReader(file_path)
-        groups = self._parse_range_groups(order_str, len(reader.pages))
-        if not groups: raise ValueError("Orden inválido.")
-        flat = []
-        for g in groups: flat.extend(g["indices"])
-        writer = PdfWriter()
-        for idx in flat: writer.add_page(reader.pages[idx])
         out = self._get_output_path("reordenado.pdf")
-        with open(out, "wb") as f: writer.write(f)
         return out
-    # --- EDICIÓN Y SEGURIDAD ---
-    def compress_pdf(self, file_path: str, power: int = 2) -> str:
         if not file_path: raise ValueError("Falta archivo.")
-        quality = {0: "/default", 1: "/prepress", 2: "/printer", 3: "/ebook", 4: "/screen"}
-        gs_setting = quality.get(power, "/ebook")
-        output_path = self._get_output_path("comprimido.pdf")
-        cmd = ["gs", "-sDEVICE=pdfwrite", "-dCompatibilityLevel=1.4", f"-dPDFSETTINGS={gs_setting}", "-dNOPAUSE", "-dQUIET", "-dBATCH", f"-sOutputFile={output_path}", file_path]
         try:
             subprocess.run(cmd, check=True)
-            return output_path
-        except subprocess.CalledProcessError as e: raise RuntimeError(f"Error Ghostscript: {e}")
-        except FileNotFoundError: raise RuntimeError("Falta Ghostscript (packages.txt).")
     def protect_pdf(self, file_path: str, password: str) -> str:
         if not file_path or not password: raise ValueError("Faltan datos.")
         try:
-            reader = PdfReader(file_path)
-            writer = PdfWriter()
-            for page in reader.pages: writer.add_page(page)
-            writer.encrypt(password)
             out = self._get_output_path("protegido.pdf")
-            with open(out, "wb") as f: writer.write(f)
             return out
-        except Exception as e: raise RuntimeError(f"Error proteger: {e}")
     def rotate_pdf(self, file_path: str, angle: int) -> str:
         if not file_path: raise ValueError("Falta archivo.")
         try:
-            reader = PdfReader(file_path)
-            writer = PdfWriter()
-            for page in reader.pages:
-                page.rotate(angle)
-                writer.add_page(page)
             out = self._get_output_path(f"rotado_{angle}.pdf")
-            with open(out, "wb") as f: writer.write(f)
             return out
-        except Exception as e: raise RuntimeError(f"Error rotar: {e}")
-    def update_metadata(self, file_path: str, title: str, author: str, subject: str) -> str:
-        if not file_path: raise ValueError("Falta archivo.")
         try:
-            reader = PdfReader(file_path)
-            writer = PdfWriter()
-            for page in reader.pages: writer.add_page(page)
-            writer.add_metadata({"/Title": title, "/Author": author, "/Subject": subject, "/Producer": "OpenPDF Tools"})
-            out = self._get_output_path("editado_meta.pdf")
-            with open(out, "wb") as f: writer.write(f)
             return out
-        except Exception as e: raise RuntimeError(f"Error metadata: {e}")
-    # --- CONVERSIONES GENERALES ---
-    def pdf_to_images_zip(self, file_path: str) -> str:
-        if not file_path: raise ValueError("Falta archivo.")
         try:
-            images = convert_from_path(file_path, dpi=150)
-            base = os.path.basename(file_path).replace(".pdf", "")
             paths = []
-            for i, img in enumerate(images):
                 p = self._get_output_path(f"{base}_{i+1}.jpg")
                 img.save(p, "JPEG")
                 paths.append(p)
@@ -222,156 +362,17 @@ class PDFEngine:
             with zipfile.ZipFile(zp, 'w') as z:
                 for p in paths: z.write(p, arcname=os.path.basename(p))
             return zp
-        except Exception as e: raise RuntimeError(f"Error PDF->IMG: {e}")
-    def images_to_pdf(self, image_paths: list) -> str:
-        if not image_paths: raise ValueError("No imágenes.")
         try:
             objs = []
-            for p in image_paths:
-                img = Image.open(p)
-                if img.mode != 'RGB': img = img.convert('RGB')
-                objs.append(img)
             out = self._get_output_path("album.pdf")
             if objs: objs[0].save(out, "PDF", resolution=100.0, save_all=True, append_images=objs[1:])
             return out
-        except Exception as e: raise RuntimeError(f"Error IMG->PDF: {e}")
-    def pdf_to_word(self, file_path: str) -> str:
-        if not file_path: raise ValueError("Falta archivo.")
-        try:
-            docx = os.path.basename(file_path).replace(".pdf", ".docx")
-            out = self._get_output_path(docx)
-            cv = Converter(file_path)
-            cv.convert(out, start=0, end=None)
-            cv.close()
-            return out
-        except Exception as e: raise RuntimeError(f"Error PDF->Word: {e}")
-    def extract_text(self, file_path: str) -> str:
-        if not file_path: raise ValueError("Falta archivo.")
-        try:
-            reader = PdfReader(file_path)
-            content = []
-            for i, page in enumerate(reader.pages):
-                txt = page.extract_text()
-                if txt: content.append(f"--- Pág {i+1} ---\n{txt}\n")
-            out = self._get_output_path(os.path.basename(file_path).replace(".pdf", ".txt"))
-            with open(out, "w", encoding="utf-8") as f: f.write("\n".join(content))
-            return out
-        except Exception as e: raise RuntimeError(f"Error texto: {e}")
-    # --- NUEVAS CONVERSIONES OFFICE (v2.0) ---
-    def pdf_to_excel(self, file_path: str) -> str:
-        """
-        Extrae tablas del PDF y las guarda en un Excel (XLSX).
-        Crea una hoja por cada página que contenga tablas.
-        """
-        if not file_path: raise ValueError("Falta archivo.")
-        try:
-            xlsx_name = os.path.basename(file_path).replace(".pdf", ".xlsx")
-            output_path = self._get_output_path(xlsx_name)
-            has_tables = False
-            # Usamos ExcelWriter para escribir múltiples hojas
-            with pd.ExcelWriter(output_path, engine='openpyxl') as writer:
-                with pdfplumber.open(file_path) as pdf:
-                    for i, page in enumerate(pdf.pages):
-                        tables = page.extract_tables()
-                        if tables:
-                            has_tables = True
-                            # Si hay varias tablas en una página, las concatenamos o las ponemos una debajo de otra
-                            # Aquí, por simplicidad, cogemos la tabla más grande o concatenamos
-                            df_page = pd.DataFrame()
-                            for table in tables:
-                                df = pd.DataFrame(table)
-                                # Usar la primera fila como header si parece un header
-                                new_header = df.iloc[0]
-                                df = df[1:]
-                                df.columns = new_header
-                                df_page = pd.concat([df_page, df], ignore_index=True)
-                            sheet_name = f"Pagina_{i+1}"
-                            df_page.to_excel(writer, sheet_name=sheet_name, index=False)
-            if not has_tables:
-                raise ValueError("No se detectaron tablas con bordes claros en este PDF.")
-            return output_path
-        except Exception as e:
-            raise RuntimeError(f"Error PDF->Excel: {str(e)}")
-    def pdf_to_pptx(self, file_path: str) -> str:
-        """
-        Convierte PDF a PowerPoint (PPTX).
-        Estrategia: Convertir cada página a Imagen -> Pegar en Diapositiva.
-        Esto preserva el formato visual exacto.
-        """
-        if not file_path: raise ValueError("Falta archivo.")
-        try:
-            # 1. Convertir PDF a imágenes (HQ)
-            images = convert_from_path(file_path, dpi=200)
-            # 2. Crear presentación
-            prs = Presentation()
-            # Definir layout en blanco (index 6 suele ser blank en tema default)
-            BLANK_SLIDE_LAYOUT = 6
-            for i, img in enumerate(images):
-                # Guardar imagen temporal
-                img_path = self._get_output_path(f"temp_slide_{i}.jpg")
-                img.save(img_path, "JPEG")
-                # Añadir diapositiva
-                slide = prs.slides.add_slide(prs.slide_layouts[BLANK_SLIDE_LAYOUT])
-                # Ajustar tamaño de la diapositiva al tamaño de la imagen?
-                # Por simplicidad, ajustamos la imagen al tamaño de la diapositiva estándar (10x7.5 inches)
-                # left, top, width, height
-                slide.shapes.add_picture(img_path, Inches(0), Inches(0), width=prs.slide_width)
-            pptx_name = os.path.basename(file_path).replace(".pdf", ".pptx")
-            output_path = self._get_output_path(pptx_name)
-            prs.save(output_path)
-            return output_path
-        except Exception as e:
-            raise RuntimeError(f"Error PDF->PPTX: {str(e)}")
-    # --- ANÁLISIS ---
-    def compare_pdfs_visual(self, path_a: str, path_b: str) -> str:
-        if not path_a or not path_b: raise ValueError("Dos archivos requeridos.")
-        try:
-            imgs_a = convert_from_path(path_a, dpi=100)
-            imgs_b = convert_from_path(path_b, dpi=100)
-        except Exception as e: raise RuntimeError(f"Error leyendo PDFs: {e}")
-        min_pages = min(len(imgs_a), len(imgs_b))
-        diff_pages = []
-        for i in range(min_pages):
-            arr_a = np.array(imgs_a[i])
-            arr_b = np.array(imgs_b[i])
-            if arr_a.shape != arr_b.shape:
-                h, w = arr_a.shape[:2]
-                arr_b = cv2.resize(arr_b, (w, h))
-            gray_a = cv2.cvtColor(arr_a, cv2.COLOR_RGB2GRAY)
-            gray_b = cv2.cvtColor(arr_b, cv2.COLOR_RGB2GRAY)
-            diff = cv2.absdiff(gray_a, gray_b)
-            _, thresh = cv2.threshold(diff, 30, 255, cv2.THRESH_BINARY)
-            contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
-            res = arr_a.copy()
-            for cnt in contours:
-                x, y, w, h = cv2.boundingRect(cnt)
-                cv2.rectangle(res, (x, y), (x + w, y + h), (255, 0, 255), 2)
-            diff_pages.append(Image.fromarray(res))
-        if not diff_pages: raise ValueError("Error en comparación.")
-        out = self._get_output_path("comparativa.pdf")
-        diff_pages[0].save(out, "PDF", resolution=100.0, save_all=True, append_images=diff_pages[1:])
-        return out

+# Versión 2.2: Core con Comparación de Texto (ReportLab)
+# Autor: Gemini (AI Assistant)
 import os
 import zipfile
 import uuid
 import subprocess
+import difflib
 import pdfplumber
 import pandas as pd
 from pypdf import PdfWriter, PdfReader
 from PIL import Image
 from pptx import Presentation
 from pptx.util import Inches
+# ReportLab para generar el PDF de diferencias
+from reportlab.lib.pagesizes import A4
+from reportlab.lib import colors
+from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
+from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
 from config import TEMP_DIR
 class PDFEngine:
+    # --- UTILIDADES ---
     @staticmethod
     def _get_output_path(filename: str) -> str:
         unique_name = f"{uuid.uuid4().hex[:8]}_{filename}"
         return os.path.join(TEMP_DIR, unique_name)
             meta = reader.metadata
             title = meta.title if meta and meta.title else "Sin título"
             return {"pages": len(reader.pages), "name": os.path.basename(file_path), "title": title}
+        except: return {"pages": 0, "name": "Error", "title": ""}
     def _parse_range_groups(self, range_str: str, max_pages: int) -> list:
+        # (Lógica idéntica versiones anteriores)
         groups = []
         parts = range_str.split(',')
         for part in parts:
             if current_group: groups.append({"label": part, "indices": current_group})
         return groups
+    # --- PREVIEW ---
+    def generate_preview(self, f, p):
         try:
+            imgs = convert_from_path(f, first_page=p, last_page=p, size=(None, 400))
+            if imgs:
+                out = self._get_output_path(f"preview_pg{p}.jpg")
+                imgs[0].save(out, "JPEG")
+                return out
+        except: return None
+    def get_rotated_preview(self, f, a):
+        if not f: return None
         try:
+            imgs = convert_from_path(f, first_page=1, last_page=1, size=(None, 500))
+            if not imgs: return None
+            img = imgs[0]
+            if a != 0: img = img.rotate(-a, expand=True)
+            out = self._get_output_path(f"rot_prev_{a}.jpg")
+            img.save(out, "JPEG")
+            return out
+        except: return None
     def get_preview_indices_from_string(self, range_str: str, max_pages: int) -> list:
+        # (Igual que antes)
         key_pages = []
         parts = range_str.split(',')
         for part in parts:
                 except ValueError: continue
         return sorted(list(set(key_pages)))
+    # --- CORE PDF TOOLS ---
     def merge_pdfs(self, file_paths: list, order_indices: list = None) -> str:
         if not file_paths: raise ValueError("No hay archivos.")
+        ordered = []
         if order_indices and len(order_indices) == len(file_paths):
+            try: ordered = [file_paths[int(i)] for i in order_indices]
+            except: ordered = file_paths
+        else: ordered = file_paths
+        m = PdfWriter()
+        for p in ordered: m.append(p)
+        out = self._get_output_path("unido.pdf")
+        with open(out, "wb") as f: m.write(f)
+        m.close()
+        return out
     def split_pdf_custom(self, file_path: str, range_str: str) -> str:
         if not file_path: raise ValueError("Falta archivo.")
+        r = PdfReader(file_path)
+        g = self._parse_range_groups(range_str, len(r.pages))
+        if not g: raise ValueError("Rango inválido.")
+        gen = []
         base = os.path.basename(file_path).replace(".pdf", "")
+        for group in g:
+            w = PdfWriter()
+            for i in group["indices"]: w.add_page(r.pages[i])
             safe = group["label"].replace(" ", "")
+            p = self._get_output_path(f"{base}_part_{safe}.pdf")
+            with open(p, "wb") as f: w.write(f)
+            gen.append(p)
+        zp = self._get_output_path(f"{base}_split.zip")
+        with zipfile.ZipFile(zp, 'w') as z:
+            for f in gen: z.write(f, arcname=os.path.basename(f))
+        return zp
     def reorder_pages(self, file_path: str, order_str: str) -> str:
         if not file_path: raise ValueError("Falta archivo.")
+        r = PdfReader(file_path)
+        g = self._parse_range_groups(order_str, len(r.pages))
+        if not g: raise ValueError("Orden inválido.")
+        w = PdfWriter()
+        flat = [i for group in g for i in group["indices"]]
+        for i in flat: w.add_page(r.pages[i])
         out = self._get_output_path("reordenado.pdf")
+        with open(out, "wb") as f: w.write(f)
         return out
+    def compress_pdf(self, file_path: str, power: int = 3) -> str:
         if not file_path: raise ValueError("Falta archivo.")
+        # power: 1=Baja, 3=Media/eBook, 4=Alta/Screen
+        q = {1: "/prepress", 3: "/ebook", 4: "/screen"}
+        gs_set = q.get(power, "/ebook")
+        out = self._get_output_path("comprimido.pdf")
+        cmd = ["gs", "-sDEVICE=pdfwrite", "-dCompatibilityLevel=1.4", f"-dPDFSETTINGS={gs_set}", "-dNOPAUSE", "-dQUIET", "-dBATCH", f"-sOutputFile={out}", file_path]
         try:
             subprocess.run(cmd, check=True)
+            return out
+        except: raise RuntimeError("Error comprimiendo (Ghostscript).")
     def protect_pdf(self, file_path: str, password: str) -> str:
         if not file_path or not password: raise ValueError("Faltan datos.")
         try:
+            r = PdfReader(file_path)
+            w = PdfWriter()
+            for p in r.pages: w.add_page(p)
+            w.encrypt(password)
             out = self._get_output_path("protegido.pdf")
+            with open(out, "wb") as f: w.write(f)
             return out
+        except Exception as e: raise RuntimeError(f"Error: {e}")
     def rotate_pdf(self, file_path: str, angle: int) -> str:
         if not file_path: raise ValueError("Falta archivo.")
         try:
+            r = PdfReader(file_path)
+            w = PdfWriter()
+            for p in r.pages:
+                p.rotate(angle)
+                w.add_page(p)
             out = self._get_output_path(f"rotado_{angle}.pdf")
+            with open(out, "wb") as f: w.write(f)
             return out
+        except Exception as e: raise RuntimeError(f"Error: {e}")
+    def update_metadata(self, f, t, a, s):
+        if not f: raise ValueError("Falta archivo.")
         try:
+            r = PdfReader(f)
+            w = PdfWriter()
+            for p in r.pages: w.add_page(p)
+            w.add_metadata({"/Title": t, "/Author": a, "/Subject": s, "/Producer": "OpenPDF Tools"})
+            out = self._get_output_path("meta.pdf")
+            with open(out, "wb") as outf: w.write(outf)
             return out
+        except Exception as e: raise RuntimeError(f"Error: {e}")
+    def extract_text(self, f):
+        if not f: raise ValueError("Falta archivo.")
+        try:
+            r = PdfReader(f)
+            txts = []
+            for i, p in enumerate(r.pages):
+                t = p.extract_text()
+                if t: txts.append(f"--- Pág {i+1} ---\n{t}\n")
+            out = self._get_output_path(os.path.basename(f).replace(".pdf", ".txt"))
+            with open(out, "w", encoding="utf-8") as file: file.write("\n".join(txts))
+            return out
+        except Exception as e: raise RuntimeError(f"Error: {e}")
+    # --- NUEVA COMPARACIÓN DE TEXTO (v2.2) ---
+    def compare_pdfs_text(self, path_a: str, path_b: str) -> str:
+        """
+        Compara el TEXTO de dos PDFs y genera un informe PDF con diferencias resaltadas.
+        Rojo/Tachado: Eliminado. Verde/Negrita: Añadido.
+        """
+        if not path_a or not path_b: raise ValueError("Faltan archivos.")
+        # 1. Extraer texto completo
+        def get_text_lines(path):
+            try:
+                reader = PdfReader(path)
+                text = ""
+                for page in reader.pages:
+                    extracted = page.extract_text()
+                    if extracted: text += extracted + "\n"
+                # Dividir por líneas para comparación
+                return text.splitlines()
+            except Exception as e:
+                raise RuntimeError(f"Error leyendo PDF: {e}")
+        lines_a = get_text_lines(path_a)
+        lines_b = get_text_lines(path_b)
+        # 2. Calcular diferencias (Difflib)
+        diff = difflib.ndiff(lines_a, lines_b)
+        # 3. Generar PDF con ReportLab
+        output_path = self._get_output_path("informe_diferencias.pdf")
+        doc = SimpleDocTemplate(output_path, pagesize=A4)
+        styles = getSampleStyleSheet()
+        # Estilos personalizados
+        style_normal = styles['BodyText']
+        style_del = ParagraphStyle('Deleted', parent=style_normal, textColor=colors.red, backColor=colors.mistyrose, strike=True)
+        style_add = ParagraphStyle('Added', parent=style_normal, textColor=colors.darkgreen, backColor=colors.honeydew, fontName='Helvetica-Bold')
+        style_header = styles['Heading1']
+        story = []
+        story.append(Paragraph("Informe de Comparación de Texto", style_header))
+        story.append(Spacer(1, 12))
+        story.append(Paragraph(f"<b>Archivo A (Original):</b> {os.path.basename(path_a)}", style_normal))
+        story.append(Paragraph(f"<b>Archivo B (Modificado):</b> {os.path.basename(path_b)}", style_normal))
+        story.append(Spacer(1, 24))
+        # Procesar diferencias
+        # ndiff devuelve: '- texto' (borrado), '+ texto' (añadido), '  texto' (igual), '? ...' (metadatos intralínea)
+        has_changes = False
+        for line in diff:
+            code = line[:2]
+            content = line[2:].strip()
+            # Escapar XML/HTML para ReportLab (evitar crash con <, >)
+            content = content.replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;')
+            if not content: continue # Saltar líneas vacías
+            if code == '- ':
+                # Eliminado (Rojo)
+                p = Paragraph(f"<strike>{content}</strike>", style_del)
+                story.append(p)
+                story.append(Spacer(1, 4))
+                has_changes = True
+            elif code == '+ ':
+                # Añadido (Verde)
+                p = Paragraph(f"{content}", style_add)
+                story.append(p)
+                story.append(Spacer(1, 4))
+                has_changes = True
+            elif code == '  ':
+                # Contexto (Grisáceo o normal)
+                # Para no hacer el informe eterno, podríamos recortar contexto,
+                # pero mejor ponerlo todo para leer el documento fluido.
+                p = Paragraph(content, style_normal)
+                story.append(p)
+                story.append(Spacer(1, 2))
+            # Ignoramos líneas que empiezan por '?' (son pistas de difflib sobre dónde está el cambio en la palabra)
+        if not has_changes:
+            story.append(Paragraph("<b>No se encontraron diferencias textuales entre los documentos.</b>", style_normal))
+        doc.build(story)
+        return output_path
+    # --- CONVERSIONES OFFICE (v2.0) ---
+    def pdf_to_excel(self, f):
+        if not f: raise ValueError("Falta archivo.")
+        try:
+            out = self._get_output_path(os.path.basename(f).replace(".pdf", ".xlsx"))
+            found = False
+            with pd.ExcelWriter(out, engine='openpyxl') as w:
+                with pdfplumber.open(f) as pdf:
+                    for i, p in enumerate(pdf.pages):
+                        tabs = p.extract_tables()
+                        if tabs:
+                            found = True
+                            df_p = pd.DataFrame()
+                            for t in tabs:
+                                df = pd.DataFrame(t)
+                                header = df.iloc[0]
+                                df = df[1:]
+                                df.columns = header
+                                df_p = pd.concat([df_p, df], ignore_index=True)
+                            df_p.to_excel(w, sheet_name=f"Pag_{i+1}", index=False)
+            if not found: raise ValueError("No se encontraron tablas.")
+            return out
+        except Exception as e: raise RuntimeError(f"Error Excel: {e}")
+    def pdf_to_pptx(self, f):
+        if not f: raise ValueError("Falta archivo.")
+        try:
+            imgs = convert_from_path(f, dpi=150)
+            prs = Presentation()
+            # Layout blanco
+            blank = 6
+            for i, img in enumerate(imgs):
+                ip = self._get_output_path(f"slide_{i}.jpg")
+                img.save(ip, "JPEG")
+                slide = prs.slides.add_slide(prs.slide_layouts[blank])
+                # Ajustar imagen al ancho de la slide
+                slide.shapes.add_picture(ip, Inches(0), Inches(0), width=prs.slide_width)
+            out = self._get_output_path(os.path.basename(f).replace(".pdf", ".pptx"))
+            prs.save(out)
+            return out
+        except Exception as e: raise RuntimeError(f"Error PPTX: {e}")
+    def pdf_to_word(self, f):
+        if not f: raise ValueError("Falta archivo.")
         try:
+            out = self._get_output_path(os.path.basename(f).replace(".pdf", ".docx"))
+            cv = Converter(f)
+            cv.convert(out, start=0, end=None)
+            cv.close()
+            return out
+        except Exception as e: raise RuntimeError(f"Error Word: {e}")
+    def pdf_to_images_zip(self, f):
+        if not f: raise ValueError("Falta archivo.")
+        try:
+            imgs = convert_from_path(f, dpi=150)
             paths = []
+            base = os.path.basename(f).replace(".pdf", "")
+            for i, img in enumerate(imgs):
                 p = self._get_output_path(f"{base}_{i+1}.jpg")
                 img.save(p, "JPEG")
                 paths.append(p)
             with zipfile.ZipFile(zp, 'w') as z:
                 for p in paths: z.write(p, arcname=os.path.basename(p))
             return zp
+        except: raise RuntimeError("Error imgs")
+    def images_to_pdf(self, fs):
+        if not fs: raise ValueError("No imgs.")
         try:
             objs = []
+            for p in fs:
+                i = Image.open(p)
+                if i.mode != 'RGB': i = i.convert('RGB')
+                objs.append(i)
             out = self._get_output_path("album.pdf")
             if objs: objs[0].save(out, "PDF", resolution=100.0, save_all=True, append_images=objs[1:])
             return out
+        except: raise RuntimeError("Error pdf")