Spaces:

DaniFera
/

PDFree

Sleeping

App Files Files Community

DaniFera commited on 22 days ago

Commit

e267243

verified ·

1 Parent(s): 0833f03

Update core.py

Browse files

Files changed (1) hide show

core.py +177 -285

core.py CHANGED Viewed

@@ -1,6 +1,6 @@
-# Versión 1.9: Core Completo (Todas las herramientas integradas)
 # Autor: Gemini (AI Assistant)
-# Descripción: Motor lógico de manipulación de PDFs. Independiente de la interfaz gráfica.
 import os
 import zipfile
@@ -8,10 +8,14 @@ import uuid
 import subprocess
 import cv2
 import numpy as np
 from pypdf import PdfWriter, PdfReader
 from pdf2image import convert_from_path
 from pdf2docx import Converter
 from PIL import Image
 from config import TEMP_DIR
 class PDFEngine:
@@ -24,102 +28,66 @@ class PDFEngine:
     @staticmethod
     def _get_output_path(filename: str) -> str:
-        """Genera una ruta única en el directorio temporal para evitar colisiones."""
         unique_name = f"{uuid.uuid4().hex[:8]}_{filename}"
         return os.path.join(TEMP_DIR, unique_name)
     def get_pdf_info(self, file_path: str) -> dict:
-        """Extrae metadatos básicos para mostrar al usuario."""
         try:
             reader = PdfReader(file_path)
             meta = reader.metadata
             title = meta.title if meta and meta.title else "Sin título"
-            return {
-                "pages": len(reader.pages),
-                "name": os.path.basename(file_path),
-                "title": title
-            }
         except Exception:
             return {"pages": 0, "name": "Error", "title": ""}
     def _parse_range_groups(self, range_str: str, max_pages: int) -> list:
-        """
-        Analiza cadenas complejas como '1-3, 5' y devuelve grupos de índices.
-        Ejemplo retorno: [{'label': '1-3', 'indices': [0, 1, 2]}, {'label': '5', 'indices': [4]}]
-        """
         groups = []
         parts = range_str.split(',')
         for part in parts:
             part = part.strip()
             if not part: continue
             current_group = []
             if '-' in part:
                 try:
                     start, end = map(int, part.split('-'))
-                    # Ajustar límites y convertir a 0-based
                     start = max(1, start)
                     end = min(max_pages, end)
-                    if start <= end:
-                        current_group = list(range(start - 1, end))
                 except ValueError: continue
             else:
                 try:
                     p = int(part)
-                    if 1 <= p <= max_pages:
-                        current_group = [p - 1]
                 except ValueError: continue
-            if current_group:
-                groups.append({"label": part, "indices": current_group})
         return groups
     # --- PREVISUALIZACIÓN ---
     def generate_preview(self, file_path: str, page_number: int) -> str:
-        """Genera JPG de una página específica (page_number es 1-based)."""
         try:
-            # Usamos altura fija (400px) para rendimiento
             images = convert_from_path(file_path, first_page=page_number, last_page=page_number, size=(None, 400))
             if images:
                 output_path = self._get_output_path(f"preview_pg{page_number}.jpg")
                 images[0].save(output_path, "JPEG")
                 return output_path
             return None
-        except Exception as e:
-            print(f"Error generando preview: {e}")
-            return None
     def get_rotated_preview(self, file_path: str, angle: int) -> str:
-        """
-        Genera preview de la página 1 aplicando rotación visual.
-        Angle: 0, 90, 180, 270 (Sentido Horario).
-        """
         if not file_path: return None
         try:
-            # Extraemos con un poco más de calidad (500px) para ver bien el texto al rotar
             images = convert_from_path(file_path, first_page=1, last_page=1, size=(None, 500))
             if not images: return None
             img = images[0]
-            # Si el ángulo no es 0, rotamos.
-            # Pillow rota 'Counter-Clockwise' (antihorario), pypdf rota 'Clockwise'.
-            # Para simular lo que hará pypdf, usamos negativo (-angle).
-            if angle != 0:
-                img = img.rotate(-angle, expand=True)
             output_path = self._get_output_path(f"preview_rot_{angle}.jpg")
             img.save(output_path, "JPEG")
             return output_path
-        except Exception as e:
-            print(f"Error preview rotación: {e}")
-            return None
     def get_preview_indices_from_string(self, range_str: str, max_pages: int) -> list:
-        """Devuelve los números de página clave (inicio/fin) para visualizar rangos."""
         key_pages = []
         parts = range_str.split(',')
         for part in parts:
@@ -139,349 +107,273 @@ class PDFEngine:
     # --- FUNCIONALIDADES DE GESTIÓN DE PÁGINAS ---
     def merge_pdfs(self, file_paths: list, order_indices: list = None) -> str:
-        """Une múltiples PDFs respetando el orden indicado."""
-        if not file_paths: raise ValueError("No hay archivos para unir.")
         ordered_paths = []
         if order_indices and len(order_indices) == len(file_paths):
             try:
-                for idx in order_indices:
-                    ordered_paths.append(file_paths[int(idx)])
-            except:
-                ordered_paths = file_paths
-        else:
-            ordered_paths = file_paths
         merger = PdfWriter()
         try:
-            for path in ordered_paths:
-                merger.append(path)
             output_path = self._get_output_path("unido_ordenado.pdf")
-            with open(output_path, "wb") as f:
-                merger.write(f)
             return output_path
-        except Exception as e:
-            raise RuntimeError(f"Error al unir: {str(e)}")
-        finally:
-            merger.close()
     def split_pdf_custom(self, file_path: str, range_str: str) -> str:
-        """Divide PDF según rangos y devuelve un ZIP con los archivos."""
         if not file_path: raise ValueError("Falta archivo.")
         reader = PdfReader(file_path)
         total = len(reader.pages)
         groups = self._parse_range_groups(range_str, total)
-        if not groups: raise ValueError("Rango inválido o vacío.")
-        generated_files = []
-        base_name = os.path.basename(file_path).replace(".pdf", "")
         for group in groups:
             writer = PdfWriter()
-            for idx in group["indices"]:
-                writer.add_page(reader.pages[idx])
-            # Nombre seguro para el archivo
-            safe_label = group["label"].replace(" ", "")
-            out_name = f"{base_name}_part_{safe_label}.pdf"
-            out_path = self._get_output_path(out_name)
-            with open(out_path, "wb") as f:
-                writer.write(f)
-            generated_files.append(out_path)
-        # Crear ZIP
-        zip_name = f"{base_name}_split_files.zip"
-        zip_path = self._get_output_path(zip_name)
-        with zipfile.ZipFile(zip_path, 'w') as zipf:
-            for file in generated_files:
-                zipf.write(file, arcname=os.path.basename(file))
-        return zip_path
     def reorder_pages(self, file_path: str, order_str: str) -> str:
-        """
-        Crea un nuevo PDF con las páginas en el orden especificado.
-        order_str ejemplo: "3, 1, 2, 4-6"
-        """
         if not file_path: raise ValueError("Falta archivo.")
         reader = PdfReader(file_path)
-        total_pages = len(reader.pages)
-        # Reutilizamos el parser de rangos para obtener la lista de índices
-        groups = self._parse_range_groups(order_str, total_pages)
         if not groups: raise ValueError("Orden inválido.")
-        flat_indices = []
-        for g in groups:
-            flat_indices.extend(g["indices"])
         writer = PdfWriter()
-        for idx in flat_indices:
-            writer.add_page(reader.pages[idx])
-        output_path = self._get_output_path("reordenado.pdf")
-        with open(output_path, "wb") as f:
-            writer.write(f)
-        return output_path
     # --- EDICIÓN Y SEGURIDAD ---
     def compress_pdf(self, file_path: str, power: int = 2) -> str:
-        """
-        Comprime PDF usando Ghostscript.
-        power:
-            0: /default
-            1: /prepress
-            2: /printer
-            3: /ebook (Recomendado)
-            4: /screen
-        """
         if not file_path: raise ValueError("Falta archivo.")
-        # Mapeo de niveles de Ghostscript
-        quality = {
-            0: "/default",
-            1: "/prepress",
-            2: "/printer",
-            3: "/ebook",
-            4: "/screen"
-        }
         gs_setting = quality.get(power, "/ebook")
         output_path = self._get_output_path("comprimido.pdf")
-        # Comando Ghostscript
-        cmd = [
-            "gs",
-            "-sDEVICE=pdfwrite",
-            "-dCompatibilityLevel=1.4",
-            f"-dPDFSETTINGS={gs_setting}",
-            "-dNOPAUSE",
-            "-dQUIET",
-            "-dBATCH",
-            f"-sOutputFile={output_path}",
-            file_path
-        ]
         try:
             subprocess.run(cmd, check=True)
             return output_path
-        except subprocess.CalledProcessError as e:
-            raise RuntimeError(f"Error en compresión (Ghostscript): {e}")
-        except FileNotFoundError:
-            raise RuntimeError("Error: Ghostscript no está instalado en el sistema (packages.txt).")
     def protect_pdf(self, file_path: str, password: str) -> str:
-        """Cifra el PDF con contraseña."""
         if not file_path or not password: raise ValueError("Faltan datos.")
         try:
             reader = PdfReader(file_path)
             writer = PdfWriter()
-            for page in reader.pages:
-                writer.add_page(page)
             writer.encrypt(password)
-            output_path = self._get_output_path("protegido.pdf")
-            with open(output_path, "wb") as f:
-                writer.write(f)
-            return output_path
-        except Exception as e:
-            raise RuntimeError(f"Error al proteger: {e}")
     def rotate_pdf(self, file_path: str, angle: int) -> str:
-        """Rota permanentemente todas las páginas del PDF."""
         if not file_path: raise ValueError("Falta archivo.")
         try:
             reader = PdfReader(file_path)
             writer = PdfWriter()
             for page in reader.pages:
                 page.rotate(angle)
                 writer.add_page(page)
-            output_path = self._get_output_path(f"rotado_{angle}.pdf")
-            with open(output_path, "wb") as f:
-                writer.write(f)
-            return output_path
-        except Exception as e:
-            raise RuntimeError(f"Error al rotar: {e}")
     def update_metadata(self, file_path: str, title: str, author: str, subject: str) -> str:
-        """Actualiza título, autor y asunto."""
         if not file_path: raise ValueError("Falta archivo.")
         try:
             reader = PdfReader(file_path)
             writer = PdfWriter()
-            for page in reader.pages:
-                writer.add_page(page)
-            new_meta = {
-                "/Title": title,
-                "/Author": author,
-                "/Subject": subject,
-                "/Producer": "OpenPDF Tools (Libre)"
-            }
-            writer.add_metadata(new_meta)
-            output_path = self._get_output_path("editado_meta.pdf")
-            with open(output_path, "wb") as f:
-                writer.write(f)
-            return output_path
-        except Exception as e:
-            raise RuntimeError(f"Error actualizando metadatos: {e}")
-    # --- CONVERSIONES ---
     def pdf_to_images_zip(self, file_path: str) -> str:
-        """Convierte PDF a JPGs y devuelve ZIP."""
         if not file_path: raise ValueError("Falta archivo.")
         try:
-            # 150 DPI es buen balance calidad/velocidad para web
             images = convert_from_path(file_path, dpi=150)
-            base_name = os.path.basename(file_path).replace(".pdf", "")
-            img_paths = []
             for i, img in enumerate(images):
-                name = f"{base_name}_pag_{i+1}.jpg"
-                p = self._get_output_path(name)
                 img.save(p, "JPEG")
-                img_paths.append(p)
-            zip_path = self._get_output_path(f"{base_name}_imagenes.zip")
-            with zipfile.ZipFile(zip_path, 'w') as zipf:
-                for p in img_paths:
-                    zipf.write(p, arcname=os.path.basename(p))
-            return zip_path
-        except Exception as e:
-            raise RuntimeError(f"Error conversión PDF->IMG: {e}")
     def images_to_pdf(self, image_paths: list) -> str:
-        """Convierte lista de imágenes a un PDF."""
-        if not image_paths: raise ValueError("No hay imágenes.")
         try:
-            img_objs = []
             for p in image_paths:
                 img = Image.open(p)
-                if img.mode != 'RGB':
-                    img = img.convert('RGB')
-                img_objs.append(img)
-            output_path = self._get_output_path("album_imagenes.pdf")
-            if img_objs:
-                img_objs[0].save(
-                    output_path, "PDF",
-                    resolution=100.0,
-                    save_all=True,
-                    append_images=img_objs[1:]
-                )
-            return output_path
-        except Exception as e:
-            raise RuntimeError(f"Error conversión IMG->PDF: {e}")
     def pdf_to_word(self, file_path: str) -> str:
-        """Convierte PDF a DOCX (Beta)."""
         if not file_path: raise ValueError("Falta archivo.")
         try:
-            docx_filename = os.path.basename(file_path).replace(".pdf", ".docx")
-            output_path = self._get_output_path(docx_filename)
             cv = Converter(file_path)
-            cv.convert(output_path, start=0, end=None)
             cv.close()
-            return output_path
-        except Exception as e:
-            raise RuntimeError(f"Error al convertir a Word: {e}")
     def extract_text(self, file_path: str) -> str:
-        """Extrae texto plano a .txt."""
         if not file_path: raise ValueError("Falta archivo.")
         try:
             reader = PdfReader(file_path)
-            text_content = []
             for i, page in enumerate(reader.pages):
                 txt = page.extract_text()
-                if txt:
-                    text_content.append(f"--- Página {i+1} ---\n{txt}\n")
-            txt_filename = os.path.basename(file_path).replace(".pdf", ".txt")
-            output_path = self._get_output_path(txt_filename)
-            with open(output_path, "w", encoding="utf-8") as f:
-                f.write("\n".join(text_content))
             return output_path
         except Exception as e:
-            raise RuntimeError(f"Error extrayendo texto: {e}")
-    # --- ANÁLISIS Y COMPARACIÓN ---
-    def compare_pdfs_visual(self, path_a: str, path_b: str) -> str:
         """
-        Compara visualmente dos PDFs página por página y resalta diferencias.
-        Devuelve un PDF compuesto por imágenes de las diferencias.
         """
-        if not path_a or not path_b: raise ValueError("Se requieren dos archivos.")
         try:
             imgs_a = convert_from_path(path_a, dpi=100)
             imgs_b = convert_from_path(path_b, dpi=100)
-        except Exception as e:
-            raise RuntimeError(f"Error leyendo PDFs para comparar: {e}")
         min_pages = min(len(imgs_a), len(imgs_b))
         diff_pages = []
         for i in range(min_pages):
-            # Convertir PIL a Numpy Array (RGB)
             arr_a = np.array(imgs_a[i])
             arr_b = np.array(imgs_b[i])
-            # Asegurar mismo tamaño
             if arr_a.shape != arr_b.shape:
                 h, w = arr_a.shape[:2]
                 arr_b = cv2.resize(arr_b, (w, h))
-            # Calcular diferencia
             gray_a = cv2.cvtColor(arr_a, cv2.COLOR_RGB2GRAY)
             gray_b = cv2.cvtColor(arr_b, cv2.COLOR_RGB2GRAY)
             diff = cv2.absdiff(gray_a, gray_b)
             _, thresh = cv2.threshold(diff, 30, 255, cv2.THRESH_BINARY)
             contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
-            result_img = arr_a.copy()
             for cnt in contours:
                 x, y, w, h = cv2.boundingRect(cnt)
-                cv2.rectangle(result_img, (x, y), (x + w, y + h), (255, 0, 255), 2)
-            diff_pages.append(Image.fromarray(result_img))
-        if not diff_pages:
-            raise ValueError("No se pudieron generar páginas de comparación o no hay páginas comunes.")
-        output_path = self._get_output_path("comparativa_diferencias.pdf")
-        diff_pages[0].save(
-            output_path, "PDF",
-            resolution=100.0,
-            save_all=True,
-            append_images=diff_pages[1:]
-        )
-        return output_path

+# Versión 2.0: Core Completo con Excel y PowerPoint
 # Autor: Gemini (AI Assistant)
+# Descripción: Motor lógico de manipulación de PDFs. Incluye conversión a formatos Office.
 import os
 import zipfile
 import subprocess
 import cv2
 import numpy as np
+import pdfplumber
+import pandas as pd
 from pypdf import PdfWriter, PdfReader
 from pdf2image import convert_from_path
 from pdf2docx import Converter
 from PIL import Image
+from pptx import Presentation
+from pptx.util import Inches
 from config import TEMP_DIR
 class PDFEngine:
     @staticmethod
     def _get_output_path(filename: str) -> str:
+        """Genera una ruta única en el directorio temporal."""
         unique_name = f"{uuid.uuid4().hex[:8]}_{filename}"
         return os.path.join(TEMP_DIR, unique_name)
     def get_pdf_info(self, file_path: str) -> dict:
         try:
             reader = PdfReader(file_path)
             meta = reader.metadata
             title = meta.title if meta and meta.title else "Sin título"
+            return {"pages": len(reader.pages), "name": os.path.basename(file_path), "title": title}
         except Exception:
             return {"pages": 0, "name": "Error", "title": ""}
     def _parse_range_groups(self, range_str: str, max_pages: int) -> list:
         groups = []
         parts = range_str.split(',')
         for part in parts:
             part = part.strip()
             if not part: continue
             current_group = []
             if '-' in part:
                 try:
                     start, end = map(int, part.split('-'))
                     start = max(1, start)
                     end = min(max_pages, end)
+                    if start <= end: current_group = list(range(start - 1, end))
                 except ValueError: continue
             else:
                 try:
                     p = int(part)
+                    if 1 <= p <= max_pages: current_group = [p - 1]
                 except ValueError: continue
+            if current_group: groups.append({"label": part, "indices": current_group})
         return groups
     # --- PREVISUALIZACIÓN ---
     def generate_preview(self, file_path: str, page_number: int) -> str:
         try:
             images = convert_from_path(file_path, first_page=page_number, last_page=page_number, size=(None, 400))
             if images:
                 output_path = self._get_output_path(f"preview_pg{page_number}.jpg")
                 images[0].save(output_path, "JPEG")
                 return output_path
             return None
+        except Exception: return None
     def get_rotated_preview(self, file_path: str, angle: int) -> str:
         if not file_path: return None
         try:
             images = convert_from_path(file_path, first_page=1, last_page=1, size=(None, 500))
             if not images: return None
             img = images[0]
+            if angle != 0: img = img.rotate(-angle, expand=True)
             output_path = self._get_output_path(f"preview_rot_{angle}.jpg")
             img.save(output_path, "JPEG")
             return output_path
+        except Exception: return None
     def get_preview_indices_from_string(self, range_str: str, max_pages: int) -> list:
         key_pages = []
         parts = range_str.split(',')
         for part in parts:
     # --- FUNCIONALIDADES DE GESTIÓN DE PÁGINAS ---
     def merge_pdfs(self, file_paths: list, order_indices: list = None) -> str:
+        if not file_paths: raise ValueError("No hay archivos.")
         ordered_paths = []
         if order_indices and len(order_indices) == len(file_paths):
             try:
+                for idx in order_indices: ordered_paths.append(file_paths[int(idx)])
+            except: ordered_paths = file_paths
+        else: ordered_paths = file_paths
         merger = PdfWriter()
         try:
+            for path in ordered_paths: merger.append(path)
             output_path = self._get_output_path("unido_ordenado.pdf")
+            with open(output_path, "wb") as f: merger.write(f)
             return output_path
+        except Exception as e: raise RuntimeError(f"Error al unir: {str(e)}")
+        finally: merger.close()
     def split_pdf_custom(self, file_path: str, range_str: str) -> str:
         if not file_path: raise ValueError("Falta archivo.")
         reader = PdfReader(file_path)
         total = len(reader.pages)
         groups = self._parse_range_groups(range_str, total)
+        if not groups: raise ValueError("Rango inválido.")
+        generated = []
+        base = os.path.basename(file_path).replace(".pdf", "")
         for group in groups:
             writer = PdfWriter()
+            for idx in group["indices"]: writer.add_page(reader.pages[idx])
+            safe = group["label"].replace(" ", "")
+            out = self._get_output_path(f"{base}_part_{safe}.pdf")
+            with open(out, "wb") as f: writer.write(f)
+            generated.append(out)
+        zname = f"{base}_split_files.zip"
+        zpath = self._get_output_path(zname)
+        with zipfile.ZipFile(zpath, 'w') as zipf:
+            for f in generated: zipf.write(f, arcname=os.path.basename(f))
+        return zpath
     def reorder_pages(self, file_path: str, order_str: str) -> str:
         if not file_path: raise ValueError("Falta archivo.")
         reader = PdfReader(file_path)
+        groups = self._parse_range_groups(order_str, len(reader.pages))
         if not groups: raise ValueError("Orden inválido.")
+        flat = []
+        for g in groups: flat.extend(g["indices"])
         writer = PdfWriter()
+        for idx in flat: writer.add_page(reader.pages[idx])
+        out = self._get_output_path("reordenado.pdf")
+        with open(out, "wb") as f: writer.write(f)
+        return out
     # --- EDICIÓN Y SEGURIDAD ---
     def compress_pdf(self, file_path: str, power: int = 2) -> str:
         if not file_path: raise ValueError("Falta archivo.")
+        quality = {0: "/default", 1: "/prepress", 2: "/printer", 3: "/ebook", 4: "/screen"}
         gs_setting = quality.get(power, "/ebook")
         output_path = self._get_output_path("comprimido.pdf")
+        cmd = ["gs", "-sDEVICE=pdfwrite", "-dCompatibilityLevel=1.4", f"-dPDFSETTINGS={gs_setting}", "-dNOPAUSE", "-dQUIET", "-dBATCH", f"-sOutputFile={output_path}", file_path]
         try:
             subprocess.run(cmd, check=True)
             return output_path
+        except subprocess.CalledProcessError as e: raise RuntimeError(f"Error Ghostscript: {e}")
+        except FileNotFoundError: raise RuntimeError("Falta Ghostscript (packages.txt).")
     def protect_pdf(self, file_path: str, password: str) -> str:
         if not file_path or not password: raise ValueError("Faltan datos.")
         try:
             reader = PdfReader(file_path)
             writer = PdfWriter()
+            for page in reader.pages: writer.add_page(page)
             writer.encrypt(password)
+            out = self._get_output_path("protegido.pdf")
+            with open(out, "wb") as f: writer.write(f)
+            return out
+        except Exception as e: raise RuntimeError(f"Error proteger: {e}")
     def rotate_pdf(self, file_path: str, angle: int) -> str:
         if not file_path: raise ValueError("Falta archivo.")
         try:
             reader = PdfReader(file_path)
             writer = PdfWriter()
             for page in reader.pages:
                 page.rotate(angle)
                 writer.add_page(page)
+            out = self._get_output_path(f"rotado_{angle}.pdf")
+            with open(out, "wb") as f: writer.write(f)
+            return out
+        except Exception as e: raise RuntimeError(f"Error rotar: {e}")
     def update_metadata(self, file_path: str, title: str, author: str, subject: str) -> str:
         if not file_path: raise ValueError("Falta archivo.")
         try:
             reader = PdfReader(file_path)
             writer = PdfWriter()
+            for page in reader.pages: writer.add_page(page)
+            writer.add_metadata({"/Title": title, "/Author": author, "/Subject": subject, "/Producer": "OpenPDF Tools"})
+            out = self._get_output_path("editado_meta.pdf")
+            with open(out, "wb") as f: writer.write(f)
+            return out
+        except Exception as e: raise RuntimeError(f"Error metadata: {e}")
+    # --- CONVERSIONES GENERALES ---
     def pdf_to_images_zip(self, file_path: str) -> str:
         if not file_path: raise ValueError("Falta archivo.")
         try:
             images = convert_from_path(file_path, dpi=150)
+            base = os.path.basename(file_path).replace(".pdf", "")
+            paths = []
             for i, img in enumerate(images):
+                p = self._get_output_path(f"{base}_{i+1}.jpg")
                 img.save(p, "JPEG")
+                paths.append(p)
+            zp = self._get_output_path(f"{base}_imgs.zip")
+            with zipfile.ZipFile(zp, 'w') as z:
+                for p in paths: z.write(p, arcname=os.path.basename(p))
+            return zp
+        except Exception as e: raise RuntimeError(f"Error PDF->IMG: {e}")
     def images_to_pdf(self, image_paths: list) -> str:
+        if not image_paths: raise ValueError("No imágenes.")
         try:
+            objs = []
             for p in image_paths:
                 img = Image.open(p)
+                if img.mode != 'RGB': img = img.convert('RGB')
+                objs.append(img)
+            out = self._get_output_path("album.pdf")
+            if objs: objs[0].save(out, "PDF", resolution=100.0, save_all=True, append_images=objs[1:])
+            return out
+        except Exception as e: raise RuntimeError(f"Error IMG->PDF: {e}")
     def pdf_to_word(self, file_path: str) -> str:
         if not file_path: raise ValueError("Falta archivo.")
         try:
+            docx = os.path.basename(file_path).replace(".pdf", ".docx")
+            out = self._get_output_path(docx)
             cv = Converter(file_path)
+            cv.convert(out, start=0, end=None)
             cv.close()
+            return out
+        except Exception as e: raise RuntimeError(f"Error PDF->Word: {e}")
     def extract_text(self, file_path: str) -> str:
         if not file_path: raise ValueError("Falta archivo.")
         try:
             reader = PdfReader(file_path)
+            content = []
             for i, page in enumerate(reader.pages):
                 txt = page.extract_text()
+                if txt: content.append(f"--- Pág {i+1} ---\n{txt}\n")
+            out = self._get_output_path(os.path.basename(file_path).replace(".pdf", ".txt"))
+            with open(out, "w", encoding="utf-8") as f: f.write("\n".join(content))
+            return out
+        except Exception as e: raise RuntimeError(f"Error texto: {e}")
+    # --- NUEVAS CONVERSIONES OFFICE (v2.0) ---
+    def pdf_to_excel(self, file_path: str) -> str:
+        """
+        Extrae tablas del PDF y las guarda en un Excel (XLSX).
+        Crea una hoja por cada página que contenga tablas.
+        """
+        if not file_path: raise ValueError("Falta archivo.")
+        try:
+            xlsx_name = os.path.basename(file_path).replace(".pdf", ".xlsx")
+            output_path = self._get_output_path(xlsx_name)
+            has_tables = False
+            # Usamos ExcelWriter para escribir múltiples hojas
+            with pd.ExcelWriter(output_path, engine='openpyxl') as writer:
+                with pdfplumber.open(file_path) as pdf:
+                    for i, page in enumerate(pdf.pages):
+                        tables = page.extract_tables()
+                        if tables:
+                            has_tables = True
+                            # Si hay varias tablas en una página, las concatenamos o las ponemos una debajo de otra
+                            # Aquí, por simplicidad, cogemos la tabla más grande o concatenamos
+                            df_page = pd.DataFrame()
+                            for table in tables:
+                                df = pd.DataFrame(table)
+                                # Usar la primera fila como header si parece un header
+                                new_header = df.iloc[0]
+                                df = df[1:]
+                                df.columns = new_header
+                                df_page = pd.concat([df_page, df], ignore_index=True)
+                            sheet_name = f"Pagina_{i+1}"
+                            df_page.to_excel(writer, sheet_name=sheet_name, index=False)
+            if not has_tables:
+                raise ValueError("No se detectaron tablas con bordes claros en este PDF.")
             return output_path
         except Exception as e:
+            raise RuntimeError(f"Error PDF->Excel: {str(e)}")
+    def pdf_to_pptx(self, file_path: str) -> str:
         """
+        Convierte PDF a PowerPoint (PPTX).
+        Estrategia: Convertir cada página a Imagen -> Pegar en Diapositiva.
+        Esto preserva el formato visual exacto.
         """
+        if not file_path: raise ValueError("Falta archivo.")
+        try:
+            # 1. Convertir PDF a imágenes (HQ)
+            images = convert_from_path(file_path, dpi=200)
+            # 2. Crear presentación
+            prs = Presentation()
+            # Definir layout en blanco (index 6 suele ser blank en tema default)
+            BLANK_SLIDE_LAYOUT = 6
+            for i, img in enumerate(images):
+                # Guardar imagen temporal
+                img_path = self._get_output_path(f"temp_slide_{i}.jpg")
+                img.save(img_path, "JPEG")
+                # Añadir diapositiva
+                slide = prs.slides.add_slide(prs.slide_layouts[BLANK_SLIDE_LAYOUT])
+                # Ajustar tamaño de la diapositiva al tamaño de la imagen?
+                # Por simplicidad, ajustamos la imagen al tamaño de la diapositiva estándar (10x7.5 inches)
+                # left, top, width, height
+                slide.shapes.add_picture(img_path, Inches(0), Inches(0), width=prs.slide_width)
+            pptx_name = os.path.basename(file_path).replace(".pdf", ".pptx")
+            output_path = self._get_output_path(pptx_name)
+            prs.save(output_path)
+            return output_path
+        except Exception as e:
+            raise RuntimeError(f"Error PDF->PPTX: {str(e)}")
+    # --- ANÁLISIS ---
+    def compare_pdfs_visual(self, path_a: str, path_b: str) -> str:
+        if not path_a or not path_b: raise ValueError("Dos archivos requeridos.")
         try:
             imgs_a = convert_from_path(path_a, dpi=100)
             imgs_b = convert_from_path(path_b, dpi=100)
+        except Exception as e: raise RuntimeError(f"Error leyendo PDFs: {e}")
         min_pages = min(len(imgs_a), len(imgs_b))
         diff_pages = []
         for i in range(min_pages):
             arr_a = np.array(imgs_a[i])
             arr_b = np.array(imgs_b[i])
             if arr_a.shape != arr_b.shape:
                 h, w = arr_a.shape[:2]
                 arr_b = cv2.resize(arr_b, (w, h))
             gray_a = cv2.cvtColor(arr_a, cv2.COLOR_RGB2GRAY)
             gray_b = cv2.cvtColor(arr_b, cv2.COLOR_RGB2GRAY)
             diff = cv2.absdiff(gray_a, gray_b)
             _, thresh = cv2.threshold(diff, 30, 255, cv2.THRESH_BINARY)
             contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+            res = arr_a.copy()
             for cnt in contours:
                 x, y, w, h = cv2.boundingRect(cnt)
+                cv2.rectangle(res, (x, y), (x + w, y + h), (255, 0, 255), 2)
+            diff_pages.append(Image.fromarray(res))
+        if not diff_pages: raise ValueError("Error en comparación.")
+        out = self._get_output_path("comparativa.pdf")
+        diff_pages[0].save(out, "PDF", resolution=100.0, save_all=True, append_images=diff_pages[1:])
+        return out