DaniFera commited on
Commit
e267243
·
verified ·
1 Parent(s): 0833f03

Update core.py

Browse files
Files changed (1) hide show
  1. core.py +177 -285
core.py CHANGED
@@ -1,6 +1,6 @@
1
- # Versión 1.9: Core Completo (Todas las herramientas integradas)
2
  # Autor: Gemini (AI Assistant)
3
- # Descripción: Motor lógico de manipulación de PDFs. Independiente de la interfaz gráfica.
4
 
5
  import os
6
  import zipfile
@@ -8,10 +8,14 @@ import uuid
8
  import subprocess
9
  import cv2
10
  import numpy as np
 
 
11
  from pypdf import PdfWriter, PdfReader
12
  from pdf2image import convert_from_path
13
  from pdf2docx import Converter
14
  from PIL import Image
 
 
15
  from config import TEMP_DIR
16
 
17
  class PDFEngine:
@@ -24,102 +28,66 @@ class PDFEngine:
24
 
25
  @staticmethod
26
  def _get_output_path(filename: str) -> str:
27
- """Genera una ruta única en el directorio temporal para evitar colisiones."""
28
  unique_name = f"{uuid.uuid4().hex[:8]}_{filename}"
29
  return os.path.join(TEMP_DIR, unique_name)
30
 
31
  def get_pdf_info(self, file_path: str) -> dict:
32
- """Extrae metadatos básicos para mostrar al usuario."""
33
  try:
34
  reader = PdfReader(file_path)
35
  meta = reader.metadata
36
  title = meta.title if meta and meta.title else "Sin título"
37
- return {
38
- "pages": len(reader.pages),
39
- "name": os.path.basename(file_path),
40
- "title": title
41
- }
42
  except Exception:
43
  return {"pages": 0, "name": "Error", "title": ""}
44
 
45
  def _parse_range_groups(self, range_str: str, max_pages: int) -> list:
46
- """
47
- Analiza cadenas complejas como '1-3, 5' y devuelve grupos de índices.
48
- Ejemplo retorno: [{'label': '1-3', 'indices': [0, 1, 2]}, {'label': '5', 'indices': [4]}]
49
- """
50
  groups = []
51
  parts = range_str.split(',')
52
-
53
  for part in parts:
54
  part = part.strip()
55
  if not part: continue
56
-
57
  current_group = []
58
  if '-' in part:
59
  try:
60
  start, end = map(int, part.split('-'))
61
- # Ajustar límites y convertir a 0-based
62
  start = max(1, start)
63
  end = min(max_pages, end)
64
- if start <= end:
65
- current_group = list(range(start - 1, end))
66
  except ValueError: continue
67
  else:
68
  try:
69
  p = int(part)
70
- if 1 <= p <= max_pages:
71
- current_group = [p - 1]
72
  except ValueError: continue
73
-
74
- if current_group:
75
- groups.append({"label": part, "indices": current_group})
76
-
77
  return groups
78
 
79
  # --- PREVISUALIZACIÓN ---
80
 
81
  def generate_preview(self, file_path: str, page_number: int) -> str:
82
- """Genera JPG de una página específica (page_number es 1-based)."""
83
  try:
84
- # Usamos altura fija (400px) para rendimiento
85
  images = convert_from_path(file_path, first_page=page_number, last_page=page_number, size=(None, 400))
86
  if images:
87
  output_path = self._get_output_path(f"preview_pg{page_number}.jpg")
88
  images[0].save(output_path, "JPEG")
89
  return output_path
90
  return None
91
- except Exception as e:
92
- print(f"Error generando preview: {e}")
93
- return None
94
 
95
  def get_rotated_preview(self, file_path: str, angle: int) -> str:
96
- """
97
- Genera preview de la página 1 aplicando rotación visual.
98
- Angle: 0, 90, 180, 270 (Sentido Horario).
99
- """
100
  if not file_path: return None
101
  try:
102
- # Extraemos con un poco más de calidad (500px) para ver bien el texto al rotar
103
  images = convert_from_path(file_path, first_page=1, last_page=1, size=(None, 500))
104
  if not images: return None
105
-
106
  img = images[0]
107
-
108
- # Si el ángulo no es 0, rotamos.
109
- # Pillow rota 'Counter-Clockwise' (antihorario), pypdf rota 'Clockwise'.
110
- # Para simular lo que hará pypdf, usamos negativo (-angle).
111
- if angle != 0:
112
- img = img.rotate(-angle, expand=True)
113
-
114
  output_path = self._get_output_path(f"preview_rot_{angle}.jpg")
115
  img.save(output_path, "JPEG")
116
  return output_path
117
- except Exception as e:
118
- print(f"Error preview rotación: {e}")
119
- return None
120
 
121
  def get_preview_indices_from_string(self, range_str: str, max_pages: int) -> list:
122
- """Devuelve los números de página clave (inicio/fin) para visualizar rangos."""
123
  key_pages = []
124
  parts = range_str.split(',')
125
  for part in parts:
@@ -139,349 +107,273 @@ class PDFEngine:
139
  # --- FUNCIONALIDADES DE GESTIÓN DE PÁGINAS ---
140
 
141
  def merge_pdfs(self, file_paths: list, order_indices: list = None) -> str:
142
- """Une múltiples PDFs respetando el orden indicado."""
143
- if not file_paths: raise ValueError("No hay archivos para unir.")
144
-
145
  ordered_paths = []
146
  if order_indices and len(order_indices) == len(file_paths):
147
  try:
148
- for idx in order_indices:
149
- ordered_paths.append(file_paths[int(idx)])
150
- except:
151
- ordered_paths = file_paths
152
- else:
153
- ordered_paths = file_paths
154
-
155
  merger = PdfWriter()
156
  try:
157
- for path in ordered_paths:
158
- merger.append(path)
159
-
160
  output_path = self._get_output_path("unido_ordenado.pdf")
161
- with open(output_path, "wb") as f:
162
- merger.write(f)
163
  return output_path
164
- except Exception as e:
165
- raise RuntimeError(f"Error al unir: {str(e)}")
166
- finally:
167
- merger.close()
168
 
169
  def split_pdf_custom(self, file_path: str, range_str: str) -> str:
170
- """Divide PDF según rangos y devuelve un ZIP con los archivos."""
171
  if not file_path: raise ValueError("Falta archivo.")
172
-
173
  reader = PdfReader(file_path)
174
  total = len(reader.pages)
175
  groups = self._parse_range_groups(range_str, total)
176
-
177
- if not groups: raise ValueError("Rango inválido o vacío.")
178
-
179
- generated_files = []
180
- base_name = os.path.basename(file_path).replace(".pdf", "")
181
-
182
  for group in groups:
183
  writer = PdfWriter()
184
- for idx in group["indices"]:
185
- writer.add_page(reader.pages[idx])
186
-
187
- # Nombre seguro para el archivo
188
- safe_label = group["label"].replace(" ", "")
189
- out_name = f"{base_name}_part_{safe_label}.pdf"
190
- out_path = self._get_output_path(out_name)
191
-
192
- with open(out_path, "wb") as f:
193
- writer.write(f)
194
- generated_files.append(out_path)
195
-
196
- # Crear ZIP
197
- zip_name = f"{base_name}_split_files.zip"
198
- zip_path = self._get_output_path(zip_name)
199
-
200
- with zipfile.ZipFile(zip_path, 'w') as zipf:
201
- for file in generated_files:
202
- zipf.write(file, arcname=os.path.basename(file))
203
-
204
- return zip_path
205
 
206
  def reorder_pages(self, file_path: str, order_str: str) -> str:
207
- """
208
- Crea un nuevo PDF con las páginas en el orden especificado.
209
- order_str ejemplo: "3, 1, 2, 4-6"
210
- """
211
  if not file_path: raise ValueError("Falta archivo.")
212
-
213
  reader = PdfReader(file_path)
214
- total_pages = len(reader.pages)
215
-
216
- # Reutilizamos el parser de rangos para obtener la lista de índices
217
- groups = self._parse_range_groups(order_str, total_pages)
218
  if not groups: raise ValueError("Orden inválido.")
219
-
220
- flat_indices = []
221
- for g in groups:
222
- flat_indices.extend(g["indices"])
223
-
224
  writer = PdfWriter()
225
- for idx in flat_indices:
226
- writer.add_page(reader.pages[idx])
227
-
228
- output_path = self._get_output_path("reordenado.pdf")
229
- with open(output_path, "wb") as f:
230
- writer.write(f)
231
-
232
- return output_path
233
 
234
  # --- EDICIÓN Y SEGURIDAD ---
235
 
236
  def compress_pdf(self, file_path: str, power: int = 2) -> str:
237
- """
238
- Comprime PDF usando Ghostscript.
239
- power:
240
- 0: /default
241
- 1: /prepress
242
- 2: /printer
243
- 3: /ebook (Recomendado)
244
- 4: /screen
245
- """
246
  if not file_path: raise ValueError("Falta archivo.")
247
-
248
- # Mapeo de niveles de Ghostscript
249
- quality = {
250
- 0: "/default",
251
- 1: "/prepress",
252
- 2: "/printer",
253
- 3: "/ebook",
254
- 4: "/screen"
255
- }
256
  gs_setting = quality.get(power, "/ebook")
257
-
258
  output_path = self._get_output_path("comprimido.pdf")
259
-
260
- # Comando Ghostscript
261
- cmd = [
262
- "gs",
263
- "-sDEVICE=pdfwrite",
264
- "-dCompatibilityLevel=1.4",
265
- f"-dPDFSETTINGS={gs_setting}",
266
- "-dNOPAUSE",
267
- "-dQUIET",
268
- "-dBATCH",
269
- f"-sOutputFile={output_path}",
270
- file_path
271
- ]
272
-
273
  try:
274
  subprocess.run(cmd, check=True)
275
  return output_path
276
- except subprocess.CalledProcessError as e:
277
- raise RuntimeError(f"Error en compresión (Ghostscript): {e}")
278
- except FileNotFoundError:
279
- raise RuntimeError("Error: Ghostscript no está instalado en el sistema (packages.txt).")
280
 
281
  def protect_pdf(self, file_path: str, password: str) -> str:
282
- """Cifra el PDF con contraseña."""
283
  if not file_path or not password: raise ValueError("Faltan datos.")
284
-
285
  try:
286
  reader = PdfReader(file_path)
287
  writer = PdfWriter()
288
-
289
- for page in reader.pages:
290
- writer.add_page(page)
291
-
292
  writer.encrypt(password)
293
- output_path = self._get_output_path("protegido.pdf")
294
- with open(output_path, "wb") as f:
295
- writer.write(f)
296
- return output_path
297
- except Exception as e:
298
- raise RuntimeError(f"Error al proteger: {e}")
299
 
300
  def rotate_pdf(self, file_path: str, angle: int) -> str:
301
- """Rota permanentemente todas las páginas del PDF."""
302
  if not file_path: raise ValueError("Falta archivo.")
303
-
304
  try:
305
  reader = PdfReader(file_path)
306
  writer = PdfWriter()
307
-
308
  for page in reader.pages:
309
  page.rotate(angle)
310
  writer.add_page(page)
311
-
312
- output_path = self._get_output_path(f"rotado_{angle}.pdf")
313
- with open(output_path, "wb") as f:
314
- writer.write(f)
315
- return output_path
316
- except Exception as e:
317
- raise RuntimeError(f"Error al rotar: {e}")
318
 
319
  def update_metadata(self, file_path: str, title: str, author: str, subject: str) -> str:
320
- """Actualiza título, autor y asunto."""
321
  if not file_path: raise ValueError("Falta archivo.")
322
-
323
  try:
324
  reader = PdfReader(file_path)
325
  writer = PdfWriter()
326
-
327
- for page in reader.pages:
328
- writer.add_page(page)
329
-
330
- new_meta = {
331
- "/Title": title,
332
- "/Author": author,
333
- "/Subject": subject,
334
- "/Producer": "OpenPDF Tools (Libre)"
335
- }
336
- writer.add_metadata(new_meta)
337
-
338
- output_path = self._get_output_path("editado_meta.pdf")
339
- with open(output_path, "wb") as f:
340
- writer.write(f)
341
- return output_path
342
- except Exception as e:
343
- raise RuntimeError(f"Error actualizando metadatos: {e}")
344
 
345
- # --- CONVERSIONES ---
346
 
347
  def pdf_to_images_zip(self, file_path: str) -> str:
348
- """Convierte PDF a JPGs y devuelve ZIP."""
349
  if not file_path: raise ValueError("Falta archivo.")
350
-
351
  try:
352
- # 150 DPI es buen balance calidad/velocidad para web
353
  images = convert_from_path(file_path, dpi=150)
354
- base_name = os.path.basename(file_path).replace(".pdf", "")
355
- img_paths = []
356
-
357
  for i, img in enumerate(images):
358
- name = f"{base_name}_pag_{i+1}.jpg"
359
- p = self._get_output_path(name)
360
  img.save(p, "JPEG")
361
- img_paths.append(p)
362
-
363
- zip_path = self._get_output_path(f"{base_name}_imagenes.zip")
364
- with zipfile.ZipFile(zip_path, 'w') as zipf:
365
- for p in img_paths:
366
- zipf.write(p, arcname=os.path.basename(p))
367
-
368
- return zip_path
369
- except Exception as e:
370
- raise RuntimeError(f"Error conversión PDF->IMG: {e}")
371
 
372
  def images_to_pdf(self, image_paths: list) -> str:
373
- """Convierte lista de imágenes a un PDF."""
374
- if not image_paths: raise ValueError("No hay imágenes.")
375
-
376
  try:
377
- img_objs = []
378
  for p in image_paths:
379
  img = Image.open(p)
380
- if img.mode != 'RGB':
381
- img = img.convert('RGB')
382
- img_objs.append(img)
383
-
384
- output_path = self._get_output_path("album_imagenes.pdf")
385
-
386
- if img_objs:
387
- img_objs[0].save(
388
- output_path, "PDF",
389
- resolution=100.0,
390
- save_all=True,
391
- append_images=img_objs[1:]
392
- )
393
- return output_path
394
- except Exception as e:
395
- raise RuntimeError(f"Error conversión IMG->PDF: {e}")
396
 
397
  def pdf_to_word(self, file_path: str) -> str:
398
- """Convierte PDF a DOCX (Beta)."""
399
  if not file_path: raise ValueError("Falta archivo.")
400
-
401
  try:
402
- docx_filename = os.path.basename(file_path).replace(".pdf", ".docx")
403
- output_path = self._get_output_path(docx_filename)
404
-
405
  cv = Converter(file_path)
406
- cv.convert(output_path, start=0, end=None)
407
  cv.close()
408
- return output_path
409
- except Exception as e:
410
- raise RuntimeError(f"Error al convertir a Word: {e}")
411
 
412
  def extract_text(self, file_path: str) -> str:
413
- """Extrae texto plano a .txt."""
414
  if not file_path: raise ValueError("Falta archivo.")
415
-
416
  try:
417
  reader = PdfReader(file_path)
418
- text_content = []
419
  for i, page in enumerate(reader.pages):
420
  txt = page.extract_text()
421
- if txt:
422
- text_content.append(f"--- Página {i+1} ---\n{txt}\n")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
423
 
424
- txt_filename = os.path.basename(file_path).replace(".pdf", ".txt")
425
- output_path = self._get_output_path(txt_filename)
426
 
427
- with open(output_path, "w", encoding="utf-8") as f:
428
- f.write("\n".join(text_content))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
429
  return output_path
430
  except Exception as e:
431
- raise RuntimeError(f"Error extrayendo texto: {e}")
432
 
433
- # --- ANÁLISIS Y COMPARACIÓN ---
434
-
435
- def compare_pdfs_visual(self, path_a: str, path_b: str) -> str:
436
  """
437
- Compara visualmente dos PDFs página por página y resalta diferencias.
438
- Devuelve un PDF compuesto por imágenes de las diferencias.
 
439
  """
440
- if not path_a or not path_b: raise ValueError("Se requieren dos archivos.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
441
 
 
 
442
  try:
443
  imgs_a = convert_from_path(path_a, dpi=100)
444
  imgs_b = convert_from_path(path_b, dpi=100)
445
- except Exception as e:
446
- raise RuntimeError(f"Error leyendo PDFs para comparar: {e}")
447
 
448
  min_pages = min(len(imgs_a), len(imgs_b))
449
  diff_pages = []
450
-
451
  for i in range(min_pages):
452
- # Convertir PIL a Numpy Array (RGB)
453
  arr_a = np.array(imgs_a[i])
454
  arr_b = np.array(imgs_b[i])
455
-
456
- # Asegurar mismo tamaño
457
  if arr_a.shape != arr_b.shape:
458
  h, w = arr_a.shape[:2]
459
  arr_b = cv2.resize(arr_b, (w, h))
460
-
461
- # Calcular diferencia
462
  gray_a = cv2.cvtColor(arr_a, cv2.COLOR_RGB2GRAY)
463
  gray_b = cv2.cvtColor(arr_b, cv2.COLOR_RGB2GRAY)
464
-
465
  diff = cv2.absdiff(gray_a, gray_b)
466
  _, thresh = cv2.threshold(diff, 30, 255, cv2.THRESH_BINARY)
467
  contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
468
-
469
- result_img = arr_a.copy()
470
  for cnt in contours:
471
  x, y, w, h = cv2.boundingRect(cnt)
472
- cv2.rectangle(result_img, (x, y), (x + w, y + h), (255, 0, 255), 2)
473
-
474
- diff_pages.append(Image.fromarray(result_img))
475
-
476
- if not diff_pages:
477
- raise ValueError("No se pudieron generar páginas de comparación o no hay páginas comunes.")
478
-
479
- output_path = self._get_output_path("comparativa_diferencias.pdf")
480
- diff_pages[0].save(
481
- output_path, "PDF",
482
- resolution=100.0,
483
- save_all=True,
484
- append_images=diff_pages[1:]
485
- )
486
 
487
- return output_path
 
 
 
 
1
+ # Versión 2.0: Core Completo con Excel y PowerPoint
2
  # Autor: Gemini (AI Assistant)
3
+ # Descripción: Motor lógico de manipulación de PDFs. Incluye conversión a formatos Office.
4
 
5
  import os
6
  import zipfile
 
8
  import subprocess
9
  import cv2
10
  import numpy as np
11
+ import pdfplumber
12
+ import pandas as pd
13
  from pypdf import PdfWriter, PdfReader
14
  from pdf2image import convert_from_path
15
  from pdf2docx import Converter
16
  from PIL import Image
17
+ from pptx import Presentation
18
+ from pptx.util import Inches
19
  from config import TEMP_DIR
20
 
21
  class PDFEngine:
 
28
 
29
  @staticmethod
30
  def _get_output_path(filename: str) -> str:
31
+ """Genera una ruta única en el directorio temporal."""
32
  unique_name = f"{uuid.uuid4().hex[:8]}_{filename}"
33
  return os.path.join(TEMP_DIR, unique_name)
34
 
35
  def get_pdf_info(self, file_path: str) -> dict:
 
36
  try:
37
  reader = PdfReader(file_path)
38
  meta = reader.metadata
39
  title = meta.title if meta and meta.title else "Sin título"
40
+ return {"pages": len(reader.pages), "name": os.path.basename(file_path), "title": title}
 
 
 
 
41
  except Exception:
42
  return {"pages": 0, "name": "Error", "title": ""}
43
 
44
  def _parse_range_groups(self, range_str: str, max_pages: int) -> list:
 
 
 
 
45
  groups = []
46
  parts = range_str.split(',')
 
47
  for part in parts:
48
  part = part.strip()
49
  if not part: continue
 
50
  current_group = []
51
  if '-' in part:
52
  try:
53
  start, end = map(int, part.split('-'))
 
54
  start = max(1, start)
55
  end = min(max_pages, end)
56
+ if start <= end: current_group = list(range(start - 1, end))
 
57
  except ValueError: continue
58
  else:
59
  try:
60
  p = int(part)
61
+ if 1 <= p <= max_pages: current_group = [p - 1]
 
62
  except ValueError: continue
63
+ if current_group: groups.append({"label": part, "indices": current_group})
 
 
 
64
  return groups
65
 
66
  # --- PREVISUALIZACIÓN ---
67
 
68
  def generate_preview(self, file_path: str, page_number: int) -> str:
 
69
  try:
 
70
  images = convert_from_path(file_path, first_page=page_number, last_page=page_number, size=(None, 400))
71
  if images:
72
  output_path = self._get_output_path(f"preview_pg{page_number}.jpg")
73
  images[0].save(output_path, "JPEG")
74
  return output_path
75
  return None
76
+ except Exception: return None
 
 
77
 
78
  def get_rotated_preview(self, file_path: str, angle: int) -> str:
 
 
 
 
79
  if not file_path: return None
80
  try:
 
81
  images = convert_from_path(file_path, first_page=1, last_page=1, size=(None, 500))
82
  if not images: return None
 
83
  img = images[0]
84
+ if angle != 0: img = img.rotate(-angle, expand=True)
 
 
 
 
 
 
85
  output_path = self._get_output_path(f"preview_rot_{angle}.jpg")
86
  img.save(output_path, "JPEG")
87
  return output_path
88
+ except Exception: return None
 
 
89
 
90
  def get_preview_indices_from_string(self, range_str: str, max_pages: int) -> list:
 
91
  key_pages = []
92
  parts = range_str.split(',')
93
  for part in parts:
 
107
  # --- FUNCIONALIDADES DE GESTIÓN DE PÁGINAS ---
108
 
109
  def merge_pdfs(self, file_paths: list, order_indices: list = None) -> str:
110
+ if not file_paths: raise ValueError("No hay archivos.")
 
 
111
  ordered_paths = []
112
  if order_indices and len(order_indices) == len(file_paths):
113
  try:
114
+ for idx in order_indices: ordered_paths.append(file_paths[int(idx)])
115
+ except: ordered_paths = file_paths
116
+ else: ordered_paths = file_paths
 
 
 
 
117
  merger = PdfWriter()
118
  try:
119
+ for path in ordered_paths: merger.append(path)
 
 
120
  output_path = self._get_output_path("unido_ordenado.pdf")
121
+ with open(output_path, "wb") as f: merger.write(f)
 
122
  return output_path
123
+ except Exception as e: raise RuntimeError(f"Error al unir: {str(e)}")
124
+ finally: merger.close()
 
 
125
 
126
  def split_pdf_custom(self, file_path: str, range_str: str) -> str:
 
127
  if not file_path: raise ValueError("Falta archivo.")
 
128
  reader = PdfReader(file_path)
129
  total = len(reader.pages)
130
  groups = self._parse_range_groups(range_str, total)
131
+ if not groups: raise ValueError("Rango inválido.")
132
+ generated = []
133
+ base = os.path.basename(file_path).replace(".pdf", "")
 
 
 
134
  for group in groups:
135
  writer = PdfWriter()
136
+ for idx in group["indices"]: writer.add_page(reader.pages[idx])
137
+ safe = group["label"].replace(" ", "")
138
+ out = self._get_output_path(f"{base}_part_{safe}.pdf")
139
+ with open(out, "wb") as f: writer.write(f)
140
+ generated.append(out)
141
+ zname = f"{base}_split_files.zip"
142
+ zpath = self._get_output_path(zname)
143
+ with zipfile.ZipFile(zpath, 'w') as zipf:
144
+ for f in generated: zipf.write(f, arcname=os.path.basename(f))
145
+ return zpath
 
 
 
 
 
 
 
 
 
 
 
146
 
147
  def reorder_pages(self, file_path: str, order_str: str) -> str:
 
 
 
 
148
  if not file_path: raise ValueError("Falta archivo.")
 
149
  reader = PdfReader(file_path)
150
+ groups = self._parse_range_groups(order_str, len(reader.pages))
 
 
 
151
  if not groups: raise ValueError("Orden inválido.")
152
+ flat = []
153
+ for g in groups: flat.extend(g["indices"])
 
 
 
154
  writer = PdfWriter()
155
+ for idx in flat: writer.add_page(reader.pages[idx])
156
+ out = self._get_output_path("reordenado.pdf")
157
+ with open(out, "wb") as f: writer.write(f)
158
+ return out
 
 
 
 
159
 
160
  # --- EDICIÓN Y SEGURIDAD ---
161
 
162
  def compress_pdf(self, file_path: str, power: int = 2) -> str:
 
 
 
 
 
 
 
 
 
163
  if not file_path: raise ValueError("Falta archivo.")
164
+ quality = {0: "/default", 1: "/prepress", 2: "/printer", 3: "/ebook", 4: "/screen"}
 
 
 
 
 
 
 
 
165
  gs_setting = quality.get(power, "/ebook")
 
166
  output_path = self._get_output_path("comprimido.pdf")
167
+ cmd = ["gs", "-sDEVICE=pdfwrite", "-dCompatibilityLevel=1.4", f"-dPDFSETTINGS={gs_setting}", "-dNOPAUSE", "-dQUIET", "-dBATCH", f"-sOutputFile={output_path}", file_path]
 
 
 
 
 
 
 
 
 
 
 
 
 
168
  try:
169
  subprocess.run(cmd, check=True)
170
  return output_path
171
+ except subprocess.CalledProcessError as e: raise RuntimeError(f"Error Ghostscript: {e}")
172
+ except FileNotFoundError: raise RuntimeError("Falta Ghostscript (packages.txt).")
 
 
173
 
174
  def protect_pdf(self, file_path: str, password: str) -> str:
 
175
  if not file_path or not password: raise ValueError("Faltan datos.")
 
176
  try:
177
  reader = PdfReader(file_path)
178
  writer = PdfWriter()
179
+ for page in reader.pages: writer.add_page(page)
 
 
 
180
  writer.encrypt(password)
181
+ out = self._get_output_path("protegido.pdf")
182
+ with open(out, "wb") as f: writer.write(f)
183
+ return out
184
+ except Exception as e: raise RuntimeError(f"Error proteger: {e}")
 
 
185
 
186
  def rotate_pdf(self, file_path: str, angle: int) -> str:
 
187
  if not file_path: raise ValueError("Falta archivo.")
 
188
  try:
189
  reader = PdfReader(file_path)
190
  writer = PdfWriter()
 
191
  for page in reader.pages:
192
  page.rotate(angle)
193
  writer.add_page(page)
194
+ out = self._get_output_path(f"rotado_{angle}.pdf")
195
+ with open(out, "wb") as f: writer.write(f)
196
+ return out
197
+ except Exception as e: raise RuntimeError(f"Error rotar: {e}")
 
 
 
198
 
199
  def update_metadata(self, file_path: str, title: str, author: str, subject: str) -> str:
 
200
  if not file_path: raise ValueError("Falta archivo.")
 
201
  try:
202
  reader = PdfReader(file_path)
203
  writer = PdfWriter()
204
+ for page in reader.pages: writer.add_page(page)
205
+ writer.add_metadata({"/Title": title, "/Author": author, "/Subject": subject, "/Producer": "OpenPDF Tools"})
206
+ out = self._get_output_path("editado_meta.pdf")
207
+ with open(out, "wb") as f: writer.write(f)
208
+ return out
209
+ except Exception as e: raise RuntimeError(f"Error metadata: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
210
 
211
+ # --- CONVERSIONES GENERALES ---
212
 
213
  def pdf_to_images_zip(self, file_path: str) -> str:
 
214
  if not file_path: raise ValueError("Falta archivo.")
 
215
  try:
 
216
  images = convert_from_path(file_path, dpi=150)
217
+ base = os.path.basename(file_path).replace(".pdf", "")
218
+ paths = []
 
219
  for i, img in enumerate(images):
220
+ p = self._get_output_path(f"{base}_{i+1}.jpg")
 
221
  img.save(p, "JPEG")
222
+ paths.append(p)
223
+ zp = self._get_output_path(f"{base}_imgs.zip")
224
+ with zipfile.ZipFile(zp, 'w') as z:
225
+ for p in paths: z.write(p, arcname=os.path.basename(p))
226
+ return zp
227
+ except Exception as e: raise RuntimeError(f"Error PDF->IMG: {e}")
 
 
 
 
228
 
229
  def images_to_pdf(self, image_paths: list) -> str:
230
+ if not image_paths: raise ValueError("No imágenes.")
 
 
231
  try:
232
+ objs = []
233
  for p in image_paths:
234
  img = Image.open(p)
235
+ if img.mode != 'RGB': img = img.convert('RGB')
236
+ objs.append(img)
237
+ out = self._get_output_path("album.pdf")
238
+ if objs: objs[0].save(out, "PDF", resolution=100.0, save_all=True, append_images=objs[1:])
239
+ return out
240
+ except Exception as e: raise RuntimeError(f"Error IMG->PDF: {e}")
 
 
 
 
 
 
 
 
 
 
241
 
242
  def pdf_to_word(self, file_path: str) -> str:
 
243
  if not file_path: raise ValueError("Falta archivo.")
 
244
  try:
245
+ docx = os.path.basename(file_path).replace(".pdf", ".docx")
246
+ out = self._get_output_path(docx)
 
247
  cv = Converter(file_path)
248
+ cv.convert(out, start=0, end=None)
249
  cv.close()
250
+ return out
251
+ except Exception as e: raise RuntimeError(f"Error PDF->Word: {e}")
 
252
 
253
  def extract_text(self, file_path: str) -> str:
 
254
  if not file_path: raise ValueError("Falta archivo.")
 
255
  try:
256
  reader = PdfReader(file_path)
257
+ content = []
258
  for i, page in enumerate(reader.pages):
259
  txt = page.extract_text()
260
+ if txt: content.append(f"--- Pág {i+1} ---\n{txt}\n")
261
+ out = self._get_output_path(os.path.basename(file_path).replace(".pdf", ".txt"))
262
+ with open(out, "w", encoding="utf-8") as f: f.write("\n".join(content))
263
+ return out
264
+ except Exception as e: raise RuntimeError(f"Error texto: {e}")
265
+
266
+ # --- NUEVAS CONVERSIONES OFFICE (v2.0) ---
267
+
268
+ def pdf_to_excel(self, file_path: str) -> str:
269
+ """
270
+ Extrae tablas del PDF y las guarda en un Excel (XLSX).
271
+ Crea una hoja por cada página que contenga tablas.
272
+ """
273
+ if not file_path: raise ValueError("Falta archivo.")
274
+
275
+ try:
276
+ xlsx_name = os.path.basename(file_path).replace(".pdf", ".xlsx")
277
+ output_path = self._get_output_path(xlsx_name)
278
 
279
+ has_tables = False
 
280
 
281
+ # Usamos ExcelWriter para escribir múltiples hojas
282
+ with pd.ExcelWriter(output_path, engine='openpyxl') as writer:
283
+ with pdfplumber.open(file_path) as pdf:
284
+ for i, page in enumerate(pdf.pages):
285
+ tables = page.extract_tables()
286
+ if tables:
287
+ has_tables = True
288
+ # Si hay varias tablas en una página, las concatenamos o las ponemos una debajo de otra
289
+ # Aquí, por simplicidad, cogemos la tabla más grande o concatenamos
290
+ df_page = pd.DataFrame()
291
+ for table in tables:
292
+ df = pd.DataFrame(table)
293
+ # Usar la primera fila como header si parece un header
294
+ new_header = df.iloc[0]
295
+ df = df[1:]
296
+ df.columns = new_header
297
+ df_page = pd.concat([df_page, df], ignore_index=True)
298
+
299
+ sheet_name = f"Pagina_{i+1}"
300
+ df_page.to_excel(writer, sheet_name=sheet_name, index=False)
301
+
302
+ if not has_tables:
303
+ raise ValueError("No se detectaron tablas con bordes claros en este PDF.")
304
+
305
  return output_path
306
  except Exception as e:
307
+ raise RuntimeError(f"Error PDF->Excel: {str(e)}")
308
 
309
+ def pdf_to_pptx(self, file_path: str) -> str:
 
 
310
  """
311
+ Convierte PDF a PowerPoint (PPTX).
312
+ Estrategia: Convertir cada página a Imagen -> Pegar en Diapositiva.
313
+ Esto preserva el formato visual exacto.
314
  """
315
+ if not file_path: raise ValueError("Falta archivo.")
316
+
317
+ try:
318
+ # 1. Convertir PDF a imágenes (HQ)
319
+ images = convert_from_path(file_path, dpi=200)
320
+
321
+ # 2. Crear presentación
322
+ prs = Presentation()
323
+
324
+ # Definir layout en blanco (index 6 suele ser blank en tema default)
325
+ BLANK_SLIDE_LAYOUT = 6
326
+
327
+ for i, img in enumerate(images):
328
+ # Guardar imagen temporal
329
+ img_path = self._get_output_path(f"temp_slide_{i}.jpg")
330
+ img.save(img_path, "JPEG")
331
+
332
+ # Añadir diapositiva
333
+ slide = prs.slides.add_slide(prs.slide_layouts[BLANK_SLIDE_LAYOUT])
334
+
335
+ # Ajustar tamaño de la diapositiva al tamaño de la imagen?
336
+ # Por simplicidad, ajustamos la imagen al tamaño de la diapositiva estándar (10x7.5 inches)
337
+ # left, top, width, height
338
+ slide.shapes.add_picture(img_path, Inches(0), Inches(0), width=prs.slide_width)
339
+
340
+ pptx_name = os.path.basename(file_path).replace(".pdf", ".pptx")
341
+ output_path = self._get_output_path(pptx_name)
342
+ prs.save(output_path)
343
+
344
+ return output_path
345
+ except Exception as e:
346
+ raise RuntimeError(f"Error PDF->PPTX: {str(e)}")
347
+
348
+ # --- ANÁLISIS ---
349
 
350
+ def compare_pdfs_visual(self, path_a: str, path_b: str) -> str:
351
+ if not path_a or not path_b: raise ValueError("Dos archivos requeridos.")
352
  try:
353
  imgs_a = convert_from_path(path_a, dpi=100)
354
  imgs_b = convert_from_path(path_b, dpi=100)
355
+ except Exception as e: raise RuntimeError(f"Error leyendo PDFs: {e}")
 
356
 
357
  min_pages = min(len(imgs_a), len(imgs_b))
358
  diff_pages = []
 
359
  for i in range(min_pages):
 
360
  arr_a = np.array(imgs_a[i])
361
  arr_b = np.array(imgs_b[i])
 
 
362
  if arr_a.shape != arr_b.shape:
363
  h, w = arr_a.shape[:2]
364
  arr_b = cv2.resize(arr_b, (w, h))
 
 
365
  gray_a = cv2.cvtColor(arr_a, cv2.COLOR_RGB2GRAY)
366
  gray_b = cv2.cvtColor(arr_b, cv2.COLOR_RGB2GRAY)
 
367
  diff = cv2.absdiff(gray_a, gray_b)
368
  _, thresh = cv2.threshold(diff, 30, 255, cv2.THRESH_BINARY)
369
  contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
370
+ res = arr_a.copy()
 
371
  for cnt in contours:
372
  x, y, w, h = cv2.boundingRect(cnt)
373
+ cv2.rectangle(res, (x, y), (x + w, y + h), (255, 0, 255), 2)
374
+ diff_pages.append(Image.fromarray(res))
 
 
 
 
 
 
 
 
 
 
 
 
375
 
376
+ if not diff_pages: raise ValueError("Error en comparación.")
377
+ out = self._get_output_path("comparativa.pdf")
378
+ diff_pages[0].save(out, "PDF", resolution=100.0, save_all=True, append_images=diff_pages[1:])
379
+ return out