DaniFera commited on
Commit
38af16a
·
verified ·
1 Parent(s): e5e7fa1

Update core.py

Browse files
Files changed (1) hide show
  1. core.py +12 -35
core.py CHANGED
@@ -1,10 +1,10 @@
1
- # Versión 2.4
2
  import os
3
  import zipfile
4
  import uuid
5
  import subprocess
6
  import difflib
7
- import io
8
  import pdfplumber
9
  import pandas as pd
10
  from pypdf import PdfWriter, PdfReader, Transformation
@@ -14,14 +14,13 @@ from PIL import Image
14
  from pptx import Presentation
15
  from pptx.util import Inches
16
 
17
- # ReportLab para generar PDFs (Informes y Capas de texto)
18
  from reportlab.lib.pagesizes import A4, letter
19
  from reportlab.lib import colors
20
  from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
21
  from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
22
  from reportlab.lib.enums import TA_JUSTIFY
23
- from reportlab.pdfgen import canvas
24
- from reportlab.lib.units import inch
25
 
26
  from config import TEMP_DIR
27
 
@@ -102,29 +101,25 @@ class PDFEngine:
102
  except ValueError: continue
103
  return sorted(list(set(key_pages)))
104
 
105
- # --- CORE PDF TOOLS ---
106
-
107
  def merge_pdfs(self, file_paths: list, order_indices: list = None, use_numbering: bool = False) -> str:
108
  if not file_paths: raise ValueError("No hay archivos.")
109
 
110
- # 1. Ordenar
111
  ordered = []
112
  if order_indices and len(order_indices) == len(file_paths):
113
  try: ordered = [file_paths[int(i)] for i in order_indices]
114
  except: ordered = file_paths
115
  else: ordered = file_paths
116
 
117
- # 2. Unir
118
  m = PdfWriter()
119
  for p in ordered: m.append(p)
120
 
121
  temp_out = self._get_output_path("temp_unido.pdf")
122
  with open(temp_out, "wb") as f: m.write(f)
123
 
124
- # 3. Numerar (Si se solicitó)
125
  if use_numbering:
126
  final_out = self._add_page_numbers(temp_out)
127
- # Intentar borrar el intermedio para no acumular basura (aunque el cron lo haría)
128
  try: os.remove(temp_out)
129
  except: pass
130
  return final_out
@@ -137,25 +132,16 @@ class PDFEngine:
137
  num_pages = len(reader.pages)
138
 
139
  for i, page in enumerate(reader.pages):
140
- # Crear un PDF en memoria con solo el número de página
141
  packet = io.BytesIO()
142
- # Usamos canvas de reportlab
143
  can = canvas.Canvas(packet, pagesize=letter)
144
-
145
- # Obtener tamaño de la página original para centrar el número
146
  page_width = float(page.mediabox.width)
147
-
148
- # Dibujar texto: "Página X de Y" en el centro inferior
149
  text = f"Página {i+1} de {num_pages}"
150
  can.setFont("Helvetica", 10)
151
- can.drawCentredString(page_width / 2.0, 20, text) # 20 puntos desde abajo
152
  can.save()
153
 
154
- # Mover al inicio del buffer
155
  packet.seek(0)
156
  new_pdf = PdfReader(packet)
157
-
158
- # Fusionar la capa del número con la página original
159
  page.merge_page(new_pdf.pages[0])
160
  writer.add_page(page)
161
 
@@ -169,16 +155,14 @@ class PDFEngine:
169
  reader = PdfReader(file_path)
170
  writer = PdfWriter()
171
 
172
- # Crear la marca de agua en memoria una sola vez
173
  packet = io.BytesIO()
174
  can = canvas.Canvas(packet, pagesize=letter)
175
  can.setFont("Helvetica-Bold", 50)
176
- can.setFillColorRGB(0.5, 0.5, 0.5, 0.3) # Gris semi-transparente
177
 
178
- # Guardar estado, rotar y escribir en el centro (aprox)
179
  can.saveState()
180
- can.translate(300, 400) # Mover origen al centro aprox
181
- can.rotate(45) # Rotar 45 grados
182
  can.drawCentredString(0, 0, text)
183
  can.restoreState()
184
  can.save()
@@ -187,9 +171,7 @@ class PDFEngine:
187
  watermark_pdf = PdfReader(packet)
188
  watermark_page = watermark_pdf.pages[0]
189
 
190
- # Aplicar a todas las páginas
191
  for page in reader.pages:
192
- # Es necesario clonar el objeto watermark para cada pagina o pypdf se queja a veces
193
  page.merge_page(watermark_page)
194
  writer.add_page(page)
195
 
@@ -197,21 +179,16 @@ class PDFEngine:
197
  with open(out, "wb") as f: writer.write(f)
198
  return out
199
 
200
- # --- NUEVO: Reparador (Ghostscript) ---
201
  def repair_pdf(self, file_path: str) -> str:
202
  if not file_path: raise ValueError("Falta archivo.")
203
 
204
  out = self._get_output_path("reparado.pdf")
205
-
206
- # Comando mágico de Ghostscript para regenerar PDFs
207
- # -sDEVICE=pdfwrite : Reescribe el PDF
208
- # -dPDFSETTINGS=/default : Calidad normal
209
  cmd = [
210
  "gs",
211
- "-o", out, # Output file
212
  "-sDEVICE=pdfwrite",
213
  "-dPDFSETTINGS=/default",
214
- "-dInteract=N", # No interactivo
215
  "-dNOPAUSE", "-dQUIET", "-dBATCH",
216
  file_path
217
  ]
 
1
+ ## Versión Alfa 1.0
2
  import os
3
  import zipfile
4
  import uuid
5
  import subprocess
6
  import difflib
7
+ import io
8
  import pdfplumber
9
  import pandas as pd
10
  from pypdf import PdfWriter, PdfReader, Transformation
 
14
  from pptx import Presentation
15
  from pptx.util import Inches
16
 
 
17
  from reportlab.lib.pagesizes import A4, letter
18
  from reportlab.lib import colors
19
  from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
20
  from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
21
  from reportlab.lib.enums import TA_JUSTIFY
22
+ from reportlab.pdfgen import canvas
23
+ from reportlab.lib.units import inch
24
 
25
  from config import TEMP_DIR
26
 
 
101
  except ValueError: continue
102
  return sorted(list(set(key_pages)))
103
 
104
+ # --- HERRAMIENTAS PRINCIPALES ---
105
+
106
  def merge_pdfs(self, file_paths: list, order_indices: list = None, use_numbering: bool = False) -> str:
107
  if not file_paths: raise ValueError("No hay archivos.")
108
 
 
109
  ordered = []
110
  if order_indices and len(order_indices) == len(file_paths):
111
  try: ordered = [file_paths[int(i)] for i in order_indices]
112
  except: ordered = file_paths
113
  else: ordered = file_paths
114
 
 
115
  m = PdfWriter()
116
  for p in ordered: m.append(p)
117
 
118
  temp_out = self._get_output_path("temp_unido.pdf")
119
  with open(temp_out, "wb") as f: m.write(f)
120
 
 
121
  if use_numbering:
122
  final_out = self._add_page_numbers(temp_out)
 
123
  try: os.remove(temp_out)
124
  except: pass
125
  return final_out
 
132
  num_pages = len(reader.pages)
133
 
134
  for i, page in enumerate(reader.pages):
 
135
  packet = io.BytesIO()
 
136
  can = canvas.Canvas(packet, pagesize=letter)
 
 
137
  page_width = float(page.mediabox.width)
 
 
138
  text = f"Página {i+1} de {num_pages}"
139
  can.setFont("Helvetica", 10)
140
+ can.drawCentredString(page_width / 2.0, 20, text)
141
  can.save()
142
 
 
143
  packet.seek(0)
144
  new_pdf = PdfReader(packet)
 
 
145
  page.merge_page(new_pdf.pages[0])
146
  writer.add_page(page)
147
 
 
155
  reader = PdfReader(file_path)
156
  writer = PdfWriter()
157
 
 
158
  packet = io.BytesIO()
159
  can = canvas.Canvas(packet, pagesize=letter)
160
  can.setFont("Helvetica-Bold", 50)
161
+ can.setFillColorRGB(0.5, 0.5, 0.5, 0.3)
162
 
 
163
  can.saveState()
164
+ can.translate(300, 400)
165
+ can.rotate(45)
166
  can.drawCentredString(0, 0, text)
167
  can.restoreState()
168
  can.save()
 
171
  watermark_pdf = PdfReader(packet)
172
  watermark_page = watermark_pdf.pages[0]
173
 
 
174
  for page in reader.pages:
 
175
  page.merge_page(watermark_page)
176
  writer.add_page(page)
177
 
 
179
  with open(out, "wb") as f: writer.write(f)
180
  return out
181
 
 
182
  def repair_pdf(self, file_path: str) -> str:
183
  if not file_path: raise ValueError("Falta archivo.")
184
 
185
  out = self._get_output_path("reparado.pdf")
 
 
 
 
186
  cmd = [
187
  "gs",
188
+ "-o", out,
189
  "-sDEVICE=pdfwrite",
190
  "-dPDFSETTINGS=/default",
191
+ "-dInteract=N",
192
  "-dNOPAUSE", "-dQUIET", "-dBATCH",
193
  file_path
194
  ]