DaniFera commited on
Commit
ef5f6c7
verified
1 Parent(s): e9988f6

Update core.py

Browse files
Files changed (1) hide show
  1. core.py +78 -157
core.py CHANGED
@@ -1,15 +1,16 @@
1
- # Versi贸n 1.5: A帽adido soporte para previsualizaci贸n de rotaci贸n
2
  import os
3
  import zipfile
4
  from pypdf import PdfWriter, PdfReader
5
  from pdf2image import convert_from_path
 
6
  from PIL import Image
7
  from config import TEMP_DIR
8
  import uuid
9
 
10
  class PDFEngine:
11
 
12
- # --- M脡TODOS EXISTENTES (Auxiliares) ---
13
  @staticmethod
14
  def _get_output_path(filename: str) -> str:
15
  unique_name = f"{uuid.uuid4().hex[:8]}_{filename}"
@@ -18,165 +19,85 @@ class PDFEngine:
18
  def get_pdf_info(self, file_path: str) -> dict:
19
  try:
20
  reader = PdfReader(file_path)
21
- return {"pages": len(reader.pages), "name": os.path.basename(file_path)}
22
- except Exception: return {"pages": 0, "name": "Error"}
 
 
 
 
 
 
23
 
24
- # --- NUEVO M脡TODO PARA PREVIEW DE ROTACI脫N (v1.5) ---
25
- def get_rotated_preview(self, file_path: str, angle: int) -> str:
26
- if not file_path: return None
27
- try:
28
- # Extraer p谩gina 1
29
- images = convert_from_path(file_path, first_page=1, last_page=1, size=(None, 500))
30
- if not images: return None
31
-
32
- img = images[0]
33
-
34
- # Si 谩ngulo es 0, no rotamos, solo guardamos
35
- if angle != 0:
36
- # -angle porque Pillow rota en sentido antihorario
37
- img = img.rotate(-angle, expand=True)
38
-
39
- out_path = self._get_output_path(f"preview_rot_{angle}.jpg")
40
- img.save(out_path, "JPEG")
41
- return out_path
42
- except Exception as e:
43
- print(f"Error preview: {e}")
44
- return None
45
 
46
- # --- M脡TODOS DE L脫GICA DE NEGOCIO (Ya existentes, resumidos) ---
47
- def generate_preview(self, file_path: str, page_number: int) -> str:
48
- # (C贸digo igual a v1.4)
 
 
 
 
49
  try:
50
- images = convert_from_path(file_path, first_page=page_number, last_page=page_number, size=(None, 400))
51
- if images:
52
- out = self._get_output_path(f"preview_pg{page_number}.jpg")
53
- images[0].save(out, "JPEG")
54
- return out
55
- return None
56
- except: return None
57
-
58
- def get_preview_indices_from_string(self, range_str: str, max_pages: int) -> list:
59
- # (C贸digo igual a v1.4 - Copiar implementaci贸n completa previa)
60
- key_pages = []
61
- parts = range_str.split(',')
62
- for part in parts:
63
- part = part.strip()
64
- if '-' in part:
65
- try:
66
- s, e = map(int, part.split('-'))
67
- key_pages.extend([max(1, min(s, max_pages)), max(1, min(e, max_pages))])
68
- except ValueError: continue
69
- else:
70
- try:
71
- p = int(part)
72
- if 1 <= p <= max_pages: key_pages.append(p)
73
- except ValueError: continue
74
- return sorted(list(set(key_pages)))
75
-
76
- def merge_pdfs(self, file_paths: list, order_indices: list = None) -> str:
77
- # (C贸digo igual a v1.4)
78
- if not file_paths: raise ValueError("No hay archivos.")
79
- ordered = []
80
- if order_indices and len(order_indices) == len(file_paths):
81
- ordered = [file_paths[int(i)] for i in order_indices]
82
- else: ordered = file_paths
83
- merger = PdfWriter()
84
- for path in ordered: merger.append(path)
85
- out = self._get_output_path("unido.pdf")
86
- with open(out, "wb") as f: merger.write(f)
87
- merger.close()
88
- return out
89
-
90
- def _parse_range_groups(self, range_str: str, max_pages: int) -> list:
91
- # (Necesario para split_pdf_custom - Copiar de v1.3)
92
- groups = []
93
- parts = range_str.split(',')
94
- for part in parts:
95
- part = part.strip()
96
- if not part: continue
97
- current_group = []
98
- if '-' in part:
99
- try:
100
- start, end = map(int, part.split('-'))
101
- start = max(1, start)
102
- end = min(max_pages, end)
103
- if start <= end: current_group = list(range(start - 1, end))
104
- except ValueError: continue
105
- else:
106
- try:
107
- p = int(part)
108
- if 1 <= p <= max_pages: current_group = [p - 1]
109
- except ValueError: continue
110
- if current_group: groups.append({"label": part, "indices": current_group})
111
- return groups
112
 
113
- def split_pdf_custom(self, file_path: str, range_str: str) -> str:
114
- # (C贸digo v1.3/v1.4 con ZIP)
115
  if not file_path: raise ValueError("Falta archivo.")
116
- reader = PdfReader(file_path)
117
- total = len(reader.pages)
118
- groups = self._parse_range_groups(range_str, total)
119
- if not groups: raise ValueError("Rango inv谩lido.")
120
- generated = []
121
- base = os.path.basename(file_path).replace(".pdf", "")
122
- for g in groups:
123
- w = PdfWriter()
124
- for i in g["indices"]: w.add_page(reader.pages[i])
125
- safe = g["label"].replace(" ", "")
126
- p = self._get_output_path(f"{base}_part_{safe}.pdf")
127
- with open(p, "wb") as f: w.write(f)
128
- generated.append(p)
129
- zip_p = self._get_output_path(f"{base}_split.zip")
130
- with zipfile.ZipFile(zip_p, 'w') as z:
131
- for f in generated: z.write(f, arcname=os.path.basename(f))
132
- return zip_p
133
-
134
- def protect_pdf(self, file_path: str, password: str) -> str:
135
- # (C贸digo igual a v1.4)
136
- if not file_path or not password: raise ValueError("Falta datos.")
137
- r = PdfReader(file_path)
138
- w = PdfWriter()
139
- for p in r.pages: w.add_page(p)
140
- w.encrypt(password)
141
- out = self._get_output_path("protegido.pdf")
142
- with open(out, "wb") as f: w.write(f)
143
- return out
144
 
145
- def rotate_pdf(self, file_path: str, angle: int) -> str:
146
- # (C贸digo igual a v1.4)
147
  if not file_path: raise ValueError("Falta archivo.")
148
- r = PdfReader(file_path)
149
- w = PdfWriter()
150
- for p in r.pages:
151
- p.rotate(angle)
152
- w.add_page(p)
153
- out = self._get_output_path(f"rotado_{angle}.pdf")
154
- with open(out, "wb") as f: w.write(f)
155
- return out
156
-
157
- def pdf_to_images_zip(self, file_path: str) -> str:
158
- # (C贸digo igual a v1.4)
159
- if not file_path: raise ValueError("Falta archivo")
160
- imgs = convert_from_path(file_path, dpi=150) # Bajo DPI un poco para velocidad
161
- base = os.path.basename(file_path).replace(".pdf", "")
162
- paths = []
163
- for i, img in enumerate(imgs):
164
- p = self._get_output_path(f"{base}_{i+1}.jpg")
165
- img.save(p, "JPEG")
166
- paths.append(p)
167
- zp = self._get_output_path(f"{base}_imgs.zip")
168
- with zipfile.ZipFile(zp, 'w') as z:
169
- for p in paths: z.write(p, arcname=os.path.basename(p))
170
- return zp
171
-
172
- def images_to_pdf(self, image_paths: list) -> str:
173
- # (C贸digo igual a v1.4)
174
- if not image_paths: raise ValueError("No imgs")
175
- objs = []
176
- for p in image_paths:
177
- img = Image.open(p)
178
- if img.mode != 'RGB': img = img.convert('RGB')
179
- objs.append(img)
180
- out = self._get_output_path("album.pdf")
181
- if objs: objs[0].save(out, "PDF", resolution=100.0, save_all=True, append_images=objs[1:])
182
- return out
 
1
+ # Versi贸n 1.7: A帽adido PDF a Word, Metadatos y Extracci贸n de Texto
2
  import os
3
  import zipfile
4
  from pypdf import PdfWriter, PdfReader
5
  from pdf2image import convert_from_path
6
+ from pdf2docx import Converter
7
  from PIL import Image
8
  from config import TEMP_DIR
9
  import uuid
10
 
11
  class PDFEngine:
12
 
13
+ # --- M脡TODOS BASE (Mantener los anteriores: _get_output_path, generate_preview, etc.) ---
14
  @staticmethod
15
  def _get_output_path(filename: str) -> str:
16
  unique_name = f"{uuid.uuid4().hex[:8]}_{filename}"
 
19
  def get_pdf_info(self, file_path: str) -> dict:
20
  try:
21
  reader = PdfReader(file_path)
22
+ meta = reader.metadata
23
+ title = meta.title if meta and meta.title else "Sin t铆tulo"
24
+ return {
25
+ "pages": len(reader.pages),
26
+ "name": os.path.basename(file_path),
27
+ "title": title
28
+ }
29
+ except Exception: return {"pages": 0, "name": "Error", "title": ""}
30
 
31
+ # --- (AQU脥 DEBES MANTENER LOS M脡TODOS DE LA v1.6: merge, split, rotate, protect, images) ---
32
+ # Por brevedad, asumo que est谩n incluidos. No los borres.
33
+ # ...
34
+ # ...
35
+ # Aqu铆 a帽ado SOLO lo nuevo de la v1.7:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
 
37
+ def pdf_to_word(self, file_path: str) -> str:
38
+ """Convierte PDF a DOCX usando pdf2docx."""
39
+ if not file_path: raise ValueError("Falta archivo.")
40
+
41
+ docx_filename = os.path.basename(file_path).replace(".pdf", ".docx")
42
+ output_path = self._get_output_path(docx_filename)
43
+
44
  try:
45
+ cv = Converter(file_path)
46
+ # start=0, end=None convierte todo
47
+ cv.convert(output_path, start=0, end=None)
48
+ cv.close()
49
+ return output_path
50
+ except Exception as e:
51
+ raise RuntimeError(f"Error en conversi贸n a Word: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
 
53
+ def extract_text(self, file_path: str) -> str:
54
+ """Extrae el texto plano a un .txt"""
55
  if not file_path: raise ValueError("Falta archivo.")
56
+
57
+ try:
58
+ reader = PdfReader(file_path)
59
+ text_content = []
60
+ for i, page in enumerate(reader.pages):
61
+ text = page.extract_text()
62
+ if text:
63
+ text_content.append(f"--- P谩gina {i+1} ---\n{text}\n")
64
+
65
+ txt_filename = os.path.basename(file_path).replace(".pdf", ".txt")
66
+ output_path = self._get_output_path(txt_filename)
67
+
68
+ with open(output_path, "w", encoding="utf-8") as f:
69
+ f.write("\n".join(text_content))
70
+
71
+ return output_path
72
+ except Exception as e:
73
+ raise RuntimeError(f"Error extrayendo texto: {e}")
 
 
 
 
 
 
 
 
 
 
74
 
75
+ def update_metadata(self, file_path: str, title: str, author: str, subject: str) -> str:
76
+ """Modifica los metadatos del PDF."""
77
  if not file_path: raise ValueError("Falta archivo.")
78
+
79
+ try:
80
+ reader = PdfReader(file_path)
81
+ writer = PdfWriter()
82
+
83
+ # Copiar p谩ginas
84
+ for page in reader.pages:
85
+ writer.add_page(page)
86
+
87
+ # Preparar nuevos metadatos
88
+ new_metadata = {
89
+ "/Title": title,
90
+ "/Author": author,
91
+ "/Subject": subject,
92
+ "/Producer": "OpenPDF Tools Libres"
93
+ }
94
+
95
+ writer.add_metadata(new_metadata)
96
+
97
+ output_path = self._get_output_path("editado_meta.pdf")
98
+ with open(output_path, "wb") as f:
99
+ writer.write(f)
100
+
101
+ return output_path
102
+ except Exception as e:
103
+ raise RuntimeError(f"Error actualizando metadatos: {e}")