File size: 15,583 Bytes
b268f6c db26b09 f469263 db26b09 cbf42ed 7f9b733 db26b09 209eae7 24b9436 db26b09 24b0c95 b268f6c e5e7fa1 cbf42ed 24b9436 7f9b733 24b9436 cbf42ed 24b9436 cfdd571 01441bd cfdd571 d4dee55 233f3a3 cbf42ed 7f9b733 233f3a3 7f9b733 d4dee55 24b0c95 233f3a3 d4dee55 24b0c95 233f3a3 7f9b733 cbf42ed cfdd571 7f9b733 24b9436 7f9b733 cfdd571 e5e7fa1 7f9b733 cbf42ed e5e7fa1 7f9b733 24b0c95 7f9b733 e5e7fa1 24b0c95 7f9b733 e5e7fa1 7f9b733 e5e7fa1 cbf42ed 24b0c95 cfdd571 e5e7fa1 24b0c95 24b9436 d4dee55 24b0c95 24b9436 cbf42ed 01441bd cbf42ed e5e7fa1 0a5d77a e5e7fa1 24b9436 7db0b1a 31df732 6e5e8ef f469263 e5e7fa1 31df732 f469263 e5e7fa1 6e5e8ef db26b09 6e5e8ef db26b09 6e5e8ef f469263 6e5e8ef eb87c9f 31df732 6e5e8ef 3f3cb94 6e5e8ef 94a82f1 6e5e8ef 3f3cb94 6e5e8ef ab78612 6e5e8ef 7db0b1a 6e5e8ef 9b86a61 db26b09 6e5e8ef 6f47e9b 6e5e8ef 7db0b1a 6e5e8ef eb87c9f e5e7fa1 94a82f1 6e5e8ef eb87c9f 6e5e8ef 7db0b1a 6e5e8ef 7db0b1a 3f3cb94 cfdd571 db26b09 cfdd571 db26b09 cfdd571 e5e7fa1 6e5e8ef db26b09 b268f6c 3f3cb94 db26b09 f469263 6e5e8ef f469263 6e5e8ef b268f6c e5e7fa1 6e5e8ef e5e7fa1 6e5e8ef cfdd571 3f3cb94 cfdd571 e5e7fa1 cfdd571 e5e7fa1 cfdd571 6e5e8ef 94a82f1 6e5e8ef ab78612 cfdd571 e5e7fa1 ab78612 cfdd571 ab78612 6e5e8ef ab78612 6e5e8ef 2c18203 3f3cb94 cfdd571 6e5e8ef cfdd571 6e5e8ef ab78612 cfdd571 94a82f1 6e5e8ef b268f6c e5e7fa1 cfdd571 3f3cb94 ab78612 94a82f1 ab78612 94a82f1 cfdd571 6e5e8ef e5e7fa1 ab78612 cfdd571 ab78612 cfdd571 ab78612 cfdd571 6e5e8ef e5e7fa1 cfdd571 ab78612 cfdd571 ab78612 94a82f1 cfdd571 94a82f1 eb87c9f db26b09 cfdd571 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 |
## Versión Alfa 1.0
import gradio as gr
import pandas as pd
import config
import os
import time
import threading
import shutil
from core import PDFEngine
os.environ["GRADIO_TEMP_DIR"] = config.TEMP_DIR
engine = PDFEngine()
# --- SEGURIDAD: GARBAGE COLLECTOR ---
# NOTA PARA DUPLICADORES DEL ESPACIO:
# Esta función limpia los archivos cada minuto. Por defecto está en modo SILENCIOSO.
# Si quieres ver qué está pasando, DESCOMENTA las líneas que empiezan por "#print".
def cleanup_cron():
"""
Revisa cada minuto. Borra archivos Y carpetas mayores a 5 minutos.
"""
print("[INIT] 🛡️ Sistema de seguridad y auto-borrado ACTIVO.")
while True:
try:
time.sleep(60)
LIMIT_MINUTES = 5
cutoff = time.time() - (LIMIT_MINUTES * 60)
#print(f"--- [SEGURIDAD] Ronda de limpieza: {time.strftime('%H:%M:%S')} ---")
if os.path.exists(config.TEMP_DIR):
items = os.listdir(config.TEMP_DIR)
if not items:
#print("[ESTADO] Carpeta limpia (0 elementos).")
pass
else:
#print(f"[ESTADO] Analizando {len(items)} elementos...")
pass
for filename in items:
filepath = os.path.join(config.TEMP_DIR, filename)
if len(filename) > 8:
masked = f"{filename[:4]}****{os.path.splitext(filename)[1]}"
else:
masked = filename
try:
file_time = os.path.getmtime(filepath)
age_sec = time.time() - file_time
is_expired = file_time < cutoff
except FileNotFoundError:
continue
# CASO 1: ARCHIVO
if os.path.isfile(filepath):
if is_expired:
try:
os.remove(filepath)
#print(f"❌ [BORRADO] Archivo {masked}")
except Exception as e:
pass
else:
#print(f"✅ [VIGENTE] Archivo {masked}")
pass
# CASO 2: CARPETA
elif os.path.isdir(filepath):
if is_expired:
try:
shutil.rmtree(filepath)
#print(f"🗑️ [LIMPIEZA] Carpeta {masked} eliminada")
except Exception as e:
pass
else:
#print(f"📂 [VIGENTE] Carpeta {masked}")
pass
else:
#print("[INFO] Carpeta temporal aún no creada.")
pass
except Exception as e:
print(f"[CRITICAL] Error en limpieza: {e}")
threading.Thread(target=cleanup_cron, daemon=True).start()
# --- GENERADOR DE ENLACE ---
def get_duplicate_html():
space_id = os.environ.get('SPACE_ID', None)
url = f"https://huggingface.co/spaces/{space_id}?duplicate=true" if space_id else "https://huggingface.co/spaces?duplicate=true"
return f"""
<div style="display: flex; align-items: center; gap: 10px; margin-bottom: 20px; background-color: #f9fafb; padding: 10px; border-radius: 8px; border: 1px solid #e5e7eb;">
<a href="{url}" target="_blank">
<img src="https://huggingface.co/datasets/huggingface/badges/raw/main/duplicate-this-space-sm.svg" alt="Duplicate Space" style="height: 30px;">
</a>
<span style="font-size: 0.9em; color: #4b5563;">
⚡ <b>¿Va lento o quieres seguridad adicional?</b> Haz clic para duplicar este espacio y tener tu propia instancia privada.
</span>
</div>
"""
# --- WRAPPERS ---
def update_file_list(files):
if not files: return pd.DataFrame(), ""
data = [[i, f.split("/")[-1]] for i, f in enumerate(files)]
return pd.DataFrame(data, columns=["ID", "Archivo"]), ",".join([str(i) for i in range(len(files))])
def process_merge(files, order_str, use_numbering):
if not files: return None
try:
indices = [int(x.strip()) for x in order_str.split(",") if x.strip().isdigit()]
return engine.merge_pdfs(files, indices, use_numbering)
except Exception as e: raise gr.Error(str(e))
def load_info(f):
if not f: return None, 0, gr.update(visible=False)
i = engine.get_pdf_info(f)
return f"📄 {i['name']} ({i['pages']} págs)", i['pages'], gr.update(visible=True)
def update_split_preview(f, r, t):
if not f or not r: return None
kp = engine.get_preview_indices_from_string(r, t)
if not kp: return None
return [(engine.generate_preview(f, p), f"Pág {p}") for p in kp[:8] if engine.generate_preview(f, p)]
def process_split(f, r):
if not f: return None
try: return engine.split_pdf_custom(f, r)
except Exception as e: raise gr.Error(str(e))
def process_reorder(f, o):
if not f: return None
try: return engine.reorder_pages(f, o)
except Exception as e: raise gr.Error(str(e))
def process_compare(fa, fb):
if not fa or not fb: return None
try: return engine.compare_pdfs_text(fa, fb)
except Exception as e: raise gr.Error(str(e))
def process_compress(f, l):
if not f: return None
lvls = {"Baja (Máxima calidad)": 1, "Media (Recomendado - eBook)": 3, "Alta (Pantalla - 72dpi)": 4}
try: return engine.compress_pdf(f, lvls.get(l, 3))
except Exception as e: raise gr.Error(str(e))
def update_rot_preview(f, a_lbl):
if not f: return None
a = 0
if "90" in a_lbl: a = 90
elif "180" in a_lbl: a = 180
elif "270" in a_lbl: a = 270
return engine.get_rotated_preview(f, a)
def process_rotate(f, a_lbl):
if not f: return None
a = 0
if "90" in a_lbl: a = 90
elif "180" in a_lbl: a = 180
elif "270" in a_lbl: a = 270
if a == 0: return f.name
try: return engine.rotate_pdf(f, a)
except Exception as e: raise gr.Error(str(e))
def process_protect(f, p):
if not f or not p: return None
try: return engine.protect_pdf(f, p)
except Exception as e: raise gr.Error(str(e))
def process_meta(f, t, a, s):
if not f: return None
try: return engine.update_metadata(f, t, a, s)
except Exception as e: raise gr.Error(str(e))
def process_text(f):
if not f: return None
try: return engine.extract_text(f)
except Exception as e: raise gr.Error(str(e))
def process_watermark(f, t):
if not f or not t: return None
try: return engine.add_watermark(f, t)
except Exception as e: raise gr.Error(str(e))
def process_repair(f):
if not f: return None
try: return engine.repair_pdf(f)
except Exception as e: raise gr.Error(str(e))
# WRAPPERS OFFICE
def process_word(f):
if not f: return None
try: return engine.pdf_to_word(f)
except Exception as e: raise gr.Error(str(e))
def process_pptx(f):
if not f: return None
try: return engine.pdf_to_pptx(f)
except Exception as e: raise gr.Error(str(e))
def process_p2i(f):
if not f: return None
try: return engine.pdf_to_images_zip(f)
except Exception as e: raise gr.Error(str(e))
# --- UI LAYOUT ---
with gr.Blocks(title=config.APP_TITLE, theme=gr.themes.Soft()) as demo:
gr.Markdown(f"# {config.APP_TITLE}")
gr.Markdown("""
Los archivos se procesan en memoria y se **autodestruyen tras 5 minutos**.
""")
gr.HTML(get_duplicate_html())
with gr.Tabs():
# 1. UNIR
with gr.TabItem("Unir"):
with gr.Row():
with gr.Column(scale=1):
m_files = gr.File(file_count="multiple", label="Archivos", file_types=[".pdf"])
with gr.Column(scale=2):
m_tbl = gr.Dataframe(headers=["ID", "Archivo"], interactive=False)
m_ord = gr.Textbox(label="Orden de los documentos según ID", placeholder="Ej: 0, 2, 1")
m_nums = gr.Checkbox(label="Numerar páginas (1 de X)", value=False)
m_btn = gr.Button("Unir PDF", variant="primary")
m_out = gr.File(label="Resultado")
m_files.change(update_file_list, m_files, [m_tbl, m_ord])
m_btn.click(process_merge, [m_files, m_ord, m_nums], m_out)
# 2. DIVIDIR / REORDENAR
with gr.TabItem("Dividir / Reordenar"):
dr_f = gr.File(label="PDF Origen", file_types=[".pdf"])
dr_inf = gr.Markdown("")
dr_pg = gr.State(0)
with gr.Tabs():
with gr.Tab("Extraer"):
gr.Markdown("Separa páginas en un ZIP.")
with gr.Row():
with gr.Column():
s_rng = gr.Textbox(label="Rango", placeholder="Ej: 1-3, 5")
with gr.Row():
s_prv = gr.Button("Preview")
s_btn = gr.Button("Dividir (ZIP)", variant="primary")
with gr.Column():
s_gal = gr.Gallery(height=160, columns=4, object_fit="contain", label="Vista Previa")
s_out = gr.File(label="ZIP")
s_prv.click(update_split_preview, [dr_f, s_rng, dr_pg], s_gal)
s_btn.click(process_split, [dr_f, s_rng], s_out)
with gr.Tab("Reordenar"):
gr.Markdown("Crea un PDF con nuevo orden.")
with gr.Row():
with gr.Column():
r_ord = gr.Textbox(label="Nuevo Orden", placeholder="Ej: 3, 1, 2, 4-10")
r_btn = gr.Button("Reordenar", variant="primary")
with gr.Column():
r_out = gr.File(label="PDF Reordenado")
r_btn.click(process_reorder, [dr_f, r_ord], r_out)
dr_f.change(load_info, dr_f, [dr_inf, dr_pg, s_out])
# 3. COMPRIMIR
with gr.TabItem("Comprimir"):
with gr.Row():
with gr.Column():
c_f = gr.File(label="PDF Original", file_types=[".pdf"])
c_l = gr.Radio(["Baja (Máxima calidad)", "Media (Recomendado - eBook)", "Alta (Pantalla - 72dpi)"], label="Nivel", value="Media (Recomendado - eBook)")
c_b = gr.Button("Comprimir", variant="primary")
with gr.Column():
c_out = gr.File(label="PDF Comprimido")
c_b.click(process_compress, [c_f, c_l], c_out)
# 4. CONVERTIR
with gr.TabItem("Convertir Formatos"):
with gr.Row():
with gr.Column():
gr.Markdown("### A Word")
w_f = gr.File(label="PDF")
w_b = gr.Button("Convertir")
w_o = gr.File(label="DOCX")
w_b.click(process_word, w_f, w_o)
with gr.Column():
gr.Markdown("### A Imágenes")
p2i_f = gr.File(label="PDF")
p2i_b = gr.Button("Extraer ZIP")
p2i_o = gr.File(label="ZIP")
p2i_b.click(process_p2i, p2i_f, p2i_o)
with gr.Column():
gr.Markdown("### A PowerPoint")
p_f = gr.File(label="PDF")
p_b = gr.Button("Convertir")
p_o = gr.File(label="PPTX")
p_b.click(process_pptx, p_f, p_o)
# 5. COMPARAR
with gr.TabItem("Comparar"):
gr.Markdown("Compara el **texto** de dos versiones. Descarga un informe con cambios.")
with gr.Row():
with gr.Column():
ca = gr.File(label="Versión A (Original)", file_types=[".pdf"])
with gr.Column():
cb = gr.File(label="Versión B (Modificada)", file_types=[".pdf"])
cb_btn = gr.Button("Generar Informe", variant="primary")
co = gr.File(label="Informe PDF")
cb_btn.click(process_compare, [ca, cb], co)
# 6. EXTRAS
with gr.TabItem("Extras"):
with gr.Tab("Rotar"):
with gr.Row():
with gr.Column():
rf = gr.File(label="PDF")
ra = gr.Radio(["0° (Original)", "90° (Derecha)", "180° (Invertir)", "270° (Izquierda)"], label="Rotación", value="0° (Original)")
rb = gr.Button("Rotar", variant="primary")
with gr.Column():
rp = gr.Image(label="Preview")
ro = gr.File(label="PDF Rotado")
rf.change(update_rot_preview, [rf, ra], rp)
ra.change(update_rot_preview, [rf, ra], rp)
rb.click(process_rotate, [rf, ra], ro)
with gr.Tab("Marca de Agua"):
gr.Markdown("Añade una marca de agua diagonal en todas las páginas.")
with gr.Row():
with gr.Column():
wf = gr.File(label="PDF")
wt = gr.Textbox(label="Texto Marca de Agua", placeholder="Ej: CONFIDENCIAL")
wb = gr.Button("Estampar", variant="primary")
with gr.Column():
wo = gr.File(label="PDF Marcado")
wb.click(process_watermark, [wf, wt], wo)
with gr.Tab("Reparar"):
gr.Markdown("Intenta arreglar PDFs corruptos o dañados reescribiéndolos con Ghostscript.")
with gr.Row():
with gr.Column():
repf = gr.File(label="PDF Dañado")
repb = gr.Button("Reparar", variant="primary")
with gr.Column():
repo = gr.File(label="PDF Reparado")
repb.click(process_repair, repf, repo)
with gr.Tab("Proteger"):
with gr.Row():
with gr.Column():
pf = gr.File(label="PDF")
pp = gr.Textbox(type="password", label="Pass")
pb = gr.Button("Encriptar", variant="primary")
with gr.Column():
po = gr.File(label="Protegido")
pb.click(process_protect, [pf, pp], po)
with gr.Tab("Info/Texto"):
with gr.Row():
with gr.Column():
tf = gr.File(label="PDF")
tb = gr.Button("Extraer Texto")
to = gr.File()
tb.click(process_text, tf, to)
with gr.Column():
mt = gr.Textbox(label="Título")
ma = gr.Textbox(label="Autor")
ms = gr.Textbox(label="Asunto")
mb = gr.Button("Actualizar Meta")
mo = gr.File()
mb.click(process_meta, [tf, mt, ma, ms], mo)
if __name__ == "__main__":
demo.queue(default_concurrency_limit=2).launch(
server_name="0.0.0.0",
server_port=7860
) |