PDFree / app.py
DaniFera's picture
Update app.py
0a5d77a verified
## Versión Alfa 1.0
import gradio as gr
import pandas as pd
import config
import os
import time
import threading
import shutil
from core import PDFEngine
os.environ["GRADIO_TEMP_DIR"] = config.TEMP_DIR
engine = PDFEngine()
# --- SEGURIDAD: GARBAGE COLLECTOR ---
# NOTA PARA DUPLICADORES DEL ESPACIO:
# Esta función limpia los archivos cada minuto. Por defecto está en modo SILENCIOSO.
# Si quieres ver qué está pasando, DESCOMENTA las líneas que empiezan por "#print".
def cleanup_cron():
"""
Revisa cada minuto. Borra archivos Y carpetas mayores a 5 minutos.
"""
print("[INIT] 🛡️ Sistema de seguridad y auto-borrado ACTIVO.")
while True:
try:
time.sleep(60)
LIMIT_MINUTES = 5
cutoff = time.time() - (LIMIT_MINUTES * 60)
#print(f"--- [SEGURIDAD] Ronda de limpieza: {time.strftime('%H:%M:%S')} ---")
if os.path.exists(config.TEMP_DIR):
items = os.listdir(config.TEMP_DIR)
if not items:
#print("[ESTADO] Carpeta limpia (0 elementos).")
pass
else:
#print(f"[ESTADO] Analizando {len(items)} elementos...")
pass
for filename in items:
filepath = os.path.join(config.TEMP_DIR, filename)
if len(filename) > 8:
masked = f"{filename[:4]}****{os.path.splitext(filename)[1]}"
else:
masked = filename
try:
file_time = os.path.getmtime(filepath)
age_sec = time.time() - file_time
is_expired = file_time < cutoff
except FileNotFoundError:
continue
# CASO 1: ARCHIVO
if os.path.isfile(filepath):
if is_expired:
try:
os.remove(filepath)
#print(f"❌ [BORRADO] Archivo {masked}")
except Exception as e:
pass
else:
#print(f"✅ [VIGENTE] Archivo {masked}")
pass
# CASO 2: CARPETA
elif os.path.isdir(filepath):
if is_expired:
try:
shutil.rmtree(filepath)
#print(f"🗑️ [LIMPIEZA] Carpeta {masked} eliminada")
except Exception as e:
pass
else:
#print(f"📂 [VIGENTE] Carpeta {masked}")
pass
else:
#print("[INFO] Carpeta temporal aún no creada.")
pass
except Exception as e:
print(f"[CRITICAL] Error en limpieza: {e}")
threading.Thread(target=cleanup_cron, daemon=True).start()
# --- GENERADOR DE ENLACE ---
def get_duplicate_html():
space_id = os.environ.get('SPACE_ID', None)
url = f"https://huggingface.co/spaces/{space_id}?duplicate=true" if space_id else "https://huggingface.co/spaces?duplicate=true"
return f"""
<div style="display: flex; align-items: center; gap: 10px; margin-bottom: 20px; background-color: #f9fafb; padding: 10px; border-radius: 8px; border: 1px solid #e5e7eb;">
<a href="{url}" target="_blank">
<img src="https://huggingface.co/datasets/huggingface/badges/raw/main/duplicate-this-space-sm.svg" alt="Duplicate Space" style="height: 30px;">
</a>
<span style="font-size: 0.9em; color: #4b5563;">
⚡ <b>¿Va lento o quieres seguridad adicional?</b> Haz clic para duplicar este espacio y tener tu propia instancia privada.
</span>
</div>
"""
# --- WRAPPERS ---
def update_file_list(files):
if not files: return pd.DataFrame(), ""
data = [[i, f.split("/")[-1]] for i, f in enumerate(files)]
return pd.DataFrame(data, columns=["ID", "Archivo"]), ",".join([str(i) for i in range(len(files))])
def process_merge(files, order_str, use_numbering):
if not files: return None
try:
indices = [int(x.strip()) for x in order_str.split(",") if x.strip().isdigit()]
return engine.merge_pdfs(files, indices, use_numbering)
except Exception as e: raise gr.Error(str(e))
def load_info(f):
if not f: return None, 0, gr.update(visible=False)
i = engine.get_pdf_info(f)
return f"📄 {i['name']} ({i['pages']} págs)", i['pages'], gr.update(visible=True)
def update_split_preview(f, r, t):
if not f or not r: return None
kp = engine.get_preview_indices_from_string(r, t)
if not kp: return None
return [(engine.generate_preview(f, p), f"Pág {p}") for p in kp[:8] if engine.generate_preview(f, p)]
def process_split(f, r):
if not f: return None
try: return engine.split_pdf_custom(f, r)
except Exception as e: raise gr.Error(str(e))
def process_reorder(f, o):
if not f: return None
try: return engine.reorder_pages(f, o)
except Exception as e: raise gr.Error(str(e))
def process_compare(fa, fb):
if not fa or not fb: return None
try: return engine.compare_pdfs_text(fa, fb)
except Exception as e: raise gr.Error(str(e))
def process_compress(f, l):
if not f: return None
lvls = {"Baja (Máxima calidad)": 1, "Media (Recomendado - eBook)": 3, "Alta (Pantalla - 72dpi)": 4}
try: return engine.compress_pdf(f, lvls.get(l, 3))
except Exception as e: raise gr.Error(str(e))
def update_rot_preview(f, a_lbl):
if not f: return None
a = 0
if "90" in a_lbl: a = 90
elif "180" in a_lbl: a = 180
elif "270" in a_lbl: a = 270
return engine.get_rotated_preview(f, a)
def process_rotate(f, a_lbl):
if not f: return None
a = 0
if "90" in a_lbl: a = 90
elif "180" in a_lbl: a = 180
elif "270" in a_lbl: a = 270
if a == 0: return f.name
try: return engine.rotate_pdf(f, a)
except Exception as e: raise gr.Error(str(e))
def process_protect(f, p):
if not f or not p: return None
try: return engine.protect_pdf(f, p)
except Exception as e: raise gr.Error(str(e))
def process_meta(f, t, a, s):
if not f: return None
try: return engine.update_metadata(f, t, a, s)
except Exception as e: raise gr.Error(str(e))
def process_text(f):
if not f: return None
try: return engine.extract_text(f)
except Exception as e: raise gr.Error(str(e))
def process_watermark(f, t):
if not f or not t: return None
try: return engine.add_watermark(f, t)
except Exception as e: raise gr.Error(str(e))
def process_repair(f):
if not f: return None
try: return engine.repair_pdf(f)
except Exception as e: raise gr.Error(str(e))
# WRAPPERS OFFICE
def process_word(f):
if not f: return None
try: return engine.pdf_to_word(f)
except Exception as e: raise gr.Error(str(e))
def process_pptx(f):
if not f: return None
try: return engine.pdf_to_pptx(f)
except Exception as e: raise gr.Error(str(e))
def process_p2i(f):
if not f: return None
try: return engine.pdf_to_images_zip(f)
except Exception as e: raise gr.Error(str(e))
# --- UI LAYOUT ---
with gr.Blocks(title=config.APP_TITLE, theme=gr.themes.Soft()) as demo:
gr.Markdown(f"# {config.APP_TITLE}")
gr.Markdown("""
Los archivos se procesan en memoria y se **autodestruyen tras 5 minutos**.
""")
gr.HTML(get_duplicate_html())
with gr.Tabs():
# 1. UNIR
with gr.TabItem("Unir"):
with gr.Row():
with gr.Column(scale=1):
m_files = gr.File(file_count="multiple", label="Archivos", file_types=[".pdf"])
with gr.Column(scale=2):
m_tbl = gr.Dataframe(headers=["ID", "Archivo"], interactive=False)
m_ord = gr.Textbox(label="Orden de los documentos según ID", placeholder="Ej: 0, 2, 1")
m_nums = gr.Checkbox(label="Numerar páginas (1 de X)", value=False)
m_btn = gr.Button("Unir PDF", variant="primary")
m_out = gr.File(label="Resultado")
m_files.change(update_file_list, m_files, [m_tbl, m_ord])
m_btn.click(process_merge, [m_files, m_ord, m_nums], m_out)
# 2. DIVIDIR / REORDENAR
with gr.TabItem("Dividir / Reordenar"):
dr_f = gr.File(label="PDF Origen", file_types=[".pdf"])
dr_inf = gr.Markdown("")
dr_pg = gr.State(0)
with gr.Tabs():
with gr.Tab("Extraer"):
gr.Markdown("Separa páginas en un ZIP.")
with gr.Row():
with gr.Column():
s_rng = gr.Textbox(label="Rango", placeholder="Ej: 1-3, 5")
with gr.Row():
s_prv = gr.Button("Preview")
s_btn = gr.Button("Dividir (ZIP)", variant="primary")
with gr.Column():
s_gal = gr.Gallery(height=160, columns=4, object_fit="contain", label="Vista Previa")
s_out = gr.File(label="ZIP")
s_prv.click(update_split_preview, [dr_f, s_rng, dr_pg], s_gal)
s_btn.click(process_split, [dr_f, s_rng], s_out)
with gr.Tab("Reordenar"):
gr.Markdown("Crea un PDF con nuevo orden.")
with gr.Row():
with gr.Column():
r_ord = gr.Textbox(label="Nuevo Orden", placeholder="Ej: 3, 1, 2, 4-10")
r_btn = gr.Button("Reordenar", variant="primary")
with gr.Column():
r_out = gr.File(label="PDF Reordenado")
r_btn.click(process_reorder, [dr_f, r_ord], r_out)
dr_f.change(load_info, dr_f, [dr_inf, dr_pg, s_out])
# 3. COMPRIMIR
with gr.TabItem("Comprimir"):
with gr.Row():
with gr.Column():
c_f = gr.File(label="PDF Original", file_types=[".pdf"])
c_l = gr.Radio(["Baja (Máxima calidad)", "Media (Recomendado - eBook)", "Alta (Pantalla - 72dpi)"], label="Nivel", value="Media (Recomendado - eBook)")
c_b = gr.Button("Comprimir", variant="primary")
with gr.Column():
c_out = gr.File(label="PDF Comprimido")
c_b.click(process_compress, [c_f, c_l], c_out)
# 4. CONVERTIR
with gr.TabItem("Convertir Formatos"):
with gr.Row():
with gr.Column():
gr.Markdown("### A Word")
w_f = gr.File(label="PDF")
w_b = gr.Button("Convertir")
w_o = gr.File(label="DOCX")
w_b.click(process_word, w_f, w_o)
with gr.Column():
gr.Markdown("### A Imágenes")
p2i_f = gr.File(label="PDF")
p2i_b = gr.Button("Extraer ZIP")
p2i_o = gr.File(label="ZIP")
p2i_b.click(process_p2i, p2i_f, p2i_o)
with gr.Column():
gr.Markdown("### A PowerPoint")
p_f = gr.File(label="PDF")
p_b = gr.Button("Convertir")
p_o = gr.File(label="PPTX")
p_b.click(process_pptx, p_f, p_o)
# 5. COMPARAR
with gr.TabItem("Comparar"):
gr.Markdown("Compara el **texto** de dos versiones. Descarga un informe con cambios.")
with gr.Row():
with gr.Column():
ca = gr.File(label="Versión A (Original)", file_types=[".pdf"])
with gr.Column():
cb = gr.File(label="Versión B (Modificada)", file_types=[".pdf"])
cb_btn = gr.Button("Generar Informe", variant="primary")
co = gr.File(label="Informe PDF")
cb_btn.click(process_compare, [ca, cb], co)
# 6. EXTRAS
with gr.TabItem("Extras"):
with gr.Tab("Rotar"):
with gr.Row():
with gr.Column():
rf = gr.File(label="PDF")
ra = gr.Radio(["0° (Original)", "90° (Derecha)", "180° (Invertir)", "270° (Izquierda)"], label="Rotación", value="0° (Original)")
rb = gr.Button("Rotar", variant="primary")
with gr.Column():
rp = gr.Image(label="Preview")
ro = gr.File(label="PDF Rotado")
rf.change(update_rot_preview, [rf, ra], rp)
ra.change(update_rot_preview, [rf, ra], rp)
rb.click(process_rotate, [rf, ra], ro)
with gr.Tab("Marca de Agua"):
gr.Markdown("Añade una marca de agua diagonal en todas las páginas.")
with gr.Row():
with gr.Column():
wf = gr.File(label="PDF")
wt = gr.Textbox(label="Texto Marca de Agua", placeholder="Ej: CONFIDENCIAL")
wb = gr.Button("Estampar", variant="primary")
with gr.Column():
wo = gr.File(label="PDF Marcado")
wb.click(process_watermark, [wf, wt], wo)
with gr.Tab("Reparar"):
gr.Markdown("Intenta arreglar PDFs corruptos o dañados reescribiéndolos con Ghostscript.")
with gr.Row():
with gr.Column():
repf = gr.File(label="PDF Dañado")
repb = gr.Button("Reparar", variant="primary")
with gr.Column():
repo = gr.File(label="PDF Reparado")
repb.click(process_repair, repf, repo)
with gr.Tab("Proteger"):
with gr.Row():
with gr.Column():
pf = gr.File(label="PDF")
pp = gr.Textbox(type="password", label="Pass")
pb = gr.Button("Encriptar", variant="primary")
with gr.Column():
po = gr.File(label="Protegido")
pb.click(process_protect, [pf, pp], po)
with gr.Tab("Info/Texto"):
with gr.Row():
with gr.Column():
tf = gr.File(label="PDF")
tb = gr.Button("Extraer Texto")
to = gr.File()
tb.click(process_text, tf, to)
with gr.Column():
mt = gr.Textbox(label="Título")
ma = gr.Textbox(label="Autor")
ms = gr.Textbox(label="Asunto")
mb = gr.Button("Actualizar Meta")
mo = gr.File()
mb.click(process_meta, [tf, mt, ma, ms], mo)
if __name__ == "__main__":
demo.queue(default_concurrency_limit=2).launch(
server_name="0.0.0.0",
server_port=7860
)