|
|
|
|
|
import gradio as gr |
|
|
import pandas as pd |
|
|
import config |
|
|
import os |
|
|
import time |
|
|
import threading |
|
|
import shutil |
|
|
from core import PDFEngine |
|
|
|
|
|
os.environ["GRADIO_TEMP_DIR"] = config.TEMP_DIR |
|
|
|
|
|
engine = PDFEngine() |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def cleanup_cron(): |
|
|
""" |
|
|
Revisa cada minuto. Borra archivos Y carpetas mayores a 5 minutos. |
|
|
""" |
|
|
print("[INIT] 🛡️ Sistema de seguridad y auto-borrado ACTIVO.") |
|
|
|
|
|
while True: |
|
|
try: |
|
|
time.sleep(60) |
|
|
|
|
|
LIMIT_MINUTES = 5 |
|
|
cutoff = time.time() - (LIMIT_MINUTES * 60) |
|
|
|
|
|
|
|
|
|
|
|
if os.path.exists(config.TEMP_DIR): |
|
|
items = os.listdir(config.TEMP_DIR) |
|
|
|
|
|
if not items: |
|
|
|
|
|
pass |
|
|
else: |
|
|
|
|
|
pass |
|
|
|
|
|
for filename in items: |
|
|
filepath = os.path.join(config.TEMP_DIR, filename) |
|
|
|
|
|
if len(filename) > 8: |
|
|
masked = f"{filename[:4]}****{os.path.splitext(filename)[1]}" |
|
|
else: |
|
|
masked = filename |
|
|
|
|
|
try: |
|
|
file_time = os.path.getmtime(filepath) |
|
|
age_sec = time.time() - file_time |
|
|
is_expired = file_time < cutoff |
|
|
except FileNotFoundError: |
|
|
continue |
|
|
|
|
|
|
|
|
if os.path.isfile(filepath): |
|
|
if is_expired: |
|
|
try: |
|
|
os.remove(filepath) |
|
|
|
|
|
except Exception as e: |
|
|
pass |
|
|
else: |
|
|
|
|
|
pass |
|
|
|
|
|
|
|
|
elif os.path.isdir(filepath): |
|
|
if is_expired: |
|
|
try: |
|
|
shutil.rmtree(filepath) |
|
|
|
|
|
except Exception as e: |
|
|
pass |
|
|
else: |
|
|
|
|
|
pass |
|
|
else: |
|
|
|
|
|
pass |
|
|
|
|
|
except Exception as e: |
|
|
print(f"[CRITICAL] Error en limpieza: {e}") |
|
|
|
|
|
threading.Thread(target=cleanup_cron, daemon=True).start() |
|
|
|
|
|
|
|
|
def get_duplicate_html(): |
|
|
space_id = os.environ.get('SPACE_ID', None) |
|
|
url = f"https://huggingface.co/spaces/{space_id}?duplicate=true" if space_id else "https://huggingface.co/spaces?duplicate=true" |
|
|
return f""" |
|
|
<div style="display: flex; align-items: center; gap: 10px; margin-bottom: 20px; background-color: #f9fafb; padding: 10px; border-radius: 8px; border: 1px solid #e5e7eb;"> |
|
|
<a href="{url}" target="_blank"> |
|
|
<img src="https://huggingface.co/datasets/huggingface/badges/raw/main/duplicate-this-space-sm.svg" alt="Duplicate Space" style="height: 30px;"> |
|
|
</a> |
|
|
<span style="font-size: 0.9em; color: #4b5563;"> |
|
|
⚡ <b>¿Va lento o quieres seguridad adicional?</b> Haz clic para duplicar este espacio y tener tu propia instancia privada. |
|
|
</span> |
|
|
</div> |
|
|
""" |
|
|
|
|
|
|
|
|
def update_file_list(files): |
|
|
if not files: return pd.DataFrame(), "" |
|
|
data = [[i, f.split("/")[-1]] for i, f in enumerate(files)] |
|
|
return pd.DataFrame(data, columns=["ID", "Archivo"]), ",".join([str(i) for i in range(len(files))]) |
|
|
|
|
|
def process_merge(files, order_str, use_numbering): |
|
|
if not files: return None |
|
|
try: |
|
|
indices = [int(x.strip()) for x in order_str.split(",") if x.strip().isdigit()] |
|
|
return engine.merge_pdfs(files, indices, use_numbering) |
|
|
except Exception as e: raise gr.Error(str(e)) |
|
|
|
|
|
def load_info(f): |
|
|
if not f: return None, 0, gr.update(visible=False) |
|
|
i = engine.get_pdf_info(f) |
|
|
return f"📄 {i['name']} ({i['pages']} págs)", i['pages'], gr.update(visible=True) |
|
|
|
|
|
def update_split_preview(f, r, t): |
|
|
if not f or not r: return None |
|
|
kp = engine.get_preview_indices_from_string(r, t) |
|
|
if not kp: return None |
|
|
return [(engine.generate_preview(f, p), f"Pág {p}") for p in kp[:8] if engine.generate_preview(f, p)] |
|
|
|
|
|
def process_split(f, r): |
|
|
if not f: return None |
|
|
try: return engine.split_pdf_custom(f, r) |
|
|
except Exception as e: raise gr.Error(str(e)) |
|
|
|
|
|
def process_reorder(f, o): |
|
|
if not f: return None |
|
|
try: return engine.reorder_pages(f, o) |
|
|
except Exception as e: raise gr.Error(str(e)) |
|
|
|
|
|
def process_compare(fa, fb): |
|
|
if not fa or not fb: return None |
|
|
try: return engine.compare_pdfs_text(fa, fb) |
|
|
except Exception as e: raise gr.Error(str(e)) |
|
|
|
|
|
def process_compress(f, l): |
|
|
if not f: return None |
|
|
lvls = {"Baja (Máxima calidad)": 1, "Media (Recomendado - eBook)": 3, "Alta (Pantalla - 72dpi)": 4} |
|
|
try: return engine.compress_pdf(f, lvls.get(l, 3)) |
|
|
except Exception as e: raise gr.Error(str(e)) |
|
|
|
|
|
def update_rot_preview(f, a_lbl): |
|
|
if not f: return None |
|
|
a = 0 |
|
|
if "90" in a_lbl: a = 90 |
|
|
elif "180" in a_lbl: a = 180 |
|
|
elif "270" in a_lbl: a = 270 |
|
|
return engine.get_rotated_preview(f, a) |
|
|
|
|
|
def process_rotate(f, a_lbl): |
|
|
if not f: return None |
|
|
a = 0 |
|
|
if "90" in a_lbl: a = 90 |
|
|
elif "180" in a_lbl: a = 180 |
|
|
elif "270" in a_lbl: a = 270 |
|
|
if a == 0: return f.name |
|
|
try: return engine.rotate_pdf(f, a) |
|
|
except Exception as e: raise gr.Error(str(e)) |
|
|
|
|
|
def process_protect(f, p): |
|
|
if not f or not p: return None |
|
|
try: return engine.protect_pdf(f, p) |
|
|
except Exception as e: raise gr.Error(str(e)) |
|
|
|
|
|
def process_meta(f, t, a, s): |
|
|
if not f: return None |
|
|
try: return engine.update_metadata(f, t, a, s) |
|
|
except Exception as e: raise gr.Error(str(e)) |
|
|
|
|
|
def process_text(f): |
|
|
if not f: return None |
|
|
try: return engine.extract_text(f) |
|
|
except Exception as e: raise gr.Error(str(e)) |
|
|
|
|
|
def process_watermark(f, t): |
|
|
if not f or not t: return None |
|
|
try: return engine.add_watermark(f, t) |
|
|
except Exception as e: raise gr.Error(str(e)) |
|
|
|
|
|
def process_repair(f): |
|
|
if not f: return None |
|
|
try: return engine.repair_pdf(f) |
|
|
except Exception as e: raise gr.Error(str(e)) |
|
|
|
|
|
|
|
|
def process_word(f): |
|
|
if not f: return None |
|
|
try: return engine.pdf_to_word(f) |
|
|
except Exception as e: raise gr.Error(str(e)) |
|
|
def process_pptx(f): |
|
|
if not f: return None |
|
|
try: return engine.pdf_to_pptx(f) |
|
|
except Exception as e: raise gr.Error(str(e)) |
|
|
def process_p2i(f): |
|
|
if not f: return None |
|
|
try: return engine.pdf_to_images_zip(f) |
|
|
except Exception as e: raise gr.Error(str(e)) |
|
|
|
|
|
|
|
|
|
|
|
with gr.Blocks(title=config.APP_TITLE, theme=gr.themes.Soft()) as demo: |
|
|
|
|
|
gr.Markdown(f"# {config.APP_TITLE}") |
|
|
gr.Markdown(""" |
|
|
Los archivos se procesan en memoria y se **autodestruyen tras 5 minutos**. |
|
|
""") |
|
|
|
|
|
gr.HTML(get_duplicate_html()) |
|
|
|
|
|
with gr.Tabs(): |
|
|
|
|
|
with gr.TabItem("Unir"): |
|
|
with gr.Row(): |
|
|
with gr.Column(scale=1): |
|
|
m_files = gr.File(file_count="multiple", label="Archivos", file_types=[".pdf"]) |
|
|
with gr.Column(scale=2): |
|
|
m_tbl = gr.Dataframe(headers=["ID", "Archivo"], interactive=False) |
|
|
m_ord = gr.Textbox(label="Orden de los documentos según ID", placeholder="Ej: 0, 2, 1") |
|
|
m_nums = gr.Checkbox(label="Numerar páginas (1 de X)", value=False) |
|
|
m_btn = gr.Button("Unir PDF", variant="primary") |
|
|
m_out = gr.File(label="Resultado") |
|
|
m_files.change(update_file_list, m_files, [m_tbl, m_ord]) |
|
|
m_btn.click(process_merge, [m_files, m_ord, m_nums], m_out) |
|
|
|
|
|
|
|
|
with gr.TabItem("Dividir / Reordenar"): |
|
|
dr_f = gr.File(label="PDF Origen", file_types=[".pdf"]) |
|
|
dr_inf = gr.Markdown("") |
|
|
dr_pg = gr.State(0) |
|
|
with gr.Tabs(): |
|
|
with gr.Tab("Extraer"): |
|
|
gr.Markdown("Separa páginas en un ZIP.") |
|
|
with gr.Row(): |
|
|
with gr.Column(): |
|
|
s_rng = gr.Textbox(label="Rango", placeholder="Ej: 1-3, 5") |
|
|
with gr.Row(): |
|
|
s_prv = gr.Button("Preview") |
|
|
s_btn = gr.Button("Dividir (ZIP)", variant="primary") |
|
|
with gr.Column(): |
|
|
s_gal = gr.Gallery(height=160, columns=4, object_fit="contain", label="Vista Previa") |
|
|
s_out = gr.File(label="ZIP") |
|
|
s_prv.click(update_split_preview, [dr_f, s_rng, dr_pg], s_gal) |
|
|
s_btn.click(process_split, [dr_f, s_rng], s_out) |
|
|
with gr.Tab("Reordenar"): |
|
|
gr.Markdown("Crea un PDF con nuevo orden.") |
|
|
with gr.Row(): |
|
|
with gr.Column(): |
|
|
r_ord = gr.Textbox(label="Nuevo Orden", placeholder="Ej: 3, 1, 2, 4-10") |
|
|
r_btn = gr.Button("Reordenar", variant="primary") |
|
|
with gr.Column(): |
|
|
r_out = gr.File(label="PDF Reordenado") |
|
|
r_btn.click(process_reorder, [dr_f, r_ord], r_out) |
|
|
dr_f.change(load_info, dr_f, [dr_inf, dr_pg, s_out]) |
|
|
|
|
|
|
|
|
with gr.TabItem("Comprimir"): |
|
|
with gr.Row(): |
|
|
with gr.Column(): |
|
|
c_f = gr.File(label="PDF Original", file_types=[".pdf"]) |
|
|
c_l = gr.Radio(["Baja (Máxima calidad)", "Media (Recomendado - eBook)", "Alta (Pantalla - 72dpi)"], label="Nivel", value="Media (Recomendado - eBook)") |
|
|
c_b = gr.Button("Comprimir", variant="primary") |
|
|
with gr.Column(): |
|
|
c_out = gr.File(label="PDF Comprimido") |
|
|
c_b.click(process_compress, [c_f, c_l], c_out) |
|
|
|
|
|
|
|
|
with gr.TabItem("Convertir Formatos"): |
|
|
with gr.Row(): |
|
|
with gr.Column(): |
|
|
gr.Markdown("### A Word") |
|
|
w_f = gr.File(label="PDF") |
|
|
w_b = gr.Button("Convertir") |
|
|
w_o = gr.File(label="DOCX") |
|
|
w_b.click(process_word, w_f, w_o) |
|
|
with gr.Column(): |
|
|
gr.Markdown("### A Imágenes") |
|
|
p2i_f = gr.File(label="PDF") |
|
|
p2i_b = gr.Button("Extraer ZIP") |
|
|
p2i_o = gr.File(label="ZIP") |
|
|
p2i_b.click(process_p2i, p2i_f, p2i_o) |
|
|
with gr.Column(): |
|
|
gr.Markdown("### A PowerPoint") |
|
|
p_f = gr.File(label="PDF") |
|
|
p_b = gr.Button("Convertir") |
|
|
p_o = gr.File(label="PPTX") |
|
|
p_b.click(process_pptx, p_f, p_o) |
|
|
|
|
|
|
|
|
with gr.TabItem("Comparar"): |
|
|
gr.Markdown("Compara el **texto** de dos versiones. Descarga un informe con cambios.") |
|
|
with gr.Row(): |
|
|
with gr.Column(): |
|
|
ca = gr.File(label="Versión A (Original)", file_types=[".pdf"]) |
|
|
with gr.Column(): |
|
|
cb = gr.File(label="Versión B (Modificada)", file_types=[".pdf"]) |
|
|
cb_btn = gr.Button("Generar Informe", variant="primary") |
|
|
co = gr.File(label="Informe PDF") |
|
|
cb_btn.click(process_compare, [ca, cb], co) |
|
|
|
|
|
|
|
|
with gr.TabItem("Extras"): |
|
|
with gr.Tab("Rotar"): |
|
|
with gr.Row(): |
|
|
with gr.Column(): |
|
|
rf = gr.File(label="PDF") |
|
|
ra = gr.Radio(["0° (Original)", "90° (Derecha)", "180° (Invertir)", "270° (Izquierda)"], label="Rotación", value="0° (Original)") |
|
|
rb = gr.Button("Rotar", variant="primary") |
|
|
with gr.Column(): |
|
|
rp = gr.Image(label="Preview") |
|
|
ro = gr.File(label="PDF Rotado") |
|
|
rf.change(update_rot_preview, [rf, ra], rp) |
|
|
ra.change(update_rot_preview, [rf, ra], rp) |
|
|
rb.click(process_rotate, [rf, ra], ro) |
|
|
|
|
|
with gr.Tab("Marca de Agua"): |
|
|
gr.Markdown("Añade una marca de agua diagonal en todas las páginas.") |
|
|
with gr.Row(): |
|
|
with gr.Column(): |
|
|
wf = gr.File(label="PDF") |
|
|
wt = gr.Textbox(label="Texto Marca de Agua", placeholder="Ej: CONFIDENCIAL") |
|
|
wb = gr.Button("Estampar", variant="primary") |
|
|
with gr.Column(): |
|
|
wo = gr.File(label="PDF Marcado") |
|
|
wb.click(process_watermark, [wf, wt], wo) |
|
|
|
|
|
with gr.Tab("Reparar"): |
|
|
gr.Markdown("Intenta arreglar PDFs corruptos o dañados reescribiéndolos con Ghostscript.") |
|
|
with gr.Row(): |
|
|
with gr.Column(): |
|
|
repf = gr.File(label="PDF Dañado") |
|
|
repb = gr.Button("Reparar", variant="primary") |
|
|
with gr.Column(): |
|
|
repo = gr.File(label="PDF Reparado") |
|
|
repb.click(process_repair, repf, repo) |
|
|
|
|
|
with gr.Tab("Proteger"): |
|
|
with gr.Row(): |
|
|
with gr.Column(): |
|
|
pf = gr.File(label="PDF") |
|
|
pp = gr.Textbox(type="password", label="Pass") |
|
|
pb = gr.Button("Encriptar", variant="primary") |
|
|
with gr.Column(): |
|
|
po = gr.File(label="Protegido") |
|
|
pb.click(process_protect, [pf, pp], po) |
|
|
|
|
|
with gr.Tab("Info/Texto"): |
|
|
with gr.Row(): |
|
|
with gr.Column(): |
|
|
tf = gr.File(label="PDF") |
|
|
tb = gr.Button("Extraer Texto") |
|
|
to = gr.File() |
|
|
tb.click(process_text, tf, to) |
|
|
with gr.Column(): |
|
|
mt = gr.Textbox(label="Título") |
|
|
ma = gr.Textbox(label="Autor") |
|
|
ms = gr.Textbox(label="Asunto") |
|
|
mb = gr.Button("Actualizar Meta") |
|
|
mo = gr.File() |
|
|
mb.click(process_meta, [tf, mt, ma, ms], mo) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
demo.queue(default_concurrency_limit=2).launch( |
|
|
server_name="0.0.0.0", |
|
|
server_port=7860 |
|
|
) |