Spaces:

DaniFera
/

PDFree

Sleeping

File size: 15,583 Bytes

b268f6c
db26b09
f469263
db26b09
cbf42ed
 
 
7f9b733
db26b09
 
209eae7
24b9436
db26b09
 
24b0c95
b268f6c
e5e7fa1
 
cbf42ed
24b9436
7f9b733
24b9436
 
 
cbf42ed
 
24b9436
cfdd571
01441bd
 
cfdd571
d4dee55
233f3a3
cbf42ed
7f9b733
233f3a3
7f9b733
d4dee55
24b0c95
233f3a3
d4dee55
24b0c95
233f3a3
7f9b733
cbf42ed
cfdd571
7f9b733
 
 
 
24b9436
7f9b733
 
 
 
 
 
cfdd571
e5e7fa1
7f9b733
 
cbf42ed
 
e5e7fa1
7f9b733
24b0c95
7f9b733
e5e7fa1
24b0c95
7f9b733
e5e7fa1
7f9b733
 
 
 
e5e7fa1
cbf42ed
24b0c95
cfdd571
e5e7fa1
24b0c95
24b9436
d4dee55
24b0c95
24b9436
cbf42ed
01441bd
cbf42ed
 
 
e5e7fa1
 
 
 
 
 
 
 
 
 
0a5d77a
e5e7fa1
 
 
 
24b9436
7db0b1a
31df732
 
6e5e8ef
f469263
e5e7fa1
31df732
f469263
 
e5e7fa1
6e5e8ef
db26b09
6e5e8ef
 
 
 
db26b09
6e5e8ef
 
 
 
 
f469263
6e5e8ef
 
 
eb87c9f
31df732
6e5e8ef
 
 
3f3cb94
 
6e5e8ef
 
94a82f1
6e5e8ef
3f3cb94
6e5e8ef
 
ab78612
6e5e8ef
7db0b1a
 
6e5e8ef
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9b86a61
db26b09
6e5e8ef
 
 
 
6f47e9b
6e5e8ef
 
 
7db0b1a
 
6e5e8ef
 
 
eb87c9f
 
e5e7fa1
 
 
 
 
 
 
 
 
 
94a82f1
6e5e8ef
 
 
eb87c9f
6e5e8ef
 
 
7db0b1a
6e5e8ef
 
 
7db0b1a
 
3f3cb94
cfdd571
db26b09
cfdd571
db26b09
cfdd571
 
 
 
e5e7fa1
6e5e8ef
db26b09
b268f6c
3f3cb94
db26b09
f469263
6e5e8ef
f469263
6e5e8ef
b268f6c
e5e7fa1
 
6e5e8ef
 
e5e7fa1
6e5e8ef
cfdd571
3f3cb94
cfdd571
 
 
 
 
e5e7fa1
cfdd571
 
e5e7fa1
cfdd571
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6e5e8ef
 
94a82f1
6e5e8ef
 
ab78612
 
cfdd571
e5e7fa1
ab78612
cfdd571
ab78612
6e5e8ef
ab78612
6e5e8ef
2c18203
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3f3cb94
cfdd571
6e5e8ef
cfdd571
6e5e8ef
ab78612
 
 
 
cfdd571
94a82f1
6e5e8ef
 
b268f6c
e5e7fa1
cfdd571
3f3cb94
ab78612
94a82f1
 
 
ab78612
94a82f1
cfdd571
6e5e8ef
 
 
e5e7fa1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ab78612
cfdd571
ab78612
 
 
cfdd571
ab78612
 
cfdd571
6e5e8ef
e5e7fa1
cfdd571
ab78612
 
 
cfdd571
ab78612
 
 
94a82f1
 
 
cfdd571
94a82f1
 
eb87c9f
db26b09
cfdd571

## Versión Alfa 1.0
import gradio as gr
import pandas as pd
import config
import os
import time
import threading
import shutil
from core import PDFEngine

os.environ["GRADIO_TEMP_DIR"] = config.TEMP_DIR

engine = PDFEngine()

# --- SEGURIDAD: GARBAGE COLLECTOR ---
# NOTA PARA DUPLICADORES DEL ESPACIO:
# Esta función limpia los archivos cada minuto. Por defecto está en modo SILENCIOSO.
# Si quieres ver qué está pasando, DESCOMENTA las líneas que empiezan por "#print".
def cleanup_cron():
    """
    Revisa cada minuto. Borra archivos Y carpetas mayores a 5 minutos.
    """
    print("[INIT] 🛡️  Sistema de seguridad y auto-borrado ACTIVO.")
    
    while True:
        try:
            time.sleep(60) 
            
            LIMIT_MINUTES = 5
            cutoff = time.time() - (LIMIT_MINUTES * 60)
            
            #print(f"--- [SEGURIDAD] Ronda de limpieza: {time.strftime('%H:%M:%S')} ---")
            
            if os.path.exists(config.TEMP_DIR):
                items = os.listdir(config.TEMP_DIR)
                
                if not items:
                    #print("[ESTADO] Carpeta limpia (0 elementos).")
                    pass
                else:
                    #print(f"[ESTADO] Analizando {len(items)} elementos...")
                    pass
                
                for filename in items:
                    filepath = os.path.join(config.TEMP_DIR, filename)
                    
                    if len(filename) > 8:
                        masked = f"{filename[:4]}****{os.path.splitext(filename)[1]}"
                    else:
                        masked = filename

                    try:
                        file_time = os.path.getmtime(filepath)
                        age_sec = time.time() - file_time
                        is_expired = file_time < cutoff
                    except FileNotFoundError:
                        continue

                    # CASO 1: ARCHIVO
                    if os.path.isfile(filepath):
                        if is_expired:
                            try:
                                os.remove(filepath)
                                #print(f"❌ [BORRADO] Archivo {masked}")
                            except Exception as e:
                                pass
                        else:
                            #print(f"✅ [VIGENTE] Archivo {masked}")
                            pass
                    
                    # CASO 2: CARPETA
                    elif os.path.isdir(filepath):
                        if is_expired:
                            try:
                                shutil.rmtree(filepath)
                                #print(f"🗑️ [LIMPIEZA] Carpeta {masked} eliminada")
                            except Exception as e:
                                pass
                        else:
                            #print(f"📂 [VIGENTE] Carpeta {masked}")
                            pass
            else:
                #print("[INFO] Carpeta temporal aún no creada.")
                pass
                
        except Exception as e:
            print(f"[CRITICAL] Error en limpieza: {e}")

threading.Thread(target=cleanup_cron, daemon=True).start()

# --- GENERADOR DE ENLACE ---
def get_duplicate_html():
    space_id = os.environ.get('SPACE_ID', None)
    url = f"https://huggingface.co/spaces/{space_id}?duplicate=true" if space_id else "https://huggingface.co/spaces?duplicate=true"
    return f"""
    <div style="display: flex; align-items: center; gap: 10px; margin-bottom: 20px; background-color: #f9fafb; padding: 10px; border-radius: 8px; border: 1px solid #e5e7eb;">
        <a href="{url}" target="_blank">
            <img src="https://huggingface.co/datasets/huggingface/badges/raw/main/duplicate-this-space-sm.svg" alt="Duplicate Space" style="height: 30px;">
        </a>
        <span style="font-size: 0.9em; color: #4b5563;">
            ⚡ <b>¿Va lento o quieres seguridad adicional?</b> Haz clic para duplicar este espacio y tener tu propia instancia privada.
        </span>
    </div>
    """

# --- WRAPPERS ---
def update_file_list(files):
    if not files: return pd.DataFrame(), ""
    data = [[i, f.split("/")[-1]] for i, f in enumerate(files)]
    return pd.DataFrame(data, columns=["ID", "Archivo"]), ",".join([str(i) for i in range(len(files))])

def process_merge(files, order_str, use_numbering):
    if not files: return None
    try:
        indices = [int(x.strip()) for x in order_str.split(",") if x.strip().isdigit()]
        return engine.merge_pdfs(files, indices, use_numbering)
    except Exception as e: raise gr.Error(str(e))

def load_info(f):
    if not f: return None, 0, gr.update(visible=False)
    i = engine.get_pdf_info(f)
    return f"📄 {i['name']} ({i['pages']} págs)", i['pages'], gr.update(visible=True)

def update_split_preview(f, r, t):
    if not f or not r: return None
    kp = engine.get_preview_indices_from_string(r, t)
    if not kp: return None
    return [(engine.generate_preview(f, p), f"Pág {p}") for p in kp[:8] if engine.generate_preview(f, p)]

def process_split(f, r):
    if not f: return None
    try: return engine.split_pdf_custom(f, r)
    except Exception as e: raise gr.Error(str(e))

def process_reorder(f, o):
    if not f: return None
    try: return engine.reorder_pages(f, o)
    except Exception as e: raise gr.Error(str(e))

def process_compare(fa, fb):
    if not fa or not fb: return None
    try: return engine.compare_pdfs_text(fa, fb)
    except Exception as e: raise gr.Error(str(e))

def process_compress(f, l):
    if not f: return None
    lvls = {"Baja (Máxima calidad)": 1, "Media (Recomendado - eBook)": 3, "Alta (Pantalla - 72dpi)": 4}
    try: return engine.compress_pdf(f, lvls.get(l, 3))
    except Exception as e: raise gr.Error(str(e))

def update_rot_preview(f, a_lbl):
    if not f: return None
    a = 0
    if "90" in a_lbl: a = 90
    elif "180" in a_lbl: a = 180
    elif "270" in a_lbl: a = 270
    return engine.get_rotated_preview(f, a)

def process_rotate(f, a_lbl):
    if not f: return None
    a = 0
    if "90" in a_lbl: a = 90
    elif "180" in a_lbl: a = 180
    elif "270" in a_lbl: a = 270
    if a == 0: return f.name
    try: return engine.rotate_pdf(f, a)
    except Exception as e: raise gr.Error(str(e))

def process_protect(f, p):
    if not f or not p: return None
    try: return engine.protect_pdf(f, p)
    except Exception as e: raise gr.Error(str(e))

def process_meta(f, t, a, s):
    if not f: return None
    try: return engine.update_metadata(f, t, a, s)
    except Exception as e: raise gr.Error(str(e))

def process_text(f):
    if not f: return None
    try: return engine.extract_text(f)
    except Exception as e: raise gr.Error(str(e))

def process_watermark(f, t):
    if not f or not t: return None
    try: return engine.add_watermark(f, t)
    except Exception as e: raise gr.Error(str(e))

def process_repair(f):
    if not f: return None
    try: return engine.repair_pdf(f)
    except Exception as e: raise gr.Error(str(e))

# WRAPPERS OFFICE
def process_word(f):
    if not f: return None
    try: return engine.pdf_to_word(f)
    except Exception as e: raise gr.Error(str(e))
def process_pptx(f):
    if not f: return None
    try: return engine.pdf_to_pptx(f)
    except Exception as e: raise gr.Error(str(e))
def process_p2i(f):
    if not f: return None
    try: return engine.pdf_to_images_zip(f)
    except Exception as e: raise gr.Error(str(e))


# --- UI LAYOUT ---
with gr.Blocks(title=config.APP_TITLE, theme=gr.themes.Soft()) as demo:
    
    gr.Markdown(f"# {config.APP_TITLE}")
    gr.Markdown("""
    Los archivos se procesan en memoria y se **autodestruyen tras 5 minutos**.
    """)
    
    gr.HTML(get_duplicate_html())

    with gr.Tabs():
        # 1. UNIR
        with gr.TabItem("Unir"):
            with gr.Row():
                with gr.Column(scale=1):
                    m_files = gr.File(file_count="multiple", label="Archivos", file_types=[".pdf"])
                with gr.Column(scale=2):
                    m_tbl = gr.Dataframe(headers=["ID", "Archivo"], interactive=False)
                    m_ord = gr.Textbox(label="Orden de los documentos según ID", placeholder="Ej: 0, 2, 1")
                    m_nums = gr.Checkbox(label="Numerar páginas (1 de X)", value=False)
                    m_btn = gr.Button("Unir PDF", variant="primary")
            m_out = gr.File(label="Resultado")
            m_files.change(update_file_list, m_files, [m_tbl, m_ord])
            m_btn.click(process_merge, [m_files, m_ord, m_nums], m_out)

        # 2. DIVIDIR / REORDENAR
        with gr.TabItem("Dividir / Reordenar"):
            dr_f = gr.File(label="PDF Origen", file_types=[".pdf"])
            dr_inf = gr.Markdown("")
            dr_pg = gr.State(0)
            with gr.Tabs():
                with gr.Tab("Extraer"):
                    gr.Markdown("Separa páginas en un ZIP.")
                    with gr.Row():
                        with gr.Column():
                            s_rng = gr.Textbox(label="Rango", placeholder="Ej: 1-3, 5")
                            with gr.Row():
                                s_prv = gr.Button("Preview")
                                s_btn = gr.Button("Dividir (ZIP)", variant="primary")
                        with gr.Column():
                            s_gal = gr.Gallery(height=160, columns=4, object_fit="contain", label="Vista Previa")
                            s_out = gr.File(label="ZIP")
                    s_prv.click(update_split_preview, [dr_f, s_rng, dr_pg], s_gal)
                    s_btn.click(process_split, [dr_f, s_rng], s_out)
                with gr.Tab("Reordenar"):
                    gr.Markdown("Crea un PDF con nuevo orden.")
                    with gr.Row():
                        with gr.Column():
                            r_ord = gr.Textbox(label="Nuevo Orden", placeholder="Ej: 3, 1, 2, 4-10")
                            r_btn = gr.Button("Reordenar", variant="primary")
                        with gr.Column():
                            r_out = gr.File(label="PDF Reordenado")
                    r_btn.click(process_reorder, [dr_f, r_ord], r_out)
            dr_f.change(load_info, dr_f, [dr_inf, dr_pg, s_out])

        # 3. COMPRIMIR
        with gr.TabItem("Comprimir"):
            with gr.Row():
                with gr.Column():
                    c_f = gr.File(label="PDF Original", file_types=[".pdf"])
                    c_l = gr.Radio(["Baja (Máxima calidad)", "Media (Recomendado - eBook)", "Alta (Pantalla - 72dpi)"], label="Nivel", value="Media (Recomendado - eBook)")
                    c_b = gr.Button("Comprimir", variant="primary")
                with gr.Column():
                    c_out = gr.File(label="PDF Comprimido")
            c_b.click(process_compress, [c_f, c_l], c_out)

        # 4. CONVERTIR
        with gr.TabItem("Convertir Formatos"):
            with gr.Row():
                with gr.Column():
                    gr.Markdown("### A Word")
                    w_f = gr.File(label="PDF")
                    w_b = gr.Button("Convertir")
                    w_o = gr.File(label="DOCX")
                    w_b.click(process_word, w_f, w_o)
                with gr.Column():
                    gr.Markdown("### A Imágenes")
                    p2i_f = gr.File(label="PDF")
                    p2i_b = gr.Button("Extraer ZIP")
                    p2i_o = gr.File(label="ZIP")
                    p2i_b.click(process_p2i, p2i_f, p2i_o)
                with gr.Column():
                    gr.Markdown("### A PowerPoint")
                    p_f = gr.File(label="PDF")
                    p_b = gr.Button("Convertir")
                    p_o = gr.File(label="PPTX")
                    p_b.click(process_pptx, p_f, p_o)

        # 5. COMPARAR
        with gr.TabItem("Comparar"):
            gr.Markdown("Compara el **texto** de dos versiones. Descarga un informe con cambios.")
            with gr.Row():
                with gr.Column():
                    ca = gr.File(label="Versión A (Original)", file_types=[".pdf"])
                with gr.Column():
                    cb = gr.File(label="Versión B (Modificada)", file_types=[".pdf"])
            cb_btn = gr.Button("Generar Informe", variant="primary")
            co = gr.File(label="Informe PDF")
            cb_btn.click(process_compare, [ca, cb], co)

        # 6. EXTRAS
        with gr.TabItem("Extras"):
            with gr.Tab("Rotar"):
                with gr.Row():
                    with gr.Column():
                        rf = gr.File(label="PDF")
                        ra = gr.Radio(["0° (Original)", "90° (Derecha)", "180° (Invertir)", "270° (Izquierda)"], label="Rotación", value="0° (Original)")
                        rb = gr.Button("Rotar", variant="primary")
                    with gr.Column():
                        rp = gr.Image(label="Preview")
                        ro = gr.File(label="PDF Rotado")
                rf.change(update_rot_preview, [rf, ra], rp)
                ra.change(update_rot_preview, [rf, ra], rp)
                rb.click(process_rotate, [rf, ra], ro)
            
            with gr.Tab("Marca de Agua"):
                gr.Markdown("Añade una marca de agua diagonal en todas las páginas.")
                with gr.Row():
                    with gr.Column():
                        wf = gr.File(label="PDF")
                        wt = gr.Textbox(label="Texto Marca de Agua", placeholder="Ej: CONFIDENCIAL")
                        wb = gr.Button("Estampar", variant="primary")
                    with gr.Column():
                        wo = gr.File(label="PDF Marcado")
                wb.click(process_watermark, [wf, wt], wo)

            with gr.Tab("Reparar"):
                gr.Markdown("Intenta arreglar PDFs corruptos o dañados reescribiéndolos con Ghostscript.")
                with gr.Row():
                    with gr.Column():
                        repf = gr.File(label="PDF Dañado")
                        repb = gr.Button("Reparar", variant="primary")
                    with gr.Column():
                        repo = gr.File(label="PDF Reparado")
                repb.click(process_repair, repf, repo)

            with gr.Tab("Proteger"):
                with gr.Row():
                    with gr.Column():
                        pf = gr.File(label="PDF")
                        pp = gr.Textbox(type="password", label="Pass")
                        pb = gr.Button("Encriptar", variant="primary")
                    with gr.Column():
                        po = gr.File(label="Protegido")
                pb.click(process_protect, [pf, pp], po)
            
            with gr.Tab("Info/Texto"):
                with gr.Row():
                    with gr.Column():
                        tf = gr.File(label="PDF")
                        tb = gr.Button("Extraer Texto")
                        to = gr.File()
                        tb.click(process_text, tf, to)
                    with gr.Column():
                        mt = gr.Textbox(label="Título")
                        ma = gr.Textbox(label="Autor")
                        ms = gr.Textbox(label="Asunto")
                        mb = gr.Button("Actualizar Meta")
                        mo = gr.File()
                        mb.click(process_meta, [tf, mt, ma, ms], mo)

if __name__ == "__main__":
    demo.queue(default_concurrency_limit=2).launch(
        server_name="0.0.0.0", 
        server_port=7860
    )