Spaces:
Running
Running
Update app.py
Browse fileschanged show time
app.py
CHANGED
|
@@ -30,11 +30,11 @@ def procesar_pdf(pdf_file=None, url_pdf=None, formatos=[], verbose="auto"):
|
|
| 30 |
try:
|
| 31 |
subprocess.run(["marker_single", "--help"], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
| 32 |
except FileNotFoundError:
|
| 33 |
-
yield "Error: el comando 'marker_single' no est谩 disponible.\nVerifica que marker-pdf est茅 en requirements.txt",
|
| 34 |
return
|
| 35 |
|
| 36 |
if not formatos:
|
| 37 |
-
yield "Debes seleccionar al menos un formato de salida.",
|
| 38 |
return
|
| 39 |
|
| 40 |
try:
|
|
@@ -44,7 +44,7 @@ def procesar_pdf(pdf_file=None, url_pdf=None, formatos=[], verbose="auto"):
|
|
| 44 |
ruta_pdf = obtener_ruta_segura(output_base, nombre_pdf)
|
| 45 |
response = requests.get(url_pdf)
|
| 46 |
if response.status_code != 200:
|
| 47 |
-
yield "No se pudo descargar el PDF desde la URL proporcionada.",
|
| 48 |
return
|
| 49 |
with open(ruta_pdf, "wb") as f:
|
| 50 |
f.write(response.content)
|
|
@@ -53,12 +53,12 @@ def procesar_pdf(pdf_file=None, url_pdf=None, formatos=[], verbose="auto"):
|
|
| 53 |
ruta_pdf = obtener_ruta_segura(output_base, nombre_pdf)
|
| 54 |
shutil.copyfile(pdf_file.name, ruta_pdf)
|
| 55 |
else:
|
| 56 |
-
yield "No se proporcion贸 ni archivo ni URL.",
|
| 57 |
return
|
| 58 |
|
| 59 |
with open(ruta_pdf, "rb") as f:
|
| 60 |
if f.read(4) != b"%PDF":
|
| 61 |
-
yield "El archivo proporcionado no es un PDF v谩lido.",
|
| 62 |
return
|
| 63 |
|
| 64 |
if os.path.getsize(ruta_pdf) > 3 * 1024 * 1024:
|
|
@@ -73,7 +73,7 @@ def procesar_pdf(pdf_file=None, url_pdf=None, formatos=[], verbose="auto"):
|
|
| 73 |
for fmt in formatos:
|
| 74 |
tiempo = round(time.time() - start_time, 1)
|
| 75 |
estado += f"Procesando formato '{fmt}'... (tiempo: {tiempo} s)\n"
|
| 76 |
-
yield estado,
|
| 77 |
|
| 78 |
if fmt == "md":
|
| 79 |
cmd = ["marker_single", ruta_pdf, "--output_format", "markdown", "--disable_ocr", "--output_dir", carpeta_salida]
|
|
@@ -89,18 +89,18 @@ def procesar_pdf(pdf_file=None, url_pdf=None, formatos=[], verbose="auto"):
|
|
| 89 |
break
|
| 90 |
elif intento == 3:
|
| 91 |
estado += f"Error ejecutando marker_single (intento {intento}):\n{result.stderr}\n"
|
| 92 |
-
yield estado,
|
| 93 |
return
|
| 94 |
time.sleep(2)
|
| 95 |
except Exception as e:
|
| 96 |
estado += f"Error inesperado durante el intento {intento}: {str(e)}\n"
|
| 97 |
-
yield estado,
|
| 98 |
return
|
| 99 |
|
| 100 |
md_path = os.path.join(carpeta_salida, f"{nombre_sin_ext}.md")
|
| 101 |
if not os.path.exists(md_path):
|
| 102 |
estado += "No se gener贸 el archivo Markdown. Puede que el PDF no contenga texto reconocible.\n"
|
| 103 |
-
yield estado,
|
| 104 |
return
|
| 105 |
|
| 106 |
with open(md_path, "r", encoding="utf-8") as f:
|
|
@@ -125,13 +125,13 @@ def procesar_pdf(pdf_file=None, url_pdf=None, formatos=[], verbose="auto"):
|
|
| 125 |
shutil.make_archive(base_name=zip_path.replace(".zip", ""), format="zip", root_dir=carpeta_salida)
|
| 126 |
|
| 127 |
estado += f"Procesamiento finalizado correctamente. Tiempo total: {tiempo_total} s"
|
| 128 |
-
yield estado + resumen,
|
| 129 |
|
| 130 |
except Exception as e:
|
| 131 |
estado += f"Error general inesperado: {str(e)}"
|
| 132 |
-
yield estado,
|
| 133 |
|
| 134 |
-
# Interfaz
|
| 135 |
demo = gr.Interface(
|
| 136 |
fn=procesar_pdf,
|
| 137 |
inputs=[
|
|
@@ -145,7 +145,6 @@ demo = gr.Interface(
|
|
| 145 |
],
|
| 146 |
outputs=[
|
| 147 |
gr.Textbox(label="Estado del procesamiento", lines=12),
|
| 148 |
-
gr.Textbox(label="Tiempo transcurrido", interactive=False),
|
| 149 |
gr.File(label="Descargar Markdown (.md)", visible=False),
|
| 150 |
gr.File(label="Descargar ZIP completo", visible=False)
|
| 151 |
],
|
|
|
|
| 30 |
try:
|
| 31 |
subprocess.run(["marker_single", "--help"], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
| 32 |
except FileNotFoundError:
|
| 33 |
+
yield "Error: el comando 'marker_single' no est谩 disponible.\nVerifica que marker-pdf est茅 en requirements.txt", None, None
|
| 34 |
return
|
| 35 |
|
| 36 |
if not formatos:
|
| 37 |
+
yield "Debes seleccionar al menos un formato de salida.", None, None
|
| 38 |
return
|
| 39 |
|
| 40 |
try:
|
|
|
|
| 44 |
ruta_pdf = obtener_ruta_segura(output_base, nombre_pdf)
|
| 45 |
response = requests.get(url_pdf)
|
| 46 |
if response.status_code != 200:
|
| 47 |
+
yield "No se pudo descargar el PDF desde la URL proporcionada.", None, None
|
| 48 |
return
|
| 49 |
with open(ruta_pdf, "wb") as f:
|
| 50 |
f.write(response.content)
|
|
|
|
| 53 |
ruta_pdf = obtener_ruta_segura(output_base, nombre_pdf)
|
| 54 |
shutil.copyfile(pdf_file.name, ruta_pdf)
|
| 55 |
else:
|
| 56 |
+
yield "No se proporcion贸 ni archivo ni URL.", None, None
|
| 57 |
return
|
| 58 |
|
| 59 |
with open(ruta_pdf, "rb") as f:
|
| 60 |
if f.read(4) != b"%PDF":
|
| 61 |
+
yield "El archivo proporcionado no es un PDF v谩lido.", None, None
|
| 62 |
return
|
| 63 |
|
| 64 |
if os.path.getsize(ruta_pdf) > 3 * 1024 * 1024:
|
|
|
|
| 73 |
for fmt in formatos:
|
| 74 |
tiempo = round(time.time() - start_time, 1)
|
| 75 |
estado += f"Procesando formato '{fmt}'... (tiempo: {tiempo} s)\n"
|
| 76 |
+
yield estado, None, None
|
| 77 |
|
| 78 |
if fmt == "md":
|
| 79 |
cmd = ["marker_single", ruta_pdf, "--output_format", "markdown", "--disable_ocr", "--output_dir", carpeta_salida]
|
|
|
|
| 89 |
break
|
| 90 |
elif intento == 3:
|
| 91 |
estado += f"Error ejecutando marker_single (intento {intento}):\n{result.stderr}\n"
|
| 92 |
+
yield estado, None, None
|
| 93 |
return
|
| 94 |
time.sleep(2)
|
| 95 |
except Exception as e:
|
| 96 |
estado += f"Error inesperado durante el intento {intento}: {str(e)}\n"
|
| 97 |
+
yield estado, None, None
|
| 98 |
return
|
| 99 |
|
| 100 |
md_path = os.path.join(carpeta_salida, f"{nombre_sin_ext}.md")
|
| 101 |
if not os.path.exists(md_path):
|
| 102 |
estado += "No se gener贸 el archivo Markdown. Puede que el PDF no contenga texto reconocible.\n"
|
| 103 |
+
yield estado, None, None
|
| 104 |
return
|
| 105 |
|
| 106 |
with open(md_path, "r", encoding="utf-8") as f:
|
|
|
|
| 125 |
shutil.make_archive(base_name=zip_path.replace(".zip", ""), format="zip", root_dir=carpeta_salida)
|
| 126 |
|
| 127 |
estado += f"Procesamiento finalizado correctamente. Tiempo total: {tiempo_total} s"
|
| 128 |
+
yield estado + resumen, gr.update(value=md_path, visible=True), gr.update(value=zip_path, visible=True)
|
| 129 |
|
| 130 |
except Exception as e:
|
| 131 |
estado += f"Error general inesperado: {str(e)}"
|
| 132 |
+
yield estado, None, None
|
| 133 |
|
| 134 |
+
# Interfaz Gradio: s贸lo un recuadro de estado + 2 archivos
|
| 135 |
demo = gr.Interface(
|
| 136 |
fn=procesar_pdf,
|
| 137 |
inputs=[
|
|
|
|
| 145 |
],
|
| 146 |
outputs=[
|
| 147 |
gr.Textbox(label="Estado del procesamiento", lines=12),
|
|
|
|
| 148 |
gr.File(label="Descargar Markdown (.md)", visible=False),
|
| 149 |
gr.File(label="Descargar ZIP completo", visible=False)
|
| 150 |
],
|