Spaces:
Runtime error
Runtime error
Upload 3 files
Browse files- Dockerfile +16 -0
- README.md +1 -0
- app.py +40 -0
Dockerfile
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
|
| 2 |
+
# you will also find guides on how best to write your Dockerfile
|
| 3 |
+
|
| 4 |
+
FROM condaforge/mambaforge:23.1.0-1
|
| 5 |
+
|
| 6 |
+
RUN mamba install -y sbt=1.7.1 git gradio
|
| 7 |
+
|
| 8 |
+
WORKDIR /work
|
| 9 |
+
|
| 10 |
+
COPY data/pdffigures2.jar /work
|
| 11 |
+
COPY app.py /work
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
ENTRYPOINT python app.py
|
| 15 |
+
|
| 16 |
+
# sbt "runMain org.allenai.pdffigures2.FigureExtractorBatchCli 2304.11968v1.Track_Anything_Segment_Anything_Meets_Videos.pdf -m figures -t 48 -q"
|
README.md
CHANGED
|
@@ -6,6 +6,7 @@ colorTo: indigo
|
|
| 6 |
sdk: docker
|
| 7 |
pinned: false
|
| 8 |
license: apache-2.0
|
|
|
|
| 9 |
---
|
| 10 |
|
| 11 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
|
| 6 |
sdk: docker
|
| 7 |
pinned: false
|
| 8 |
license: apache-2.0
|
| 9 |
+
app_port: 7860
|
| 10 |
---
|
| 11 |
|
| 12 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
app.py
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import urllib.request
|
| 3 |
+
import subprocess
|
| 4 |
+
import os
|
| 5 |
+
import glob
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
def extract_figure(url):
|
| 9 |
+
# download PDF file from URL
|
| 10 |
+
urllib.request.urlretrieve(url, "input.pdf")
|
| 11 |
+
|
| 12 |
+
# extract first figure from PDF using pdffigures2
|
| 13 |
+
subprocess.run(["java", "-jar", "pdffigures2.jar", "input.pdf", "-m", "figures_"])
|
| 14 |
+
|
| 15 |
+
all_pngs = glob.glob("*.png")
|
| 16 |
+
print(all_pngs)
|
| 17 |
+
|
| 18 |
+
# get path to first figure
|
| 19 |
+
figure_path = "figures_input-Figure1-1.png"
|
| 20 |
+
|
| 21 |
+
# # read first figure from file
|
| 22 |
+
# with open(figure_path, "rb") as f:
|
| 23 |
+
# figure_bytes = f.read()
|
| 24 |
+
|
| 25 |
+
# # delete downloaded file and figure file
|
| 26 |
+
# os.remove("input.pdf")
|
| 27 |
+
# os.remove(figure_path)
|
| 28 |
+
|
| 29 |
+
# return first figure
|
| 30 |
+
return figure_path
|
| 31 |
+
|
| 32 |
+
# define input and output interfaces
|
| 33 |
+
inputs = gr.inputs.Textbox(label="Enter URL of PDF file:")
|
| 34 |
+
outputs = gr.outputs.Image(label="First figure in PDF:", type="filepath")
|
| 35 |
+
|
| 36 |
+
# create interface
|
| 37 |
+
interface = gr.Interface(fn=extract_figure, inputs=inputs, outputs=outputs, title="Extract First Figure from PDF", description="Enter the URL of a PDF file and the first figure in the file will be extracted and displayed.")
|
| 38 |
+
|
| 39 |
+
# launch interface
|
| 40 |
+
interface.launch()
|