Spaces:
Sleeping
Sleeping
qolina commited on
Commit ·
6ae3c63
1
Parent(s): d4e2f53
add pdf crawl
Browse files
app.py
CHANGED
|
@@ -5,6 +5,35 @@ from reference_string_parsing import *
|
|
| 5 |
from controlled_summarization import *
|
| 6 |
from dataset_extraction import *
|
| 7 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
with gr.Blocks(css="#htext span {white-space: pre-line}") as demo:
|
| 9 |
gr.Markdown("# Gradio Demo for SciAssist")
|
| 10 |
with gr.Tabs():
|
|
@@ -16,7 +45,8 @@ with gr.Blocks(css="#htext span {white-space: pre-line}") as demo:
|
|
| 16 |
gr.Markdown(ctrlsum_file_md)
|
| 17 |
with gr.Row():
|
| 18 |
with gr.Column():
|
| 19 |
-
|
|
|
|
| 20 |
ctrlsum_str = gr.TextArea(label="Input String", max_lines=5)
|
| 21 |
with gr.Column():
|
| 22 |
gr.Markdown("* Length 0 will exert no control over length.")
|
|
@@ -33,6 +63,9 @@ with gr.Blocks(css="#htext span {white-space: pre-line}") as demo:
|
|
| 33 |
ctrlsum_file_examples = gr.Examples(examples=[["examples/H01-1042_body.txt", 50, "automatic evaluation technique"],["examples/H01-1042.pdf", 0, "automatic evaluation technique"]],
|
| 34 |
inputs=[ctrlsum_file, ctrlsum_file_length, ctrlsum_file_keywords])
|
| 35 |
|
|
|
|
|
|
|
|
|
|
| 36 |
ctrlsum_file_btn.click(
|
| 37 |
fn=ctrlsum_for_file,
|
| 38 |
inputs=[ctrlsum_file, ctrlsum_file_length, ctrlsum_file_keywords, ctrlsum_str],
|
|
@@ -143,4 +176,4 @@ with gr.Blocks(css="#htext span {white-space: pre-line}") as demo:
|
|
| 143 |
)
|
| 144 |
|
| 145 |
|
| 146 |
-
demo.launch(share=False)
|
|
|
|
| 5 |
from controlled_summarization import *
|
| 6 |
from dataset_extraction import *
|
| 7 |
|
| 8 |
+
import requests
|
| 9 |
+
def download_pdf(url, dest_folder):
|
| 10 |
+
|
| 11 |
+
"""
|
| 12 |
+
Download a PDF from a given URL and save it to a specified destination folder.
|
| 13 |
+
Parameters:
|
| 14 |
+
url (str): URL of the PDF
|
| 15 |
+
dest_folder (str): Destination folder to save the downloaded PDF
|
| 16 |
+
"""
|
| 17 |
+
|
| 18 |
+
if not os.path.exists(dest_folder):
|
| 19 |
+
os.makedirs(dest_folder)
|
| 20 |
+
|
| 21 |
+
response = requests.get(url, stream=True)
|
| 22 |
+
filename = os.path.join(dest_folder, url.split("/")[-1])
|
| 23 |
+
|
| 24 |
+
with open(filename, 'wb') as file:
|
| 25 |
+
for chunk in response.iter_content(chunk_size=1024):
|
| 26 |
+
if chunk:
|
| 27 |
+
file.write(chunk)
|
| 28 |
+
#print(f"Downloaded {url} to {filename}")
|
| 29 |
+
return filename
|
| 30 |
+
|
| 31 |
+
# Example Usage
|
| 32 |
+
#url = "https://arxiv.org/pdf/2305.14996.pdf"
|
| 33 |
+
#dest_folder = "./examples/"
|
| 34 |
+
#download_pdf(url, dest_folder)
|
| 35 |
+
|
| 36 |
+
|
| 37 |
with gr.Blocks(css="#htext span {white-space: pre-line}") as demo:
|
| 38 |
gr.Markdown("# Gradio Demo for SciAssist")
|
| 39 |
with gr.Tabs():
|
|
|
|
| 45 |
gr.Markdown(ctrlsum_file_md)
|
| 46 |
with gr.Row():
|
| 47 |
with gr.Column():
|
| 48 |
+
ctrlsum_url = gr.TextArea(label="PDF URL", max_lines=1)
|
| 49 |
+
ctrlsum_file = gr.File(label="Input File", max_lines=2)
|
| 50 |
ctrlsum_str = gr.TextArea(label="Input String", max_lines=5)
|
| 51 |
with gr.Column():
|
| 52 |
gr.Markdown("* Length 0 will exert no control over length.")
|
|
|
|
| 63 |
ctrlsum_file_examples = gr.Examples(examples=[["examples/H01-1042_body.txt", 50, "automatic evaluation technique"],["examples/H01-1042.pdf", 0, "automatic evaluation technique"]],
|
| 64 |
inputs=[ctrlsum_file, ctrlsum_file_length, ctrlsum_file_keywords])
|
| 65 |
|
| 66 |
+
if ctrlsum_url is not None and len(ctrlsum_url) > 4:
|
| 67 |
+
ctrlsum_file = download_pdf(ctrlsum_url, './examples/')
|
| 68 |
+
|
| 69 |
ctrlsum_file_btn.click(
|
| 70 |
fn=ctrlsum_for_file,
|
| 71 |
inputs=[ctrlsum_file, ctrlsum_file_length, ctrlsum_file_keywords, ctrlsum_str],
|
|
|
|
| 176 |
)
|
| 177 |
|
| 178 |
|
| 179 |
+
demo.launch(share=False)
|