Spaces:
Sleeping
Sleeping
Update app.py
#1
by
C2MV
- opened
app.py
CHANGED
|
@@ -436,17 +436,17 @@ class PaperDownloader:
|
|
| 436 |
with open(filepath, 'wb') as f:
|
| 437 |
f.write(pdf_content)
|
| 438 |
logger.info(f"Successfully downloaded: {filename}")
|
| 439 |
-
return filepath, f'<
|
| 440 |
else:
|
| 441 |
logger.warning(f"Could not download: {doi}")
|
| 442 |
-
return None, f"Could not download {doi}", f'<
|
| 443 |
|
| 444 |
except Exception as e:
|
| 445 |
logger.error(f"Error processing {doi}: {e}")
|
| 446 |
return None, f"Error processing {doi}: {e}", f"Error processing {doi}: {e}"
|
| 447 |
|
| 448 |
-
def download_multiple_dois(self, dois_text):
|
| 449 |
-
"""Downloads multiple papers from a list of DOIs"""
|
| 450 |
if not dois_text:
|
| 451 |
return None, "Error: No DOIs provided", "Error: No DOIs provided"
|
| 452 |
|
|
@@ -457,7 +457,9 @@ class PaperDownloader:
|
|
| 457 |
downloaded_files = []
|
| 458 |
failed_dois = []
|
| 459 |
downloaded_links = []
|
| 460 |
-
|
|
|
|
|
|
|
| 461 |
filepath, success_message, fail_message = self.download_single_doi(doi)
|
| 462 |
if filepath:
|
| 463 |
# Unique filename for zip
|
|
@@ -465,10 +467,14 @@ class PaperDownloader:
|
|
| 465 |
filepath_unique = os.path.join(self.output_dir, filename)
|
| 466 |
os.rename(filepath, filepath_unique)
|
| 467 |
downloaded_files.append(filepath_unique)
|
| 468 |
-
downloaded_links.append(f'<
|
| 469 |
-
|
| 470 |
else:
|
| 471 |
-
failed_dois.append(f'<
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 472 |
|
| 473 |
if downloaded_files:
|
| 474 |
zip_filename = 'papers.zip'
|
|
@@ -477,9 +483,17 @@ class PaperDownloader:
|
|
| 477 |
zipf.write(file_path, arcname=os.path.basename(file_path))
|
| 478 |
logger.info(f"ZIP file created: {zip_filename}")
|
| 479 |
|
| 480 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 481 |
|
| 482 |
-
|
|
|
|
|
|
|
|
|
|
| 483 |
"""Process BibTeX file and download papers with multiple strategies"""
|
| 484 |
# Read BibTeX file content from the uploaded object
|
| 485 |
try:
|
|
@@ -504,9 +518,10 @@ class PaperDownloader:
|
|
| 504 |
downloaded_files = []
|
| 505 |
failed_dois = []
|
| 506 |
downloaded_links = []
|
|
|
|
| 507 |
|
| 508 |
# Download PDFs
|
| 509 |
-
for doi in
|
| 510 |
try:
|
| 511 |
# Try to download with multiple methods with retries
|
| 512 |
pdf_content = self.download_with_retry(doi)
|
|
@@ -522,14 +537,17 @@ class PaperDownloader:
|
|
| 522 |
f.write(pdf_content)
|
| 523 |
|
| 524 |
downloaded_files.append(filepath)
|
| 525 |
-
downloaded_links.append(f'<
|
| 526 |
logger.info(f"Successfully downloaded: {filename}")
|
| 527 |
else:
|
| 528 |
-
failed_dois.append(f'<
|
| 529 |
-
|
| 530 |
except Exception as e:
|
| 531 |
-
failed_dois.append(f'<
|
| 532 |
logger.error(f"Error processing {doi}: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 533 |
|
| 534 |
# Create ZIP of downloaded papers
|
| 535 |
if downloaded_files:
|
|
@@ -538,10 +556,16 @@ class PaperDownloader:
|
|
| 538 |
for file_path in downloaded_files:
|
| 539 |
zipf.write(file_path, arcname=os.path.basename(file_path))
|
| 540 |
logger.info(f"ZIP file created: {zip_filename}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 541 |
|
| 542 |
-
return zip_filename, "\n".join(downloaded_links), "\n".join(failed_dois), None
|
| 543 |
|
| 544 |
-
|
|
|
|
|
|
|
| 545 |
"""Process BibTeX file and download papers with multiple strategies"""
|
| 546 |
# Read BibTeX file content from the uploaded object
|
| 547 |
try:
|
|
@@ -566,9 +590,10 @@ class PaperDownloader:
|
|
| 566 |
downloaded_files = []
|
| 567 |
failed_dois = []
|
| 568 |
downloaded_links = []
|
|
|
|
| 569 |
|
| 570 |
# Download PDFs
|
| 571 |
-
for doi in
|
| 572 |
try:
|
| 573 |
# Try to download with multiple methods with retries
|
| 574 |
pdf_content = await self.download_with_retry_async(doi)
|
|
@@ -584,14 +609,17 @@ class PaperDownloader:
|
|
| 584 |
f.write(pdf_content)
|
| 585 |
|
| 586 |
downloaded_files.append(filepath)
|
| 587 |
-
downloaded_links.append(f'<
|
| 588 |
logger.info(f"Successfully downloaded: {filename}")
|
| 589 |
else:
|
| 590 |
-
failed_dois.append(f'<
|
| 591 |
-
|
| 592 |
except Exception as e:
|
| 593 |
-
failed_dois.append(f'<
|
| 594 |
logger.error(f"Error processing {doi}: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 595 |
|
| 596 |
# Create ZIP of downloaded papers
|
| 597 |
if downloaded_files:
|
|
@@ -600,26 +628,34 @@ class PaperDownloader:
|
|
| 600 |
for file_path in downloaded_files:
|
| 601 |
zipf.write(file_path, arcname=os.path.basename(file_path))
|
| 602 |
logger.info(f"ZIP file created: {zip_filename}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 603 |
|
| 604 |
-
|
|
|
|
| 605 |
|
| 606 |
def create_gradio_interface():
|
| 607 |
"""Create Gradio interface for Paper Downloader"""
|
| 608 |
downloader = PaperDownloader()
|
| 609 |
|
| 610 |
-
async def download_papers(bib_file, doi_input, dois_input):
|
|
|
|
|
|
|
| 611 |
if bib_file:
|
| 612 |
# Check file type
|
| 613 |
if not bib_file.name.lower().endswith('.bib'):
|
| 614 |
return None, "Error: Please upload a .bib file", "Error: Please upload a .bib file", None
|
| 615 |
|
| 616 |
-
zip_path, downloaded_dois, failed_dois, _ = await downloader.process_bibtex_async(bib_file)
|
| 617 |
return zip_path, downloaded_dois, failed_dois, None
|
| 618 |
elif doi_input:
|
| 619 |
filepath, message, failed_doi = downloader.download_single_doi(doi_input)
|
| 620 |
return None, message, failed_doi, filepath
|
| 621 |
elif dois_input:
|
| 622 |
-
zip_path, downloaded_dois, failed_dois = downloader.download_multiple_dois(dois_input)
|
| 623 |
return zip_path, downloaded_dois, failed_dois, None
|
| 624 |
else:
|
| 625 |
return None, "Please provide a .bib file, a single DOI, or a list of DOIs", "Please provide a .bib file, a single DOI, or a list of DOIs", None
|
|
@@ -650,7 +686,7 @@ def create_gradio_interface():
|
|
| 650 |
<div id="failed-dois"></div>
|
| 651 |
</div>
|
| 652 |
"""),
|
| 653 |
-
|
| 654 |
],
|
| 655 |
title="🔬 Academic Paper Batch Downloader",
|
| 656 |
description="Upload a BibTeX file or enter DOIs to download PDFs. We'll attempt to fetch PDFs from multiple sources like Sci-Hub, Libgen, Google Scholar and Crossref. You can use any of the three inputs at any moment.",
|
|
@@ -700,18 +736,17 @@ def create_gradio_interface():
|
|
| 700 |
|
| 701 |
interface.head = """
|
| 702 |
<script>
|
| 703 |
-
|
| 704 |
-
const
|
| 705 |
-
|
| 706 |
-
|
|
|
|
|
|
|
| 707 |
.then(() => {
|
| 708 |
-
|
| 709 |
-
setTimeout(() => {
|
| 710 |
-
button.innerText = 'Copy';
|
| 711 |
-
}, 2000);
|
| 712 |
})
|
| 713 |
.catch(err => {
|
| 714 |
-
|
| 715 |
});
|
| 716 |
}
|
| 717 |
</script>
|
|
|
|
| 436 |
with open(filepath, 'wb') as f:
|
| 437 |
f.write(pdf_content)
|
| 438 |
logger.info(f"Successfully downloaded: {filename}")
|
| 439 |
+
return filepath, f'<a href="https://doi.org/{doi}">{doi}</a>', ""
|
| 440 |
else:
|
| 441 |
logger.warning(f"Could not download: {doi}")
|
| 442 |
+
return None, f"Could not download {doi}", f'<a href="https://doi.org/{doi}">{doi}</a>'
|
| 443 |
|
| 444 |
except Exception as e:
|
| 445 |
logger.error(f"Error processing {doi}: {e}")
|
| 446 |
return None, f"Error processing {doi}: {e}", f"Error processing {doi}: {e}"
|
| 447 |
|
| 448 |
+
def download_multiple_dois(self, dois_text, progress_callback=None):
|
| 449 |
+
"""Downloads multiple papers from a list of DOIs with progress updates and single copy button"""
|
| 450 |
if not dois_text:
|
| 451 |
return None, "Error: No DOIs provided", "Error: No DOIs provided"
|
| 452 |
|
|
|
|
| 457 |
downloaded_files = []
|
| 458 |
failed_dois = []
|
| 459 |
downloaded_links = []
|
| 460 |
+
total_dois = len(dois)
|
| 461 |
+
|
| 462 |
+
for i, doi in enumerate(dois):
|
| 463 |
filepath, success_message, fail_message = self.download_single_doi(doi)
|
| 464 |
if filepath:
|
| 465 |
# Unique filename for zip
|
|
|
|
| 467 |
filepath_unique = os.path.join(self.output_dir, filename)
|
| 468 |
os.rename(filepath, filepath_unique)
|
| 469 |
downloaded_files.append(filepath_unique)
|
| 470 |
+
downloaded_links.append(f'<a href="https://doi.org/{doi}">{doi}</a>')
|
|
|
|
| 471 |
else:
|
| 472 |
+
failed_dois.append(f'<a href="https://doi.org/{doi}">{doi}</a>')
|
| 473 |
+
|
| 474 |
+
if progress_callback:
|
| 475 |
+
progress = int(((i + 1) / total_dois) * 100)
|
| 476 |
+
progress_callback(progress)
|
| 477 |
+
|
| 478 |
|
| 479 |
if downloaded_files:
|
| 480 |
zip_filename = 'papers.zip'
|
|
|
|
| 483 |
zipf.write(file_path, arcname=os.path.basename(file_path))
|
| 484 |
logger.info(f"ZIP file created: {zip_filename}")
|
| 485 |
|
| 486 |
+
|
| 487 |
+
|
| 488 |
+
|
| 489 |
+
# Combine all links into a single string
|
| 490 |
+
all_links_html = "<br>".join(downloaded_links)
|
| 491 |
+
copy_button_html = f'<button onclick="copyAllLinks(\'{all_links_html}\')">Copy All Links</button>' if all_links_html else ""
|
| 492 |
|
| 493 |
+
|
| 494 |
+
return zip_filename if downloaded_files else None, f"{all_links_html} {copy_button_html}", "\n".join(failed_dois)
|
| 495 |
+
|
| 496 |
+
def process_bibtex(self, bib_file, progress_callback=None):
|
| 497 |
"""Process BibTeX file and download papers with multiple strategies"""
|
| 498 |
# Read BibTeX file content from the uploaded object
|
| 499 |
try:
|
|
|
|
| 518 |
downloaded_files = []
|
| 519 |
failed_dois = []
|
| 520 |
downloaded_links = []
|
| 521 |
+
total_dois = len(dois)
|
| 522 |
|
| 523 |
# Download PDFs
|
| 524 |
+
for i, doi in enumerate(dois):
|
| 525 |
try:
|
| 526 |
# Try to download with multiple methods with retries
|
| 527 |
pdf_content = self.download_with_retry(doi)
|
|
|
|
| 537 |
f.write(pdf_content)
|
| 538 |
|
| 539 |
downloaded_files.append(filepath)
|
| 540 |
+
downloaded_links.append(f'<a href="https://doi.org/{doi}">{doi}</a>')
|
| 541 |
logger.info(f"Successfully downloaded: {filename}")
|
| 542 |
else:
|
| 543 |
+
failed_dois.append(f'<a href="https://doi.org/{doi}">{doi}</a>')
|
|
|
|
| 544 |
except Exception as e:
|
| 545 |
+
failed_dois.append(f'<a href="https://doi.org/{doi}">{doi}</a>')
|
| 546 |
logger.error(f"Error processing {doi}: {e}")
|
| 547 |
+
|
| 548 |
+
if progress_callback:
|
| 549 |
+
progress = int(((i + 1) / total_dois) * 100)
|
| 550 |
+
progress_callback(progress)
|
| 551 |
|
| 552 |
# Create ZIP of downloaded papers
|
| 553 |
if downloaded_files:
|
|
|
|
| 556 |
for file_path in downloaded_files:
|
| 557 |
zipf.write(file_path, arcname=os.path.basename(file_path))
|
| 558 |
logger.info(f"ZIP file created: {zip_filename}")
|
| 559 |
+
|
| 560 |
+
|
| 561 |
+
# Combine all links into a single string
|
| 562 |
+
all_links_html = "<br>".join(downloaded_links)
|
| 563 |
+
copy_button_html = f'<button onclick="copyAllLinks(\'{all_links_html}\')">Copy All Links</button>' if all_links_html else ""
|
| 564 |
|
|
|
|
| 565 |
|
| 566 |
+
return zip_filename, f"{all_links_html} {copy_button_html}", "\n".join(failed_dois), None
|
| 567 |
+
|
| 568 |
+
async def process_bibtex_async(self, bib_file, progress_callback=None):
|
| 569 |
"""Process BibTeX file and download papers with multiple strategies"""
|
| 570 |
# Read BibTeX file content from the uploaded object
|
| 571 |
try:
|
|
|
|
| 590 |
downloaded_files = []
|
| 591 |
failed_dois = []
|
| 592 |
downloaded_links = []
|
| 593 |
+
total_dois = len(dois)
|
| 594 |
|
| 595 |
# Download PDFs
|
| 596 |
+
for i, doi in enumerate(dois):
|
| 597 |
try:
|
| 598 |
# Try to download with multiple methods with retries
|
| 599 |
pdf_content = await self.download_with_retry_async(doi)
|
|
|
|
| 609 |
f.write(pdf_content)
|
| 610 |
|
| 611 |
downloaded_files.append(filepath)
|
| 612 |
+
downloaded_links.append(f'<a href="https://doi.org/{doi}">{doi}</a>')
|
| 613 |
logger.info(f"Successfully downloaded: {filename}")
|
| 614 |
else:
|
| 615 |
+
failed_dois.append(f'<a href="https://doi.org/{doi}">{doi}</a>')
|
|
|
|
| 616 |
except Exception as e:
|
| 617 |
+
failed_dois.append(f'<a href="https://doi.org/{doi}">{doi}</a>')
|
| 618 |
logger.error(f"Error processing {doi}: {e}")
|
| 619 |
+
|
| 620 |
+
if progress_callback:
|
| 621 |
+
progress = int(((i + 1) / total_dois) * 100)
|
| 622 |
+
progress_callback(progress)
|
| 623 |
|
| 624 |
# Create ZIP of downloaded papers
|
| 625 |
if downloaded_files:
|
|
|
|
| 628 |
for file_path in downloaded_files:
|
| 629 |
zipf.write(file_path, arcname=os.path.basename(file_path))
|
| 630 |
logger.info(f"ZIP file created: {zip_filename}")
|
| 631 |
+
|
| 632 |
+
|
| 633 |
+
# Combine all links into a single string
|
| 634 |
+
all_links_html = "<br>".join(downloaded_links)
|
| 635 |
+
copy_button_html = f'<button onclick="copyAllLinks(\'{all_links_html}\')">Copy All Links</button>' if all_links_html else ""
|
| 636 |
|
| 637 |
+
|
| 638 |
+
return zip_filename, f"{all_links_html} {copy_button_html}", "\n".join(failed_dois), None
|
| 639 |
|
| 640 |
def create_gradio_interface():
|
| 641 |
"""Create Gradio interface for Paper Downloader"""
|
| 642 |
downloader = PaperDownloader()
|
| 643 |
|
| 644 |
+
async def download_papers(bib_file, doi_input, dois_input, progress=gr.Progress()):
|
| 645 |
+
progress_callback = lambda p: progress(p, desc="Downloading Papers")
|
| 646 |
+
|
| 647 |
if bib_file:
|
| 648 |
# Check file type
|
| 649 |
if not bib_file.name.lower().endswith('.bib'):
|
| 650 |
return None, "Error: Please upload a .bib file", "Error: Please upload a .bib file", None
|
| 651 |
|
| 652 |
+
zip_path, downloaded_dois, failed_dois, _ = await downloader.process_bibtex_async(bib_file, progress_callback)
|
| 653 |
return zip_path, downloaded_dois, failed_dois, None
|
| 654 |
elif doi_input:
|
| 655 |
filepath, message, failed_doi = downloader.download_single_doi(doi_input)
|
| 656 |
return None, message, failed_doi, filepath
|
| 657 |
elif dois_input:
|
| 658 |
+
zip_path, downloaded_dois, failed_dois = downloader.download_multiple_dois(dois_input, progress_callback)
|
| 659 |
return zip_path, downloaded_dois, failed_dois, None
|
| 660 |
else:
|
| 661 |
return None, "Please provide a .bib file, a single DOI, or a list of DOIs", "Please provide a .bib file, a single DOI, or a list of DOIs", None
|
|
|
|
| 686 |
<div id="failed-dois"></div>
|
| 687 |
</div>
|
| 688 |
"""),
|
| 689 |
+
gr.File(label="Downloaded Single PDF")
|
| 690 |
],
|
| 691 |
title="🔬 Academic Paper Batch Downloader",
|
| 692 |
description="Upload a BibTeX file or enter DOIs to download PDFs. We'll attempt to fetch PDFs from multiple sources like Sci-Hub, Libgen, Google Scholar and Crossref. You can use any of the three inputs at any moment.",
|
|
|
|
| 736 |
|
| 737 |
interface.head = """
|
| 738 |
<script>
|
| 739 |
+
function copyAllLinks(linksHTML) {
|
| 740 |
+
const tempElement = document.createElement('div');
|
| 741 |
+
tempElement.innerHTML = linksHTML;
|
| 742 |
+
const links = Array.from(tempElement.querySelectorAll('a')).map(a => a.href).join('\\n');
|
| 743 |
+
|
| 744 |
+
navigator.clipboard.writeText(links)
|
| 745 |
.then(() => {
|
| 746 |
+
alert('All links copied to clipboard!');
|
|
|
|
|
|
|
|
|
|
| 747 |
})
|
| 748 |
.catch(err => {
|
| 749 |
+
console.error('Failed to copy links: ', err);
|
| 750 |
});
|
| 751 |
}
|
| 752 |
</script>
|