Spaces:
Sleeping
Sleeping
File size: 5,497 Bytes
f77c312 8ab983b 9b5b26a c19d193 8ab983b 6aae614 8fe992b 9b5b26a c5a6d75 8ab983b 3f42e28 8ab983b 9b5b26a 9eb2a9d 8ab983b c5a6d75 cf5568c 9b5b26a 8ab983b 5aad9d4 8ab983b 5aad9d4 8ab983b c5a6d75 8ab983b 9b5b26a 2979069 8ab983b 2979069 9eb2a9d 8ab983b 2979069 8ab983b 9eb2a9d 8ab983b 8c01ffb 6aae614 ae7a494 e121372 bf6d34c 29ec968 fe328e0 13d500a 8c01ffb a46511e 9b5b26a 8c01ffb 8fe992b 8ab983b 8c01ffb 0b70c01 8fe992b 8c01ffb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 |
from smolagents import CodeAgent, HfApiModel,load_tool,tool
from shutil import make_archive
import datetime
import requests
import pytz
import yaml
import os
from tools.final_answer import FinalAnswerTool
from Gradio_UI import GradioUI
@tool
def search_articles(search_term: str, start: int, end: int) -> str:
"""
Searches for academic articles on arxiv.org using a given search term, retrieves
results within a given range, and saves the output as a Markdown file.
Args:
search_term: The keyword or phrase to search for in ArXiv articles.
start: The starting index of the search results (used for pagination).
end: The number of articles to retrieve from the API.
Returns:
str: The articles has found.
Example:
>>> search_articles("deep learning", 0, 10)
"""
try:
# Construct the ArXiv API query
search_url = (
f"http://export.arxiv.org/api/query?search_query=all:{search_term}"
f"&start={start}&max_results={end}&sortBy=submittedDate&sortOrder=descending"
)
response = requests.get(search_url)
if response.status_code != 200:
return "Error: Failed to fetch articles from ArXiv."
# Extract and filter results
articles = []
entries = response.text.split("<entry>")[1:] # Splitting XML response
for entry in entries:
title_start = entry.find("<title>") + 7
title_end = entry.find("</title>")
title = entry[title_start:title_end].strip()
link_start = entry.find("<id>") + 4
link_end = entry.find("</id>")
link = entry[link_start:link_end].strip()
published_start = entry.find("<published>") + 11
published_end = entry.find("</published>")
published_date = entry[published_start:published_end][:10] # Extract YYYY-MM-DD
articles.append(f"### [{title}]({link})\n**Published Date:** {published_date}\n")
if not articles:
return f"No articles found for '{search_term}'."
return f"Articles: {articles}"
except Exception as e:
return f"Error: {str(e)}"
@tool
def download_articles(article_links: list, save_folder: str = "downloads") -> str:
"""
A tool that downloads articles from arxiv.org given a list of links.
Args:
article_links: List of article links from arXiv.
save_folder: Folder to save downloaded articles. Default is 'downloads'.
Returns:
str: Success or error message.
"""
if not article_links:
return "Error: No article links provided."
# Ensure the save folder exists
os.makedirs(save_folder, exist_ok=True)
downloaded_files = []
for link in article_links:
try:
# Extract the article ID from the link
article_id = link.split("/")[-1]
# Construct the PDF download URL
pdf_url = f"https://arxiv.org/pdf/{article_id}.pdf"
# Download the PDF
response = requests.get(pdf_url, stream=True)
if response.status_code != 200:
return f"Error: Failed to download {pdf_url}"
# Save the file
file_path = os.path.join(save_folder, f"{article_id}.pdf")
with open(file_path, "wb") as file:
for chunk in response.iter_content(1024):
file.write(chunk)
downloaded_files.append(file_path)
except Exception as e:
return f"Error downloading {link}: {str(e)}"
return f"Downloaded articles:\n" + "\n".join(downloaded_files)
@tool
def create_zip_package(articles_folder: str, output_zip: str) -> str:
"""
A tool that packages all downloaded articles (PDFs) from a folder into a zip file.
Args:
articles_folder: Folder containing downloaded articles.
output_zip: The name of the output zip file.
Returns:
str: Path to the created zip file.
"""
if not os.path.exists(articles_folder):
return f"Error: Folder '{articles_folder}' does not exist."
file_paths = [os.path.join(articles_folder, f) for f in os.listdir(articles_folder) if f.endswith(".pdf")]
if not file_paths:
return "Error: No PDF files found for zipping."
try:
with zipfile.ZipFile(output_zip, 'w', zipfile.ZIP_DEFLATED) as zipf:
for file in file_paths:
zipf.write(file, os.path.basename(file))
return f"Zip package created successfully: {output_zip}"
except Exception as e:
return f"Error: {str(e)}"
final_answer = FinalAnswerTool()
# If the agent does not answer, the model is overloaded, please use another model or the following Hugging Face Endpoint that also contains qwen2.5 coder:
# model_id='https://pflgm2locj2t89co.us-east-1.aws.endpoints.huggingface.cloud'
model = HfApiModel(
max_tokens=2096,
temperature=0.5,
model_id='Qwen/Qwen2.5-Coder-32B-Instruct',# it is possible that this model may be overloaded
custom_role_conversions=None,
)
with open("prompts.yaml", 'r') as stream:
prompt_templates = yaml.safe_load(stream)
agent = CodeAgent(
model=model,
tools=[search_articles, download_articles, create_zip_package, final_answer],
max_steps=6,
verbosity_level=1,
grammar=None,
planning_interval=None,
name=None,
description=None,
prompt_templates=prompt_templates
)
GradioUI(agent).launch() |