File size: 5,497 Bytes
f77c312
8ab983b
9b5b26a
 
 
c19d193
8ab983b
6aae614
8fe992b
9b5b26a
 
 
c5a6d75
8ab983b
3f42e28
 
8ab983b
9b5b26a
9eb2a9d
 
 
8ab983b
 
c5a6d75
cf5568c
 
 
9b5b26a
8ab983b
 
 
5aad9d4
 
8ab983b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5aad9d4
8ab983b
c5a6d75
8ab983b
 
 
 
9b5b26a
2979069
8ab983b
 
 
 
2979069
9eb2a9d
 
8ab983b
 
 
2979069
8ab983b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9eb2a9d
 
8ab983b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8c01ffb
6aae614
ae7a494
 
 
 
e121372
bf6d34c
 
29ec968
fe328e0
13d500a
8c01ffb
a46511e
 
9b5b26a
8c01ffb
8fe992b
8ab983b
8c01ffb
 
 
 
 
 
0b70c01
8fe992b
 
8c01ffb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
from smolagents import CodeAgent, HfApiModel,load_tool,tool
from shutil import make_archive
import datetime
import requests
import pytz
import yaml
import os
from tools.final_answer import FinalAnswerTool

from Gradio_UI import GradioUI

@tool
def search_articles(search_term: str, start: int, end: int) -> str:
    """
    Searches for academic articles on arxiv.org using a given search term, retrieves 
    results within a given range, and saves the output as a Markdown file.

    Args:
        search_term: The keyword or phrase to search for in ArXiv articles.
        start: The starting index of the search results (used for pagination).
        end: The number of articles to retrieve from the API.

    Returns:
        str: The articles has found.

    Example:
        >>> search_articles("deep learning", 0, 10)
    """
    try:
        # Construct the ArXiv API query
        search_url = (
            f"http://export.arxiv.org/api/query?search_query=all:{search_term}"
            f"&start={start}&max_results={end}&sortBy=submittedDate&sortOrder=descending"
        )

        response = requests.get(search_url)
        if response.status_code != 200:
            return "Error: Failed to fetch articles from ArXiv."

        # Extract and filter results
        articles = []
        entries = response.text.split("<entry>")[1:]  # Splitting XML response
        for entry in entries:
            title_start = entry.find("<title>") + 7
            title_end = entry.find("</title>")
            title = entry[title_start:title_end].strip()

            link_start = entry.find("<id>") + 4
            link_end = entry.find("</id>")
            link = entry[link_start:link_end].strip()

            published_start = entry.find("<published>") + 11
            published_end = entry.find("</published>")
            published_date = entry[published_start:published_end][:10]  # Extract YYYY-MM-DD

            articles.append(f"### [{title}]({link})\n**Published Date:** {published_date}\n")

        if not articles:
            return f"No articles found for '{search_term}'."

        return f"Articles: {articles}"

    except Exception as e:
        return f"Error: {str(e)}"


@tool
def download_articles(article_links: list, save_folder: str = "downloads") -> str:
    """
    A tool that downloads articles from arxiv.org given a list of links.

    Args:
        article_links: List of article links from arXiv.
        save_folder: Folder to save downloaded articles. Default is 'downloads'.

    Returns:
        str: Success or error message.
    """
    if not article_links:
        return "Error: No article links provided."

    # Ensure the save folder exists
    os.makedirs(save_folder, exist_ok=True)

    downloaded_files = []

    for link in article_links:
        try:
            # Extract the article ID from the link
            article_id = link.split("/")[-1]

            # Construct the PDF download URL
            pdf_url = f"https://arxiv.org/pdf/{article_id}.pdf"

            # Download the PDF
            response = requests.get(pdf_url, stream=True)
            if response.status_code != 200:
                return f"Error: Failed to download {pdf_url}"

            # Save the file
            file_path = os.path.join(save_folder, f"{article_id}.pdf")
            with open(file_path, "wb") as file:
                for chunk in response.iter_content(1024):
                    file.write(chunk)

            downloaded_files.append(file_path)

        except Exception as e:
            return f"Error downloading {link}: {str(e)}"

    return f"Downloaded articles:\n" + "\n".join(downloaded_files)
    
@tool
def create_zip_package(articles_folder: str, output_zip: str) -> str:
    """
    A tool that packages all downloaded articles (PDFs) from a folder into a zip file.

    Args:
        articles_folder: Folder containing downloaded articles.
        output_zip: The name of the output zip file.

    Returns:
        str: Path to the created zip file.
    """
    if not os.path.exists(articles_folder):
        return f"Error: Folder '{articles_folder}' does not exist."

    file_paths = [os.path.join(articles_folder, f) for f in os.listdir(articles_folder) if f.endswith(".pdf")]

    if not file_paths:
        return "Error: No PDF files found for zipping."

    try:
        with zipfile.ZipFile(output_zip, 'w', zipfile.ZIP_DEFLATED) as zipf:
            for file in file_paths:
                zipf.write(file, os.path.basename(file))

        return f"Zip package created successfully: {output_zip}"

    except Exception as e:
        return f"Error: {str(e)}"

final_answer = FinalAnswerTool()

# If the agent does not answer, the model is overloaded, please use another model or the following Hugging Face Endpoint that also contains qwen2.5 coder:
# model_id='https://pflgm2locj2t89co.us-east-1.aws.endpoints.huggingface.cloud' 

model = HfApiModel(
max_tokens=2096,
temperature=0.5,
model_id='Qwen/Qwen2.5-Coder-32B-Instruct',# it is possible that this model may be overloaded
custom_role_conversions=None,
)

with open("prompts.yaml", 'r') as stream:
    prompt_templates = yaml.safe_load(stream)
    
agent = CodeAgent(
    model=model,
    tools=[search_articles, download_articles, create_zip_package, final_answer],
    max_steps=6,
    verbosity_level=1,
    grammar=None,
    planning_interval=None,
    name=None,
    description=None,
    prompt_templates=prompt_templates
)

GradioUI(agent).launch()