Spaces:
Sleeping
Sleeping
| from smolagents import CodeAgent,DuckDuckGoSearchTool, HfApiModel,load_tool,tool | |
| from shutil import make_archive | |
| import datetime | |
| import requests | |
| import pytz | |
| import yaml | |
| import os | |
| from tools.final_answer import FinalAnswerTool | |
| from Gradio_UI import GradioUI | |
| def search_articles(text2search: str, start: int, end: int, output_md: str = "arxiv_articles.md") -> str: | |
| """ | |
| Searches for academic articles on arxiv.org using a given search term, retrieves | |
| results within a given range, and saves the output as a Markdown file. | |
| Args: | |
| text2search (str): The keyword or phrase to search for in ArXiv articles. | |
| start (int): The starting index of the search results (used for pagination). | |
| end (int): The number of articles to retrieve from the API. | |
| output_md (str, optional): The name of the output Markdown file. Defaults to "arxiv_articles.md". | |
| Returns: | |
| str: The absolute path of the generated Markdown file, or an error message if the request fails. | |
| Example: | |
| >>> search_articles("deep learning", 0, 10) | |
| "Markdown file created: /absolute/path/to/arxiv_articles.md" | |
| """ | |
| try: | |
| # Construct the ArXiv API query | |
| search_url = ( | |
| f"http://export.arxiv.org/api/query?search_query=all:{text2search}" | |
| f"&start={start}&max_results=5&sortBy=submittedDate&sortOrder=descending" | |
| ) | |
| response = requests.get(search_url) | |
| if response.status_code != 200: | |
| return "Error: Failed to fetch articles from ArXiv." | |
| # Extract and filter results | |
| articles = [] | |
| entries = response.text.split("<entry>")[1:] # Splitting XML response | |
| for entry in entries: | |
| title_start = entry.find("<title>") + 7 | |
| title_end = entry.find("</title>") | |
| title = entry[title_start:title_end].strip() | |
| link_start = entry.find("<id>") + 4 | |
| link_end = entry.find("</id>") | |
| link = entry[link_start:link_end].strip() | |
| published_start = entry.find("<published>") + 11 | |
| published_end = entry.find("</published>") | |
| published_date = entry[published_start:published_end][:10] # Extract YYYY-MM-DD | |
| articles.append(f"### [{title}]({link})\n**Published Date:** {published_date}\n") | |
| if not articles: | |
| return f"No articles found for '{text2search}'." | |
| # Save results to a Markdown file | |
| with open(output_md, "w", encoding="utf-8") as md_file: | |
| md_file.write(f"# ArXiv Articles on '{text2search}'\n\n") | |
| md_file.writelines("\n".join(articles)) | |
| return f"Markdown file created: {os.path.abspath(output_md)}" | |
| except Exception as e: | |
| return f"Error: {str(e)}" | |
| def download_articles(article_links: list, save_folder: str = "downloads") -> str: | |
| """ | |
| A tool that downloads articles from arxiv.org given a list of links. | |
| Args: | |
| article_links (list): List of article links from arXiv. | |
| save_folder (str): Folder to save downloaded articles. Default is 'downloads'. | |
| Returns: | |
| str: Success or error message. | |
| """ | |
| if not article_links: | |
| return "Error: No article links provided." | |
| # Ensure the save folder exists | |
| os.makedirs(save_folder, exist_ok=True) | |
| downloaded_files = [] | |
| for link in article_links: | |
| try: | |
| # Extract the article ID from the link | |
| article_id = link.split("/")[-1] | |
| # Construct the PDF download URL | |
| pdf_url = f"https://arxiv.org/pdf/{article_id}.pdf" | |
| # Download the PDF | |
| response = requests.get(pdf_url, stream=True) | |
| if response.status_code != 200: | |
| return f"Error: Failed to download {pdf_url}" | |
| # Save the file | |
| file_path = os.path.join(save_folder, f"{article_id}.pdf") | |
| with open(file_path, "wb") as file: | |
| for chunk in response.iter_content(1024): | |
| file.write(chunk) | |
| downloaded_files.append(file_path) | |
| except Exception as e: | |
| return f"Error downloading {link}: {str(e)}" | |
| return f"Downloaded articles:\n" + "\n".join(downloaded_files) | |
| def create_zip_package(articles_folder: str, output_zip: str) -> str: | |
| """ | |
| A tool that packages all downloaded articles (PDFs) from a folder into a zip file. | |
| Args: | |
| articles_folder (str): Folder containing downloaded articles. | |
| output_zip (str): The name of the output zip file. | |
| Returns: | |
| str: Path to the created zip file. | |
| """ | |
| if not os.path.exists(articles_folder): | |
| return f"Error: Folder '{articles_folder}' does not exist." | |
| file_paths = [os.path.join(articles_folder, f) for f in os.listdir(articles_folder) if f.endswith(".pdf")] | |
| if not file_paths: | |
| return "Error: No PDF files found for zipping." | |
| try: | |
| with zipfile.ZipFile(output_zip, 'w', zipfile.ZIP_DEFLATED) as zipf: | |
| for file in file_paths: | |
| zipf.write(file, os.path.basename(file)) | |
| return f"Zip package created successfully: {output_zip}" | |
| except Exception as e: | |
| return f"Error: {str(e)}" | |
| def get_current_time_in_timezone(timezone: str) -> str: | |
| """A tool that fetches the current local time in a specified timezone. | |
| Args: | |
| timezone: A string representing a valid timezone (e.g., 'America/New_York'). | |
| """ | |
| try: | |
| # Create timezone object | |
| tz = pytz.timezone(timezone) | |
| # Get current time in that timezone | |
| local_time = datetime.datetime.now(tz).strftime("%Y-%m-%d %H:%M:%S") | |
| return f"The current local time in {timezone} is: {local_time}" | |
| except Exception as e: | |
| return f"Error fetching time for timezone '{timezone}': {str(e)}" | |
| final_answer = FinalAnswerTool() | |
| # If the agent does not answer, the model is overloaded, please use another model or the following Hugging Face Endpoint that also contains qwen2.5 coder: | |
| # model_id='https://pflgm2locj2t89co.us-east-1.aws.endpoints.huggingface.cloud' | |
| model = HfApiModel( | |
| max_tokens=2096, | |
| temperature=0.5, | |
| model_id='Qwen/Qwen2.5-Coder-32B-Instruct',# it is possible that this model may be overloaded | |
| custom_role_conversions=None, | |
| ) | |
| # Import tool from Hub | |
| search_tool = DuckDuckGoSearchTool() | |
| my_prompt = "" | |
| agent = CodeAgent( | |
| model=model, | |
| tools=[search_articles, download_articles, create_zip_package, final_answer], | |
| max_steps=6, | |
| verbosity_level=1, | |
| grammar=None, | |
| planning_interval=None, | |
| name=None, | |
| description=None, | |
| prompt_templates=my_prompt | |
| ) | |
| GradioUI(agent).launch() |