Spaces:
Runtime error
Runtime error
| from datetime import datetime | |
| import requests | |
| import xmltodict | |
| from research_assistant.app_logging import app_logger | |
| from research_assistant.constants import ARXIV_API_ACCESS_POINT | |
| from research_assistant.entity import ArticleSearchConfig | |
| class ArxivApiWrap: | |
| def __init__(self, config: ArticleSearchConfig): | |
| self.config = config | |
| def convert_link_to_pdflink(self, link): | |
| return link.replace("/abs/", "/pdf/") + ".pdf" | |
| def convert_date(self, date): | |
| return datetime.strptime(date, "%Y-%m-%d").strftime("%Y%m%d") | |
| """ | |
| Fetches the response from the arXiv API based on the specified search terms and parameters. | |
| Args used by the arXiv API: | |
| Keywords (list of str): Contains the search terms | |
| max_length (int): Maximum number of articles to retrieve | |
| Date range : Contains start and end dates for the search | |
| Sort by : Sorts the results by a specific field (e.g., submittedDate) | |
| Sort order (str): Sort order for the results (e.g., asc, desc) | |
| Returns: | |
| requests.Response: The HTTP response object returned by the arXiv API. | |
| """ | |
| def get_arxiv_api_response(self): | |
| keyword_query = " AND all:".join([f"'{kw}'" for kw in self.config.search_terms]) | |
| if self.config.date_range.start_date: | |
| query = f" all:{keyword_query} AND submittedDate:[{self.convert_date(self.config.date_range.start_date)} TO {self.convert_date(self.config.date_range.end_date)}]" | |
| else: | |
| query = f" all:{keyword_query}" | |
| params = { | |
| "search_query": query, | |
| "start": 0, # Starts from page 1 of the results obtained | |
| "max_results": self.config.num_results, # Adjust the number of results as needed | |
| "sortBy": self.config.sort_by, # Sort by submission date | |
| "sortOrder": self.config.sort_order, # Sort in descending order (latest first) | |
| } | |
| return requests.get(ARXIV_API_ACCESS_POINT, params=params) | |
| """ | |
| Retrieves article search results from the arXiv API and logs detailed information about each article. | |
| This method fetches the API response, parses the XML content into a structured format, | |
| and extracts key information such as the title, summary, link, and authors for each article. | |
| Returns: | |
| list of str: A list of article links retrieved from the arXiv API. | |
| """ | |
| def get_article_search_result(self): | |
| response = self.get_arxiv_api_response() # Fetch the API response | |
| article_links = [] | |
| if response.status_code == 200: # Check if the request was successful | |
| # Parse the response (arXiv API returns XML) | |
| data = xmltodict.parse(response.content) | |
| for entry in data["feed"]["entry"]: | |
| title, summary, link, authors = ( | |
| entry["title"], | |
| entry["summary"], | |
| entry["id"], | |
| [author["name"] for author in entry["author"]], | |
| ) | |
| app_logger.info( | |
| f"Title: {title}\n Authors: {authors} \n,Abstract: {summary}\n Page Link: {link}\n PDF Link: {self.convert_link_to_pdflink(link)}\n Paper Id: {link.split('/')[-1]}\n {'-'*80}" | |
| ) | |
| article_links.append(link) | |
| else: | |
| app_logger.info(f"Failed to retrieve papers: {response.status_code}") | |
| return article_links | |
| def download_pdf(self, pdf_url): | |
| response, title = requests.get(pdf_url), pdf_url.split("/")[0] | |
| with open(f"data/{title}.pdf", "wb") as f: | |
| f.write(response.content) | |
| print(f"Downloaded: {title}.pdf") | |