Spaces:
Sleeping
Sleeping
| import arxiv | |
| from crossref.restful import Works | |
| import pytz | |
| from datetime import date | |
| from datetime import datetime | |
| class Search_Papers(): | |
| def __init__(self, query, search_by, search_by_query, sort_by, sort_order): | |
| self.query = query | |
| self.search_mode = search_by | |
| self.search_mode_query = search_by_query | |
| self.sort_by = sort_by | |
| self.sort_order = sort_order | |
| self.time_search = datetime.now(pytz.utc).replace(microsecond=0) | |
| return | |
| def search_arxiv_NResults(self, query, max_results, sort_by, sort_order): | |
| search_results = arxiv.Search( | |
| query = query, | |
| max_results = max_results, | |
| sort_by = sort_by, | |
| sort_order = sort_order | |
| ) | |
| return search_results.results() | |
| def search_arxiv_Timeframe(self, query, timeframe, sort_by, sort_order): | |
| collection = [] | |
| exit_flag = 0 | |
| n_iter = 0 | |
| max_results = 10 | |
| while True: | |
| search_list = arxiv.Search( | |
| query = query, | |
| max_results = (n_iter+1)*max_results, | |
| sort_by = sort_by, | |
| sort_order = sort_order | |
| ) | |
| results = list(search_list.results()) | |
| for i in range(n_iter*max_results, len(results)): | |
| #print(timeframe, results[i].published, results[i].published < timeframe) | |
| if results[i].published > timeframe: | |
| collection.append(results[i]) | |
| else: | |
| exit_flag = 1 | |
| break | |
| if exit_flag: | |
| break | |
| else: | |
| n_iter += 1 | |
| return collection | |
| def search_arxiv(self): | |
| if self.sort_by == "PublishDate": | |
| sort_by = arxiv.SortCriterion.SubmittedDate | |
| elif self.sort_by == "LastUpdatedDate": | |
| sort_by = arxiv.SortCriterion.LastUpdatedDate | |
| else: | |
| sort_by = arxiv.SortCriterion.Relevance | |
| if self.sort_order == "Ascending": | |
| sort_order = arxiv.SortOrder.Ascending | |
| else: | |
| sort_order = arxiv.SortOrder.Descending | |
| if self.search_mode == "NumberResults": | |
| search_results = self.search_arxiv_NResults(self.query, self.search_mode_query, sort_by, sort_order) | |
| else: | |
| search_results = self.search_arxiv_Timeframe(self.query, self.search_mode_query, sort_by, sort_order) | |
| return search_results | |
| def search_general_NResults(self, query, max_results, sort_by, sort_order): | |
| works = Works() | |
| # search_results = works.filter(category_name = query).sort(sort_by).order(sort_order).sample(max_results) | |
| search_results = works.query(bibliographic = query).sort(sort_by).order(sort_order).sample(max_results) | |
| return search_results | |
| def search_general_Timeframe(self, query, timeframe, sort_by, sort_order): | |
| works = Works() | |
| today = (datetime.combine(date.today(), datetime.min.time())).strftime("%Y-%m-%d") | |
| # search_results = works.filter(category_name = query, from_created_date=timeframe, until_created_date = today).sort(sort_by).order(sort_order) | |
| search_results = works.query(bibliographic = query).filter(from_created_date=timeframe, until_created_date = today).sort(sort_by).order(sort_order) | |
| return search_results | |
| #'Magnetic Field Conditions Upstream of Ganymede' | |
| def search_general(self): | |
| if self.search_mode == 'Timeframe': | |
| timeframe = self.search_mode_query.strftime("%Y-%m-%d") | |
| if self.sort_by == "PublishDate": | |
| sort_by = 'created' | |
| elif self.sort_by == "LastUpdatedDate": | |
| sort_by = 'updated' | |
| else: | |
| sort_by = 'relevance' | |
| if self.sort_order == "Ascending": | |
| sort_order = "asc" | |
| else: | |
| sort_order = "desc" | |
| if self.search_mode == "NumberResults": | |
| search_results = self.search_general_NResults(self.query, self.search_mode_query, sort_by, sort_order) | |
| else: | |
| search_results = self.search_general_Timeframe(self.query, timeframe, sort_by, sort_order) | |
| return search_results | |