import arxiv from crossref.restful import Works import pytz from datetime import date from datetime import datetime class Search_Papers(): def __init__(self, query, search_by, search_by_query, sort_by, sort_order): self.query = query self.search_mode = search_by self.search_mode_query = search_by_query self.sort_by = sort_by self.sort_order = sort_order self.time_search = datetime.now(pytz.utc).replace(microsecond=0) return def search_arxiv_NResults(self, query, max_results, sort_by, sort_order): search_results = arxiv.Search( query = query, max_results = max_results, sort_by = sort_by, sort_order = sort_order ) return search_results.results() def search_arxiv_Timeframe(self, query, timeframe, sort_by, sort_order): collection = [] exit_flag = 0 n_iter = 0 max_results = 10 while True: search_list = arxiv.Search( query = query, max_results = (n_iter+1)*max_results, sort_by = sort_by, sort_order = sort_order ) results = list(search_list.results()) for i in range(n_iter*max_results, len(results)): #print(timeframe, results[i].published, results[i].published < timeframe) if results[i].published > timeframe: collection.append(results[i]) else: exit_flag = 1 break if exit_flag: break else: n_iter += 1 return collection def search_arxiv(self): if self.sort_by == "PublishDate": sort_by = arxiv.SortCriterion.SubmittedDate elif self.sort_by == "LastUpdatedDate": sort_by = arxiv.SortCriterion.LastUpdatedDate else: sort_by = arxiv.SortCriterion.Relevance if self.sort_order == "Ascending": sort_order = arxiv.SortOrder.Ascending else: sort_order = arxiv.SortOrder.Descending if self.search_mode == "NumberResults": search_results = self.search_arxiv_NResults(self.query, self.search_mode_query, sort_by, sort_order) else: search_results = self.search_arxiv_Timeframe(self.query, self.search_mode_query, sort_by, sort_order) return search_results def search_general_NResults(self, query, max_results, sort_by, sort_order): works = Works() # search_results = works.filter(category_name = query).sort(sort_by).order(sort_order).sample(max_results) search_results = works.query(bibliographic = query).sort(sort_by).order(sort_order).sample(max_results) return search_results def search_general_Timeframe(self, query, timeframe, sort_by, sort_order): works = Works() today = (datetime.combine(date.today(), datetime.min.time())).strftime("%Y-%m-%d") # search_results = works.filter(category_name = query, from_created_date=timeframe, until_created_date = today).sort(sort_by).order(sort_order) search_results = works.query(bibliographic = query).filter(from_created_date=timeframe, until_created_date = today).sort(sort_by).order(sort_order) return search_results #'Magnetic Field Conditions Upstream of Ganymede' def search_general(self): if self.search_mode == 'Timeframe': timeframe = self.search_mode_query.strftime("%Y-%m-%d") if self.sort_by == "PublishDate": sort_by = 'created' elif self.sort_by == "LastUpdatedDate": sort_by = 'updated' else: sort_by = 'relevance' if self.sort_order == "Ascending": sort_order = "asc" else: sort_order = "desc" if self.search_mode == "NumberResults": search_results = self.search_general_NResults(self.query, self.search_mode_query, sort_by, sort_order) else: search_results = self.search_general_Timeframe(self.query, timeframe, sort_by, sort_order) return search_results