panel_example7 / src /search.py
EasySci's picture
Upload 6 files
564d1d7
import arxiv
from crossref.restful import Works
import pytz
from datetime import date
from datetime import datetime
class Search_Papers():
def __init__(self, query, search_by, search_by_query, sort_by, sort_order):
self.query = query
self.search_mode = search_by
self.search_mode_query = search_by_query
self.sort_by = sort_by
self.sort_order = sort_order
self.time_search = datetime.now(pytz.utc).replace(microsecond=0)
return
def search_arxiv_NResults(self, query, max_results, sort_by, sort_order):
search_results = arxiv.Search(
query = query,
max_results = max_results,
sort_by = sort_by,
sort_order = sort_order
)
return search_results.results()
def search_arxiv_Timeframe(self, query, timeframe, sort_by, sort_order):
collection = []
exit_flag = 0
n_iter = 0
max_results = 10
while True:
search_list = arxiv.Search(
query = query,
max_results = (n_iter+1)*max_results,
sort_by = sort_by,
sort_order = sort_order
)
results = list(search_list.results())
for i in range(n_iter*max_results, len(results)):
#print(timeframe, results[i].published, results[i].published < timeframe)
if results[i].published > timeframe:
collection.append(results[i])
else:
exit_flag = 1
break
if exit_flag:
break
else:
n_iter += 1
return collection
def search_arxiv(self):
if self.sort_by == "PublishDate":
sort_by = arxiv.SortCriterion.SubmittedDate
elif self.sort_by == "LastUpdatedDate":
sort_by = arxiv.SortCriterion.LastUpdatedDate
else:
sort_by = arxiv.SortCriterion.Relevance
if self.sort_order == "Ascending":
sort_order = arxiv.SortOrder.Ascending
else:
sort_order = arxiv.SortOrder.Descending
if self.search_mode == "NumberResults":
search_results = self.search_arxiv_NResults(self.query, self.search_mode_query, sort_by, sort_order)
else:
search_results = self.search_arxiv_Timeframe(self.query, self.search_mode_query, sort_by, sort_order)
return search_results
def search_general_NResults(self, query, max_results, sort_by, sort_order):
works = Works()
# search_results = works.filter(category_name = query).sort(sort_by).order(sort_order).sample(max_results)
search_results = works.query(bibliographic = query).sort(sort_by).order(sort_order).sample(max_results)
return search_results
def search_general_Timeframe(self, query, timeframe, sort_by, sort_order):
works = Works()
today = (datetime.combine(date.today(), datetime.min.time())).strftime("%Y-%m-%d")
# search_results = works.filter(category_name = query, from_created_date=timeframe, until_created_date = today).sort(sort_by).order(sort_order)
search_results = works.query(bibliographic = query).filter(from_created_date=timeframe, until_created_date = today).sort(sort_by).order(sort_order)
return search_results
#'Magnetic Field Conditions Upstream of Ganymede'
def search_general(self):
if self.search_mode == 'Timeframe':
timeframe = self.search_mode_query.strftime("%Y-%m-%d")
if self.sort_by == "PublishDate":
sort_by = 'created'
elif self.sort_by == "LastUpdatedDate":
sort_by = 'updated'
else:
sort_by = 'relevance'
if self.sort_order == "Ascending":
sort_order = "asc"
else:
sort_order = "desc"
if self.search_mode == "NumberResults":
search_results = self.search_general_NResults(self.query, self.search_mode_query, sort_by, sort_order)
else:
search_results = self.search_general_Timeframe(self.query, timeframe, sort_by, sort_order)
return search_results