File size: 4,306 Bytes
564d1d7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
import arxiv
from crossref.restful import Works
import pytz
from datetime import date
from datetime import datetime


class Search_Papers():
    def __init__(self, query, search_by, search_by_query, sort_by, sort_order):

        self.query = query
        self.search_mode = search_by
        self.search_mode_query = search_by_query

        self.sort_by = sort_by

        self.sort_order = sort_order

        self.time_search = datetime.now(pytz.utc).replace(microsecond=0)

        return

    
    def search_arxiv_NResults(self, query, max_results, sort_by, sort_order):
        search_results = arxiv.Search(
        query = query,
        max_results = max_results,
        sort_by = sort_by,
        sort_order = sort_order
        )

        return search_results.results()



    def search_arxiv_Timeframe(self, query, timeframe, sort_by, sort_order):

        collection = []
        exit_flag = 0
        n_iter = 0
        max_results = 10

        while True:
            search_list = arxiv.Search(
                query = query,
                max_results = (n_iter+1)*max_results,
                sort_by = sort_by,
                sort_order = sort_order
                )


            results = list(search_list.results())

            for i in range(n_iter*max_results, len(results)):
                #print(timeframe, results[i].published, results[i].published < timeframe)

                if results[i].published > timeframe:
                    collection.append(results[i])
                else:
                    exit_flag = 1
                    break
          
            if exit_flag:
                break
            else:
                n_iter += 1
          

        return collection

    
    
    def search_arxiv(self):

        if self.sort_by == "PublishDate":
            sort_by = arxiv.SortCriterion.SubmittedDate
        elif self.sort_by == "LastUpdatedDate":
            sort_by = arxiv.SortCriterion.LastUpdatedDate
        else:
            sort_by = arxiv.SortCriterion.Relevance


        if self.sort_order == "Ascending":
            sort_order = arxiv.SortOrder.Ascending
        else:
            sort_order = arxiv.SortOrder.Descending



        if self.search_mode == "NumberResults":
            search_results = self.search_arxiv_NResults(self.query, self.search_mode_query, sort_by, sort_order)
        else:
            search_results = self.search_arxiv_Timeframe(self.query, self.search_mode_query, sort_by, sort_order)

        return search_results
    
    
    def search_general_NResults(self, query, max_results, sort_by, sort_order):
        works = Works()
        # search_results = works.filter(category_name = query).sort(sort_by).order(sort_order).sample(max_results)
        search_results = works.query(bibliographic = query).sort(sort_by).order(sort_order).sample(max_results)
        return search_results


    def search_general_Timeframe(self, query, timeframe, sort_by, sort_order):
        works = Works()
        today = (datetime.combine(date.today(), datetime.min.time())).strftime("%Y-%m-%d")
      
        # search_results = works.filter(category_name = query, from_created_date=timeframe, until_created_date = today).sort(sort_by).order(sort_order)
        search_results = works.query(bibliographic = query).filter(from_created_date=timeframe, until_created_date = today).sort(sort_by).order(sort_order)
         
        return search_results

    #'Magnetic Field Conditions Upstream of Ganymede'

    def search_general(self):

        if self.search_mode == 'Timeframe':
            timeframe = self.search_mode_query.strftime("%Y-%m-%d")
            



        if self.sort_by == "PublishDate":
            sort_by = 'created'
        elif self.sort_by == "LastUpdatedDate":
            sort_by = 'updated'
        else:
            sort_by = 'relevance'


        if self.sort_order == "Ascending":
            sort_order = "asc"
        else:
            sort_order = "desc"



        if self.search_mode == "NumberResults":
            search_results = self.search_general_NResults(self.query, self.search_mode_query, sort_by, sort_order)
        else:
            search_results = self.search_general_Timeframe(self.query, timeframe, sort_by, sort_order)

        return search_results