Marcelo commited on
Commit
27ec9ac
·
1 Parent(s): 398a3b1

initial app commit

Browse files
Dockerfile CHANGED
@@ -11,4 +11,4 @@ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
11
 
12
  COPY . .
13
 
14
- CMD ["panel", "serve", "/code/app.py","--address", "0.0.0.0", "--port", "7860", "--allow-websocket-origin","easysci-panel-main.hf.space"]
 
11
 
12
  COPY . .
13
 
14
+ CMD ["panel", "serve", "/code/app2.py","--address", "0.0.0.0", "--port","7860","--allow-websocket-origin","easysci-panel-example7.hf.space"]
MyFiles.png ADDED
README.md CHANGED
@@ -1,8 +1,8 @@
1
  ---
2
- title: Panel Main
3
- emoji: 😻
4
- colorFrom: green
5
- colorTo: pink
6
  sdk: docker
7
  pinned: false
8
  ---
 
1
  ---
2
+ title: Panel Example7
3
+ emoji: 🌍
4
+ colorFrom: yellow
5
+ colorTo: blue
6
  sdk: docker
7
  pinned: false
8
  ---
TLDR.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import panel as pn
2
+ import pytz
3
+ from datetime import date
4
+ from datetime import datetime
5
+ from dateutil.relativedelta import relativedelta, MO
6
+ import param
7
+ import json
8
+
9
+ from functools import partial
10
+ from src.search import Search_Papers
11
+
12
+
13
+ def load_categories():
14
+ # f = open("categories_general.txt", "r")
15
+ # list_categories = f.read().splitlines()
16
+
17
+ # reading the data from the file
18
+ with open('categories_arxiv.txt') as f:
19
+ data = f.read()
20
+ # reconstructing the data as a dictionary
21
+ category_dict = json.loads(data)
22
+
23
+ return category_dict
24
+
25
+
26
+ def run_code(category, loaded_dict, event = None):
27
+ timeframe_today = (datetime.combine(date.today(), datetime.min.time())).replace(tzinfo = pytz.utc)
28
+ timeframe_week = timeframe_today + relativedelta(weekday=MO(-1))
29
+
30
+ timeframe_day = (datetime.combine(datetime(2023, 7, 16), datetime.min.time())).replace(tzinfo = pytz.utc)
31
+
32
+ search_mode = "Timeframe" #Accepted values: 'NumberResults' and 'Timeframe
33
+ sort_by = "PublishDate" #Accepted values: 'PublishDate', 'LastUpdatedDate' and 'Relevance'
34
+ sort_order = "Descending" #Accepted values: 'Ascending' and 'Descending'
35
+
36
+
37
+ query = list(loaded_dict.keys())[list(loaded_dict.values()).index(category)]
38
+
39
+ search = Search_Papers(query, search_mode, timeframe_week, sort_by, sort_order)
40
+ result_arxiv = search.search_arxiv()
41
+
42
+
43
+ return result_arxiv
44
+
45
+
46
+ def update_mainTLDR(buttons_to_add, paper_list):
47
+ main_tldr = []
48
+
49
+ if buttons_to_add:
50
+ for i in range(len(buttons_to_add)):
51
+ category_content = []
52
+ for paper in paper_list[i]:
53
+
54
+ paper_content = pn.Column(
55
+ pn.pane.LaTeX(f"{paper.title}", styles={'font-size': '18pt'}),
56
+ pn.pane.LaTeX(paper.summary, styles={'font-size': '14pt'}),
57
+ pn.pane.Markdown("### " + (paper.published).strftime("%d/%m/%Y %H:%M:%S")),
58
+ sizing_mode='stretch_width'
59
+ )
60
+ category_content.append(paper_content)
61
+
62
+ category_column = pn.Column(
63
+ pn.pane.Markdown(f"# {buttons_to_add[i]}"),
64
+ *category_content,
65
+ sizing_mode='stretch_width'
66
+ )
67
+
68
+ main_tldr.append(category_column)
69
+
70
+ else:
71
+ main_tldr.append(pn.pane.Markdown("# Please select some tags!"))
72
+
73
+ return main_tldr
app.py CHANGED
@@ -1,42 +1,154 @@
1
  import panel as pn
2
- import hvplot.pandas
 
 
 
 
 
3
  import param
4
 
5
- # Load Data
6
- from bokeh.sampledata.autompg import autompg_clean as df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
 
8
- # create a self-contained dashboard class
9
- class InteractiveDashboard(param.Parameterized):
10
- cylinders = param.Integer(label='Cylinders', default=4, bounds=(4, 8))
11
- mfr = param.ListSelector(
12
- label='MFR',
13
- default=['ford', 'chevrolet', 'honda', 'toyota', 'audi'],
14
- objects=['ford', 'chevrolet', 'honda', 'toyota', 'audi'], precedence=0.5)
15
- yaxis = param.Selector(label='Y axis', objects=['hp', 'weight'])
16
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
- @param.depends('cylinders', 'mfr', 'yaxis')
19
- def plot(self):
20
- return (
21
- df[
22
- (df.cyl == self.cylinders) &
23
- (df.mfr.isin(self.mfr))
24
- ]
25
- .groupby(['origin', 'mpg'])[self.yaxis].mean()
26
- .to_frame()
27
- .reset_index()
28
- .sort_values(by='mpg')
29
- .reset_index(drop=True)
30
- .hvplot(x='mpg', y=self.yaxis, by='origin', color=["#ff6f69", "#ffcc5c", "#88d8b0"], line_width=6, height=400)
31
- )
32
- dashboard = InteractiveDashboard()
33
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  # Layout using Template
35
  template = pn.template.FastListTemplate(
36
- title='Interactive DataFrame Dashboards with param .depends',
37
- sidebar=[pn.Param(dashboard.param, widgets={'mfr': pn.widgets.ToggleGroup,'yaxis': pn.widgets.RadioButtonGroup})],
38
- main=[dashboard.plot],
 
39
  accent_base_color="#88d8b0",
40
- header_background="#88d8b0",
 
 
 
 
41
  )
 
 
42
  template.servable()
 
1
  import panel as pn
2
+ pn.extension('katex')
3
+
4
+ import pytz
5
+ from datetime import date
6
+ from datetime import datetime
7
+ from dateutil.relativedelta import relativedelta, MO
8
  import param
9
 
10
+ import TLDR
11
+ from src.search import Search_Papers
12
+
13
+ class MainBody(param.Parameterized):
14
+ main_body = param.List(default=[pn.pane.LaTeX("Please select some tags!", styles={'font-size': '20pt'})])
15
+
16
+ @param.depends("main_body")
17
+ def update(self):
18
+ return pn.Column(*self.main_body)
19
+
20
+ main_body_instance = MainBody()
21
+
22
+ paper_list = [] # Add this line to initialize paper_list as an empty list
23
+
24
+ main_body = [pn.pane.LaTeX("Please select some tags!", styles={'font-size': '20pt'})]
25
+
26
+ # Create buttons for the toggleable sidebar
27
+ # button1 = pn.widgets.Button(name="🏠 Home", button_type='default', button_style='outline', width=140)
28
+ # button2 = pn.widgets.Button(name='📁 My papers', button_type='default', button_style='outline', width=140)
29
+ # button3 = pn.widgets.Toggle(name='⚙️ Settings', button_type='default', button_style='outline', width=140, value=False)
30
+
31
+ # Create buttons for the toggleable sidebar
32
+ button1 = pn.widgets.Button(icon='home', name="Daily papers", icon_size='1.5em', button_type = 'primary', button_style = 'outline', width = 140)
33
+ button2 = pn.widgets.Button(icon='calendar-filled', name="Search papers", icon_size='1.5em', button_type = 'primary', button_style = 'outline', width = 140)
34
+ button3 = pn.widgets.Button(icon='file-analytics', name='My papers', icon_size='1.5em', button_type = 'primary', button_style = 'outline', width = 140)
35
+ button4 = pn.widgets.Toggle(name='Accessibility',icon='settings', icon_size='1.5em', button_type='default', button_style='outline', width=140, value=False)
36
+
37
+ # Custom RadioButtonGroup widget
38
+ select = pn.widgets.RadioButtonGroup(value="Scientic", options=["General", "Scientic"], name='String', align='center', button_type='default') # , align='center'
39
+ select2 = pn.widgets.RadioButtonGroup(value="Normal", options=["Bionic", "Normal"], name='String', align='center', button_type='default')
40
+
41
+ # Define a callback to display the selected value
42
+ def display_selected_value(event):
43
+ print(f"Selected value: {event.new}", flush=True)
44
+
45
+ # Attach the callback to the value_change event of the RadioButtonGroup
46
+ select.param.watch(display_selected_value, 'value')
47
+ select2.param.watch(display_selected_value, 'value')
48
+
49
+
50
+ # Create a column layout for the buttons inside the toggleable sidebar
51
+ buttons = pn.Column(
52
+ button1, button2, button3,
53
+ pn.Column(
54
+ select,
55
+ select2,
56
+ visible=True,
57
+ sizing_mode='stretch_width',
58
+ ),
59
+ css_classes=['hidden']
60
+ )
61
+
62
+ # Define a callback to show/hide the select buttons when "Settings" button is toggled
63
+ def toggle_settings(event):
64
+ buttons[-1].visible = event.new
65
 
66
+ button3.param.watch(toggle_settings, 'value')
 
 
 
 
 
 
 
67
 
68
+ # List to store the entered options
69
+ loaded_dict = TLDR.load_categories()
70
+
71
+ arxiv_tags = list(loaded_dict.keys())
72
+ entered_options = list(loaded_dict.values())
73
+
74
+ # Create buttons for the header
75
+ header_buttons = pn.Row(sizing_mode='stretch_width', css_classes=['header-buttons'])
76
+
77
+ # "+" button to trigger the addition to the header
78
+ add_to_header_button = pn.widgets.Button(name="", icon='search', icon_size='1.5em', button_style = 'outline',button_type = 'light')
79
+
80
+ # List to store the names of buttons to be added to the header
81
+ buttons_to_add = []
82
+
83
+ paper_list = []
84
+
85
+ # Callback for adding selected options to the list
86
+ def add_to_header(event):
87
+ global paper_list
88
 
89
+ selected_options = filter_list.value
90
+ if selected_options:
91
+ for option in selected_options:
92
+ if option not in buttons_to_add: # Check if option is already in header
93
+ buttons_to_add.append(option) # Add to header if not already present
94
+
95
+ paper_list_itr = TLDR.run_code(option, loaded_dict)
96
+ paper_list.append(paper_list_itr)
97
+ filter_list.value = [] # Clear the selected options after adding them to the header
98
+ update_header() # Update the header after adding options
 
 
 
 
 
99
 
100
+
101
+ add_to_header_button.on_click(add_to_header)
102
+
103
+ # Function to update the header layout with the newly created buttons
104
+ def update_header():
105
+ global paper_list
106
+ header_buttons.clear() # Clear the existing buttons
107
+
108
+ for button_name in buttons_to_add:
109
+ header_button = pn.widgets.Button(name=button_name, button_type = 'primary', button_style = 'outline')
110
+ header_button.on_click(remove_from_header) # Add callback to remove the header button
111
+ header_buttons.append(header_button)
112
+ buttons_to_add
113
+ # Update the filter list options to exclude buttons that are already in the header
114
+ filter_list.options = [option for option in entered_options if option not in buttons_to_add]
115
+ main_body_instance.main_body = TLDR.update_mainTLDR(buttons_to_add, paper_list)
116
+
117
+ # Callback to remove the clicked header button
118
+ def remove_from_header(event):
119
+ global paper_list
120
+ button = event.obj # Get the clicked button
121
+ if button.name in buttons_to_add:
122
+ del paper_list[buttons_to_add.index(button.name)]
123
+ buttons_to_add.remove(button.name) # Remove from the header buttons list
124
+ filter_list.options.append(button.name) # Add back to the filter list options
125
+ update_header() # Update the header and filter list
126
+
127
+ # MultiChoice widget to display the filter options with delete buttons
128
+ filter_list = pn.widgets.MultiChoice(
129
+ name='',
130
+ value=[],
131
+ options=entered_options,
132
+ margin=(20, 10),
133
+ sizing_mode='fixed',
134
+ solid=False,
135
+ styles={'background': '#f0f0f0'},
136
+ placeholder="Search Topics"
137
+ )
138
+
139
  # Layout using Template
140
  template = pn.template.FastListTemplate(
141
+ title="EasySciRead",
142
+ header=[pn.Row(header_buttons, width=750, sizing_mode='stretch_width'), pn.Row(filter_list, width=250), pn.Row(add_to_header_button, width=55)],
143
+ main= main_body_instance.update,
144
+ sidebar=[buttons],
145
  accent_base_color="#88d8b0",
146
+ header_background="#FFFFFF",
147
+ header_color="#000000",
148
+ text_align='center',
149
+ sidebar_width=160,
150
+ sizing_mode = 'stretch_both'
151
  )
152
+
153
+ # Run the app
154
  template.servable()
app2.py ADDED
@@ -0,0 +1,209 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import panel as pn
2
+ pn.extension('katex')
3
+
4
+ import pytz
5
+ from datetime import date
6
+ from datetime import datetime
7
+ from dateutil.relativedelta import relativedelta, MO
8
+ import param
9
+
10
+ import TLDR
11
+ from src.search import Search_Papers
12
+
13
+ # Create the MainBody class
14
+ class MainBody(param.Parameterized):
15
+ main_body = param.List(default=[pn.pane.LaTeX("Please select some tags!", styles={'font-size': '20pt'})])
16
+
17
+ @param.depends("main_body")
18
+ def update(self):
19
+ return pn.Column(*self.main_body)
20
+
21
+ # Initialize the MainBody instance
22
+ main_body_instance = MainBody()
23
+
24
+ # Store the initial content of the main panel in the MainBody instance
25
+ main_body_instance.main_body = [pn.pane.LaTeX("Please select some tags!", styles={'font-size': '20pt'})]
26
+ initial_main_body = main_body_instance.main_body.copy()
27
+
28
+ # Function to reset the main panel content to the initial content
29
+ def reset_main_panel(event):
30
+ # Clear the existing main panel content if it's not the initial content
31
+ if main_body_instance.main_body != initial_main_body:
32
+ main_body_instance.main_body.clear()
33
+ main_body_instance.main_body.extend(initial_main_body)
34
+ main_body_instance.param.trigger('main_body') # Trigger the update manually
35
+
36
+ paper_list = [] # Add this line to initialize paper_list as an empty list
37
+
38
+ main_body = [pn.pane.LaTeX("Please select some tags!", styles={'font-size': '20pt'})]
39
+
40
+ # Create buttons for the toggleable sidebar
41
+ # button1 = pn.widgets.Button(name="🏠 Home", button_type='default', button_style='outline', width=140)
42
+ # button2 = pn.widgets.Button(name='📁 My papers', button_type='default', button_style='outline', width=140)
43
+ # button3 = pn.widgets.Toggle(name='⚙️ Settings', button_type='default', button_style='outline', width=140, value=False)
44
+
45
+ # Create buttons for the toggleable sidebar
46
+ button1 = pn.widgets.Button(icon='home', name="Daily papers", icon_size='1.5em', button_type = 'primary', button_style = 'outline', width = 140)
47
+ button2 = pn.widgets.Button(icon='calendar-filled', name="Search papers", icon_size='1.5em', button_type = 'primary', button_style = 'outline', width = 140)
48
+ button3 = pn.widgets.Button(icon='file-analytics', name='My papers', icon_size='1.5em', button_type = 'primary', button_style = 'outline', width = 140)
49
+ button4 = pn.widgets.Toggle(name='Accessibility',icon='settings', icon_size='1.5em', button_type='default', button_style='outline', width=140, value=False)
50
+
51
+ # Custom RadioButtonGroup widget
52
+ select = pn.widgets.RadioButtonGroup(value="Scientic", options=["General", "Scientic"], name='String', align='center', button_type='default') # , align='center'
53
+ select2 = pn.widgets.RadioButtonGroup(value="Normal", options=["Bionic", "Normal"], name='String', align='center', button_type='default')
54
+
55
+ # Define a callback to display the selected value
56
+ def display_selected_value(event):
57
+ print(f"Selected value: {event.new}", flush=True)
58
+
59
+ # Attach the callback to the value_change event of the RadioButtonGroup
60
+ select.param.watch(display_selected_value, 'value')
61
+ select2.param.watch(display_selected_value, 'value')
62
+
63
+ # Create a column layout for the buttons inside the toggleable sidebar
64
+ buttons = pn.Column(
65
+ button1, button2, button3,
66
+ pn.Column(
67
+ select,
68
+ select2,
69
+ visible=True,
70
+ sizing_mode='stretch_width',
71
+ ),
72
+ css_classes=['hidden']
73
+ )
74
+
75
+ # Define a callback to show/hide the select buttons when "Settings" button is toggled
76
+ def toggle_settings(event):
77
+ buttons[-1].visible = event.new
78
+
79
+ button3.param.watch(toggle_settings, 'value')
80
+
81
+ # Attach the reset_main_panel function to the on_click event of button1
82
+ button1.on_click(reset_main_panel)
83
+
84
+ # Function to update the main panel content when button3 is pressed
85
+ def update_main_panel_button3(event):
86
+ main_body_instance.main_body.clear() # Clear the existing main panel content
87
+ main_body_instance.main_body.append(pn.pane.Markdown("You pressed button3!"))
88
+ main_body_instance.param.trigger('main_body') # Trigger the update manually
89
+
90
+ # Attach the functions to the on_click event of button1 and button3
91
+ button3.on_click(update_main_panel_button3)
92
+
93
+ # Create the main panel with some initial content
94
+ main_panel = pn.Column(pn.pane.Markdown("# Main Panel"))
95
+
96
+ # Define a callback to show/hide the select buttons when "Settings" button is toggled
97
+ def toggle_date_picker(event):
98
+ if event.new:
99
+ # Create a FloatPanel and place the DatePicker inside it
100
+ float_panel = pn.layout.FloatPanel(date_picker, name="Free Floating FloatPanel", contained=False, position='bottom_left')
101
+ float_panel.show()
102
+ else:
103
+ # Remove any existing FloatPanel if the toggle is set to False
104
+ try:
105
+ float_panel.close()
106
+ except NameError:
107
+ pass
108
+
109
+ # List to store the entered options
110
+ loaded_dict = TLDR.load_categories()
111
+
112
+ arxiv_tags = list(loaded_dict.keys())
113
+ entered_options = list(loaded_dict.values())
114
+
115
+ # Create buttons for the header
116
+ header_buttons = pn.Row(sizing_mode='stretch_width', css_classes=['header-buttons'])
117
+
118
+ # "+" button to trigger the addition to the header
119
+ add_to_header_button = pn.widgets.Button(name="", icon='search', icon_size='1.5em', button_style = 'outline',button_type = 'light')
120
+
121
+ # List to store the names of buttons to be added to the header
122
+ buttons_to_add = []
123
+
124
+ paper_list = []
125
+
126
+ # Callback for adding selected options to the list
127
+ def add_to_header(event):
128
+ global paper_list
129
+
130
+ selected_options = filter_list.value
131
+ if selected_options:
132
+ for option in selected_options:
133
+ if option not in buttons_to_add: # Check if option is already in header
134
+ buttons_to_add.append(option) # Add to header if not already present
135
+
136
+ paper_list_itr = TLDR.run_code(option, loaded_dict)
137
+ paper_list.append(paper_list_itr)
138
+ filter_list.value = [] # Clear the selected options after adding them to the header
139
+
140
+ # Update the main panel content
141
+ main_body_instance.main_body = TLDR.update_mainTLDR(buttons_to_add, paper_list)
142
+ main_body_instance.param.trigger('main_body') # Trigger the update manually
143
+
144
+ add_to_header_button.on_click(add_to_header)
145
+
146
+ # Function to update the header layout with the newly created buttons
147
+ def update_header():
148
+ global paper_list
149
+ header_buttons.clear() # Clear the existing buttons
150
+
151
+ for button_name in buttons_to_add:
152
+ header_button = pn.widgets.Button(name=button_name, button_type = 'primary', button_style = 'outline')
153
+ header_button.on_click(remove_from_header) # Add callback to remove the header button
154
+ header_buttons.append(header_button)
155
+ buttons_to_add
156
+ # Update the filter list options to exclude buttons that are already in the header
157
+ filter_list.options = [option for option in entered_options if option not in buttons_to_add]
158
+ main_body_instance.main_body = TLDR.update_mainTLDR(buttons_to_add, paper_list)
159
+
160
+ # Callback to remove the clicked header button
161
+ def remove_from_header(event):
162
+ global paper_list
163
+ button = event.obj # Get the clicked button
164
+ if button.name in buttons_to_add:
165
+ del paper_list[buttons_to_add.index(button.name)]
166
+ buttons_to_add.remove(button.name) # Remove from the header buttons list
167
+ filter_list.options.append(button.name) # Add back to the filter list options
168
+ update_header() # Update the header and filter list
169
+ # Update the main panel content
170
+ main_body_instance.main_body = TLDR.update_mainTLDR(buttons_to_add, paper_list)
171
+ main_body_instance.param.trigger('main_body') # Trigger the update manually
172
+
173
+ # MultiChoice widget to display the filter options with delete buttons
174
+ filter_list = pn.widgets.MultiChoice(
175
+ name='',
176
+ value=[],
177
+ options=entered_options,
178
+ margin=(20, 10),
179
+ sizing_mode='fixed',
180
+ solid=False,
181
+ styles={'background': '#f0f0f0'},
182
+ placeholder="Search Topics"
183
+ )
184
+
185
+ # Layout using Template
186
+ template = pn.template.FastListTemplate(
187
+ title="EasySciRead",
188
+ header=[
189
+ pn.Row(
190
+ header_buttons, width=750, sizing_mode='stretch_width'
191
+ ),
192
+ pn.Row(filter_list, width=250),
193
+ pn.Row(add_to_header_button, width=55)
194
+ ],
195
+ main=main_body_instance.update,
196
+ sidebar=[buttons],
197
+ accent_base_color="#88d8b0",
198
+ header_background="#FFFFFF",
199
+ header_color="#000000",
200
+ text_align='center',
201
+ sidebar_width=160,
202
+ sizing_mode='stretch_both'
203
+ )
204
+
205
+ # Update the header to populate the header buttons initially
206
+ update_header()
207
+
208
+ # Run the app
209
+ template.servable()
categories_arxiv.txt ADDED
@@ -0,0 +1,154 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "astro-ph.CO": "Cosmology and Nongalactic Astrophysics",
3
+ "astro-ph.EP": "Earth and Planetary Astrophysics",
4
+ "astro-ph.GA": "Astrophysics of Galaxies",
5
+ "astro-ph.HE": "High Energy Astrophysical Phenomena",
6
+ "astro-ph.IM": "Instrumentation and Methods for Astrophysics",
7
+ "astro-ph.SR": "Solar and Stellar Astrophysics",
8
+ "cond-mat.dis-nn": "Disordered Systems and Neural Networks",
9
+ "cond-mat.mes-hall": "Mesoscale and Nanoscale Physics",
10
+ "cond-mat.mtrl-sci": "Materials Science",
11
+ "cond-mat.other": "Other Condensed Matter",
12
+ "cond-mat.quant-gas": "Quantum Gases",
13
+ "cond-mat.soft": "Soft Condensed Matter",
14
+ "cond-mat.stat-mech": "Statistical Mechanics",
15
+ "cond-mat.str-el": "Strongly Correlated Electrons",
16
+ "cond-mat.supr-con": "Superconductivity",
17
+ "cs.AI": "Artificial Intelligence",
18
+ "cs.AR": "Hardware Architecture",
19
+ "cs.CC": "Computational Complexity",
20
+ "cs.CE": "Computational Engineering, Finance, and Science",
21
+ "cs.CG": "Computational Geometry",
22
+ "cs.CL": "Computation and Language",
23
+ "cs.CR": "Cryptography and Security",
24
+ "cs.CV": "Computer Vision and Pattern Recognition",
25
+ "cs.CY": "Computers and Society",
26
+ "cs.DB": "Databases",
27
+ "cs.DC": "Distributed, Parallel, and Cluster Computing",
28
+ "cs.DL": "Digital Libraries",
29
+ "cs.DM": "Discrete Mathematics",
30
+ "cs.DS": "Data Structures and Algorithms",
31
+ "cs.ET": "Emerging Technologies",
32
+ "cs.FL": "Formal Languages and Automata Theory",
33
+ "cs.GL": "General Literature",
34
+ "cs.GR": "Graphics",
35
+ "cs.GT": "Computer Science and Game Theory",
36
+ "cs.HC": "Human-Computer Interaction",
37
+ "cs.IR": "Information Retrieval",
38
+ "cs.IT": "Information Theory",
39
+ "cs.LG": "Machine Learning",
40
+ "cs.LO": "Logic in Computer Science",
41
+ "cs.MA": "Multiagent Systems",
42
+ "cs.MM": "Multimedia",
43
+ "cs.MS": "Mathematical Software",
44
+ "cs.NA": "Numerical Analysis",
45
+ "cs.NE": "Neural and Evolutionary Computing",
46
+ "cs.NI": "Networking and Internet Architecture",
47
+ "cs.OH": "Other Computer Science",
48
+ "cs.OS": "Operating Systems",
49
+ "cs.PF": "Performance",
50
+ "cs.PL": "Programming Languages",
51
+ "cs.RO": "Robotics",
52
+ "cs.SC": "Symbolic Computation",
53
+ "cs.SD": "Sound",
54
+ "cs.SE": "Software Engineering",
55
+ "cs.SI": "Social and Information Networks",
56
+ "cs.SY": "Systems and Control",
57
+ "econ.EM": "Econometrics",
58
+ "eess.AS": "Audio and Speech Processing",
59
+ "eess.IV": "Image and Video Processing",
60
+ "eess.SP": "Signal Processing",
61
+ "gr-qc": "General Relativity and Quantum Cosmology",
62
+ "hep-ex": "High Energy Physics - Experiment",
63
+ "hep-lat": "High Energy Physics - Lattice",
64
+ "hep-ph": "High Energy Physics - Phenomenology",
65
+ "hep-th": "High Energy Physics - Theory",
66
+ "math.AC": "Commutative Algebra",
67
+ "math.AG": "Algebraic Geometry",
68
+ "math.AP": "Analysis of PDEs",
69
+ "math.AT": "Algebraic Topology",
70
+ "math.CA": "Classical Analysis and ODEs",
71
+ "math.CO": "Combinatorics",
72
+ "math.CT": "Category Theory",
73
+ "math.CV": "Complex Variables",
74
+ "math.DG": "Differential Geometry",
75
+ "math.DS": "Dynamical Systems",
76
+ "math.FA": "Functional Analysis",
77
+ "math.GM": "General Mathematics",
78
+ "math.GN": "General Topology",
79
+ "math.GR": "Group Theory",
80
+ "math.GT": "Geometric Topology",
81
+ "math.HO": "History and Overview",
82
+ "math.IT": "Information Theory",
83
+ "math.KT": "K-Theory and Homology",
84
+ "math.LO": "Logic",
85
+ "math.MG": "Metric Geometry",
86
+ "math.MP": "Mathematical Physics",
87
+ "math.NA": "Numerical Analysis",
88
+ "math.NT": "Number Theory",
89
+ "math.OA": "Operator Algebras",
90
+ "math.OC": "Optimization and Control",
91
+ "math.PR": "Probability",
92
+ "math.QA": "Quantum Algebra",
93
+ "math.RA": "Rings and Algebras",
94
+ "math.RT": "Representation Theory",
95
+ "math.SG": "Symplectic Geometry",
96
+ "math.SP": "Spectral Theory",
97
+ "math.ST": "Statistics Theory",
98
+ "math-ph": "Mathematical Physics",
99
+ "nlin.AO": "Adaptation and Self-Organizing Systems",
100
+ "nlin.CD": "Chaotic Dynamics",
101
+ "nlin.CG": "Cellular Automata and Lattice Gases",
102
+ "nlin.PS": "Pattern Formation and Solitons",
103
+ "nlin.SI": "Exactly Solvable and Integrable Systems",
104
+ "nucl-ex": "Nuclear Experiment",
105
+ "nucl-th": "Nuclear Theory",
106
+ "physics.acc-ph": "Accelerator Physics",
107
+ "physics.ao-ph": "Atmospheric and Oceanic Physics",
108
+ "physics.app-ph": "Applied Physics",
109
+ "physics.atom-ph": "Atomic Physics",
110
+ "physics.atm-clus": "Atomic and Molecular Clusters",
111
+ "physics.bio-ph": "Biological Physics",
112
+ "physics.chem-ph": "Chemical Physics",
113
+ "physics.class-ph": "Classical Physics",
114
+ "physics.comp-ph": "Computational Physics",
115
+ "physics.data-an": "Data Analysis, Statistics and Probability",
116
+ "physics.ed-ph": "Physics Education",
117
+ "physics.flu-dyn": "Fluid Dynamics",
118
+ "physics.gen-ph": "General Physics",
119
+ "physics.geo-ph": "Geophysics",
120
+ "physics.hist-ph": "History and Philosophy of Physics",
121
+ "physics.ins-det": "Instrumentation and Detectors",
122
+ "physics.med-ph": "Medical Physics",
123
+ "physics.optics": "Optics",
124
+ "physics.plasm-ph": "Plasma Physics",
125
+ "physics.pop-ph": "Popular Physics",
126
+ "physics.soc-ph": "Physics and Society",
127
+ "physics.space-ph": "Space Physics",
128
+ "q-bio.BM": "Biomolecules",
129
+ "q-bio.CB": "Cell Behavior",
130
+ "q-bio.GN": "Genomics",
131
+ "q-bio.MN": "Molecular Networks",
132
+ "q-bio.NC": "Neurons and Cognition",
133
+ "q-bio.OT": "Other Quantitative Biology",
134
+ "q-bio.PE": "Populations and Evolution",
135
+ "q-bio.QM": "Quantitative Methods",
136
+ "q-bio.SC": "Subcellular Processes",
137
+ "q-bio.TO": "Tissues and Organs",
138
+ "q-fin.CP": "Computational Finance",
139
+ "q-fin.EC": "Economics",
140
+ "q-fin.GN": "General Finance",
141
+ "q-fin.MF": "Mathematical Finance",
142
+ "q-fin.PM": "Portfolio Management",
143
+ "q-fin.PR": "Pricing of Securities",
144
+ "q-fin.RM": "Risk Management",
145
+ "q-fin.ST": "Statistical Finance",
146
+ "q-fin.TR": "Trading and Market Microstructure",
147
+ "quant-ph": "Quantum Physics",
148
+ "stat.AP": "Applications",
149
+ "stat.CO": "Computation",
150
+ "stat.ME": "Methodology",
151
+ "stat.ML": "Machine Learning",
152
+ "stat.OT": "Other Statistics",
153
+ "stat.TH": "Statistics Theory"
154
+ }
categories_general.txt ADDED
@@ -0,0 +1,334 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Multidisciplinary
2
+ General Agricultural and Biological Sciences
3
+ Agricultural and Biological Sciences (miscellaneous)
4
+ Agronomy and Crop Science
5
+ Animal Science and Zoology
6
+ Aquatic Science
7
+ Ecology, Evolution, Behavior and Systematics
8
+ Food Science
9
+ Forestry
10
+ Horticulture
11
+ Insect Science
12
+ Plant Science
13
+ Soil Science
14
+ General Arts and Humanities
15
+ Arts and Humanities (miscellaneous)
16
+ History
17
+ Language and Linguistics
18
+ Archeology (arts and humanities)
19
+ Classics
20
+ Conservation
21
+ History and Philosophy of Science
22
+ Literature and Literary Theory
23
+ Museology
24
+ Music
25
+ Philosophy
26
+ Religious Studies
27
+ Visual Arts and Performing Arts
28
+ General Biochemistry, Genetics and Molecular Biology
29
+ Biochemistry, Genetics and Molecular Biology (miscellaneous)
30
+ Aging
31
+ Biochemistry
32
+ Biophysics
33
+ Biotechnology
34
+ Cancer Research
35
+ Cell Biology
36
+ Clinical Biochemistry
37
+ Developmental Biology
38
+ Endocrinology
39
+ Genetics
40
+ Molecular Biology
41
+ Molecular Medicine
42
+ Physiology
43
+ Structural Biology
44
+ General Business, Management and Accounting
45
+ Business, Management and Accounting (miscellaneous)
46
+ Accounting
47
+ Business and International Management
48
+ Management Information Systems
49
+ Management of Technology and Innovation
50
+ Marketing
51
+ Organizational Behavior and Human Resource Management
52
+ Strategy and Management
53
+ Tourism, Leisure and Hospitality Management
54
+ Industrial Relations
55
+ General Chemical Engineering
56
+ Chemical Engineering (miscellaneous)
57
+ Bioengineering
58
+ Catalysis
59
+ Chemical Health and Safety
60
+ Colloid and Surface Chemistry
61
+ Filtration and Separation
62
+ Fluid Flow and Transfer Processes
63
+ Process Chemistry and Technology
64
+ General Chemistry
65
+ Chemistry (miscellaneous)
66
+ Analytical Chemistry
67
+ Electrochemistry
68
+ Inorganic Chemistry
69
+ Organic Chemistry
70
+ Physical and Theoretical Chemistry
71
+ Spectroscopy
72
+ General Computer Science
73
+ Computer Science (miscellaneous)
74
+ Artificial Intelligence
75
+ Computational Theory and Mathematics
76
+ Computer Graphics and Computer-Aided Design
77
+ Computer Networks and Communications
78
+ Computer Science Applications
79
+ Computer Vision and Pattern Recognition
80
+ Hardware and Architecture
81
+ Human-Computer Interaction
82
+ Information Systems
83
+ Signal Processing
84
+ Software
85
+ General Decision Sciences
86
+ Decision Sciences (miscellaneous)
87
+ Information Systems and Management
88
+ Management Science and Operations Research
89
+ Statistics, Probability and Uncertainty
90
+ General Earth and Planetary Sciences
91
+ Earth and Planetary Sciences (miscellaneous)
92
+ Atmospheric Science
93
+ Computers in Earth Sciences
94
+ Earth-Surface Processes
95
+ Economic Geology
96
+ Geochemistry and Petrology
97
+ Geology
98
+ Geophysics
99
+ Geotechnical Engineering and Engineering Geology
100
+ Oceanography
101
+ Paleontology
102
+ Space and Planetary Science
103
+ Stratigraphy
104
+ General Economics, Econometrics and Finance
105
+ Economics, Econometrics and Finance (miscellaneous)
106
+ Economics and Econometrics
107
+ Finance
108
+ General Energy
109
+ Energy (miscellaneous)
110
+ Energy Engineering and Power Technology
111
+ Fuel Technology
112
+ Nuclear Energy and Engineering
113
+ Renewable Energy, Sustainability and the Environment
114
+ General Engineering
115
+ Engineering (miscellaneous)
116
+ Aerospace Engineering
117
+ Automotive Engineering
118
+ Biomedical Engineering
119
+ Civil and Structural Engineering
120
+ Computational Mechanics
121
+ Control and Systems Engineering
122
+ Electrical and Electronic Engineering
123
+ Industrial and Manufacturing Engineering
124
+ Mechanical Engineering
125
+ Mechanics of Materials
126
+ Ocean Engineering
127
+ Safety, Risk, Reliability and Quality
128
+ Media Technology
129
+ Building and Construction
130
+ Architecture
131
+ General Environmental Science
132
+ Environmental Science (miscellaneous)
133
+ Ecological Modeling
134
+ Ecology
135
+ Environmental Chemistry
136
+ Environmental Engineering
137
+ Global and Planetary Change
138
+ Health, Toxicology and Mutagenesis
139
+ Management, Monitoring, Policy and Law
140
+ Nature and Landscape Conservation
141
+ Pollution
142
+ Waste Management and Disposal
143
+ Water Science and Technology
144
+ General Immunology and Microbiology
145
+ Immunology and Microbiology (miscellaneous)
146
+ Applied Microbiology and Biotechnology
147
+ Immunology
148
+ Microbiology
149
+ Parasitology
150
+ Virology
151
+ General Materials Science
152
+ Materials Science (miscellaneous)
153
+ Biomaterials
154
+ Ceramics and Composites
155
+ Electronic, Optical and Magnetic Materials
156
+ Materials Chemistry
157
+ Metals and Alloys
158
+ Polymers and Plastics
159
+ Surfaces, Coatings and Films
160
+ General Mathematics
161
+ Mathematics (miscellaneous)
162
+ Algebra and Number Theory
163
+ Analysis
164
+ Applied Mathematics
165
+ Computational Mathematics
166
+ Control and Optimization
167
+ Discrete Mathematics and Combinatorics
168
+ Geometry and Topology
169
+ Logic
170
+ Mathematical Physics
171
+ Modeling and Simulation
172
+ Numerical Analysis
173
+ Statistics and Probability
174
+ Theoretical Computer Science
175
+ General Medicine
176
+ Medicine (miscellaneous)
177
+ Anatomy
178
+ Anesthesiology and Pain Medicine
179
+ Biochemistry (medical)
180
+ Cardiology and Cardiovascular Medicine
181
+ Critical Care and Intensive Care Medicine
182
+ Complementary and Alternative Medicine
183
+ Dermatology
184
+ Drug Guides
185
+ Embryology
186
+ Emergency Medicine
187
+ Endocrinology, Diabetes and Metabolism
188
+ Epidemiology
189
+ Family Practice
190
+ Gastroenterology
191
+ Genetics (clinical)
192
+ Geriatrics and Gerontology
193
+ Health Informatics
194
+ Health Policy
195
+ Hematology
196
+ Hepatology
197
+ Histology
198
+ Immunology and Allergy
199
+ Internal Medicine
200
+ Infectious Diseases
201
+ Microbiology (medical)
202
+ Nephrology
203
+ Neurology (clinical)
204
+ Obstetrics and Gynecology
205
+ Oncology
206
+ Ophthalmology
207
+ Orthopedics and Sports Medicine
208
+ Otorhinolaryngology
209
+ Pathology and Forensic Medicine
210
+ Pediatrics, Perinatology and Child Health
211
+ Pharmacology (medical)
212
+ Physiology (medical)
213
+ Psychiatry and Mental Health
214
+ Public Health, Environmental and Occupational Health
215
+ Pulmonary and Respiratory Medicine
216
+ Radiology, Nuclear Medicine and Imaging
217
+ Rehabilitation
218
+ Reproductive Medicine
219
+ Reviews and References (medical)
220
+ Rheumatology
221
+ Surgery
222
+ Transplantation
223
+ Urology
224
+ General Neuroscience
225
+ Neuroscience (miscellaneous)
226
+ Behavioral Neuroscience
227
+ Biological Psychiatry
228
+ Cellular and Molecular Neuroscience
229
+ Cognitive Neuroscience
230
+ Developmental Neuroscience
231
+ Endocrine and Autonomic Systems
232
+ Neurology
233
+ Sensory Systems
234
+ General Nursing
235
+ Nursing (miscellaneous)
236
+ Advanced and Specialized Nursing
237
+ Assessment and Diagnosis
238
+ Care Planning
239
+ Community and Home Care
240
+ Critical Care Nursing
241
+ Emergency Nursing
242
+ Fundamentals and Skills
243
+ Gerontology
244
+ Issues, Ethics and Legal Aspects
245
+ Leadership and Management
246
+ LPN and LVN
247
+ Maternity and Midwifery
248
+ Medical and Surgical Nursing
249
+ Nurse Assisting
250
+ Nutrition and Dietetics
251
+ Oncology (nursing)
252
+ Pathophysiology
253
+ Pediatrics
254
+ Pharmacology (nursing)
255
+ Psychiatric Mental Health
256
+ Research and Theory
257
+ Review and Exam Preparation
258
+ General Pharmacology, Toxicology and Pharmaceutics
259
+ Pharmacology, Toxicology and Pharmaceutics (miscellaneous)
260
+ Drug Discovery
261
+ Pharmaceutical Science
262
+ Pharmacology
263
+ Toxicology
264
+ General Physics and Astronomy
265
+ Physics and Astronomy (miscellaneous)
266
+ Acoustics and Ultrasonics
267
+ Astronomy and Astrophysics
268
+ Condensed Matter Physics
269
+ Instrumentation
270
+ Nuclear and High Energy Physics
271
+ Atomic and Molecular Physics, and Optics
272
+ Radiation
273
+ Statistical and Nonlinear Physics
274
+ Surfaces and Interfaces
275
+ General Psychology
276
+ Psychology (miscellaneous)
277
+ Applied Psychology
278
+ Clinical Psychology
279
+ Developmental and Educational Psychology
280
+ Experimental and Cognitive Psychology
281
+ Neuropsychology and Physiological Psychology
282
+ Social Psychology
283
+ General Social Sciences
284
+ Social Sciences (miscellaneous)
285
+ Archeology
286
+ Development
287
+ Education
288
+ Geography, Planning and Development
289
+ Health (social science)
290
+ Human Factors and Ergonomics
291
+ Law
292
+ Library and Information Sciences
293
+ Linguistics and Language
294
+ Safety Research
295
+ Sociology and Political Science
296
+ Transportation
297
+ Anthropology
298
+ Communication
299
+ Cultural Studies
300
+ Demography
301
+ Gender Studies
302
+ Life-span and Life-course Studies
303
+ Political Science and International Relations
304
+ Public Administration
305
+ Urban Studies
306
+ General Veterinary
307
+ Veterinary (miscellaneous)
308
+ Equine
309
+ Food Animals
310
+ Small Animals
311
+ General Dentistry
312
+ Dentistry (miscellaneous)
313
+ Dental Assisting
314
+ Dental Hygiene
315
+ Oral Surgery
316
+ Orthodontics
317
+ Periodontics
318
+ General Health Professions
319
+ Health Professions (miscellaneous)
320
+ Chiropractics
321
+ Complementary and Manual Therapy
322
+ Emergency Medical Services
323
+ Health Information Management
324
+ Medical Assisting and Transcription
325
+ Medical Laboratory Technology
326
+ Medical Terminology
327
+ Occupational Therapy
328
+ Optometry
329
+ Pharmacy
330
+ Physical Therapy, Sports Therapy and Rehabilitation
331
+ Podiatry
332
+ Radiological and Ultrasound Technology
333
+ Respiratory Care
334
+ Speech and Hearing
home-icon.png ADDED
requirements.txt CHANGED
@@ -1,2 +1,4 @@
1
- hvplot
2
  panel
 
 
 
 
 
1
  panel
2
+ hvplot
3
+ arxiv
4
+ crossrefapi
src/__pycache__/chains.cpython-311.pyc ADDED
Binary file (6.27 kB). View file
 
src/__pycache__/constants.cpython-311.pyc ADDED
Binary file (9.16 kB). View file
 
src/__pycache__/search.cpython-311.pyc ADDED
Binary file (5.51 kB). View file
 
src/chains.py ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.document_loaders import PyPDFLoader, PyPDFDirectoryLoader
2
+ from langchain.indexes import VectorstoreIndexCreator
3
+ from langchain.llms import AzureOpenAI, OpenAI
4
+ from langchain.text_splitter import CharacterTextSplitter
5
+ from langchain.embeddings import OpenAIEmbeddings
6
+ from langchain.vectorstores import Chroma
7
+ from langchain.chains import RetrievalQA, ConversationalRetrievalChain, RetrievalQAWithSourcesChain
8
+ from langchain.chains.question_answering import load_qa_chain
9
+ from langchain.memory import ConversationBufferMemory
10
+
11
+ from langchain.chat_models import AzureChatOpenAI
12
+
13
+
14
+ import os
15
+ import openai
16
+ os.environ['CWD'] = os.getcwd()
17
+
18
+ # for testing
19
+ import src.constants as constants
20
+ # import constants
21
+ os.environ['OPENAI_API_KEY'] = constants.AZURE_OPENAI_KEY_FR
22
+ os.environ['OPENAI_API_BASE'] = constants.AZURE_OPENAI_ENDPOINT_FR
23
+ os.environ['OPENAI_API_VERSION'] = "2023-05-15"
24
+ os.environ['OPENAI_API_TYPE'] = "azure"
25
+ # openai.api_type = "azure"
26
+ # openai.api_base = constants.AZURE_OPENAI_ENDPOINT_FR
27
+ # openai.api_version = "2023-05-15"
28
+ openai.api_key = constants.OPEN_AI_KEY
29
+
30
+ def get_document_key(doc):
31
+ return doc.metadata['source'] + '_page_' + str(doc.metadata['page'])
32
+
33
+
34
+ import os
35
+ from typing import Optional
36
+
37
+ class PDFEmbeddings():
38
+ def __init__(self, path: Optional[str] = None):
39
+ self.path = path or os.path.join(os.environ['CWD'], 'archive')
40
+ self.text_splitter = CharacterTextSplitter(separator="\n", chunk_size=1000, chunk_overlap=200)
41
+ self.embeddings = OpenAIEmbeddings(deployment= constants.AZURE_ENGINE_NAME_US, chunk_size=1,
42
+ openai_api_key= constants.AZURE_OPENAI_KEY_US,
43
+ openai_api_base= constants.AZURE_OPENAI_ENDPOINT_US,
44
+ openai_api_version= "2023-05-15",
45
+ openai_api_type= "azure",)
46
+ self.vectorstore = Chroma(persist_directory=constants.persistent_dir, embedding_function=self.embeddings)
47
+ self.retriever = self.vectorstore.as_retriever(search_type = "similarity", search_kwags= {"k": 5})
48
+ self.memory = ConversationBufferMemory(memory_key='pdf_memory', return_messages=True)
49
+
50
+ def process_documents(self):
51
+ # Load the documents and process them
52
+ loader = PyPDFDirectoryLoader(self.path)
53
+ documents = loader.load()
54
+ chunks = self.text_splitter.split_documents(documents)
55
+ self.vectorstore.add_documents(chunks)
56
+
57
+ def search(self, query: str, chain_type: str = "stuff"):
58
+ chain = RetrievalQA.from_chain_type(llm= AzureChatOpenAI(deployment_name= constants.AZURE_ENGINE_NAME_FR, temperature=0),
59
+ retriever= self.retriever, chain_type= chain_type, return_source_documents= True)
60
+ result = chain({"query": query})
61
+ return result
62
+
63
+ def conversational_search(self, query: str, chain_type: str = "stuff"):
64
+ chain = ConversationalRetrievalChain.from_llm(llm= AzureChatOpenAI(deployment_name= constants.AZURE_ENGINE_NAME_FR),
65
+ retriever= self.retriever, memory= self.memory, chain_type= chain_type)
66
+ result = chain({"question": query})
67
+ return result['answer']
68
+
69
+ def load_and_run_chain(self, query: str, chain_type: str = "stuff"):
70
+ chain = load_qa_chain(llm= AzureChatOpenAI(deployment_name= constants.AZURE_ENGINE_NAME_FR), chain_type= chain_type)
71
+ return chain.run(input_documents = self.retriever, question = query)
72
+
73
+ if __name__ == '__main__':
74
+ pdf_embed = PDFEmbeddings()
75
+ # pdf_embed.process_documents() # This takes a while, so we only do it once
76
+ result = pdf_embed.search("Give me a list of short relevant queries to look for papers related to the topics of the papers in the source documents.")
77
+ print("\n\n", result['result'], "\n")
78
+ print("Source documents:")
79
+ for doc in result['source_documents']:
80
+ print(doc.metadata['source'])
src/constants.py ADDED
@@ -0,0 +1,167 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ AZURE_OPENAI_KEY_FR = "d296db50d1ad471caf944d1b148ae6c7"
2
+ AZURE_OPENAI_ENDPOINT_FR = "https://openai-resource-team-11-france.openai.azure.com/"
3
+ AZURE_ENGINE_NAME_FR = "gpt35-team-11"
4
+
5
+ AZURE_OPENAI_KEY_US = "af7e76186d2c46458bcb6d1ecef91d68"
6
+ AZURE_OPENAI_ENDPOINT_US = "https://openai-resource-team-11-east-us.openai.azure.com/"
7
+ AZURE_ENGINE_NAME_US = "ada002-team-11"
8
+
9
+ OPEN_AI_KEY = "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"
10
+
11
+ persistent_dir = "db"
12
+
13
+ # Taxonomy dictionary arxiv
14
+ arxiv_taxonomy = {
15
+ "astro-ph.CO": "Cosmology and Nongalactic Astrophysics",
16
+ "astro-ph.EP": "Earth and Planetary Astrophysics",
17
+ "astro-ph.GA": "Astrophysics of Galaxies",
18
+ "astro-ph.HE": "High Energy Astrophysical Phenomena",
19
+ "astro-ph.IM": "Instrumentation and Methods for Astrophysics",
20
+ "astro-ph.SR": "Solar and Stellar Astrophysics",
21
+ "cond-mat.dis-nn": "Disordered Systems and Neural Networks",
22
+ "cond-mat.mes-hall": "Mesoscale and Nanoscale Physics",
23
+ "cond-mat.mtrl-sci": "Materials Science",
24
+ "cond-mat.other": "Other Condensed Matter",
25
+ "cond-mat.quant-gas": "Quantum Gases",
26
+ "cond-mat.soft": "Soft Condensed Matter",
27
+ "cond-mat.stat-mech": "Statistical Mechanics",
28
+ "cond-mat.str-el": "Strongly Correlated Electrons",
29
+ "cond-mat.supr-con": "Superconductivity",
30
+ "cs.AI": "Artificial Intelligence",
31
+ "cs.AR": "Hardware Architecture",
32
+ "cs.CC": "Computational Complexity",
33
+ "cs.CE": "Computational Engineering, Finance, and Science",
34
+ "cs.CG": "Computational Geometry",
35
+ "cs.CL": "Computation and Language",
36
+ "cs.CR": "Cryptography and Security",
37
+ "cs.CV": "Computer Vision and Pattern Recognition",
38
+ "cs.CY": "Computers and Society",
39
+ "cs.DB": "Databases",
40
+ "cs.DC": "Distributed, Parallel, and Cluster Computing",
41
+ "cs.DL": "Digital Libraries",
42
+ "cs.DM": "Discrete Mathematics",
43
+ "cs.DS": "Data Structures and Algorithms",
44
+ "cs.ET": "Emerging Technologies",
45
+ "cs.FL": "Formal Languages and Automata Theory",
46
+ "cs.GL": "General Literature",
47
+ "cs.GR": "Graphics",
48
+ "cs.GT": "Computer Science and Game Theory",
49
+ "cs.HC": "Human-Computer Interaction",
50
+ "cs.IR": "Information Retrieval",
51
+ "cs.IT": "Information Theory",
52
+ "cs.LG": "Machine Learning",
53
+ "cs.LO": "Logic in Computer Science",
54
+ "cs.MA": "Multiagent Systems",
55
+ "cs.MM": "Multimedia",
56
+ "cs.MS": "Mathematical Software",
57
+ "cs.NA": "Numerical Analysis",
58
+ "cs.NE": "Neural and Evolutionary Computing",
59
+ "cs.NI": "Networking and Internet Architecture",
60
+ "cs.OH": "Other Computer Science",
61
+ "cs.OS": "Operating Systems",
62
+ "cs.PF": "Performance",
63
+ "cs.PL": "Programming Languages",
64
+ "cs.RO": "Robotics",
65
+ "cs.SC": "Symbolic Computation",
66
+ "cs.SD": "Sound",
67
+ "cs.SE": "Software Engineering",
68
+ "cs.SI": "Social and Information Networks",
69
+ "cs.SY": "Systems and Control",
70
+ "econ.EM": "Econometrics",
71
+ "eess.AS": "Audio and Speech Processing",
72
+ "eess.IV": "Image and Video Processing",
73
+ "eess.SP": "Signal Processing",
74
+ "gr-qc": "General Relativity and Quantum Cosmology",
75
+ "hep-ex": "High Energy Physics - Experiment",
76
+ "hep-lat": "High Energy Physics - Lattice",
77
+ "hep-ph": "High Energy Physics - Phenomenology",
78
+ "hep-th": "High Energy Physics - Theory",
79
+ "math.AC": "Commutative Algebra",
80
+ "math.AG": "Algebraic Geometry",
81
+ "math.AP": "Analysis of PDEs",
82
+ "math.AT": "Algebraic Topology",
83
+ "math.CA": "Classical Analysis and ODEs",
84
+ "math.CO": "Combinatorics",
85
+ "math.CT": "Category Theory",
86
+ "math.CV": "Complex Variables",
87
+ "math.DG": "Differential Geometry",
88
+ "math.DS": "Dynamical Systems",
89
+ "math.FA": "Functional Analysis",
90
+ "math.GM": "General Mathematics",
91
+ "math.GN": "General Topology",
92
+ "math.GR": "Group Theory",
93
+ "math.GT": "Geometric Topology",
94
+ "math.HO": "History and Overview",
95
+ "math.IT": "Information Theory",
96
+ "math.KT": "K-Theory and Homology",
97
+ "math.LO": "Logic",
98
+ "math.MG": "Metric Geometry",
99
+ "math.MP": "Mathematical Physics",
100
+ "math.NA": "Numerical Analysis",
101
+ "math.NT": "Number Theory",
102
+ "math.OA": "Operator Algebras",
103
+ "math.OC": "Optimization and Control",
104
+ "math.PR": "Probability",
105
+ "math.QA": "Quantum Algebra",
106
+ "math.RA": "Rings and Algebras",
107
+ "math.RT": "Representation Theory",
108
+ "math.SG": "Symplectic Geometry",
109
+ "math.SP": "Spectral Theory",
110
+ "math.ST": "Statistics Theory",
111
+ "math-ph": "Mathematical Physics",
112
+ "nlin.AO": "Adaptation and Self-Organizing Systems",
113
+ "nlin.CD": "Chaotic Dynamics",
114
+ "nlin.CG": "Cellular Automata and Lattice Gases",
115
+ "nlin.PS": "Pattern Formation and Solitons",
116
+ "nlin.SI": "Exactly Solvable and Integrable Systems",
117
+ "nucl-ex": "Nuclear Experiment",
118
+ "nucl-th": "Nuclear Theory",
119
+ "physics.acc-ph": "Accelerator Physics",
120
+ "physics.ao-ph": "Atmospheric and Oceanic Physics",
121
+ "physics.app-ph": "Applied Physics",
122
+ "physics.atom-ph": "Atomic Physics",
123
+ "physics.atm-clus": "Atomic and Molecular Clusters",
124
+ "physics.bio-ph": "Biological Physics",
125
+ "physics.chem-ph": "Chemical Physics",
126
+ "physics.class-ph": "Classical Physics",
127
+ "physics.comp-ph": "Computational Physics",
128
+ "physics.data-an": "Data Analysis, Statistics and Probability",
129
+ "physics.ed-ph": "Physics Education",
130
+ "physics.flu-dyn": "Fluid Dynamics",
131
+ "physics.gen-ph": "General Physics",
132
+ "physics.geo-ph": "Geophysics",
133
+ "physics.hist-ph": "History and Philosophy of Physics",
134
+ "physics.ins-det": "Instrumentation and Detectors",
135
+ "physics.med-ph": "Medical Physics",
136
+ "physics.optics": "Optics",
137
+ "physics.plasm-ph": "Plasma Physics",
138
+ "physics.pop-ph": "Popular Physics",
139
+ "physics.soc-ph": "Physics and Society",
140
+ "physics.space-ph": "Space Physics",
141
+ "q-bio.BM": "Biomolecules",
142
+ "q-bio.CB": "Cell Behavior",
143
+ "q-bio.GN": "Genomics",
144
+ "q-bio.MN": "Molecular Networks",
145
+ "q-bio.NC": "Neurons and Cognition",
146
+ "q-bio.OT": "Other Quantitative Biology",
147
+ "q-bio.PE": "Populations and Evolution",
148
+ "q-bio.QM": "Quantitative Methods",
149
+ "q-bio.SC": "Subcellular Processes",
150
+ "q-bio.TO": "Tissues and Organs",
151
+ "q-fin.CP": "Computational Finance",
152
+ "q-fin.EC": "Economics",
153
+ "q-fin.GN": "General Finance",
154
+ "q-fin.MF": "Mathematical Finance",
155
+ "q-fin.PM": "Portfolio Management",
156
+ "q-fin.PR": "Pricing of Securities",
157
+ "q-fin.RM": "Risk Management",
158
+ "q-fin.ST": "Statistical Finance",
159
+ "q-fin.TR": "Trading and Market Microstructure",
160
+ "quant-ph": "Quantum Physics",
161
+ "stat.AP": "Applications",
162
+ "stat.CO": "Computation",
163
+ "stat.ME": "Methodology",
164
+ "stat.ML": "Machine Learning",
165
+ "stat.OT": "Other Statistics",
166
+ "stat.TH": "Statistics Theory"
167
+ }
src/constants_general.txt ADDED
@@ -0,0 +1,334 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Multidisciplinary
2
+ General Agricultural and Biological Sciences
3
+ Agricultural and Biological Sciences (miscellaneous)
4
+ Agronomy and Crop Science
5
+ Animal Science and Zoology
6
+ Aquatic Science
7
+ Ecology, Evolution, Behavior and Systematics
8
+ Food Science
9
+ Forestry
10
+ Horticulture
11
+ Insect Science
12
+ Plant Science
13
+ Soil Science
14
+ General Arts and Humanities
15
+ Arts and Humanities (miscellaneous)
16
+ History
17
+ Language and Linguistics
18
+ Archeology (arts and humanities)
19
+ Classics
20
+ Conservation
21
+ History and Philosophy of Science
22
+ Literature and Literary Theory
23
+ Museology
24
+ Music
25
+ Philosophy
26
+ Religious Studies
27
+ Visual Arts and Performing Arts
28
+ General Biochemistry, Genetics and Molecular Biology
29
+ Biochemistry, Genetics and Molecular Biology (miscellaneous)
30
+ Aging
31
+ Biochemistry
32
+ Biophysics
33
+ Biotechnology
34
+ Cancer Research
35
+ Cell Biology
36
+ Clinical Biochemistry
37
+ Developmental Biology
38
+ Endocrinology
39
+ Genetics
40
+ Molecular Biology
41
+ Molecular Medicine
42
+ Physiology
43
+ Structural Biology
44
+ General Business, Management and Accounting
45
+ Business, Management and Accounting (miscellaneous)
46
+ Accounting
47
+ Business and International Management
48
+ Management Information Systems
49
+ Management of Technology and Innovation
50
+ Marketing
51
+ Organizational Behavior and Human Resource Management
52
+ Strategy and Management
53
+ Tourism, Leisure and Hospitality Management
54
+ Industrial Relations
55
+ General Chemical Engineering
56
+ Chemical Engineering (miscellaneous)
57
+ Bioengineering
58
+ Catalysis
59
+ Chemical Health and Safety
60
+ Colloid and Surface Chemistry
61
+ Filtration and Separation
62
+ Fluid Flow and Transfer Processes
63
+ Process Chemistry and Technology
64
+ General Chemistry
65
+ Chemistry (miscellaneous)
66
+ Analytical Chemistry
67
+ Electrochemistry
68
+ Inorganic Chemistry
69
+ Organic Chemistry
70
+ Physical and Theoretical Chemistry
71
+ Spectroscopy
72
+ General Computer Science
73
+ Computer Science (miscellaneous)
74
+ Artificial Intelligence
75
+ Computational Theory and Mathematics
76
+ Computer Graphics and Computer-Aided Design
77
+ Computer Networks and Communications
78
+ Computer Science Applications
79
+ Computer Vision and Pattern Recognition
80
+ Hardware and Architecture
81
+ Human-Computer Interaction
82
+ Information Systems
83
+ Signal Processing
84
+ Software
85
+ General Decision Sciences
86
+ Decision Sciences (miscellaneous)
87
+ Information Systems and Management
88
+ Management Science and Operations Research
89
+ Statistics, Probability and Uncertainty
90
+ General Earth and Planetary Sciences
91
+ Earth and Planetary Sciences (miscellaneous)
92
+ Atmospheric Science
93
+ Computers in Earth Sciences
94
+ Earth-Surface Processes
95
+ Economic Geology
96
+ Geochemistry and Petrology
97
+ Geology
98
+ Geophysics
99
+ Geotechnical Engineering and Engineering Geology
100
+ Oceanography
101
+ Paleontology
102
+ Space and Planetary Science
103
+ Stratigraphy
104
+ General Economics, Econometrics and Finance
105
+ Economics, Econometrics and Finance (miscellaneous)
106
+ Economics and Econometrics
107
+ Finance
108
+ General Energy
109
+ Energy (miscellaneous)
110
+ Energy Engineering and Power Technology
111
+ Fuel Technology
112
+ Nuclear Energy and Engineering
113
+ Renewable Energy, Sustainability and the Environment
114
+ General Engineering
115
+ Engineering (miscellaneous)
116
+ Aerospace Engineering
117
+ Automotive Engineering
118
+ Biomedical Engineering
119
+ Civil and Structural Engineering
120
+ Computational Mechanics
121
+ Control and Systems Engineering
122
+ Electrical and Electronic Engineering
123
+ Industrial and Manufacturing Engineering
124
+ Mechanical Engineering
125
+ Mechanics of Materials
126
+ Ocean Engineering
127
+ Safety, Risk, Reliability and Quality
128
+ Media Technology
129
+ Building and Construction
130
+ Architecture
131
+ General Environmental Science
132
+ Environmental Science (miscellaneous)
133
+ Ecological Modeling
134
+ Ecology
135
+ Environmental Chemistry
136
+ Environmental Engineering
137
+ Global and Planetary Change
138
+ Health, Toxicology and Mutagenesis
139
+ Management, Monitoring, Policy and Law
140
+ Nature and Landscape Conservation
141
+ Pollution
142
+ Waste Management and Disposal
143
+ Water Science and Technology
144
+ General Immunology and Microbiology
145
+ Immunology and Microbiology (miscellaneous)
146
+ Applied Microbiology and Biotechnology
147
+ Immunology
148
+ Microbiology
149
+ Parasitology
150
+ Virology
151
+ General Materials Science
152
+ Materials Science (miscellaneous)
153
+ Biomaterials
154
+ Ceramics and Composites
155
+ Electronic, Optical and Magnetic Materials
156
+ Materials Chemistry
157
+ Metals and Alloys
158
+ Polymers and Plastics
159
+ Surfaces, Coatings and Films
160
+ General Mathematics
161
+ Mathematics (miscellaneous)
162
+ Algebra and Number Theory
163
+ Analysis
164
+ Applied Mathematics
165
+ Computational Mathematics
166
+ Control and Optimization
167
+ Discrete Mathematics and Combinatorics
168
+ Geometry and Topology
169
+ Logic
170
+ Mathematical Physics
171
+ Modeling and Simulation
172
+ Numerical Analysis
173
+ Statistics and Probability
174
+ Theoretical Computer Science
175
+ General Medicine
176
+ Medicine (miscellaneous)
177
+ Anatomy
178
+ Anesthesiology and Pain Medicine
179
+ Biochemistry (medical)
180
+ Cardiology and Cardiovascular Medicine
181
+ Critical Care and Intensive Care Medicine
182
+ Complementary and Alternative Medicine
183
+ Dermatology
184
+ Drug Guides
185
+ Embryology
186
+ Emergency Medicine
187
+ Endocrinology, Diabetes and Metabolism
188
+ Epidemiology
189
+ Family Practice
190
+ Gastroenterology
191
+ Genetics (clinical)
192
+ Geriatrics and Gerontology
193
+ Health Informatics
194
+ Health Policy
195
+ Hematology
196
+ Hepatology
197
+ Histology
198
+ Immunology and Allergy
199
+ Internal Medicine
200
+ Infectious Diseases
201
+ Microbiology (medical)
202
+ Nephrology
203
+ Neurology (clinical)
204
+ Obstetrics and Gynecology
205
+ Oncology
206
+ Ophthalmology
207
+ Orthopedics and Sports Medicine
208
+ Otorhinolaryngology
209
+ Pathology and Forensic Medicine
210
+ Pediatrics, Perinatology and Child Health
211
+ Pharmacology (medical)
212
+ Physiology (medical)
213
+ Psychiatry and Mental Health
214
+ Public Health, Environmental and Occupational Health
215
+ Pulmonary and Respiratory Medicine
216
+ Radiology, Nuclear Medicine and Imaging
217
+ Rehabilitation
218
+ Reproductive Medicine
219
+ Reviews and References (medical)
220
+ Rheumatology
221
+ Surgery
222
+ Transplantation
223
+ Urology
224
+ General Neuroscience
225
+ Neuroscience (miscellaneous)
226
+ Behavioral Neuroscience
227
+ Biological Psychiatry
228
+ Cellular and Molecular Neuroscience
229
+ Cognitive Neuroscience
230
+ Developmental Neuroscience
231
+ Endocrine and Autonomic Systems
232
+ Neurology
233
+ Sensory Systems
234
+ General Nursing
235
+ Nursing (miscellaneous)
236
+ Advanced and Specialized Nursing
237
+ Assessment and Diagnosis
238
+ Care Planning
239
+ Community and Home Care
240
+ Critical Care Nursing
241
+ Emergency Nursing
242
+ Fundamentals and Skills
243
+ Gerontology
244
+ Issues, Ethics and Legal Aspects
245
+ Leadership and Management
246
+ LPN and LVN
247
+ Maternity and Midwifery
248
+ Medical and Surgical Nursing
249
+ Nurse Assisting
250
+ Nutrition and Dietetics
251
+ Oncology (nursing)
252
+ Pathophysiology
253
+ Pediatrics
254
+ Pharmacology (nursing)
255
+ Psychiatric Mental Health
256
+ Research and Theory
257
+ Review and Exam Preparation
258
+ General Pharmacology, Toxicology and Pharmaceutics
259
+ Pharmacology, Toxicology and Pharmaceutics (miscellaneous)
260
+ Drug Discovery
261
+ Pharmaceutical Science
262
+ Pharmacology
263
+ Toxicology
264
+ General Physics and Astronomy
265
+ Physics and Astronomy (miscellaneous)
266
+ Acoustics and Ultrasonics
267
+ Astronomy and Astrophysics
268
+ Condensed Matter Physics
269
+ Instrumentation
270
+ Nuclear and High Energy Physics
271
+ Atomic and Molecular Physics, and Optics
272
+ Radiation
273
+ Statistical and Nonlinear Physics
274
+ Surfaces and Interfaces
275
+ General Psychology
276
+ Psychology (miscellaneous)
277
+ Applied Psychology
278
+ Clinical Psychology
279
+ Developmental and Educational Psychology
280
+ Experimental and Cognitive Psychology
281
+ Neuropsychology and Physiological Psychology
282
+ Social Psychology
283
+ General Social Sciences
284
+ Social Sciences (miscellaneous)
285
+ Archeology
286
+ Development
287
+ Education
288
+ Geography, Planning and Development
289
+ Health (social science)
290
+ Human Factors and Ergonomics
291
+ Law
292
+ Library and Information Sciences
293
+ Linguistics and Language
294
+ Safety Research
295
+ Sociology and Political Science
296
+ Transportation
297
+ Anthropology
298
+ Communication
299
+ Cultural Studies
300
+ Demography
301
+ Gender Studies
302
+ Life-span and Life-course Studies
303
+ Political Science and International Relations
304
+ Public Administration
305
+ Urban Studies
306
+ General Veterinary
307
+ Veterinary (miscellaneous)
308
+ Equine
309
+ Food Animals
310
+ Small Animals
311
+ General Dentistry
312
+ Dentistry (miscellaneous)
313
+ Dental Assisting
314
+ Dental Hygiene
315
+ Oral Surgery
316
+ Orthodontics
317
+ Periodontics
318
+ General Health Professions
319
+ Health Professions (miscellaneous)
320
+ Chiropractics
321
+ Complementary and Manual Therapy
322
+ Emergency Medical Services
323
+ Health Information Management
324
+ Medical Assisting and Transcription
325
+ Medical Laboratory Technology
326
+ Medical Terminology
327
+ Occupational Therapy
328
+ Optometry
329
+ Pharmacy
330
+ Physical Therapy, Sports Therapy and Rehabilitation
331
+ Podiatry
332
+ Radiological and Ultrasound Technology
333
+ Respiratory Care
334
+ Speech and Hearing
src/search.py ADDED
@@ -0,0 +1,146 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import arxiv
2
+ from crossref.restful import Works
3
+ import pytz
4
+ from datetime import date
5
+ from datetime import datetime
6
+
7
+
8
+ class Search_Papers():
9
+ def __init__(self, query, search_by, search_by_query, sort_by, sort_order):
10
+
11
+ self.query = query
12
+ self.search_mode = search_by
13
+ self.search_mode_query = search_by_query
14
+
15
+ self.sort_by = sort_by
16
+
17
+ self.sort_order = sort_order
18
+
19
+ self.time_search = datetime.now(pytz.utc).replace(microsecond=0)
20
+
21
+ return
22
+
23
+
24
+ def search_arxiv_NResults(self, query, max_results, sort_by, sort_order):
25
+ search_results = arxiv.Search(
26
+ query = query,
27
+ max_results = max_results,
28
+ sort_by = sort_by,
29
+ sort_order = sort_order
30
+ )
31
+
32
+ return search_results.results()
33
+
34
+
35
+
36
+ def search_arxiv_Timeframe(self, query, timeframe, sort_by, sort_order):
37
+
38
+ collection = []
39
+ exit_flag = 0
40
+ n_iter = 0
41
+ max_results = 10
42
+
43
+ while True:
44
+ search_list = arxiv.Search(
45
+ query = query,
46
+ max_results = (n_iter+1)*max_results,
47
+ sort_by = sort_by,
48
+ sort_order = sort_order
49
+ )
50
+
51
+
52
+ results = list(search_list.results())
53
+
54
+ for i in range(n_iter*max_results, len(results)):
55
+ #print(timeframe, results[i].published, results[i].published < timeframe)
56
+
57
+ if results[i].published > timeframe:
58
+ collection.append(results[i])
59
+ else:
60
+ exit_flag = 1
61
+ break
62
+
63
+ if exit_flag:
64
+ break
65
+ else:
66
+ n_iter += 1
67
+
68
+
69
+ return collection
70
+
71
+
72
+
73
+ def search_arxiv(self):
74
+
75
+ if self.sort_by == "PublishDate":
76
+ sort_by = arxiv.SortCriterion.SubmittedDate
77
+ elif self.sort_by == "LastUpdatedDate":
78
+ sort_by = arxiv.SortCriterion.LastUpdatedDate
79
+ else:
80
+ sort_by = arxiv.SortCriterion.Relevance
81
+
82
+
83
+ if self.sort_order == "Ascending":
84
+ sort_order = arxiv.SortOrder.Ascending
85
+ else:
86
+ sort_order = arxiv.SortOrder.Descending
87
+
88
+
89
+
90
+ if self.search_mode == "NumberResults":
91
+ search_results = self.search_arxiv_NResults(self.query, self.search_mode_query, sort_by, sort_order)
92
+ else:
93
+ search_results = self.search_arxiv_Timeframe(self.query, self.search_mode_query, sort_by, sort_order)
94
+
95
+ return search_results
96
+
97
+
98
+ def search_general_NResults(self, query, max_results, sort_by, sort_order):
99
+ works = Works()
100
+ # search_results = works.filter(category_name = query).sort(sort_by).order(sort_order).sample(max_results)
101
+ search_results = works.query(bibliographic = query).sort(sort_by).order(sort_order).sample(max_results)
102
+ return search_results
103
+
104
+
105
+ def search_general_Timeframe(self, query, timeframe, sort_by, sort_order):
106
+ works = Works()
107
+ today = (datetime.combine(date.today(), datetime.min.time())).strftime("%Y-%m-%d")
108
+
109
+ # search_results = works.filter(category_name = query, from_created_date=timeframe, until_created_date = today).sort(sort_by).order(sort_order)
110
+ search_results = works.query(bibliographic = query).filter(from_created_date=timeframe, until_created_date = today).sort(sort_by).order(sort_order)
111
+
112
+ return search_results
113
+
114
+ #'Magnetic Field Conditions Upstream of Ganymede'
115
+
116
+ def search_general(self):
117
+
118
+ if self.search_mode == 'Timeframe':
119
+ timeframe = self.search_mode_query.strftime("%Y-%m-%d")
120
+
121
+
122
+
123
+
124
+ if self.sort_by == "PublishDate":
125
+ sort_by = 'created'
126
+ elif self.sort_by == "LastUpdatedDate":
127
+ sort_by = 'updated'
128
+ else:
129
+ sort_by = 'relevance'
130
+
131
+
132
+ if self.sort_order == "Ascending":
133
+ sort_order = "asc"
134
+ else:
135
+ sort_order = "desc"
136
+
137
+
138
+
139
+ if self.search_mode == "NumberResults":
140
+ search_results = self.search_general_NResults(self.query, self.search_mode_query, sort_by, sort_order)
141
+ else:
142
+ search_results = self.search_general_Timeframe(self.query, timeframe, sort_by, sort_order)
143
+
144
+ return search_results
145
+
146
+