fulviodeo commited on
Commit
f6dbff2
·
1 Parent(s): 8dd9580

Reorganisation of PubMed search section

Browse files
notebooks/notebook.ipynb CHANGED
@@ -11,7 +11,7 @@
11
  }
12
  },
13
  "outputs": [],
14
- "source": "import subprocess\nfrom datetime import date\nfrom dateutil.relativedelta import relativedelta\nimport os\nimport ipywidgets as widgets\nfrom IPython.display import clear_output\nimport warnings\nwarnings.filterwarnings(\"ignore\")\n\nfrom src.handlers.model_wrapper import available_models\nfrom src.utils.ui_elements import (\n inject_styles, BOX_WIDTH,\n make_button, make_separator, make_spacer, make_section_header,\n set_executing, set_done, set_error\n)\n\ninject_styles()\n\n# --- UI Elements ---\n\nmodel_dropdown = widgets.Dropdown(\n options=[m.name for m in available_models],\n value=available_models[0].name,\n description='',\n disabled=False,\n layout=widgets.Layout(width=BOX_WIDTH)\n)\n\nselected_model = available_models[0]\n\nquery_input = widgets.Textarea(\n placeholder='Enter PubMed query...',\n description='',\n layout=widgets.Layout(width='500px', height='100px')\n)\n\nuse_default_query = widgets.Checkbox(\n value=False,\n description='Use the same query as in the 2016 paper by the NCD-RisC',\n style={'description_width': 'initial'},\n layout=widgets.Layout(width='auto')\n)\n\ndef toggle_default_query(change):\n query_input.disabled = change.new\n\nuse_default_query.observe(toggle_default_query, names='value')\n\nstart_date_value = date.today() - relativedelta(months=1)\nend_date_value = date.today()\n\nstart_date = widgets.DatePicker(\n description='Start date:',\n value=start_date_value,\n style={'description_width': 'initial'}\n)\n\nend_date = widgets.DatePicker(\n description='End date:',\n value=end_date_value,\n style={'description_width': 'initial'}\n)\n\nrecall_target = widgets.Dropdown(\n options=[('95%', 95), ('90%', 90), ('80%', 80), ('70%', 70), ('60%', 60)],\n value=95,\n description='',\n layout=widgets.Layout(width='120px')\n)\n\n# Buttons\nload_btn = make_button('Load')\nsearch_btn = make_button('Search')\npredict_btn = make_button('Screen articles')\n\n# Output areas\nload_model_output = widgets.Output()\nsearch_output = widgets.Output()\npredict_output = widgets.Output()\n\n\n# --- Event Handlers ---\n\ndef run_load_model(b):\n set_executing(load_btn, 'Loading...')\n with load_model_output:\n clear_output(wait=True)\n try:\n with open(os.path.abspath(os.path.join(os.getcwd(), 'src/choose_model.py'))) as f:\n exec(f.read(), globals())\n set_done(load_btn, '\\u2713 Loaded')\n except Exception:\n set_error(load_btn, 'Load')\n import traceback\n traceback.print_exc()\n\n\ndef run_search(b):\n set_executing(search_btn, 'Searching...')\n with search_output:\n clear_output(wait=True)\n try:\n with open(os.path.abspath(os.path.join(os.getcwd(), 'src/search_pubmed.py'))) as f:\n exec(f.read(), globals())\n set_done(search_btn, '\\u2713 Search completed')\n path_to_articles = os.path.join(globals().get('directory'), 'downloaded_articles.csv')\n with search_output:\n display(widgets.HTML(\n f\"<p style='font-size:12px; color:gray; margin-top:6px; margin-left:12px; margin-bottom:0px;'>\"\n f\"Article abstracts downloaded to: {path_to_articles}</p>\"\n ))\n except Exception:\n set_error(search_btn, 'Search')\n import traceback\n traceback.print_exc()\n\n\ndef open_ris_file(b):\n path = os.path.join(globals().get('directory'), 'articles_to_review.ris')\n try:\n if os.name == 'posix':\n subprocess.Popen(['open', path])\n elif os.name == 'nt':\n subprocess.Popen(['start', '', path], shell=True)\n except Exception:\n import traceback\n traceback.print_exc()\n\n\ndef run_screen_articles(b):\n set_executing(predict_btn, 'Screening...')\n with predict_output:\n clear_output(wait=True)\n try:\n with open(os.path.abspath(os.path.join(os.getcwd(), 'src/screen_articles.py'))) as f:\n exec(f.read(), globals())\n set_done(predict_btn, '\\u2713 Screening completed')\n except Exception:\n set_error(predict_btn, 'Screen articles')\n import traceback\n traceback.print_exc()\n\n path_to_ris = os.path.join(globals().get('directory'), 'articles_to_review.ris')\n open_file_btn = make_button('📄 Open in EndNote', width='250px')\n open_file_btn.on_click(open_ris_file)\n path_display = widgets.HTML(\n f\"<p style='font-size:12px; color:gray; margin-top:6px; margin-left:12px;'>\"\n f\"Path to the EndNote file: {path_to_ris}</p>\"\n )\n final_message_1 = widgets.HTML(\n \"<p style='font-size:14px; color:black; margin-top:10px; margin-left:12px;'>\"\n \"Open the .ris file in EndNote by clicking on the button above or navigating to the file</p>\"\n )\n final_message_2 = widgets.HTML(\n \"<p style='font-size:14px; color:black; margin-top:10px; margin-left:12px;'>\"\n \"Select RefMan - RIS as the input file format</p>\"\n )\n with predict_output:\n display(widgets.VBox([open_file_btn, path_display, final_message_1, final_message_2]))\n\n\nload_btn.on_click(run_load_model)\nsearch_btn.on_click(run_search)\npredict_btn.on_click(run_screen_articles)\n\n# --- Layout ---\n\ntitle_style = 'font-size: 22px; font-weight: bold; margin-bottom: 40px;'\n\nheader = widgets.HTML(\n f\"<h2 style='margin-left:12px; {title_style}'>Automated screening of the literature</h2>\"\n)\n\n# 1 - Choose a model\nchoose_model = widgets.VBox([\n make_section_header('Choose a model'),\n model_dropdown,\n make_spacer('20px'),\n load_btn,\n load_model_output,\n], layout=widgets.Layout(margin='0 0 0 12px'))\n\n# 2 - Search PubMed\nsearch_pubmed = widgets.VBox([\n make_section_header('Search PubMed'),\n widgets.HTML(\"<b style='font-size:14px; margin-top:10px;'>Query</b>\"),\n query_input,\n start_date,\n end_date,\n use_default_query,\n make_spacer('20px'),\n search_btn,\n search_output,\n], layout=widgets.Layout(margin='0 0 0 12px'))\n\n# 3 - Screen articles\nscreen_articles = widgets.VBox([\n make_section_header('Screen articles'),\n widgets.HTML(\"<span style='font-size:13px; color:gray; margin-top:2px; display:block;'>Based on the recall achieved in previous testing; the higher the recall, the more inclusive the model</span>\"),\n make_spacer('8px'),\n recall_target,\n make_spacer('20px'),\n predict_btn,\n predict_output\n], layout=widgets.Layout(margin='0 0 0 12px'))\n\n\ndisplay(header, choose_model, make_separator(), search_pubmed, make_separator(), screen_articles)"
15
  },
16
  {
17
  "cell_type": "code",
 
11
  }
12
  },
13
  "outputs": [],
14
+ "source": "import subprocess\nfrom datetime import date\nfrom dateutil.relativedelta import relativedelta\nimport os\nimport ipywidgets as widgets\nfrom IPython.display import clear_output\nimport warnings\nwarnings.filterwarnings(\"ignore\")\n\nfrom src.handlers.model_wrapper import available_models\nfrom src.utils.ui_elements import (\n inject_styles, BOX_WIDTH, QUERY_WIDTH,\n make_button, make_separator, make_spacer, make_section_header,\n set_executing, set_done, set_error\n)\n\ninject_styles()\n\n# --- UI Elements ---\n\nmodel_dropdown = widgets.Dropdown(\n options=[m.name for m in available_models],\n value=available_models[0].name,\n description='',\n disabled=False,\n layout=widgets.Layout(width=BOX_WIDTH)\n)\n\nselected_model = available_models[0]\n\nquery_input = widgets.Textarea(\n description='Query:',\n placeholder='Enter PubMed query...',\n style={'description_width': 'initial'},\n layout=widgets.Layout(width=QUERY_WIDTH, height='160px')\n)\n\nuse_default_query = widgets.Checkbox(\n value=False,\n description='Use the same query as in the 2016 paper by the NCD-RisC',\n style={'description_width': 'initial'},\n layout=widgets.Layout(width='auto')\n)\n\ndef toggle_default_query(change):\n query_input.disabled = change.new\n\nuse_default_query.observe(toggle_default_query, names='value')\n\nstart_date_value = date.today() - relativedelta(months=1)\nend_date_value = date.today()\n\nstart_date = widgets.DatePicker(\n description='Start date:',\n value=start_date_value,\n style={'description_width': 'initial'}\n)\n\nend_date = widgets.DatePicker(\n description='End date:',\n value=end_date_value,\n style={'description_width': 'initial'}\n)\n\nrecall_target = widgets.Dropdown(\n options=[('95%', 95), ('90%', 90), ('80%', 80), ('70%', 70), ('60%', 60)],\n value=95,\n description='',\n layout=widgets.Layout(width='120px')\n)\n\n# Buttons\nload_btn = make_button('Load')\nsearch_btn = make_button('Search')\npredict_btn = make_button('Screen articles')\n\n# Output areas\nload_model_output = widgets.Output()\nsearch_output = widgets.Output()\npredict_output = widgets.Output()\n\n\n# --- Event Handlers ---\n\ndef run_load_model(b):\n set_executing(load_btn, 'Loading...')\n with load_model_output:\n clear_output(wait=True)\n try:\n with open(os.path.abspath(os.path.join(os.getcwd(), 'src/choose_model.py'))) as f:\n exec(f.read(), globals())\n set_done(load_btn, '\\u2713 Model loaded')\n except Exception:\n set_error(load_btn, 'Load')\n import traceback\n traceback.print_exc()\n\n\ndef run_search(b):\n set_executing(search_btn, 'Searching...')\n with search_output:\n clear_output(wait=True)\n try:\n with open(os.path.abspath(os.path.join(os.getcwd(), 'src/search_pubmed.py'))) as f:\n exec(f.read(), globals())\n set_done(search_btn, '\\u2713 Search completed')\n path_to_articles = os.path.join(globals().get('directory'), 'downloaded_articles.csv')\n with search_output:\n display(widgets.HTML(\n f\"<p style='font-size:12px; color:gray; margin-top:6px; margin-left:12px; margin-bottom:0px;'>\"\n f\"Article abstracts downloaded to: {path_to_articles}</p>\"\n ))\n except Exception:\n set_error(search_btn, 'Search')\n import traceback\n traceback.print_exc()\n\n\ndef open_ris_file(b):\n path = os.path.join(globals().get('directory'), 'articles_to_review.ris')\n try:\n if os.name == 'posix':\n subprocess.Popen(['open', path])\n elif os.name == 'nt':\n subprocess.Popen(['start', '', path], shell=True)\n except Exception:\n import traceback\n traceback.print_exc()\n\n\ndef run_screen_articles(b):\n set_executing(predict_btn, 'Screening...')\n with predict_output:\n clear_output(wait=True)\n try:\n with open(os.path.abspath(os.path.join(os.getcwd(), 'src/screen_articles.py'))) as f:\n exec(f.read(), globals())\n set_done(predict_btn, '\\u2713 Screening completed')\n except Exception:\n set_error(predict_btn, 'Screen articles')\n import traceback\n traceback.print_exc()\n\n path_to_ris = os.path.join(globals().get('directory'), 'articles_to_review.ris')\n open_file_btn = make_button('\\U0001f4c4 Open in EndNote', width='250px')\n open_file_btn.on_click(open_ris_file)\n path_display = widgets.HTML(\n f\"<p style='font-size:12px; color:gray; margin-top:6px; margin-left:12px;'>\"\n f\"Path to the EndNote file: {path_to_ris}</p>\"\n )\n final_message_1 = widgets.HTML(\n \"<p style='font-size:14px; color:black; margin-top:10px; margin-left:12px;'>\"\n \"Open the .ris file in EndNote by clicking on the button above or navigating to the file</p>\"\n )\n final_message_2 = widgets.HTML(\n \"<p style='font-size:14px; color:black; margin-top:10px; margin-left:12px;'>\"\n \"Select RefMan - RIS as the input file format</p>\"\n )\n with predict_output:\n display(widgets.VBox([open_file_btn, path_display, final_message_1, final_message_2]))\n\n\nload_btn.on_click(run_load_model)\nsearch_btn.on_click(run_search)\npredict_btn.on_click(run_screen_articles)\n\n# --- Layout ---\n\ntitle_style = 'font-size: 22px; font-weight: bold; margin-bottom: 40px;'\n\nheader = widgets.HTML(\n f\"<h2 style='margin-left:12px; {title_style}'>Automated screening of the literature</h2>\"\n)\n\n# 1 - Choose a model\nchoose_model = widgets.VBox([\n make_section_header('Choose a model'),\n model_dropdown,\n make_spacer('20px'),\n load_btn,\n load_model_output,\n], layout=widgets.Layout(margin='0 0 0 12px'))\n\n# 2 - Search PubMed\nsearch_pubmed = widgets.VBox([\n make_section_header('Search PubMed'),\n make_spacer('8px'),\n query_input,\n make_spacer('4px'),\n widgets.VBox([start_date, end_date], layout=widgets.Layout(align_items='flex-end')),\n use_default_query,\n make_spacer('20px'),\n search_btn,\n search_output,\n], layout=widgets.Layout(margin='0 0 0 12px'))\n\n# 3 - Screen articles\nscreen_articles = widgets.VBox([\n make_section_header('Screen articles'),\n widgets.HTML(\"<span style='font-size:13px; color:gray; margin-top:2px; display:block;'>Based on the recall achieved in previous testing; the higher the recall, the more inclusive the model</span>\"),\n make_spacer('8px'),\n recall_target,\n make_spacer('20px'),\n predict_btn,\n predict_output\n], layout=widgets.Layout(margin='0 0 0 12px'))\n\n\ndisplay(header, choose_model, make_separator(), search_pubmed, make_separator(), screen_articles)"
15
  },
16
  {
17
  "cell_type": "code",
notebooks/src/utils/logging.py CHANGED
@@ -34,8 +34,7 @@ class Logger:
34
 
35
  def __configure_logging(self, _level=LogLevel.INFO):
36
  logging.basicConfig(level=_level, format="%(asctime)s [%(levelname)s] %(message)s",
37
- handlers=[logging.FileHandler(os.path.join(self.log_dir, self.log_filename)),
38
- logging.StreamHandler()])
39
 
40
  def info(self, msg):
41
  # logging.info(msg)
 
34
 
35
  def __configure_logging(self, _level=LogLevel.INFO):
36
  logging.basicConfig(level=_level, format="%(asctime)s [%(levelname)s] %(message)s",
37
+ handlers=[logging.FileHandler(os.path.join(self.log_dir, self.log_filename))])
 
38
 
39
  def info(self, msg):
40
  # logging.info(msg)
notebooks/src/utils/pytorch_models.py CHANGED
@@ -1,5 +1,3 @@
1
- import sys
2
-
3
  import torch
4
  from torch.utils.data import DataLoader, TensorDataset
5
 
@@ -35,10 +33,6 @@ class PopulationHealthScreener:
35
  self.model.eval()
36
 
37
  for i, batch in enumerate(test_data):
38
- msg = f" Processing article {self.batch_size * (i + 1)}/{self.batch_size * len(test_data)} \u23f3"
39
- sys.stdout.write('\r' + msg)
40
- sys.stdout.flush()
41
-
42
  input_ids = batch[0].to(self.device)
43
  attention_mask = batch[1].to(self.device)
44
 
 
 
 
1
  import torch
2
  from torch.utils.data import DataLoader, TensorDataset
3
 
 
33
  self.model.eval()
34
 
35
  for i, batch in enumerate(test_data):
 
 
 
 
36
  input_ids = batch[0].to(self.device)
37
  attention_mask = batch[1].to(self.device)
38
 
notebooks/src/utils/ui_elements.py CHANGED
@@ -2,9 +2,10 @@ import ipywidgets as widgets
2
  from IPython.display import HTML, display
3
 
4
  BOX_WIDTH = '300px'
5
- BUTTON_WIDTH = '150px'
 
6
 
7
- _BUTTON_STYLE_CSS = """
8
  <style>
9
  .widget-button {
10
  justify-content: flex-start !important;
@@ -13,12 +14,20 @@ _BUTTON_STYLE_CSS = """
13
  font-size: 15px !important;
14
  padding-left: 12px !important;
15
  }
 
 
 
 
 
 
 
 
16
  </style>
17
  """
18
 
19
 
20
  def inject_styles():
21
- display(HTML(_BUTTON_STYLE_CSS))
22
 
23
 
24
  def make_button(description, width=BUTTON_WIDTH):
@@ -49,7 +58,7 @@ def set_executing(btn, message='Running...'):
49
  def set_done(btn, message):
50
  btn.description = message
51
  btn.disabled = True
52
- btn.style.button_color = '#c3e6cb'
53
 
54
 
55
  def set_error(btn, original_description):
 
2
  from IPython.display import HTML, display
3
 
4
  BOX_WIDTH = '300px'
5
+ BUTTON_WIDTH = '225px'
6
+ QUERY_WIDTH = '375px'
7
 
8
+ _STYLES_CSS = """
9
  <style>
10
  .widget-button {
11
  justify-content: flex-start !important;
 
14
  font-size: 15px !important;
15
  padding-left: 12px !important;
16
  }
17
+ .done-button .widget-button {
18
+ color: #28a745 !important;
19
+ background-color: white !important;
20
+ border: 1px solid #28a745 !important;
21
+ }
22
+ textarea.widget-textarea {
23
+ resize: none !important;
24
+ }
25
  </style>
26
  """
27
 
28
 
29
  def inject_styles():
30
+ display(HTML(_STYLES_CSS))
31
 
32
 
33
  def make_button(description, width=BUTTON_WIDTH):
 
58
  def set_done(btn, message):
59
  btn.description = message
60
  btn.disabled = True
61
+ btn.add_class('done-button')
62
 
63
 
64
  def set_error(btn, original_description):