fix AssertionError: First token is not <cls> token value in extract_and_plot_cell_embeddings.ipynb

#541
examples/extract_and_plot_cell_embeddings.ipynb CHANGED
@@ -22,6 +22,7 @@
22
  "# (otherwise the EmbExtractor will use the current default model dictionary)\n",
23
  "embex = EmbExtractor(model_type=\"CellClassifier\",\n",
24
  " num_classes=3,\n",
 
25
  " filter_data={\"cell_type\":[\"Cardiomyocyte1\",\"Cardiomyocyte2\",\"Cardiomyocyte3\"]},\n",
26
  " max_ncells=1000,\n",
27
  " emb_layer=0,\n",
 
22
  "# (otherwise the EmbExtractor will use the current default model dictionary)\n",
23
  "embex = EmbExtractor(model_type=\"CellClassifier\",\n",
24
  " num_classes=3,\n",
25
+ " emb_mode=\"cell\",\n",
26
  " filter_data={\"cell_type\":[\"Cardiomyocyte1\",\"Cardiomyocyte2\",\"Cardiomyocyte3\"]},\n",
27
  " max_ncells=1000,\n",
28
  " emb_layer=0,\n",
examples/in_silico_perturbation.ipynb CHANGED
@@ -44,6 +44,7 @@
44
  "# 30M token dictionary: https://huggingface.co/ctheodoris/Geneformer/blob/main/geneformer/gene_dictionaries_30m/token_dictionary_gc30M.pkl\n",
45
  "embex = EmbExtractor(model_type=\"CellClassifier\", # if using previously fine-tuned cell classifier model\n",
46
  " num_classes=3,\n",
 
47
  " filter_data=filter_data_dict,\n",
48
  " max_ncells=1000,\n",
49
  " emb_layer=0,\n",
 
44
  "# 30M token dictionary: https://huggingface.co/ctheodoris/Geneformer/blob/main/geneformer/gene_dictionaries_30m/token_dictionary_gc30M.pkl\n",
45
  "embex = EmbExtractor(model_type=\"CellClassifier\", # if using previously fine-tuned cell classifier model\n",
46
  " num_classes=3,\n",
47
+ " emb_mode=\"cell\",\n",
48
  " filter_data=filter_data_dict,\n",
49
  " max_ncells=1000,\n",
50
  " emb_layer=0,\n",