Alejandro Velez
commited on
Commit
·
8a3a2f5
1
Parent(s):
db1d3e7
more cleanup on docs
Browse files- .readthedocs.yaml +0 -19
- docs/Makefile +0 -20
- docs/make.bat +0 -35
- docs/requirements.txt +0 -3
- docs/source/_static/css/custom.css +0 -40
- docs/source/_static/gf_logo.png +0 -0
- docs/source/about.rst +0 -49
- docs/source/api.rst +0 -51
- docs/source/conf.py +0 -80
- docs/source/geneformer.classifier.rst +0 -10
- docs/source/geneformer.emb_extractor.rst +0 -26
- docs/source/geneformer.in_silico_perturber.rst +0 -8
- docs/source/geneformer.in_silico_perturber_stats.rst +0 -25
- docs/source/geneformer.mtl_classifier.rst +0 -11
- docs/source/geneformer.tokenizer.rst +0 -15
- docs/source/getstarted.rst +0 -36
- docs/source/index.rst +0 -16
.readthedocs.yaml
DELETED
|
@@ -1,19 +0,0 @@
|
|
| 1 |
-
# Read the Docs configuration file
|
| 2 |
-
|
| 3 |
-
# Required
|
| 4 |
-
version: 2
|
| 5 |
-
|
| 6 |
-
# Set the OS, Python version and other tools you might need
|
| 7 |
-
build:
|
| 8 |
-
os: ubuntu-22.04
|
| 9 |
-
tools:
|
| 10 |
-
python: "3.10"
|
| 11 |
-
|
| 12 |
-
# Build documentation in the "docs/" directory with Sphinx
|
| 13 |
-
sphinx:
|
| 14 |
-
configuration: docs/source/conf.py
|
| 15 |
-
|
| 16 |
-
# Python requirements required build your documentation
|
| 17 |
-
python:
|
| 18 |
-
install:
|
| 19 |
-
- requirements: docs/requirements.txt
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
docs/Makefile
DELETED
|
@@ -1,20 +0,0 @@
|
|
| 1 |
-
# Minimal makefile for Sphinx documentation
|
| 2 |
-
#
|
| 3 |
-
|
| 4 |
-
# You can set these variables from the command line, and also
|
| 5 |
-
# from the environment for the first two.
|
| 6 |
-
SPHINXOPTS ?=
|
| 7 |
-
SPHINXBUILD ?= sphinx-build
|
| 8 |
-
SOURCEDIR = source
|
| 9 |
-
BUILDDIR = build
|
| 10 |
-
|
| 11 |
-
# Put it first so that "make" without argument is like "make help".
|
| 12 |
-
help:
|
| 13 |
-
@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
|
| 14 |
-
|
| 15 |
-
.PHONY: help Makefile
|
| 16 |
-
|
| 17 |
-
# Catch-all target: route all unknown targets to Sphinx using the new
|
| 18 |
-
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
|
| 19 |
-
%: Makefile
|
| 20 |
-
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
docs/make.bat
DELETED
|
@@ -1,35 +0,0 @@
|
|
| 1 |
-
@ECHO OFF
|
| 2 |
-
|
| 3 |
-
pushd %~dp0
|
| 4 |
-
|
| 5 |
-
REM Command file for Sphinx documentation
|
| 6 |
-
|
| 7 |
-
if "%SPHINXBUILD%" == "" (
|
| 8 |
-
set SPHINXBUILD=sphinx-build
|
| 9 |
-
)
|
| 10 |
-
set SOURCEDIR=source
|
| 11 |
-
set BUILDDIR=build
|
| 12 |
-
|
| 13 |
-
%SPHINXBUILD% >NUL 2>NUL
|
| 14 |
-
if errorlevel 9009 (
|
| 15 |
-
echo.
|
| 16 |
-
echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
|
| 17 |
-
echo.installed, then set the SPHINXBUILD environment variable to point
|
| 18 |
-
echo.to the full path of the 'sphinx-build' executable. Alternatively you
|
| 19 |
-
echo.may add the Sphinx directory to PATH.
|
| 20 |
-
echo.
|
| 21 |
-
echo.If you don't have Sphinx installed, grab it from
|
| 22 |
-
echo.https://www.sphinx-doc.org/
|
| 23 |
-
exit /b 1
|
| 24 |
-
)
|
| 25 |
-
|
| 26 |
-
if "%1" == "" goto help
|
| 27 |
-
|
| 28 |
-
%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
|
| 29 |
-
goto end
|
| 30 |
-
|
| 31 |
-
:help
|
| 32 |
-
%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
|
| 33 |
-
|
| 34 |
-
:end
|
| 35 |
-
popd
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
docs/requirements.txt
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
.
|
| 2 |
-
sphinx_rtd_theme==2.0.0
|
| 3 |
-
nbsphinx==0.9.3
|
|
|
|
|
|
|
|
|
|
|
|
docs/source/_static/css/custom.css
DELETED
|
@@ -1,40 +0,0 @@
|
|
| 1 |
-
/* top left logo */
|
| 2 |
-
.wy-side-nav-search, .wy-nav-top {
|
| 3 |
-
background: linear-gradient(15deg, #13547a 0%, #80d0c7 100%);
|
| 4 |
-
}
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
/* unvisited link */
|
| 8 |
-
.wy-nav-content a:link {
|
| 9 |
-
color: #067abd;
|
| 10 |
-
}
|
| 11 |
-
|
| 12 |
-
/* visited link */
|
| 13 |
-
.wy-nav-content a:visited {
|
| 14 |
-
color: #4b827c;
|
| 15 |
-
}
|
| 16 |
-
|
| 17 |
-
/* mouse over link */
|
| 18 |
-
.wy-nav-content a:hover {
|
| 19 |
-
color: #80d0c7;
|
| 20 |
-
}
|
| 21 |
-
|
| 22 |
-
/* selected link */
|
| 23 |
-
.wy-nav-content a:active {
|
| 24 |
-
color: #4b827c;
|
| 25 |
-
}
|
| 26 |
-
|
| 27 |
-
/* class object */
|
| 28 |
-
.sig.sig-object {
|
| 29 |
-
padding: 5px 5px 5px 5px;
|
| 30 |
-
background-color: #ececec;
|
| 31 |
-
border-style: solid;
|
| 32 |
-
border-color: black;
|
| 33 |
-
border-width: 1px 0;
|
| 34 |
-
}
|
| 35 |
-
|
| 36 |
-
/* parameter object */
|
| 37 |
-
dt {
|
| 38 |
-
padding: 5px 5px 5px 5px;
|
| 39 |
-
background-color: #ececec;
|
| 40 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
docs/source/_static/gf_logo.png
DELETED
|
Binary file (48.2 kB)
|
|
|
docs/source/about.rst
DELETED
|
@@ -1,49 +0,0 @@
|
|
| 1 |
-
About
|
| 2 |
-
=====
|
| 3 |
-
|
| 4 |
-
Model Description
|
| 5 |
-
-----------------
|
| 6 |
-
|
| 7 |
-
**Geneformer** is a context-aware, attention-based deep learning model pretrained on a large-scale corpus of single-cell transcriptomes to enable context-specific predictions in settings with limited data in network biology. During pretraining, Geneformer gained a fundamental understanding of network dynamics, encoding network hierarchy in the attention weights of the model in a completely self-supervised manner. With both zero-shot learning and fine-tuning with limited task-specific data, Geneformer consistently boosted predictive accuracy in a diverse panel of downstream tasks relevant to chromatin and network dynamics. In silico perturbation with zero-shot learning identified a novel transcription factor in cardiomyocytes that we experimentally validated to be critical to their ability to generate contractile force. In silico treatment with limited patient data revealed candidate therapeutic targets for cardiomyopathy that we experimentally validated to significantly improve the ability of cardiomyocytes to generate contractile force in an iPSC model of the disease. Overall, Geneformer represents a foundational deep learning model pretrained on a large-scale corpus of human single cell transcriptomes to gain a fundamental understanding of gene network dynamics that can now be democratized to a vast array of downstream tasks to accelerate discovery of key network regulators and candidate therapeutic targets.
|
| 8 |
-
|
| 9 |
-
In `our manuscript <https://rdcu.be/ddrx0>`_, we report results for the original 6 layer Geneformer model pretrained on Genecorpus-30M. We additionally provide within the repository a 12 layer Geneformer model, scaled up with retained width:depth aspect ratio, also pretrained on Genecorpus-30M.
|
| 10 |
-
|
| 11 |
-
Both the `6 <https://huggingface.co/ctheodoris/Geneformer/blob/main/gf-6L-30M-i2048/model.safetensors>`_ and `12 <https://huggingface.co/ctheodoris/Geneformer/blob/main/gf-12L-30M-i2048/pytorch_model.bin>`_ layer Geneformer models were pretrained in June 2021.
|
| 12 |
-
|
| 13 |
-
Also see `our 2024 manuscript <https://www.biorxiv.org/content/10.1101/2024.08.16.608180v1.full.pdf>`_, for details of the `expanded model <https://huggingface.co/ctheodoris/Geneformer/blob/main/model.safetensors>`_ trained on ~95 million transcriptomes in April 2024 and our continual learning, multitask learning, and quantization strategies.
|
| 14 |
-
|
| 15 |
-
Application
|
| 16 |
-
-----------
|
| 17 |
-
|
| 18 |
-
The pretrained Geneformer model can be used directly for zero-shot learning, for example for in silico perturbation analysis, or by fine-tuning towards the relevant downstream task, such as gene or cell state classification.
|
| 19 |
-
|
| 20 |
-
Example applications demonstrated in `our manuscript <https://rdcu.be/ddrx0>`_ include:
|
| 21 |
-
|
| 22 |
-
| *Fine-tuning*:
|
| 23 |
-
| - transcription factor dosage sensitivity
|
| 24 |
-
| - chromatin dynamics (bivalently marked promoters)
|
| 25 |
-
| - transcription factor regulatory range
|
| 26 |
-
| - gene network centrality
|
| 27 |
-
| - transcription factor targets
|
| 28 |
-
| - cell type annotation
|
| 29 |
-
| - batch integration
|
| 30 |
-
| - cell state classification across differentiation
|
| 31 |
-
| - disease classification
|
| 32 |
-
| - in silico perturbation to determine disease-driving genes
|
| 33 |
-
| - in silico treatment to determine candidate therapeutic targets
|
| 34 |
-
|
| 35 |
-
| *Zero-shot learning*:
|
| 36 |
-
| - batch integration
|
| 37 |
-
| - gene context specificity
|
| 38 |
-
| - in silico reprogramming
|
| 39 |
-
| - in silico differentiation
|
| 40 |
-
| - in silico perturbation to determine impact on cell state
|
| 41 |
-
| - in silico perturbation to determine transcription factor targets
|
| 42 |
-
| - in silico perturbation to determine transcription factor cooperativity
|
| 43 |
-
|
| 44 |
-
Citations
|
| 45 |
-
---------
|
| 46 |
-
|
| 47 |
-
| C V Theodoris #, L Xiao, A Chopra, M D Chaffin, Z R Al Sayed, M C Hill, H Mantineo, E Brydon, Z Zeng, X S Liu, P T Ellinor #. `Transfer learning enables predictions in network biology. <https://rdcu.be/ddrx0>`_ *Nature*, 31 May 2023. (# co-corresponding authors)
|
| 48 |
-
|
| 49 |
-
| H Chen \*, M S Venkatesh \*, J Gomez Ortega, S V Mahesh, T Nandi, R Madduri, K Pelka †, C V Theodoris † #. `Quantized multi-task learning for context-specific representations of gene network dynamics. <https://www.biorxiv.org/content/10.1101/2024.08.16.608180v1.full.pdf>`_ *bioRxiv*, 19 Aug 2024. (\* co-first authors, † co-senior authors, # corresponding author)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
docs/source/api.rst
DELETED
|
@@ -1,51 +0,0 @@
|
|
| 1 |
-
API
|
| 2 |
-
===
|
| 3 |
-
|
| 4 |
-
Tokenizer
|
| 5 |
-
---------
|
| 6 |
-
|
| 7 |
-
.. toctree::
|
| 8 |
-
:maxdepth: 1
|
| 9 |
-
|
| 10 |
-
geneformer.tokenizer
|
| 11 |
-
|
| 12 |
-
Classifier
|
| 13 |
-
----------
|
| 14 |
-
|
| 15 |
-
.. toctree::
|
| 16 |
-
:maxdepth: 1
|
| 17 |
-
|
| 18 |
-
geneformer.classifier
|
| 19 |
-
|
| 20 |
-
Multitask Classifier
|
| 21 |
-
--------------------
|
| 22 |
-
|
| 23 |
-
.. toctree::
|
| 24 |
-
:maxdepth: 1
|
| 25 |
-
|
| 26 |
-
geneformer.mtl_classifier
|
| 27 |
-
|
| 28 |
-
Embedding Extractor
|
| 29 |
-
-------------------
|
| 30 |
-
|
| 31 |
-
.. toctree::
|
| 32 |
-
:maxdepth: 1
|
| 33 |
-
|
| 34 |
-
geneformer.emb_extractor
|
| 35 |
-
|
| 36 |
-
In Silico Perturber
|
| 37 |
-
-------------------
|
| 38 |
-
|
| 39 |
-
.. toctree::
|
| 40 |
-
:maxdepth: 1
|
| 41 |
-
|
| 42 |
-
geneformer.in_silico_perturber
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
In Silico Perturber Stats
|
| 46 |
-
-------------------------
|
| 47 |
-
|
| 48 |
-
.. toctree::
|
| 49 |
-
:maxdepth: 1
|
| 50 |
-
|
| 51 |
-
geneformer.in_silico_perturber_stats
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
docs/source/conf.py
DELETED
|
@@ -1,80 +0,0 @@
|
|
| 1 |
-
# Configuration file for the Sphinx documentation builder.
|
| 2 |
-
#
|
| 3 |
-
# For the full list of built-in configuration values, see the documentation:
|
| 4 |
-
# https://www.sphinx-doc.org/en/master/usage/configuration.html
|
| 5 |
-
|
| 6 |
-
import pathlib
|
| 7 |
-
import re
|
| 8 |
-
import sys
|
| 9 |
-
|
| 10 |
-
from sphinx.ext import autodoc
|
| 11 |
-
|
| 12 |
-
sys.path.insert(0, pathlib.Path(__file__).parents[2].resolve().as_posix())
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
# -- Project information -----------------------------------------------------
|
| 16 |
-
# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
|
| 17 |
-
|
| 18 |
-
project = "geneformer"
|
| 19 |
-
copyright = "2024, Christina Theodoris"
|
| 20 |
-
author = "Christina Theodoris"
|
| 21 |
-
release = "0.1.0"
|
| 22 |
-
repository_url = "https://huggingface.co/ctheodoris/Geneformer"
|
| 23 |
-
|
| 24 |
-
# -- General configuration ---------------------------------------------------
|
| 25 |
-
# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
|
| 26 |
-
|
| 27 |
-
extensions = [
|
| 28 |
-
"sphinx.ext.autodoc",
|
| 29 |
-
"sphinx.ext.autosummary",
|
| 30 |
-
"nbsphinx",
|
| 31 |
-
"sphinx.ext.viewcode",
|
| 32 |
-
"sphinx.ext.doctest",
|
| 33 |
-
]
|
| 34 |
-
|
| 35 |
-
templates_path = ["_templates"]
|
| 36 |
-
exclude_patterns = [
|
| 37 |
-
"**.ipynb_checkpoints",
|
| 38 |
-
]
|
| 39 |
-
autoclass_content = "both"
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
class MockedClassDocumenter(autodoc.ClassDocumenter):
|
| 43 |
-
def add_line(self, line: str, source: str, *lineno: int) -> None:
|
| 44 |
-
if line == " Bases: :py:class:`object`":
|
| 45 |
-
return
|
| 46 |
-
super().add_line(line, source, *lineno)
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
autodoc.ClassDocumenter = MockedClassDocumenter
|
| 50 |
-
add_module_names = False
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
def process_signature(app, what, name, obj, options, signature, return_annotation):
|
| 54 |
-
# loop through each line in the docstring and replace path with
|
| 55 |
-
# the generic path text
|
| 56 |
-
signature = re.sub(r"PosixPath\(.*?\)", "FILEPATH", signature)
|
| 57 |
-
return (signature, None)
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
def setup(app):
|
| 61 |
-
app.connect("autodoc-process-signature", process_signature)
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
# -- Options for HTML output -------------------------------------------------
|
| 65 |
-
# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output
|
| 66 |
-
|
| 67 |
-
html_theme = "sphinx_rtd_theme"
|
| 68 |
-
html_show_sphinx = False
|
| 69 |
-
html_static_path = ["_static"]
|
| 70 |
-
html_logo = "_static/gf_logo.png"
|
| 71 |
-
html_theme_options = {
|
| 72 |
-
"collapse_navigation": False,
|
| 73 |
-
"sticky_navigation": True,
|
| 74 |
-
"navigation_depth": 3,
|
| 75 |
-
"logo_only": True,
|
| 76 |
-
}
|
| 77 |
-
html_css_files = [
|
| 78 |
-
"css/custom.css",
|
| 79 |
-
]
|
| 80 |
-
html_show_sourcelink = False
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
docs/source/geneformer.classifier.rst
DELETED
|
@@ -1,10 +0,0 @@
|
|
| 1 |
-
geneformer.classifier
|
| 2 |
-
=====================
|
| 3 |
-
|
| 4 |
-
.. automodule:: geneformer.classifier
|
| 5 |
-
:members:
|
| 6 |
-
:undoc-members:
|
| 7 |
-
:show-inheritance:
|
| 8 |
-
:exclude-members:
|
| 9 |
-
valid_option_dict,
|
| 10 |
-
validate_options
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
docs/source/geneformer.emb_extractor.rst
DELETED
|
@@ -1,26 +0,0 @@
|
|
| 1 |
-
geneformer.emb\_extractor
|
| 2 |
-
=========================
|
| 3 |
-
|
| 4 |
-
.. automodule:: geneformer.emb_extractor
|
| 5 |
-
:members:
|
| 6 |
-
:undoc-members:
|
| 7 |
-
:show-inheritance:
|
| 8 |
-
:exclude-members:
|
| 9 |
-
accumulate_tdigests,
|
| 10 |
-
gen_heatmap_class_colors,
|
| 11 |
-
gen_heatmap_class_dict,
|
| 12 |
-
get_embs,
|
| 13 |
-
label_cell_embs,
|
| 14 |
-
label_gene_embs,
|
| 15 |
-
make_colorbar,
|
| 16 |
-
plot_heatmap,
|
| 17 |
-
plot_umap,
|
| 18 |
-
summarize_gene_embs,
|
| 19 |
-
tdigest_mean,
|
| 20 |
-
tdigest_median,
|
| 21 |
-
test_emb,
|
| 22 |
-
update_tdigest_dict,
|
| 23 |
-
update_tdigest_dict_mean,
|
| 24 |
-
update_tdigest_dict_median,
|
| 25 |
-
valid_option_dict,
|
| 26 |
-
validate_options
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
docs/source/geneformer.in_silico_perturber.rst
DELETED
|
@@ -1,8 +0,0 @@
|
|
| 1 |
-
geneformer.in\_silico\_perturber
|
| 2 |
-
=======================================
|
| 3 |
-
|
| 4 |
-
.. automodule:: geneformer.in_silico_perturber
|
| 5 |
-
:members:
|
| 6 |
-
:undoc-members:
|
| 7 |
-
:show-inheritance:
|
| 8 |
-
:exclude-members: valid_option_dict, validate_options, apply_additional_filters, isp_perturb_all, isp_perturb_set, update_perturbation_dictionary
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
docs/source/geneformer.in_silico_perturber_stats.rst
DELETED
|
@@ -1,25 +0,0 @@
|
|
| 1 |
-
geneformer.in\_silico\_perturber\_stats
|
| 2 |
-
==============================================
|
| 3 |
-
|
| 4 |
-
.. automodule:: geneformer.in_silico_perturber_stats
|
| 5 |
-
:members:
|
| 6 |
-
:undoc-members:
|
| 7 |
-
:show-inheritance:
|
| 8 |
-
:exclude-members:
|
| 9 |
-
find,
|
| 10 |
-
get_fdr,
|
| 11 |
-
get_gene_list,
|
| 12 |
-
get_impact_component,
|
| 13 |
-
invert_dict,
|
| 14 |
-
isp_aggregate_gene_shifts,
|
| 15 |
-
isp_aggregate_grouped_perturb,
|
| 16 |
-
isp_stats_mixture_model,
|
| 17 |
-
isp_stats_to_goal_state,
|
| 18 |
-
isp_stats_vs_null,
|
| 19 |
-
n_detections,
|
| 20 |
-
read_dict,
|
| 21 |
-
read_dictionaries,
|
| 22 |
-
token_to_gene_name,
|
| 23 |
-
token_tuple_to_ensembl_ids,
|
| 24 |
-
valid_option_dict,
|
| 25 |
-
validate_options
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
docs/source/geneformer.mtl_classifier.rst
DELETED
|
@@ -1,11 +0,0 @@
|
|
| 1 |
-
geneformer.mtl\_classifier
|
| 2 |
-
==========================
|
| 3 |
-
|
| 4 |
-
.. automodule:: geneformer.mtl_classifier
|
| 5 |
-
:members:
|
| 6 |
-
:undoc-members:
|
| 7 |
-
:show-inheritance:
|
| 8 |
-
:exclude-members:
|
| 9 |
-
valid_option_dict,
|
| 10 |
-
validate_options,
|
| 11 |
-
validate_additional_options
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
docs/source/geneformer.tokenizer.rst
DELETED
|
@@ -1,15 +0,0 @@
|
|
| 1 |
-
geneformer.tokenizer
|
| 2 |
-
====================
|
| 3 |
-
|
| 4 |
-
.. automodule:: geneformer.tokenizer
|
| 5 |
-
:members:
|
| 6 |
-
:undoc-members:
|
| 7 |
-
:show-inheritance:
|
| 8 |
-
:exclude-members:
|
| 9 |
-
create_dataset,
|
| 10 |
-
tokenize_anndata,
|
| 11 |
-
tokenize_files,
|
| 12 |
-
tokenize_loom,
|
| 13 |
-
rank_genes,
|
| 14 |
-
tokenize_cell,
|
| 15 |
-
sum_ensembl_ids
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
docs/source/getstarted.rst
DELETED
|
@@ -1,36 +0,0 @@
|
|
| 1 |
-
Getting Started
|
| 2 |
-
===============
|
| 3 |
-
|
| 4 |
-
Installation
|
| 5 |
-
------------
|
| 6 |
-
|
| 7 |
-
Geneformer installation instructions.
|
| 8 |
-
|
| 9 |
-
Make sure you have git-lfs installed (https://git-lfs.com).
|
| 10 |
-
|
| 11 |
-
.. code-block:: bash
|
| 12 |
-
|
| 13 |
-
git lfs install
|
| 14 |
-
git clone https://huggingface.co/ctheodoris/Geneformer
|
| 15 |
-
cd Geneformer
|
| 16 |
-
pip install .
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
Tutorials
|
| 20 |
-
---------
|
| 21 |
-
|
| 22 |
-
| See `examples <https://huggingface.co/ctheodoris/Geneformer/tree/main/examples>`_ for:
|
| 23 |
-
| - tokenizing transcriptomes
|
| 24 |
-
| - pretraining
|
| 25 |
-
| - hyperparameter tuning
|
| 26 |
-
| - fine-tuning
|
| 27 |
-
| - extracting and plotting cell embeddings
|
| 28 |
-
| - in silico perturbation
|
| 29 |
-
|
| 30 |
-
Please note that the fine-tuning examples are meant to be generally applicable and the input datasets and labels will vary dependent on the downstream task. Example input files for a few of the downstream tasks demonstrated in the manuscript are located within the `example_input_files directory <https://huggingface.co/datasets/ctheodoris/Genecorpus-30M/tree/main/example_input_files>`_ in the dataset repository, but these only represent a few example fine-tuning applications.
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
Tips
|
| 34 |
-
----
|
| 35 |
-
|
| 36 |
-
Please note that GPU resources are required for efficient usage of Geneformer. Additionally, we strongly recommend tuning hyperparameters for each downstream fine-tuning application as this can significantly boost predictive potential in the downstream task (e.g. max learning rate, learning schedule, number of layers to freeze, etc.).
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
docs/source/index.rst
DELETED
|
@@ -1,16 +0,0 @@
|
|
| 1 |
-
Geneformer
|
| 2 |
-
==========
|
| 3 |
-
|
| 4 |
-
Geneformer is a foundation transformer model pretrained on a large-scale corpus of single cell transcriptomes to enable context-aware predictions in network biology.
|
| 5 |
-
|
| 6 |
-
See `our manuscript <https://rdcu.be/ddrx0>`_ for details.
|
| 7 |
-
|
| 8 |
-
Table of Contents
|
| 9 |
-
-----------------
|
| 10 |
-
|
| 11 |
-
.. toctree::
|
| 12 |
-
:maxdepth: 2
|
| 13 |
-
|
| 14 |
-
about
|
| 15 |
-
getstarted
|
| 16 |
-
api
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|