Upload 260 files
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- .gitattributes +22 -0
- Dockerfile +18 -0
- app.py +45 -0
- lifelines/.DS_Store +0 -0
- lifelines/mcp_output/README_MCP.md +124 -0
- lifelines/mcp_output/analysis.json +1585 -0
- lifelines/mcp_output/diff_report.md +142 -0
- lifelines/mcp_output/mcp_plugin/__init__.py +0 -0
- lifelines/mcp_output/mcp_plugin/adapter.py +333 -0
- lifelines/mcp_output/mcp_plugin/main.py +13 -0
- lifelines/mcp_output/mcp_plugin/mcp_service.py +398 -0
- lifelines/mcp_output/requirements.txt +11 -0
- lifelines/mcp_output/start_mcp.py +30 -0
- lifelines/mcp_output/workflow_summary.json +224 -0
- lifelines/source/.DS_Store +0 -0
- lifelines/source/.coveragerc +4 -0
- lifelines/source/.pre-commit-config.yaml +16 -0
- lifelines/source/.prospector.yaml +46 -0
- lifelines/source/.readthedocs.yaml +35 -0
- lifelines/source/CHANGELOG.md +1310 -0
- lifelines/source/CITATION.cff +14 -0
- lifelines/source/LICENSE +21 -0
- lifelines/source/MANIFEST.in +12 -0
- lifelines/source/Makefile +38 -0
- lifelines/source/README.md +32 -0
- lifelines/source/__init__.py +4 -0
- lifelines/source/conftest.py +21 -0
- lifelines/source/docs/Changelog.rst +2822 -0
- lifelines/source/docs/Citing lifelines.rst +33 -0
- lifelines/source/docs/Contributing.rst +93 -0
- lifelines/source/docs/Examples.rst +1097 -0
- lifelines/source/docs/Makefile +177 -0
- lifelines/source/docs/Quickstart.rst +366 -0
- lifelines/source/docs/References.rst +11 -0
- lifelines/source/docs/Survival Analysis intro.rst +232 -0
- lifelines/source/docs/Survival Regression.rst +1298 -0
- lifelines/source/docs/Survival analysis with lifelines.rst +850 -0
- lifelines/source/docs/Time varying survival regression.rst +262 -0
- lifelines/source/docs/__init__.py +1 -0
- lifelines/source/docs/_static/custom.css +3 -0
- lifelines/source/docs/_templates/layout.html +6 -0
- lifelines/source/docs/conf.py +297 -0
- lifelines/source/docs/conftest.py +30 -0
- lifelines/source/docs/docs_requirements.txt +1 -0
- lifelines/source/docs/fitters/regression/AalenAdditiveFitter.rst +7 -0
- lifelines/source/docs/fitters/regression/CRCSplineFitter.rst +6 -0
- lifelines/source/docs/fitters/regression/CoxPHFitter.rst +71 -0
- lifelines/source/docs/fitters/regression/CoxTimeVaryingFitter.rst +6 -0
- lifelines/source/docs/fitters/regression/GeneralizedGammaRegressionFitter.rst +6 -0
- lifelines/source/docs/fitters/regression/LogLogisticAFTFitter.rst +7 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,25 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
lifelines/source/docs/images/coxph_plot_covarite_groups.png filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
lifelines/source/docs/images/lcd_parametric.png filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
lifelines/source/docs/images/lifelines_intro_all_regimes.png filter=lfs diff=lfs merge=lfs -text
|
| 39 |
+
lifelines/source/docs/images/lifelines_intro_kmf_curve.png filter=lfs diff=lfs merge=lfs -text
|
| 40 |
+
lifelines/source/docs/images/lifelines_intro_kmf_fitter.png filter=lfs diff=lfs merge=lfs -text
|
| 41 |
+
lifelines/source/docs/images/lifelines_intro_multi_kmf_fitter_2.png filter=lfs diff=lfs merge=lfs -text
|
| 42 |
+
lifelines/source/docs/images/lifelines_intro_naf_fitter_multi.png filter=lfs diff=lfs merge=lfs -text
|
| 43 |
+
lifelines/source/docs/images/lifelines_intro_naf_smooth_multi_2.png filter=lfs diff=lfs merge=lfs -text
|
| 44 |
+
lifelines/source/docs/images/lifelines_intro_naf_smooth_multi.png filter=lfs diff=lfs merge=lfs -text
|
| 45 |
+
lifelines/source/docs/images/lls_democracy.png filter=lfs diff=lfs merge=lfs -text
|
| 46 |
+
lifelines/source/docs/images/lls_regime_type.png filter=lfs diff=lfs merge=lfs -text
|
| 47 |
+
lifelines/source/docs/images/plot_covariate_example3.png filter=lfs diff=lfs merge=lfs -text
|
| 48 |
+
lifelines/source/docs/images/show_censors_plot.png filter=lfs diff=lfs merge=lfs -text
|
| 49 |
+
lifelines/source/docs/images/survival_analysis_intro_censoring.png filter=lfs diff=lfs merge=lfs -text
|
| 50 |
+
lifelines/source/docs/images/survival_calibration_probablilty.png filter=lfs diff=lfs merge=lfs -text
|
| 51 |
+
lifelines/source/docs/images/survival_weibull.png filter=lfs diff=lfs merge=lfs -text
|
| 52 |
+
lifelines/source/docs/images/waltons_cumulative_hazard.png filter=lfs diff=lfs merge=lfs -text
|
| 53 |
+
lifelines/source/docs/images/waltons_survival_function.png filter=lfs diff=lfs merge=lfs -text
|
| 54 |
+
lifelines/source/docs/images/weibull_aft_two_models_side_by_side.png filter=lfs diff=lfs merge=lfs -text
|
| 55 |
+
lifelines/source/docs/images/weibull_aft_two_models.png filter=lfs diff=lfs merge=lfs -text
|
| 56 |
+
lifelines/source/docs/images/weibull_extrapolation.png filter=lfs diff=lfs merge=lfs -text
|
| 57 |
+
lifelines/source/docs/images/weibull_parameters.png filter=lfs diff=lfs merge=lfs -text
|
Dockerfile
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM python:3.10
|
| 2 |
+
|
| 3 |
+
RUN useradd -m -u 1000 user && python -m pip install --upgrade pip
|
| 4 |
+
USER user
|
| 5 |
+
ENV PATH="/home/user/.local/bin:$PATH"
|
| 6 |
+
|
| 7 |
+
WORKDIR /app
|
| 8 |
+
|
| 9 |
+
COPY --chown=user ./requirements.txt requirements.txt
|
| 10 |
+
RUN pip install --no-cache-dir --upgrade -r requirements.txt
|
| 11 |
+
|
| 12 |
+
COPY --chown=user . /app
|
| 13 |
+
ENV MCP_TRANSPORT=http
|
| 14 |
+
ENV MCP_PORT=7860
|
| 15 |
+
|
| 16 |
+
EXPOSE 7860
|
| 17 |
+
|
| 18 |
+
CMD ["python", "lifelines/mcp_output/start_mcp.py"]
|
app.py
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import FastAPI
|
| 2 |
+
import os
|
| 3 |
+
import sys
|
| 4 |
+
|
| 5 |
+
mcp_plugin_path = os.path.join(os.path.dirname(__file__), "lifelines", "mcp_output", "mcp_plugin")
|
| 6 |
+
sys.path.insert(0, mcp_plugin_path)
|
| 7 |
+
|
| 8 |
+
app = FastAPI(
|
| 9 |
+
title="Lifelines MCP Service",
|
| 10 |
+
description="Auto-generated MCP service for lifelines",
|
| 11 |
+
version="1.0.0"
|
| 12 |
+
)
|
| 13 |
+
|
| 14 |
+
@app.get("/")
|
| 15 |
+
def root():
|
| 16 |
+
return {
|
| 17 |
+
"service": "Lifelines MCP Service",
|
| 18 |
+
"version": "1.0.0",
|
| 19 |
+
"status": "running",
|
| 20 |
+
"transport": os.environ.get("MCP_TRANSPORT", "http")
|
| 21 |
+
}
|
| 22 |
+
|
| 23 |
+
@app.get("/health")
|
| 24 |
+
def health_check():
|
| 25 |
+
return {"status": "healthy", "service": "lifelines MCP"}
|
| 26 |
+
|
| 27 |
+
@app.get("/tools")
|
| 28 |
+
def list_tools():
|
| 29 |
+
try:
|
| 30 |
+
from mcp_service import create_app
|
| 31 |
+
mcp_app = create_app()
|
| 32 |
+
tools = []
|
| 33 |
+
for tool_name, tool_func in mcp_app.tools.items():
|
| 34 |
+
tools.append({
|
| 35 |
+
"name": tool_name,
|
| 36 |
+
"description": tool_func.__doc__ or "No description available"
|
| 37 |
+
})
|
| 38 |
+
return {"tools": tools}
|
| 39 |
+
except Exception as e:
|
| 40 |
+
return {"error": f"Failed to load tools: {str(e)}"}
|
| 41 |
+
|
| 42 |
+
if __name__ == "__main__":
|
| 43 |
+
import uvicorn
|
| 44 |
+
port = int(os.environ.get("PORT", 7860))
|
| 45 |
+
uvicorn.run(app, host="0.0.0.0", port=port)
|
lifelines/.DS_Store
ADDED
|
Binary file (6.15 kB). View file
|
|
|
lifelines/mcp_output/README_MCP.md
ADDED
|
@@ -0,0 +1,124 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# lifelines MCP (Model Context Protocol) Service README
|
| 2 |
+
|
| 3 |
+
## 1) Project Introduction
|
| 4 |
+
|
| 5 |
+
This MCP (Model Context Protocol) service wraps the `lifelines` Python library to provide survival analysis capabilities to LLM applications and developer tools.
|
| 6 |
+
|
| 7 |
+
Core capabilities:
|
| 8 |
+
- Fit survival models (Kaplan–Meier, Cox PH, AFT, Weibull, etc.)
|
| 9 |
+
- Run statistical tests (log-rank, proportional hazards checks, RMST comparisons)
|
| 10 |
+
- Load built-in example datasets
|
| 11 |
+
- Generate calibration and plotting-ready outputs
|
| 12 |
+
- Compute utility metrics (e.g., concordance index)
|
| 13 |
+
|
| 14 |
+
This service is best suited for data science assistants, clinical analytics workflows, and automated model comparison pipelines.
|
| 15 |
+
|
| 16 |
+
---
|
| 17 |
+
|
| 18 |
+
## 2) Installation Method
|
| 19 |
+
|
| 20 |
+
### Requirements
|
| 21 |
+
- Python 3.9+ recommended
|
| 22 |
+
- System packages for scientific Python stack (if needed)
|
| 23 |
+
- Main dependencies:
|
| 24 |
+
- `numpy`
|
| 25 |
+
- `scipy`
|
| 26 |
+
- `pandas`
|
| 27 |
+
- `matplotlib`
|
| 28 |
+
- `autograd`
|
| 29 |
+
- `autograd-gamma`
|
| 30 |
+
- `formulaic`
|
| 31 |
+
|
| 32 |
+
### Install
|
| 33 |
+
pip install lifelines numpy scipy pandas matplotlib autograd autograd-gamma formulaic
|
| 34 |
+
|
| 35 |
+
### Optional (development/docs/testing)
|
| 36 |
+
pip install pytest sphinx jupyter nbconvert
|
| 37 |
+
|
| 38 |
+
---
|
| 39 |
+
|
| 40 |
+
## 3) Quick Start
|
| 41 |
+
|
| 42 |
+
### Basic workflow
|
| 43 |
+
1. Load or receive a tabular dataset with duration/event columns.
|
| 44 |
+
2. Call a fitter endpoint (for example, Cox or Kaplan–Meier).
|
| 45 |
+
3. Inspect returned coefficients/survival curves/test statistics.
|
| 46 |
+
4. Optionally run diagnostics (PH assumption tests, calibration).
|
| 47 |
+
|
| 48 |
+
### Example service usage flow
|
| 49 |
+
- `dataset.load` → `model.fit_coxph` → `statistics.logrank_test` → `utils.concordance_index`
|
| 50 |
+
|
| 51 |
+
### Minimal Python-side equivalent
|
| 52 |
+
from lifelines import CoxPHFitter
|
| 53 |
+
from lifelines.datasets import load_rossi
|
| 54 |
+
|
| 55 |
+
df = load_rossi()
|
| 56 |
+
cph = CoxPHFitter()
|
| 57 |
+
cph.fit(df, duration_col="week", event_col="arrest")
|
| 58 |
+
print(cph.summary)
|
| 59 |
+
|
| 60 |
+
---
|
| 61 |
+
|
| 62 |
+
## 4) Available Tools and Endpoints List
|
| 63 |
+
|
| 64 |
+
Recommended MCP (Model Context Protocol) service endpoint groups:
|
| 65 |
+
|
| 66 |
+
### `dataset.*`
|
| 67 |
+
- `dataset.list` — list bundled lifelines datasets
|
| 68 |
+
- `dataset.load` — load a named dataset (e.g., `rossi`, `lung`, `gbsg2`)
|
| 69 |
+
|
| 70 |
+
### `model.fit_*`
|
| 71 |
+
- `model.fit_kaplan_meier` — non-parametric survival estimation
|
| 72 |
+
- `model.fit_coxph` — Cox proportional hazards regression
|
| 73 |
+
- `model.fit_cox_time_varying` — Cox model with time-varying covariates
|
| 74 |
+
- `model.fit_weibull` / `model.fit_exponential` / `model.fit_lognormal` / `model.fit_loglogistic`
|
| 75 |
+
- `model.fit_aft_*` — AFT regression family (Weibull/LogNormal/LogLogistic)
|
| 76 |
+
- `model.fit_aalen_additive` — additive hazards model
|
| 77 |
+
|
| 78 |
+
### `statistics.*`
|
| 79 |
+
- `statistics.logrank_test` — two-group survival comparison
|
| 80 |
+
- `statistics.pairwise_logrank_test` — pairwise group comparisons
|
| 81 |
+
- `statistics.multivariate_logrank_test` — multi-group comparison
|
| 82 |
+
- `statistics.proportional_hazard_test` — PH assumption diagnostics
|
| 83 |
+
- `statistics.rmst_difference_test` — restricted mean survival time difference
|
| 84 |
+
|
| 85 |
+
### `calibration.*`
|
| 86 |
+
- `calibration.survival_probability` — calibration at fixed time horizon
|
| 87 |
+
|
| 88 |
+
### `metrics.*`
|
| 89 |
+
- `metrics.concordance_index` — ranking/discrimination quality
|
| 90 |
+
|
| 91 |
+
### `utils.*`
|
| 92 |
+
- `utils.k_fold_cross_validation` — model validation
|
| 93 |
+
- `utils.to_long_format` / `utils.add_covariate_to_timeline` — time-varying data prep
|
| 94 |
+
- `utils.find_best_parametric_model` — parametric model selection helper
|
| 95 |
+
|
| 96 |
+
### `plot.*` (optional)
|
| 97 |
+
- `plot.survival_curve`
|
| 98 |
+
- `plot.loglogs`
|
| 99 |
+
- `plot.qq`
|
| 100 |
+
- `plot.rmst`
|
| 101 |
+
- `plot.at_risk_counts`
|
| 102 |
+
|
| 103 |
+
---
|
| 104 |
+
|
| 105 |
+
## 5) Common Issues and Notes
|
| 106 |
+
|
| 107 |
+
- **Column mapping errors**: Ensure `duration_col` and `event_col` are explicitly provided.
|
| 108 |
+
- **Convergence warnings**: Common in Cox/AFT models with collinearity or separability; standardize features, reduce covariates, or regularize.
|
| 109 |
+
- **Time-varying format**: Use long/episodic format (`start`, `stop`, `event`) for time-varying models.
|
| 110 |
+
- **Censoring assumptions**: Confirm right/left/interval censoring assumptions match chosen model.
|
| 111 |
+
- **Performance**: Large datasets and heavy diagnostics can be slow; prefer batched requests and limit plotting in production.
|
| 112 |
+
- **Headless environments**: For plotting endpoints on servers, configure non-interactive matplotlib backend.
|
| 113 |
+
- **Dependency consistency**: Pin versions in production for `numpy/scipy/pandas/lifelines`.
|
| 114 |
+
|
| 115 |
+
---
|
| 116 |
+
|
| 117 |
+
## 6) Reference Links or Documentation
|
| 118 |
+
|
| 119 |
+
- Repository: https://github.com/CamDavidsonPilon/lifelines
|
| 120 |
+
- Official docs: https://lifelines.readthedocs.io/
|
| 121 |
+
- Examples: `examples/` directory in the repository
|
| 122 |
+
- Changelog: `CHANGELOG.md` in the repository
|
| 123 |
+
|
| 124 |
+
If you want, I can also generate a ready-to-use `service.json` tool schema for these MCP (Model Context Protocol) endpoints.
|
lifelines/mcp_output/analysis.json
ADDED
|
@@ -0,0 +1,1585 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"summary": {
|
| 3 |
+
"repository_url": "https://github.com/CamDavidsonPilon/lifelines",
|
| 4 |
+
"summary": "Imported via zip fallback, file count: 86",
|
| 5 |
+
"file_tree": {
|
| 6 |
+
".github/CODE_OF_CONDUCT.md": {
|
| 7 |
+
"size": 2977
|
| 8 |
+
},
|
| 9 |
+
".github/CONTRIBUTING.md": {
|
| 10 |
+
"size": 2744
|
| 11 |
+
},
|
| 12 |
+
".github/FUNDING.yml": {
|
| 13 |
+
"size": 25
|
| 14 |
+
},
|
| 15 |
+
".github/workflows/ci.yaml": {
|
| 16 |
+
"size": 838
|
| 17 |
+
},
|
| 18 |
+
".github/workflows/pythonpublish.yml": {
|
| 19 |
+
"size": 862
|
| 20 |
+
},
|
| 21 |
+
".pre-commit-config.yaml": {
|
| 22 |
+
"size": 412
|
| 23 |
+
},
|
| 24 |
+
".prospector.yaml": {
|
| 25 |
+
"size": 719
|
| 26 |
+
},
|
| 27 |
+
".readthedocs.yaml": {
|
| 28 |
+
"size": 1035
|
| 29 |
+
},
|
| 30 |
+
"CHANGELOG.md": {
|
| 31 |
+
"size": 69853
|
| 32 |
+
},
|
| 33 |
+
"README.md": {
|
| 34 |
+
"size": 2257
|
| 35 |
+
},
|
| 36 |
+
"conftest.py": {
|
| 37 |
+
"size": 536
|
| 38 |
+
},
|
| 39 |
+
"docs/conf.py": {
|
| 40 |
+
"size": 9430
|
| 41 |
+
},
|
| 42 |
+
"docs/conftest.py": {
|
| 43 |
+
"size": 749
|
| 44 |
+
},
|
| 45 |
+
"docs/docs_requirements.txt": {
|
| 46 |
+
"size": 32
|
| 47 |
+
},
|
| 48 |
+
"docs/images/dist_script.py": {
|
| 49 |
+
"size": 753
|
| 50 |
+
},
|
| 51 |
+
"examples/README.md": {
|
| 52 |
+
"size": 2547
|
| 53 |
+
},
|
| 54 |
+
"examples/aalen_and_cook_simulation.py": {
|
| 55 |
+
"size": 762
|
| 56 |
+
},
|
| 57 |
+
"examples/copula_frailty_weibull_model.py": {
|
| 58 |
+
"size": 1705
|
| 59 |
+
},
|
| 60 |
+
"examples/cox_spline_custom_knots.py": {
|
| 61 |
+
"size": 406
|
| 62 |
+
},
|
| 63 |
+
"examples/crowther_royston_clements_splines.py": {
|
| 64 |
+
"size": 3662
|
| 65 |
+
},
|
| 66 |
+
"examples/cure_model.py": {
|
| 67 |
+
"size": 1126
|
| 68 |
+
},
|
| 69 |
+
"examples/haft_model.py": {
|
| 70 |
+
"size": 1970
|
| 71 |
+
},
|
| 72 |
+
"examples/left_censoring_experiments.py": {
|
| 73 |
+
"size": 1409
|
| 74 |
+
},
|
| 75 |
+
"examples/mixture_cure_model.py": {
|
| 76 |
+
"size": 1580
|
| 77 |
+
},
|
| 78 |
+
"examples/royston_parmar_splines.py": {
|
| 79 |
+
"size": 4818
|
| 80 |
+
},
|
| 81 |
+
"lifelines/__init__.py": {
|
| 82 |
+
"size": 2241
|
| 83 |
+
},
|
| 84 |
+
"lifelines/calibration.py": {
|
| 85 |
+
"size": 4107
|
| 86 |
+
},
|
| 87 |
+
"lifelines/datasets/__init__.py": {
|
| 88 |
+
"size": 19962
|
| 89 |
+
},
|
| 90 |
+
"lifelines/datasets/dfcv_dataset.py": {
|
| 91 |
+
"size": 2700
|
| 92 |
+
},
|
| 93 |
+
"lifelines/exceptions.py": {
|
| 94 |
+
"size": 577
|
| 95 |
+
},
|
| 96 |
+
"lifelines/fitters/__init__.py": {
|
| 97 |
+
"size": 151829
|
| 98 |
+
},
|
| 99 |
+
"lifelines/fitters/aalen_additive_fitter.py": {
|
| 100 |
+
"size": 21527
|
| 101 |
+
},
|
| 102 |
+
"lifelines/fitters/aalen_johansen_fitter.py": {
|
| 103 |
+
"size": 14424
|
| 104 |
+
},
|
| 105 |
+
"lifelines/fitters/breslow_fleming_harrington_fitter.py": {
|
| 106 |
+
"size": 4293
|
| 107 |
+
},
|
| 108 |
+
"lifelines/fitters/cox_time_varying_fitter.py": {
|
| 109 |
+
"size": 34690
|
| 110 |
+
},
|
| 111 |
+
"lifelines/fitters/coxph_fitter.py": {
|
| 112 |
+
"size": 137349
|
| 113 |
+
},
|
| 114 |
+
"lifelines/fitters/crc_spline_fitter.py": {
|
| 115 |
+
"size": 3126
|
| 116 |
+
},
|
| 117 |
+
"lifelines/fitters/exponential_fitter.py": {
|
| 118 |
+
"size": 2857
|
| 119 |
+
},
|
| 120 |
+
"lifelines/fitters/generalized_gamma_fitter.py": {
|
| 121 |
+
"size": 6482
|
| 122 |
+
},
|
| 123 |
+
"lifelines/fitters/generalized_gamma_regression_fitter.py": {
|
| 124 |
+
"size": 7955
|
| 125 |
+
},
|
| 126 |
+
"lifelines/fitters/kaplan_meier_fitter.py": {
|
| 127 |
+
"size": 24209
|
| 128 |
+
},
|
| 129 |
+
"lifelines/fitters/log_logistic_aft_fitter.py": {
|
| 130 |
+
"size": 7074
|
| 131 |
+
},
|
| 132 |
+
"lifelines/fitters/log_logistic_fitter.py": {
|
| 133 |
+
"size": 4004
|
| 134 |
+
},
|
| 135 |
+
"lifelines/fitters/log_normal_aft_fitter.py": {
|
| 136 |
+
"size": 7890
|
| 137 |
+
},
|
| 138 |
+
"lifelines/fitters/log_normal_fitter.py": {
|
| 139 |
+
"size": 3557
|
| 140 |
+
},
|
| 141 |
+
"lifelines/fitters/mixins.py": {
|
| 142 |
+
"size": 12827
|
| 143 |
+
},
|
| 144 |
+
"lifelines/fitters/mixture_cure_fitter.py": {
|
| 145 |
+
"size": 5416
|
| 146 |
+
},
|
| 147 |
+
"lifelines/fitters/nelson_aalen_fitter.py": {
|
| 148 |
+
"size": 10687
|
| 149 |
+
},
|
| 150 |
+
"lifelines/fitters/npmle.py": {
|
| 151 |
+
"size": 10157
|
| 152 |
+
},
|
| 153 |
+
"lifelines/fitters/piecewise_exponential_fitter.py": {
|
| 154 |
+
"size": 3357
|
| 155 |
+
},
|
| 156 |
+
"lifelines/fitters/piecewise_exponential_regression_fitter.py": {
|
| 157 |
+
"size": 4983
|
| 158 |
+
},
|
| 159 |
+
"lifelines/fitters/spline_fitter.py": {
|
| 160 |
+
"size": 4212
|
| 161 |
+
},
|
| 162 |
+
"lifelines/fitters/weibull_aft_fitter.py": {
|
| 163 |
+
"size": 7772
|
| 164 |
+
},
|
| 165 |
+
"lifelines/fitters/weibull_fitter.py": {
|
| 166 |
+
"size": 3771
|
| 167 |
+
},
|
| 168 |
+
"lifelines/generate_datasets.py": {
|
| 169 |
+
"size": 10188
|
| 170 |
+
},
|
| 171 |
+
"lifelines/plotting.py": {
|
| 172 |
+
"size": 35395
|
| 173 |
+
},
|
| 174 |
+
"lifelines/statistics.py": {
|
| 175 |
+
"size": 35225
|
| 176 |
+
},
|
| 177 |
+
"lifelines/tests/__init__.py": {
|
| 178 |
+
"size": 0
|
| 179 |
+
},
|
| 180 |
+
"lifelines/tests/test_estimation.py": {
|
| 181 |
+
"size": 240527
|
| 182 |
+
},
|
| 183 |
+
"lifelines/tests/test_generate_datasets.py": {
|
| 184 |
+
"size": 1033
|
| 185 |
+
},
|
| 186 |
+
"lifelines/tests/test_npmle.py": {
|
| 187 |
+
"size": 3913
|
| 188 |
+
},
|
| 189 |
+
"lifelines/tests/test_plotting.py": {
|
| 190 |
+
"size": 39463
|
| 191 |
+
},
|
| 192 |
+
"lifelines/tests/test_statistics.py": {
|
| 193 |
+
"size": 20418
|
| 194 |
+
},
|
| 195 |
+
"lifelines/tests/utils/test_btree.py": {
|
| 196 |
+
"size": 880
|
| 197 |
+
},
|
| 198 |
+
"lifelines/tests/utils/test_concordance.py": {
|
| 199 |
+
"size": 2666
|
| 200 |
+
},
|
| 201 |
+
"lifelines/tests/utils/test_utils.py": {
|
| 202 |
+
"size": 40823
|
| 203 |
+
},
|
| 204 |
+
"lifelines/utils/__init__.py": {
|
| 205 |
+
"size": 72185
|
| 206 |
+
},
|
| 207 |
+
"lifelines/utils/btree.py": {
|
| 208 |
+
"size": 4369
|
| 209 |
+
},
|
| 210 |
+
"lifelines/utils/concordance.py": {
|
| 211 |
+
"size": 12245
|
| 212 |
+
},
|
| 213 |
+
"lifelines/utils/lowess.py": {
|
| 214 |
+
"size": 2541
|
| 215 |
+
},
|
| 216 |
+
"lifelines/utils/printer.py": {
|
| 217 |
+
"size": 5861
|
| 218 |
+
},
|
| 219 |
+
"lifelines/utils/safe_exp.py": {
|
| 220 |
+
"size": 4350
|
| 221 |
+
},
|
| 222 |
+
"lifelines/version.py": {
|
| 223 |
+
"size": 88
|
| 224 |
+
},
|
| 225 |
+
"mypy.ini": {
|
| 226 |
+
"size": 567
|
| 227 |
+
},
|
| 228 |
+
"paper/paper.md": {
|
| 229 |
+
"size": 7288
|
| 230 |
+
},
|
| 231 |
+
"perf_tests/aaf_perf_test.py": {
|
| 232 |
+
"size": 571
|
| 233 |
+
},
|
| 234 |
+
"perf_tests/batch_vs_single.py": {
|
| 235 |
+
"size": 2716
|
| 236 |
+
},
|
| 237 |
+
"perf_tests/cp_perf_test.py": {
|
| 238 |
+
"size": 674
|
| 239 |
+
},
|
| 240 |
+
"perf_tests/ctv_perf_test.py": {
|
| 241 |
+
"size": 618
|
| 242 |
+
},
|
| 243 |
+
"perf_tests/lognormal_perf_test.py": {
|
| 244 |
+
"size": 572
|
| 245 |
+
},
|
| 246 |
+
"perf_tests/weibull_aft_perf.py": {
|
| 247 |
+
"size": 720
|
| 248 |
+
},
|
| 249 |
+
"perf_tests/weibull_perf_test.py": {
|
| 250 |
+
"size": 769
|
| 251 |
+
},
|
| 252 |
+
"reqs/base-requirements.txt": {
|
| 253 |
+
"size": 111
|
| 254 |
+
},
|
| 255 |
+
"reqs/dev-requirements.txt": {
|
| 256 |
+
"size": 479
|
| 257 |
+
},
|
| 258 |
+
"reqs/docs-requirements.txt": {
|
| 259 |
+
"size": 135
|
| 260 |
+
},
|
| 261 |
+
"setup.py": {
|
| 262 |
+
"size": 1593
|
| 263 |
+
}
|
| 264 |
+
},
|
| 265 |
+
"processed_by": "zip_fallback",
|
| 266 |
+
"success": true
|
| 267 |
+
},
|
| 268 |
+
"structure": {
|
| 269 |
+
"packages": [
|
| 270 |
+
"source.lifelines",
|
| 271 |
+
"source.lifelines.datasets",
|
| 272 |
+
"source.lifelines.fitters",
|
| 273 |
+
"source.lifelines.tests",
|
| 274 |
+
"source.lifelines.utils"
|
| 275 |
+
]
|
| 276 |
+
},
|
| 277 |
+
"dependencies": {
|
| 278 |
+
"has_environment_yml": false,
|
| 279 |
+
"has_requirements_txt": false,
|
| 280 |
+
"pyproject": false,
|
| 281 |
+
"setup_cfg": false,
|
| 282 |
+
"setup_py": true
|
| 283 |
+
},
|
| 284 |
+
"entry_points": {
|
| 285 |
+
"imports": [],
|
| 286 |
+
"cli": [],
|
| 287 |
+
"modules": []
|
| 288 |
+
},
|
| 289 |
+
"llm_analysis": {
|
| 290 |
+
"core_modules": [
|
| 291 |
+
{
|
| 292 |
+
"package": "conftest",
|
| 293 |
+
"module": "conftest",
|
| 294 |
+
"functions": [
|
| 295 |
+
"block",
|
| 296 |
+
"pytest_addoption",
|
| 297 |
+
"pytest_runtest_setup"
|
| 298 |
+
],
|
| 299 |
+
"classes": [],
|
| 300 |
+
"function_signatures": {
|
| 301 |
+
"pytest_runtest_setup": [
|
| 302 |
+
"item"
|
| 303 |
+
],
|
| 304 |
+
"pytest_addoption": [
|
| 305 |
+
"parser"
|
| 306 |
+
],
|
| 307 |
+
"block": [
|
| 308 |
+
"request"
|
| 309 |
+
]
|
| 310 |
+
},
|
| 311 |
+
"description": "Discovered via AST scan"
|
| 312 |
+
},
|
| 313 |
+
{
|
| 314 |
+
"package": "docs",
|
| 315 |
+
"module": "conftest",
|
| 316 |
+
"functions": [
|
| 317 |
+
"tempdir"
|
| 318 |
+
],
|
| 319 |
+
"classes": [],
|
| 320 |
+
"function_signatures": {
|
| 321 |
+
"tempdir": []
|
| 322 |
+
},
|
| 323 |
+
"description": "Discovered via AST scan"
|
| 324 |
+
},
|
| 325 |
+
{
|
| 326 |
+
"package": "docs",
|
| 327 |
+
"module": "conf",
|
| 328 |
+
"functions": [
|
| 329 |
+
"setup"
|
| 330 |
+
],
|
| 331 |
+
"classes": [],
|
| 332 |
+
"function_signatures": {
|
| 333 |
+
"setup": [
|
| 334 |
+
"app"
|
| 335 |
+
]
|
| 336 |
+
},
|
| 337 |
+
"description": "Discovered via AST scan"
|
| 338 |
+
},
|
| 339 |
+
{
|
| 340 |
+
"package": "examples",
|
| 341 |
+
"module": "crowther_royston_clements_splines",
|
| 342 |
+
"functions": [
|
| 343 |
+
"generate_data"
|
| 344 |
+
],
|
| 345 |
+
"classes": [
|
| 346 |
+
"CRCSplineFitter"
|
| 347 |
+
],
|
| 348 |
+
"function_signatures": {
|
| 349 |
+
"generate_data": [
|
| 350 |
+
"n"
|
| 351 |
+
]
|
| 352 |
+
},
|
| 353 |
+
"description": "Discovered via AST scan"
|
| 354 |
+
},
|
| 355 |
+
{
|
| 356 |
+
"package": "examples",
|
| 357 |
+
"module": "royston_parmar_splines",
|
| 358 |
+
"functions": [],
|
| 359 |
+
"classes": [
|
| 360 |
+
"PHSplineFitter",
|
| 361 |
+
"POSplineFitter",
|
| 362 |
+
"SplineFitter",
|
| 363 |
+
"WeibullFitter"
|
| 364 |
+
],
|
| 365 |
+
"function_signatures": {},
|
| 366 |
+
"description": "Discovered via AST scan"
|
| 367 |
+
},
|
| 368 |
+
{
|
| 369 |
+
"package": "examples",
|
| 370 |
+
"module": "cure_model",
|
| 371 |
+
"functions": [],
|
| 372 |
+
"classes": [
|
| 373 |
+
"CureModel"
|
| 374 |
+
],
|
| 375 |
+
"function_signatures": {},
|
| 376 |
+
"description": "Discovered via AST scan"
|
| 377 |
+
},
|
| 378 |
+
{
|
| 379 |
+
"package": "examples",
|
| 380 |
+
"module": "haft_model",
|
| 381 |
+
"functions": [],
|
| 382 |
+
"classes": [
|
| 383 |
+
"HAFT"
|
| 384 |
+
],
|
| 385 |
+
"function_signatures": {},
|
| 386 |
+
"description": "Discovered via AST scan"
|
| 387 |
+
},
|
| 388 |
+
{
|
| 389 |
+
"package": "examples",
|
| 390 |
+
"module": "copula_frailty_weibull_model",
|
| 391 |
+
"functions": [],
|
| 392 |
+
"classes": [
|
| 393 |
+
"CopulaFrailtyWeilbullModel"
|
| 394 |
+
],
|
| 395 |
+
"function_signatures": {},
|
| 396 |
+
"description": "Discovered via AST scan"
|
| 397 |
+
},
|
| 398 |
+
{
|
| 399 |
+
"package": "examples",
|
| 400 |
+
"module": "mixture_cure_model",
|
| 401 |
+
"functions": [],
|
| 402 |
+
"classes": [
|
| 403 |
+
"MixtureCureModel"
|
| 404 |
+
],
|
| 405 |
+
"function_signatures": {},
|
| 406 |
+
"description": "Discovered via AST scan"
|
| 407 |
+
},
|
| 408 |
+
{
|
| 409 |
+
"package": "lifelines",
|
| 410 |
+
"module": "generate_datasets",
|
| 411 |
+
"functions": [
|
| 412 |
+
"constant_",
|
| 413 |
+
"constant_coefficients",
|
| 414 |
+
"construct_survival_curves",
|
| 415 |
+
"cumulative_integral",
|
| 416 |
+
"exp_comp_",
|
| 417 |
+
"exponential_survival_data",
|
| 418 |
+
"generate_covariates",
|
| 419 |
+
"generate_hazard_rates",
|
| 420 |
+
"generate_observational_matrix",
|
| 421 |
+
"generate_random_lifetimes",
|
| 422 |
+
"inverseSq_",
|
| 423 |
+
"log_",
|
| 424 |
+
"periodic_",
|
| 425 |
+
"piecewise_exponential_survival_data",
|
| 426 |
+
"right_censor_lifetimes",
|
| 427 |
+
"time_varying_coefficients"
|
| 428 |
+
],
|
| 429 |
+
"classes": [
|
| 430 |
+
"coeff_func"
|
| 431 |
+
],
|
| 432 |
+
"function_signatures": {
|
| 433 |
+
"piecewise_exponential_survival_data": [
|
| 434 |
+
"n",
|
| 435 |
+
"breakpoints",
|
| 436 |
+
"lambdas"
|
| 437 |
+
],
|
| 438 |
+
"exponential_survival_data": [
|
| 439 |
+
"n",
|
| 440 |
+
"cr",
|
| 441 |
+
"scale"
|
| 442 |
+
],
|
| 443 |
+
"exp_comp_": [
|
| 444 |
+
"t",
|
| 445 |
+
"alpha",
|
| 446 |
+
"beta"
|
| 447 |
+
],
|
| 448 |
+
"log_": [
|
| 449 |
+
"t",
|
| 450 |
+
"alpha",
|
| 451 |
+
"beta"
|
| 452 |
+
],
|
| 453 |
+
"inverseSq_": [
|
| 454 |
+
"t",
|
| 455 |
+
"alpha",
|
| 456 |
+
"beta"
|
| 457 |
+
],
|
| 458 |
+
"periodic_": [
|
| 459 |
+
"t",
|
| 460 |
+
"alpha",
|
| 461 |
+
"beta"
|
| 462 |
+
],
|
| 463 |
+
"constant_": [
|
| 464 |
+
"t",
|
| 465 |
+
"alpha",
|
| 466 |
+
"beta"
|
| 467 |
+
],
|
| 468 |
+
"right_censor_lifetimes": [
|
| 469 |
+
"lifetimes",
|
| 470 |
+
"max_",
|
| 471 |
+
"min_"
|
| 472 |
+
],
|
| 473 |
+
"generate_covariates": [
|
| 474 |
+
"n",
|
| 475 |
+
"d",
|
| 476 |
+
"n_binary",
|
| 477 |
+
"p"
|
| 478 |
+
],
|
| 479 |
+
"constant_coefficients": [
|
| 480 |
+
"d",
|
| 481 |
+
"timelines",
|
| 482 |
+
"constant",
|
| 483 |
+
"independent"
|
| 484 |
+
],
|
| 485 |
+
"time_varying_coefficients": [
|
| 486 |
+
"d",
|
| 487 |
+
"timelines",
|
| 488 |
+
"constant",
|
| 489 |
+
"independent",
|
| 490 |
+
"randgen"
|
| 491 |
+
],
|
| 492 |
+
"generate_hazard_rates": [
|
| 493 |
+
"n",
|
| 494 |
+
"d",
|
| 495 |
+
"timelines",
|
| 496 |
+
"constant",
|
| 497 |
+
"independent",
|
| 498 |
+
"n_binary",
|
| 499 |
+
"model"
|
| 500 |
+
],
|
| 501 |
+
"generate_random_lifetimes": [
|
| 502 |
+
"hazard_rates",
|
| 503 |
+
"timelines",
|
| 504 |
+
"size",
|
| 505 |
+
"censor"
|
| 506 |
+
],
|
| 507 |
+
"generate_observational_matrix": [
|
| 508 |
+
"n",
|
| 509 |
+
"d",
|
| 510 |
+
"timelines",
|
| 511 |
+
"constant",
|
| 512 |
+
"independent",
|
| 513 |
+
"n_binary",
|
| 514 |
+
"model"
|
| 515 |
+
],
|
| 516 |
+
"cumulative_integral": [
|
| 517 |
+
"fx",
|
| 518 |
+
"x"
|
| 519 |
+
],
|
| 520 |
+
"construct_survival_curves": [
|
| 521 |
+
"hazard_rates",
|
| 522 |
+
"timelines"
|
| 523 |
+
]
|
| 524 |
+
},
|
| 525 |
+
"description": "Discovered via AST scan"
|
| 526 |
+
},
|
| 527 |
+
{
|
| 528 |
+
"package": "lifelines",
|
| 529 |
+
"module": "plotting",
|
| 530 |
+
"functions": [
|
| 531 |
+
"add_at_risk_counts",
|
| 532 |
+
"cdf_plot",
|
| 533 |
+
"create_dataframe_slicer",
|
| 534 |
+
"create_scipy_stats_model_from_lifelines_model",
|
| 535 |
+
"get_distribution_name_of_lifelines_model",
|
| 536 |
+
"is_latex_enabled",
|
| 537 |
+
"loglogs_plot",
|
| 538 |
+
"move_spines",
|
| 539 |
+
"plot_interval_censored_lifetimes",
|
| 540 |
+
"plot_lifetimes",
|
| 541 |
+
"qq_plot",
|
| 542 |
+
"remove_spines",
|
| 543 |
+
"remove_ticks",
|
| 544 |
+
"rmst_plot",
|
| 545 |
+
"set_kwargs_color",
|
| 546 |
+
"set_kwargs_drawstyle",
|
| 547 |
+
"set_kwargs_label"
|
| 548 |
+
],
|
| 549 |
+
"classes": [
|
| 550 |
+
"PlotEstimateConfig"
|
| 551 |
+
],
|
| 552 |
+
"function_signatures": {
|
| 553 |
+
"get_distribution_name_of_lifelines_model": [
|
| 554 |
+
"model"
|
| 555 |
+
],
|
| 556 |
+
"create_scipy_stats_model_from_lifelines_model": [
|
| 557 |
+
"model"
|
| 558 |
+
],
|
| 559 |
+
"cdf_plot": [
|
| 560 |
+
"model",
|
| 561 |
+
"timeline",
|
| 562 |
+
"ax"
|
| 563 |
+
],
|
| 564 |
+
"rmst_plot": [
|
| 565 |
+
"model",
|
| 566 |
+
"model2",
|
| 567 |
+
"t",
|
| 568 |
+
"ax",
|
| 569 |
+
"text_position"
|
| 570 |
+
],
|
| 571 |
+
"qq_plot": [
|
| 572 |
+
"model",
|
| 573 |
+
"ax",
|
| 574 |
+
"scatter_color"
|
| 575 |
+
],
|
| 576 |
+
"is_latex_enabled": [],
|
| 577 |
+
"remove_spines": [
|
| 578 |
+
"ax",
|
| 579 |
+
"sides"
|
| 580 |
+
],
|
| 581 |
+
"move_spines": [
|
| 582 |
+
"ax",
|
| 583 |
+
"sides",
|
| 584 |
+
"dists"
|
| 585 |
+
],
|
| 586 |
+
"remove_ticks": [
|
| 587 |
+
"ax",
|
| 588 |
+
"x",
|
| 589 |
+
"y"
|
| 590 |
+
],
|
| 591 |
+
"add_at_risk_counts": [],
|
| 592 |
+
"plot_interval_censored_lifetimes": [
|
| 593 |
+
"lower_bound",
|
| 594 |
+
"upper_bound",
|
| 595 |
+
"entry",
|
| 596 |
+
"left_truncated",
|
| 597 |
+
"sort_by_lower_bound",
|
| 598 |
+
"event_observed_color",
|
| 599 |
+
"event_right_censored_color",
|
| 600 |
+
"ax"
|
| 601 |
+
],
|
| 602 |
+
"plot_lifetimes": [
|
| 603 |
+
"durations",
|
| 604 |
+
"event_observed",
|
| 605 |
+
"entry",
|
| 606 |
+
"left_truncated",
|
| 607 |
+
"sort_by_duration",
|
| 608 |
+
"event_observed_color",
|
| 609 |
+
"event_censored_color",
|
| 610 |
+
"ax"
|
| 611 |
+
],
|
| 612 |
+
"set_kwargs_color": [
|
| 613 |
+
"kwargs"
|
| 614 |
+
],
|
| 615 |
+
"set_kwargs_drawstyle": [
|
| 616 |
+
"kwargs",
|
| 617 |
+
"default"
|
| 618 |
+
],
|
| 619 |
+
"set_kwargs_label": [
|
| 620 |
+
"kwargs",
|
| 621 |
+
"cls"
|
| 622 |
+
],
|
| 623 |
+
"create_dataframe_slicer": [
|
| 624 |
+
"iloc",
|
| 625 |
+
"loc",
|
| 626 |
+
"timeline"
|
| 627 |
+
],
|
| 628 |
+
"loglogs_plot": [
|
| 629 |
+
"cls",
|
| 630 |
+
"loc",
|
| 631 |
+
"iloc",
|
| 632 |
+
"show_censors",
|
| 633 |
+
"censor_styles",
|
| 634 |
+
"ax"
|
| 635 |
+
]
|
| 636 |
+
},
|
| 637 |
+
"description": "Discovered via AST scan"
|
| 638 |
+
},
|
| 639 |
+
{
|
| 640 |
+
"package": "lifelines",
|
| 641 |
+
"module": "exceptions",
|
| 642 |
+
"functions": [],
|
| 643 |
+
"classes": [
|
| 644 |
+
"ApproximationWarning",
|
| 645 |
+
"ConvergenceError",
|
| 646 |
+
"ConvergenceWarning",
|
| 647 |
+
"ProportionalHazardAssumptionError",
|
| 648 |
+
"StatError",
|
| 649 |
+
"StatisticalWarning"
|
| 650 |
+
],
|
| 651 |
+
"function_signatures": {},
|
| 652 |
+
"description": "Discovered via AST scan"
|
| 653 |
+
},
|
| 654 |
+
{
|
| 655 |
+
"package": "lifelines",
|
| 656 |
+
"module": "statistics",
|
| 657 |
+
"functions": [
|
| 658 |
+
"difference_of_restricted_mean_survival_time_test",
|
| 659 |
+
"logrank_test",
|
| 660 |
+
"multivariate_logrank_test",
|
| 661 |
+
"pairwise_logrank_test",
|
| 662 |
+
"power_under_cph",
|
| 663 |
+
"proportional_hazard_test",
|
| 664 |
+
"sample_size_necessary_under_cph",
|
| 665 |
+
"survival_difference_at_fixed_point_in_time_test"
|
| 666 |
+
],
|
| 667 |
+
"classes": [
|
| 668 |
+
"StatisticalResult",
|
| 669 |
+
"TimeTransformers"
|
| 670 |
+
],
|
| 671 |
+
"function_signatures": {
|
| 672 |
+
"sample_size_necessary_under_cph": [
|
| 673 |
+
"power",
|
| 674 |
+
"ratio_of_participants",
|
| 675 |
+
"p_exp",
|
| 676 |
+
"p_con",
|
| 677 |
+
"postulated_hazard_ratio",
|
| 678 |
+
"alpha"
|
| 679 |
+
],
|
| 680 |
+
"power_under_cph": [
|
| 681 |
+
"n_exp",
|
| 682 |
+
"n_con",
|
| 683 |
+
"p_exp",
|
| 684 |
+
"p_con",
|
| 685 |
+
"postulated_hazard_ratio",
|
| 686 |
+
"alpha"
|
| 687 |
+
],
|
| 688 |
+
"survival_difference_at_fixed_point_in_time_test": [
|
| 689 |
+
"point_in_time",
|
| 690 |
+
"fitterA",
|
| 691 |
+
"fitterB"
|
| 692 |
+
],
|
| 693 |
+
"logrank_test": [
|
| 694 |
+
"durations_A",
|
| 695 |
+
"durations_B",
|
| 696 |
+
"event_observed_A",
|
| 697 |
+
"event_observed_B",
|
| 698 |
+
"t_0",
|
| 699 |
+
"weights_A",
|
| 700 |
+
"weights_B",
|
| 701 |
+
"weightings"
|
| 702 |
+
],
|
| 703 |
+
"pairwise_logrank_test": [
|
| 704 |
+
"event_durations",
|
| 705 |
+
"groups",
|
| 706 |
+
"event_observed",
|
| 707 |
+
"t_0",
|
| 708 |
+
"weightings"
|
| 709 |
+
],
|
| 710 |
+
"difference_of_restricted_mean_survival_time_test": [
|
| 711 |
+
"model1",
|
| 712 |
+
"model2",
|
| 713 |
+
"t"
|
| 714 |
+
],
|
| 715 |
+
"multivariate_logrank_test": [
|
| 716 |
+
"event_durations",
|
| 717 |
+
"groups",
|
| 718 |
+
"event_observed",
|
| 719 |
+
"weights",
|
| 720 |
+
"t_0",
|
| 721 |
+
"weightings"
|
| 722 |
+
],
|
| 723 |
+
"proportional_hazard_test": [
|
| 724 |
+
"fitted_cox_model",
|
| 725 |
+
"training_df",
|
| 726 |
+
"time_transform",
|
| 727 |
+
"precomputed_residuals"
|
| 728 |
+
]
|
| 729 |
+
},
|
| 730 |
+
"description": "Discovered via AST scan"
|
| 731 |
+
},
|
| 732 |
+
{
|
| 733 |
+
"package": "lifelines",
|
| 734 |
+
"module": "calibration",
|
| 735 |
+
"functions": [
|
| 736 |
+
"survival_probability_calibration"
|
| 737 |
+
],
|
| 738 |
+
"classes": [],
|
| 739 |
+
"function_signatures": {
|
| 740 |
+
"survival_probability_calibration": [
|
| 741 |
+
"model",
|
| 742 |
+
"df",
|
| 743 |
+
"t0",
|
| 744 |
+
"ax"
|
| 745 |
+
]
|
| 746 |
+
},
|
| 747 |
+
"description": "Discovered via AST scan"
|
| 748 |
+
},
|
| 749 |
+
{
|
| 750 |
+
"package": "lifelines",
|
| 751 |
+
"module": "datasets",
|
| 752 |
+
"functions": [
|
| 753 |
+
"load_c_botulinum_lag_phase",
|
| 754 |
+
"load_canadian_senators",
|
| 755 |
+
"load_dd",
|
| 756 |
+
"load_dfcv",
|
| 757 |
+
"load_diabetes",
|
| 758 |
+
"load_g3",
|
| 759 |
+
"load_gbsg2",
|
| 760 |
+
"load_holly_molly_polly",
|
| 761 |
+
"load_kidney_transplant",
|
| 762 |
+
"load_larynx",
|
| 763 |
+
"load_lcd",
|
| 764 |
+
"load_leukemia",
|
| 765 |
+
"load_lung",
|
| 766 |
+
"load_lupus",
|
| 767 |
+
"load_lymph_node",
|
| 768 |
+
"load_lymphoma",
|
| 769 |
+
"load_mice",
|
| 770 |
+
"load_multicenter_aids_cohort_study",
|
| 771 |
+
"load_nh4",
|
| 772 |
+
"load_panel_test",
|
| 773 |
+
"load_psychiatric_patients",
|
| 774 |
+
"load_recur",
|
| 775 |
+
"load_regression_dataset",
|
| 776 |
+
"load_rossi",
|
| 777 |
+
"load_stanford_heart_transplants",
|
| 778 |
+
"load_static_test",
|
| 779 |
+
"load_waltons"
|
| 780 |
+
],
|
| 781 |
+
"classes": [],
|
| 782 |
+
"function_signatures": {
|
| 783 |
+
"load_recur": [],
|
| 784 |
+
"load_multicenter_aids_cohort_study": [],
|
| 785 |
+
"load_holly_molly_polly": [],
|
| 786 |
+
"load_leukemia": [],
|
| 787 |
+
"load_canadian_senators": [],
|
| 788 |
+
"load_dd": [],
|
| 789 |
+
"load_kidney_transplant": [],
|
| 790 |
+
"load_larynx": [],
|
| 791 |
+
"load_lung": [],
|
| 792 |
+
"load_panel_test": [],
|
| 793 |
+
"load_psychiatric_patients": [],
|
| 794 |
+
"load_static_test": [],
|
| 795 |
+
"load_lcd": [],
|
| 796 |
+
"load_nh4": [],
|
| 797 |
+
"load_waltons": [],
|
| 798 |
+
"load_rossi": [],
|
| 799 |
+
"load_regression_dataset": [],
|
| 800 |
+
"load_g3": [],
|
| 801 |
+
"load_stanford_heart_transplants": [],
|
| 802 |
+
"load_gbsg2": [],
|
| 803 |
+
"load_dfcv": [],
|
| 804 |
+
"load_lymphoma": [],
|
| 805 |
+
"load_diabetes": [],
|
| 806 |
+
"load_lupus": [],
|
| 807 |
+
"load_lymph_node": [],
|
| 808 |
+
"load_c_botulinum_lag_phase": [],
|
| 809 |
+
"load_mice": []
|
| 810 |
+
},
|
| 811 |
+
"description": "Discovered via AST scan"
|
| 812 |
+
},
|
| 813 |
+
{
|
| 814 |
+
"package": "lifelines.utils",
|
| 815 |
+
"module": "lowess",
|
| 816 |
+
"functions": [
|
| 817 |
+
"lowess"
|
| 818 |
+
],
|
| 819 |
+
"classes": [],
|
| 820 |
+
"function_signatures": {
|
| 821 |
+
"lowess": [
|
| 822 |
+
"x",
|
| 823 |
+
"y",
|
| 824 |
+
"f",
|
| 825 |
+
"iterations"
|
| 826 |
+
]
|
| 827 |
+
},
|
| 828 |
+
"description": "Discovered via AST scan"
|
| 829 |
+
},
|
| 830 |
+
{
|
| 831 |
+
"package": "lifelines",
|
| 832 |
+
"module": "utils",
|
| 833 |
+
"functions": [
|
| 834 |
+
"add_covariate_to_timeline",
|
| 835 |
+
"check_complete_separation",
|
| 836 |
+
"check_complete_separation_close_to_perfect_correlation",
|
| 837 |
+
"check_complete_separation_low_variance",
|
| 838 |
+
"check_dimensions",
|
| 839 |
+
"check_entry_times",
|
| 840 |
+
"check_for_immediate_deaths",
|
| 841 |
+
"check_for_instantaneous_events_at_death_time",
|
| 842 |
+
"check_for_instantaneous_events_at_time_zero",
|
| 843 |
+
"check_for_nonnegative_intervals",
|
| 844 |
+
"check_for_numeric_dtypes_or_raise",
|
| 845 |
+
"check_for_overlapping_intervals",
|
| 846 |
+
"check_low_var",
|
| 847 |
+
"check_nans_or_infs",
|
| 848 |
+
"check_positivity",
|
| 849 |
+
"check_scaling",
|
| 850 |
+
"coalesce",
|
| 851 |
+
"covariates_from_event_matrix",
|
| 852 |
+
"datetimes_to_durations",
|
| 853 |
+
"epanechnikov_kernel",
|
| 854 |
+
"find_best_parametric_model",
|
| 855 |
+
"format_exp_floats",
|
| 856 |
+
"format_floats",
|
| 857 |
+
"format_p_value",
|
| 858 |
+
"group_survival_table_from_events",
|
| 859 |
+
"interpolate_at_times",
|
| 860 |
+
"interpolate_at_times_and_return_pandas",
|
| 861 |
+
"inv_normal_cdf",
|
| 862 |
+
"k_fold_cross_validation",
|
| 863 |
+
"leading_space",
|
| 864 |
+
"make_simpliest_hashable",
|
| 865 |
+
"map_leading_space",
|
| 866 |
+
"median_survival_times",
|
| 867 |
+
"normalize",
|
| 868 |
+
"pass_for_numeric_dtypes_or_raise_array",
|
| 869 |
+
"pearson_correlation",
|
| 870 |
+
"qth_survival_time",
|
| 871 |
+
"qth_survival_times",
|
| 872 |
+
"quiet_log2",
|
| 873 |
+
"restricted_mean_survival_time",
|
| 874 |
+
"ridge_regression",
|
| 875 |
+
"safe_zip",
|
| 876 |
+
"survival_events_from_table",
|
| 877 |
+
"survival_table_from_events",
|
| 878 |
+
"to_episodic_format",
|
| 879 |
+
"to_long_format",
|
| 880 |
+
"unnormalize"
|
| 881 |
+
],
|
| 882 |
+
"classes": [
|
| 883 |
+
"CensoringType",
|
| 884 |
+
"CovariateParameterMappings",
|
| 885 |
+
"DataframeSlicer",
|
| 886 |
+
"LinearAccumulator",
|
| 887 |
+
"QuadraticAccumulator",
|
| 888 |
+
"StepSizer"
|
| 889 |
+
],
|
| 890 |
+
"function_signatures": {
|
| 891 |
+
"qth_survival_times": [
|
| 892 |
+
"q",
|
| 893 |
+
"survival_functions"
|
| 894 |
+
],
|
| 895 |
+
"qth_survival_time": [
|
| 896 |
+
"q",
|
| 897 |
+
"model_or_survival_function"
|
| 898 |
+
],
|
| 899 |
+
"median_survival_times": [
|
| 900 |
+
"model_or_survival_function"
|
| 901 |
+
],
|
| 902 |
+
"restricted_mean_survival_time": [
|
| 903 |
+
"model_or_survival_function",
|
| 904 |
+
"t",
|
| 905 |
+
"return_variance"
|
| 906 |
+
],
|
| 907 |
+
"group_survival_table_from_events": [
|
| 908 |
+
"groups",
|
| 909 |
+
"durations",
|
| 910 |
+
"event_observed",
|
| 911 |
+
"birth_times",
|
| 912 |
+
"weights",
|
| 913 |
+
"limit"
|
| 914 |
+
],
|
| 915 |
+
"survival_table_from_events": [
|
| 916 |
+
"death_times",
|
| 917 |
+
"event_observed",
|
| 918 |
+
"birth_times",
|
| 919 |
+
"columns",
|
| 920 |
+
"weights",
|
| 921 |
+
"collapse",
|
| 922 |
+
"intervals"
|
| 923 |
+
],
|
| 924 |
+
"survival_events_from_table": [
|
| 925 |
+
"survival_table",
|
| 926 |
+
"observed_deaths_col",
|
| 927 |
+
"censored_col"
|
| 928 |
+
],
|
| 929 |
+
"datetimes_to_durations": [
|
| 930 |
+
"start_times",
|
| 931 |
+
"end_times",
|
| 932 |
+
"fill_date",
|
| 933 |
+
"freq",
|
| 934 |
+
"dayfirst",
|
| 935 |
+
"na_values",
|
| 936 |
+
"format"
|
| 937 |
+
],
|
| 938 |
+
"coalesce": [],
|
| 939 |
+
"inv_normal_cdf": [
|
| 940 |
+
"p"
|
| 941 |
+
],
|
| 942 |
+
"k_fold_cross_validation": [
|
| 943 |
+
"fitters",
|
| 944 |
+
"df",
|
| 945 |
+
"duration_col",
|
| 946 |
+
"event_col",
|
| 947 |
+
"k",
|
| 948 |
+
"scoring_method",
|
| 949 |
+
"fitter_kwargs",
|
| 950 |
+
"seed"
|
| 951 |
+
],
|
| 952 |
+
"normalize": [
|
| 953 |
+
"X",
|
| 954 |
+
"mean",
|
| 955 |
+
"std"
|
| 956 |
+
],
|
| 957 |
+
"unnormalize": [
|
| 958 |
+
"X",
|
| 959 |
+
"mean",
|
| 960 |
+
"std"
|
| 961 |
+
],
|
| 962 |
+
"epanechnikov_kernel": [
|
| 963 |
+
"t",
|
| 964 |
+
"T",
|
| 965 |
+
"bandwidth"
|
| 966 |
+
],
|
| 967 |
+
"ridge_regression": [
|
| 968 |
+
"X",
|
| 969 |
+
"Y",
|
| 970 |
+
"c1",
|
| 971 |
+
"c2",
|
| 972 |
+
"offset",
|
| 973 |
+
"ix"
|
| 974 |
+
],
|
| 975 |
+
"pass_for_numeric_dtypes_or_raise_array": [
|
| 976 |
+
"x"
|
| 977 |
+
],
|
| 978 |
+
"check_scaling": [
|
| 979 |
+
"df"
|
| 980 |
+
],
|
| 981 |
+
"check_dimensions": [
|
| 982 |
+
"df"
|
| 983 |
+
],
|
| 984 |
+
"check_for_numeric_dtypes_or_raise": [
|
| 985 |
+
"df"
|
| 986 |
+
],
|
| 987 |
+
"check_for_nonnegative_intervals": [
|
| 988 |
+
"start",
|
| 989 |
+
"stop"
|
| 990 |
+
],
|
| 991 |
+
"check_for_immediate_deaths": [
|
| 992 |
+
"events",
|
| 993 |
+
"start",
|
| 994 |
+
"stop"
|
| 995 |
+
],
|
| 996 |
+
"check_for_instantaneous_events_at_time_zero": [
|
| 997 |
+
"start",
|
| 998 |
+
"stop"
|
| 999 |
+
],
|
| 1000 |
+
"check_for_instantaneous_events_at_death_time": [
|
| 1001 |
+
"events",
|
| 1002 |
+
"start",
|
| 1003 |
+
"stop"
|
| 1004 |
+
],
|
| 1005 |
+
"check_for_overlapping_intervals": [
|
| 1006 |
+
"df"
|
| 1007 |
+
],
|
| 1008 |
+
"check_positivity": [
|
| 1009 |
+
"array"
|
| 1010 |
+
],
|
| 1011 |
+
"check_low_var": [
|
| 1012 |
+
"df",
|
| 1013 |
+
"prescript",
|
| 1014 |
+
"postscript"
|
| 1015 |
+
],
|
| 1016 |
+
"check_complete_separation_low_variance": [
|
| 1017 |
+
"df",
|
| 1018 |
+
"events",
|
| 1019 |
+
"event_col"
|
| 1020 |
+
],
|
| 1021 |
+
"pearson_correlation": [
|
| 1022 |
+
"x",
|
| 1023 |
+
"y"
|
| 1024 |
+
],
|
| 1025 |
+
"check_entry_times": [
|
| 1026 |
+
"T",
|
| 1027 |
+
"entries"
|
| 1028 |
+
],
|
| 1029 |
+
"check_complete_separation_close_to_perfect_correlation": [
|
| 1030 |
+
"df",
|
| 1031 |
+
"durations"
|
| 1032 |
+
],
|
| 1033 |
+
"check_complete_separation": [
|
| 1034 |
+
"df",
|
| 1035 |
+
"events",
|
| 1036 |
+
"durations",
|
| 1037 |
+
"event_col"
|
| 1038 |
+
],
|
| 1039 |
+
"check_nans_or_infs": [
|
| 1040 |
+
"df_or_array"
|
| 1041 |
+
],
|
| 1042 |
+
"to_episodic_format": [
|
| 1043 |
+
"df",
|
| 1044 |
+
"duration_col",
|
| 1045 |
+
"event_col",
|
| 1046 |
+
"id_col",
|
| 1047 |
+
"time_gaps"
|
| 1048 |
+
],
|
| 1049 |
+
"to_long_format": [
|
| 1050 |
+
"df",
|
| 1051 |
+
"duration_col"
|
| 1052 |
+
],
|
| 1053 |
+
"add_covariate_to_timeline": [
|
| 1054 |
+
"long_form_df",
|
| 1055 |
+
"cv",
|
| 1056 |
+
"id_col",
|
| 1057 |
+
"duration_col",
|
| 1058 |
+
"event_col",
|
| 1059 |
+
"start_col",
|
| 1060 |
+
"stop_col",
|
| 1061 |
+
"add_enum",
|
| 1062 |
+
"overwrite",
|
| 1063 |
+
"cumulative_sum",
|
| 1064 |
+
"cumulative_sum_prefix",
|
| 1065 |
+
"delay"
|
| 1066 |
+
],
|
| 1067 |
+
"covariates_from_event_matrix": [
|
| 1068 |
+
"df",
|
| 1069 |
+
"id_col"
|
| 1070 |
+
],
|
| 1071 |
+
"format_p_value": [
|
| 1072 |
+
"decimals"
|
| 1073 |
+
],
|
| 1074 |
+
"format_exp_floats": [
|
| 1075 |
+
"decimals"
|
| 1076 |
+
],
|
| 1077 |
+
"format_floats": [
|
| 1078 |
+
"decimals"
|
| 1079 |
+
],
|
| 1080 |
+
"leading_space": [
|
| 1081 |
+
"s"
|
| 1082 |
+
],
|
| 1083 |
+
"map_leading_space": [
|
| 1084 |
+
"list"
|
| 1085 |
+
],
|
| 1086 |
+
"interpolate_at_times": [
|
| 1087 |
+
"df_or_series",
|
| 1088 |
+
"new_times"
|
| 1089 |
+
],
|
| 1090 |
+
"interpolate_at_times_and_return_pandas": [
|
| 1091 |
+
"df_or_series",
|
| 1092 |
+
"new_times"
|
| 1093 |
+
],
|
| 1094 |
+
"safe_zip": [
|
| 1095 |
+
"first",
|
| 1096 |
+
"second"
|
| 1097 |
+
],
|
| 1098 |
+
"make_simpliest_hashable": [
|
| 1099 |
+
"ele"
|
| 1100 |
+
],
|
| 1101 |
+
"find_best_parametric_model": [
|
| 1102 |
+
"event_times",
|
| 1103 |
+
"event_observed",
|
| 1104 |
+
"scoring_method",
|
| 1105 |
+
"additional_models",
|
| 1106 |
+
"censoring_type",
|
| 1107 |
+
"timeline",
|
| 1108 |
+
"alpha",
|
| 1109 |
+
"ci_labels",
|
| 1110 |
+
"entry",
|
| 1111 |
+
"weights",
|
| 1112 |
+
"show_progress"
|
| 1113 |
+
],
|
| 1114 |
+
"quiet_log2": [
|
| 1115 |
+
"p"
|
| 1116 |
+
]
|
| 1117 |
+
},
|
| 1118 |
+
"description": "Discovered via AST scan"
|
| 1119 |
+
},
|
| 1120 |
+
{
|
| 1121 |
+
"package": "lifelines.utils",
|
| 1122 |
+
"module": "concordance",
|
| 1123 |
+
"functions": [
|
| 1124 |
+
"concordance_index",
|
| 1125 |
+
"naive_concordance_index",
|
| 1126 |
+
"somers_d"
|
| 1127 |
+
],
|
| 1128 |
+
"classes": [],
|
| 1129 |
+
"function_signatures": {
|
| 1130 |
+
"somers_d": [
|
| 1131 |
+
"event_times",
|
| 1132 |
+
"x",
|
| 1133 |
+
"event_observed"
|
| 1134 |
+
],
|
| 1135 |
+
"concordance_index": [
|
| 1136 |
+
"event_times",
|
| 1137 |
+
"predicted_scores",
|
| 1138 |
+
"event_observed"
|
| 1139 |
+
],
|
| 1140 |
+
"naive_concordance_index": [
|
| 1141 |
+
"event_times",
|
| 1142 |
+
"predicted_event_times",
|
| 1143 |
+
"event_observed"
|
| 1144 |
+
]
|
| 1145 |
+
},
|
| 1146 |
+
"description": "Discovered via AST scan"
|
| 1147 |
+
},
|
| 1148 |
+
{
|
| 1149 |
+
"package": "lifelines.utils",
|
| 1150 |
+
"module": "printer",
|
| 1151 |
+
"functions": [],
|
| 1152 |
+
"classes": [
|
| 1153 |
+
"Printer"
|
| 1154 |
+
],
|
| 1155 |
+
"function_signatures": {},
|
| 1156 |
+
"description": "Discovered via AST scan"
|
| 1157 |
+
},
|
| 1158 |
+
{
|
| 1159 |
+
"package": "lifelines.utils",
|
| 1160 |
+
"module": "safe_exp",
|
| 1161 |
+
"functions": [
|
| 1162 |
+
"safe_exp",
|
| 1163 |
+
"safe_exp_vjp"
|
| 1164 |
+
],
|
| 1165 |
+
"classes": [],
|
| 1166 |
+
"function_signatures": {
|
| 1167 |
+
"safe_exp_vjp": [
|
| 1168 |
+
"ans",
|
| 1169 |
+
"x"
|
| 1170 |
+
],
|
| 1171 |
+
"safe_exp": [
|
| 1172 |
+
"x"
|
| 1173 |
+
]
|
| 1174 |
+
},
|
| 1175 |
+
"description": "Discovered via AST scan"
|
| 1176 |
+
},
|
| 1177 |
+
{
|
| 1178 |
+
"package": "lifelines.fitters",
|
| 1179 |
+
"module": "mixture_cure_fitter",
|
| 1180 |
+
"functions": [],
|
| 1181 |
+
"classes": [
|
| 1182 |
+
"MixtureCureFitter"
|
| 1183 |
+
],
|
| 1184 |
+
"function_signatures": {},
|
| 1185 |
+
"description": "Discovered via AST scan"
|
| 1186 |
+
},
|
| 1187 |
+
{
|
| 1188 |
+
"package": "lifelines.fitters",
|
| 1189 |
+
"module": "exponential_fitter",
|
| 1190 |
+
"functions": [],
|
| 1191 |
+
"classes": [
|
| 1192 |
+
"ExponentialFitter"
|
| 1193 |
+
],
|
| 1194 |
+
"function_signatures": {},
|
| 1195 |
+
"description": "Discovered via AST scan"
|
| 1196 |
+
},
|
| 1197 |
+
{
|
| 1198 |
+
"package": "lifelines.fitters",
|
| 1199 |
+
"module": "aalen_johansen_fitter",
|
| 1200 |
+
"functions": [],
|
| 1201 |
+
"classes": [
|
| 1202 |
+
"AalenJohansenFitter"
|
| 1203 |
+
],
|
| 1204 |
+
"function_signatures": {},
|
| 1205 |
+
"description": "Discovered via AST scan"
|
| 1206 |
+
},
|
| 1207 |
+
{
|
| 1208 |
+
"package": "lifelines.fitters",
|
| 1209 |
+
"module": "breslow_fleming_harrington_fitter",
|
| 1210 |
+
"functions": [],
|
| 1211 |
+
"classes": [
|
| 1212 |
+
"BreslowFlemingHarringtonFitter"
|
| 1213 |
+
],
|
| 1214 |
+
"function_signatures": {},
|
| 1215 |
+
"description": "Discovered via AST scan"
|
| 1216 |
+
},
|
| 1217 |
+
{
|
| 1218 |
+
"package": "lifelines.fitters",
|
| 1219 |
+
"module": "mixins",
|
| 1220 |
+
"functions": [],
|
| 1221 |
+
"classes": [
|
| 1222 |
+
"ProportionalHazardMixin",
|
| 1223 |
+
"SplineFitterMixin"
|
| 1224 |
+
],
|
| 1225 |
+
"function_signatures": {},
|
| 1226 |
+
"description": "Discovered via AST scan"
|
| 1227 |
+
},
|
| 1228 |
+
{
|
| 1229 |
+
"package": "lifelines.fitters",
|
| 1230 |
+
"module": "nelson_aalen_fitter",
|
| 1231 |
+
"functions": [],
|
| 1232 |
+
"classes": [
|
| 1233 |
+
"NelsonAalenFitter"
|
| 1234 |
+
],
|
| 1235 |
+
"function_signatures": {},
|
| 1236 |
+
"description": "Discovered via AST scan"
|
| 1237 |
+
},
|
| 1238 |
+
{
|
| 1239 |
+
"package": "lifelines.fitters",
|
| 1240 |
+
"module": "log_normal_aft_fitter",
|
| 1241 |
+
"functions": [],
|
| 1242 |
+
"classes": [
|
| 1243 |
+
"LogNormalAFTFitter"
|
| 1244 |
+
],
|
| 1245 |
+
"function_signatures": {},
|
| 1246 |
+
"description": "Discovered via AST scan"
|
| 1247 |
+
},
|
| 1248 |
+
{
|
| 1249 |
+
"package": "lifelines.fitters",
|
| 1250 |
+
"module": "piecewise_exponential_regression_fitter",
|
| 1251 |
+
"functions": [],
|
| 1252 |
+
"classes": [
|
| 1253 |
+
"PiecewiseExponentialRegressionFitter"
|
| 1254 |
+
],
|
| 1255 |
+
"function_signatures": {},
|
| 1256 |
+
"description": "Discovered via AST scan"
|
| 1257 |
+
},
|
| 1258 |
+
{
|
| 1259 |
+
"package": "lifelines",
|
| 1260 |
+
"module": "fitters",
|
| 1261 |
+
"functions": [],
|
| 1262 |
+
"classes": [
|
| 1263 |
+
"BaseFitter",
|
| 1264 |
+
"KnownModelParametricUnivariateFitter",
|
| 1265 |
+
"NonParametricUnivariateFitter",
|
| 1266 |
+
"ParametericAFTRegressionFitter",
|
| 1267 |
+
"ParametricRegressionFitter",
|
| 1268 |
+
"ParametricUnivariateFitter",
|
| 1269 |
+
"RegressionFitter",
|
| 1270 |
+
"SemiParametricRegressionFitter",
|
| 1271 |
+
"UnivariateFitter"
|
| 1272 |
+
],
|
| 1273 |
+
"function_signatures": {},
|
| 1274 |
+
"description": "Discovered via AST scan"
|
| 1275 |
+
},
|
| 1276 |
+
{
|
| 1277 |
+
"package": "lifelines.fitters",
|
| 1278 |
+
"module": "log_logistic_fitter",
|
| 1279 |
+
"functions": [],
|
| 1280 |
+
"classes": [
|
| 1281 |
+
"LogLogisticFitter"
|
| 1282 |
+
],
|
| 1283 |
+
"function_signatures": {},
|
| 1284 |
+
"description": "Discovered via AST scan"
|
| 1285 |
+
},
|
| 1286 |
+
{
|
| 1287 |
+
"package": "lifelines.fitters",
|
| 1288 |
+
"module": "weibull_fitter",
|
| 1289 |
+
"functions": [],
|
| 1290 |
+
"classes": [
|
| 1291 |
+
"WeibullFitter"
|
| 1292 |
+
],
|
| 1293 |
+
"function_signatures": {},
|
| 1294 |
+
"description": "Discovered via AST scan"
|
| 1295 |
+
},
|
| 1296 |
+
{
|
| 1297 |
+
"package": "lifelines.fitters",
|
| 1298 |
+
"module": "piecewise_exponential_fitter",
|
| 1299 |
+
"functions": [],
|
| 1300 |
+
"classes": [
|
| 1301 |
+
"PiecewiseExponentialFitter"
|
| 1302 |
+
],
|
| 1303 |
+
"function_signatures": {},
|
| 1304 |
+
"description": "Discovered via AST scan"
|
| 1305 |
+
},
|
| 1306 |
+
{
|
| 1307 |
+
"package": "lifelines.fitters",
|
| 1308 |
+
"module": "coxph_fitter",
|
| 1309 |
+
"functions": [],
|
| 1310 |
+
"classes": [
|
| 1311 |
+
"CoxPHFitter",
|
| 1312 |
+
"ParametricCoxModelFitter",
|
| 1313 |
+
"ParametricPiecewiseBaselinePHFitter",
|
| 1314 |
+
"ParametricSplinePHFitter",
|
| 1315 |
+
"SemiParametricPHFitter"
|
| 1316 |
+
],
|
| 1317 |
+
"function_signatures": {},
|
| 1318 |
+
"description": "Discovered via AST scan"
|
| 1319 |
+
},
|
| 1320 |
+
{
|
| 1321 |
+
"package": "lifelines.fitters",
|
| 1322 |
+
"module": "generalized_gamma_fitter",
|
| 1323 |
+
"functions": [],
|
| 1324 |
+
"classes": [
|
| 1325 |
+
"GeneralizedGammaFitter"
|
| 1326 |
+
],
|
| 1327 |
+
"function_signatures": {},
|
| 1328 |
+
"description": "Discovered via AST scan"
|
| 1329 |
+
},
|
| 1330 |
+
{
|
| 1331 |
+
"package": "lifelines.fitters",
|
| 1332 |
+
"module": "aalen_additive_fitter",
|
| 1333 |
+
"functions": [],
|
| 1334 |
+
"classes": [
|
| 1335 |
+
"AalenAdditiveFitter"
|
| 1336 |
+
],
|
| 1337 |
+
"function_signatures": {},
|
| 1338 |
+
"description": "Discovered via AST scan"
|
| 1339 |
+
},
|
| 1340 |
+
{
|
| 1341 |
+
"package": "lifelines.fitters",
|
| 1342 |
+
"module": "log_logistic_aft_fitter",
|
| 1343 |
+
"functions": [],
|
| 1344 |
+
"classes": [
|
| 1345 |
+
"LogLogisticAFTFitter"
|
| 1346 |
+
],
|
| 1347 |
+
"function_signatures": {},
|
| 1348 |
+
"description": "Discovered via AST scan"
|
| 1349 |
+
},
|
| 1350 |
+
{
|
| 1351 |
+
"package": "lifelines.fitters",
|
| 1352 |
+
"module": "crc_spline_fitter",
|
| 1353 |
+
"functions": [],
|
| 1354 |
+
"classes": [
|
| 1355 |
+
"CRCSplineFitter"
|
| 1356 |
+
],
|
| 1357 |
+
"function_signatures": {},
|
| 1358 |
+
"description": "Discovered via AST scan"
|
| 1359 |
+
},
|
| 1360 |
+
{
|
| 1361 |
+
"package": "lifelines.fitters",
|
| 1362 |
+
"module": "cox_time_varying_fitter",
|
| 1363 |
+
"functions": [],
|
| 1364 |
+
"classes": [
|
| 1365 |
+
"CoxTimeVaryingFitter"
|
| 1366 |
+
],
|
| 1367 |
+
"function_signatures": {},
|
| 1368 |
+
"description": "Discovered via AST scan"
|
| 1369 |
+
},
|
| 1370 |
+
{
|
| 1371 |
+
"package": "lifelines.fitters",
|
| 1372 |
+
"module": "npmle",
|
| 1373 |
+
"functions": [
|
| 1374 |
+
"E_step_M_step",
|
| 1375 |
+
"check_convergence",
|
| 1376 |
+
"create_observation_intervals",
|
| 1377 |
+
"create_turnbull_intervals",
|
| 1378 |
+
"create_turnbull_lookup",
|
| 1379 |
+
"cumulative_sum",
|
| 1380 |
+
"expectation_maximization_fit",
|
| 1381 |
+
"is_subset",
|
| 1382 |
+
"log_likelihood",
|
| 1383 |
+
"log_odds",
|
| 1384 |
+
"npmle",
|
| 1385 |
+
"npmle_compute_confidence_intervals",
|
| 1386 |
+
"probs",
|
| 1387 |
+
"reconstruct_survival_function",
|
| 1388 |
+
"scipy_minimize_fit",
|
| 1389 |
+
"temper"
|
| 1390 |
+
],
|
| 1391 |
+
"classes": [
|
| 1392 |
+
"min_max"
|
| 1393 |
+
],
|
| 1394 |
+
"function_signatures": {
|
| 1395 |
+
"temper": [
|
| 1396 |
+
"i",
|
| 1397 |
+
"optimize"
|
| 1398 |
+
],
|
| 1399 |
+
"E_step_M_step": [
|
| 1400 |
+
"observation_intervals",
|
| 1401 |
+
"p_old",
|
| 1402 |
+
"turnbull_interval_lookup",
|
| 1403 |
+
"weights",
|
| 1404 |
+
"i",
|
| 1405 |
+
"optimize"
|
| 1406 |
+
],
|
| 1407 |
+
"cumulative_sum": [
|
| 1408 |
+
"p"
|
| 1409 |
+
],
|
| 1410 |
+
"create_turnbull_intervals": [
|
| 1411 |
+
"left",
|
| 1412 |
+
"right"
|
| 1413 |
+
],
|
| 1414 |
+
"is_subset": [
|
| 1415 |
+
"query_interval",
|
| 1416 |
+
"super_interval"
|
| 1417 |
+
],
|
| 1418 |
+
"create_turnbull_lookup": [
|
| 1419 |
+
"turnbull_intervals",
|
| 1420 |
+
"observation_intervals"
|
| 1421 |
+
],
|
| 1422 |
+
"check_convergence": [
|
| 1423 |
+
"p_new",
|
| 1424 |
+
"p_old",
|
| 1425 |
+
"turnbull_lookup",
|
| 1426 |
+
"weights",
|
| 1427 |
+
"tol",
|
| 1428 |
+
"i",
|
| 1429 |
+
"verbose"
|
| 1430 |
+
],
|
| 1431 |
+
"create_observation_intervals": [
|
| 1432 |
+
"obs"
|
| 1433 |
+
],
|
| 1434 |
+
"log_odds": [
|
| 1435 |
+
"p"
|
| 1436 |
+
],
|
| 1437 |
+
"probs": [
|
| 1438 |
+
"log_odds"
|
| 1439 |
+
],
|
| 1440 |
+
"npmle": [
|
| 1441 |
+
"left",
|
| 1442 |
+
"right",
|
| 1443 |
+
"tol",
|
| 1444 |
+
"weights",
|
| 1445 |
+
"verbose",
|
| 1446 |
+
"max_iter",
|
| 1447 |
+
"optimize",
|
| 1448 |
+
"fit_method"
|
| 1449 |
+
],
|
| 1450 |
+
"scipy_minimize_fit": [
|
| 1451 |
+
"turnbull_interval_lookup",
|
| 1452 |
+
"turnbull_intervals",
|
| 1453 |
+
"weights",
|
| 1454 |
+
"tol",
|
| 1455 |
+
"verbose"
|
| 1456 |
+
],
|
| 1457 |
+
"expectation_maximization_fit": [
|
| 1458 |
+
"observation_intervals",
|
| 1459 |
+
"turnbull_intervals",
|
| 1460 |
+
"turnbull_lookup",
|
| 1461 |
+
"weights",
|
| 1462 |
+
"tol",
|
| 1463 |
+
"max_iter",
|
| 1464 |
+
"optimize",
|
| 1465 |
+
"verbose"
|
| 1466 |
+
],
|
| 1467 |
+
"log_likelihood": [
|
| 1468 |
+
"p",
|
| 1469 |
+
"turnbull_interval_lookup",
|
| 1470 |
+
"weights"
|
| 1471 |
+
],
|
| 1472 |
+
"reconstruct_survival_function": [
|
| 1473 |
+
"probabilities",
|
| 1474 |
+
"turnbull_intervals",
|
| 1475 |
+
"timeline",
|
| 1476 |
+
"label"
|
| 1477 |
+
],
|
| 1478 |
+
"npmle_compute_confidence_intervals": [
|
| 1479 |
+
"left",
|
| 1480 |
+
"right",
|
| 1481 |
+
"mle_",
|
| 1482 |
+
"alpha",
|
| 1483 |
+
"samples"
|
| 1484 |
+
]
|
| 1485 |
+
},
|
| 1486 |
+
"description": "Discovered via AST scan"
|
| 1487 |
+
},
|
| 1488 |
+
{
|
| 1489 |
+
"package": "lifelines.fitters",
|
| 1490 |
+
"module": "spline_fitter",
|
| 1491 |
+
"functions": [],
|
| 1492 |
+
"classes": [
|
| 1493 |
+
"SplineFitter"
|
| 1494 |
+
],
|
| 1495 |
+
"function_signatures": {},
|
| 1496 |
+
"description": "Discovered via AST scan"
|
| 1497 |
+
},
|
| 1498 |
+
{
|
| 1499 |
+
"package": "lifelines.fitters",
|
| 1500 |
+
"module": "weibull_aft_fitter",
|
| 1501 |
+
"functions": [],
|
| 1502 |
+
"classes": [
|
| 1503 |
+
"WeibullAFTFitter"
|
| 1504 |
+
],
|
| 1505 |
+
"function_signatures": {},
|
| 1506 |
+
"description": "Discovered via AST scan"
|
| 1507 |
+
},
|
| 1508 |
+
{
|
| 1509 |
+
"package": "lifelines.fitters",
|
| 1510 |
+
"module": "generalized_gamma_regression_fitter",
|
| 1511 |
+
"functions": [],
|
| 1512 |
+
"classes": [
|
| 1513 |
+
"GeneralizedGammaRegressionFitter"
|
| 1514 |
+
],
|
| 1515 |
+
"function_signatures": {},
|
| 1516 |
+
"description": "Discovered via AST scan"
|
| 1517 |
+
},
|
| 1518 |
+
{
|
| 1519 |
+
"package": "lifelines.fitters",
|
| 1520 |
+
"module": "kaplan_meier_fitter",
|
| 1521 |
+
"functions": [],
|
| 1522 |
+
"classes": [
|
| 1523 |
+
"KaplanMeierFitter"
|
| 1524 |
+
],
|
| 1525 |
+
"function_signatures": {},
|
| 1526 |
+
"description": "Discovered via AST scan"
|
| 1527 |
+
},
|
| 1528 |
+
{
|
| 1529 |
+
"package": "lifelines.fitters",
|
| 1530 |
+
"module": "log_normal_fitter",
|
| 1531 |
+
"functions": [],
|
| 1532 |
+
"classes": [
|
| 1533 |
+
"LogNormalFitter"
|
| 1534 |
+
],
|
| 1535 |
+
"function_signatures": {},
|
| 1536 |
+
"description": "Discovered via AST scan"
|
| 1537 |
+
}
|
| 1538 |
+
],
|
| 1539 |
+
"cli_commands": [],
|
| 1540 |
+
"import_strategy": {
|
| 1541 |
+
"primary": "import",
|
| 1542 |
+
"fallback": "blackbox",
|
| 1543 |
+
"confidence": 0.9
|
| 1544 |
+
},
|
| 1545 |
+
"dependencies": {
|
| 1546 |
+
"required": [
|
| 1547 |
+
"numpy",
|
| 1548 |
+
"scipy",
|
| 1549 |
+
"pandas",
|
| 1550 |
+
"matplotlib",
|
| 1551 |
+
"autograd",
|
| 1552 |
+
"autograd-gamma",
|
| 1553 |
+
"formulaic"
|
| 1554 |
+
],
|
| 1555 |
+
"optional": [
|
| 1556 |
+
"pytest",
|
| 1557 |
+
"sphinx",
|
| 1558 |
+
"jupyter",
|
| 1559 |
+
"nbconvert"
|
| 1560 |
+
]
|
| 1561 |
+
},
|
| 1562 |
+
"risk_assessment": {
|
| 1563 |
+
"import_feasibility": 0.94,
|
| 1564 |
+
"intrusiveness_risk": "low",
|
| 1565 |
+
"complexity": "medium"
|
| 1566 |
+
}
|
| 1567 |
+
},
|
| 1568 |
+
"deepwiki_analysis": {
|
| 1569 |
+
"repo_url": "https://github.com/CamDavidsonPilon/lifelines",
|
| 1570 |
+
"repo_name": "lifelines",
|
| 1571 |
+
"error": "DeepWiki analysis failed",
|
| 1572 |
+
"model": "gpt-5.3-codex",
|
| 1573 |
+
"source": "llm_direct_analysis",
|
| 1574 |
+
"success": false
|
| 1575 |
+
},
|
| 1576 |
+
"deepwiki_options": {
|
| 1577 |
+
"enabled": true,
|
| 1578 |
+
"model": "gpt-5.3-codex"
|
| 1579 |
+
},
|
| 1580 |
+
"risk": {
|
| 1581 |
+
"import_feasibility": 0.94,
|
| 1582 |
+
"intrusiveness_risk": "low",
|
| 1583 |
+
"complexity": "medium"
|
| 1584 |
+
}
|
| 1585 |
+
}
|
lifelines/mcp_output/diff_report.md
ADDED
|
@@ -0,0 +1,142 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Difference Report — **lifelines**
|
| 2 |
+
**Generated:** 2026-03-12 08:11:18
|
| 3 |
+
**Repository:** `lifelines`
|
| 4 |
+
**Project Type:** Python library
|
| 5 |
+
**Scope:** Basic functionality
|
| 6 |
+
**Intrusiveness:** None
|
| 7 |
+
**Workflow Status:** ✅ Success
|
| 8 |
+
**Test Status:** ❌ Failed
|
| 9 |
+
|
| 10 |
+
---
|
| 11 |
+
|
| 12 |
+
## 1) Project Overview
|
| 13 |
+
|
| 14 |
+
This update for the `lifelines` Python library appears to introduce **new assets only** with no edits to existing files, indicating a low-risk, additive change profile from a source-control perspective.
|
| 15 |
+
|
| 16 |
+
### Change Summary
|
| 17 |
+
- **New files:** 8
|
| 18 |
+
- **Modified files:** 0
|
| 19 |
+
- **Deleted files:** 0 (not reported)
|
| 20 |
+
- **Net impact:** Additive only
|
| 21 |
+
|
| 22 |
+
---
|
| 23 |
+
|
| 24 |
+
## 2) Difference Analysis
|
| 25 |
+
|
| 26 |
+
## 2.1 File-Level Delta
|
| 27 |
+
Given the provided metadata:
|
| 28 |
+
- The change set consists entirely of **8 newly added files**.
|
| 29 |
+
- No existing modules or logic were directly altered (`0 modified`), reducing regression surface in current code paths.
|
| 30 |
+
|
| 31 |
+
## 2.2 Functional Impact (Expected)
|
| 32 |
+
Because this is a **basic functionality** update and no existing files were modified, likely scenarios include:
|
| 33 |
+
- Introduction of new helper modules/utilities
|
| 34 |
+
- New tests, examples, docs, or configuration files
|
| 35 |
+
- Optional feature scaffolding not yet integrated into active runtime paths
|
| 36 |
+
|
| 37 |
+
Without per-file listing, runtime impact is assumed **low-to-moderate** unless new files are imported automatically by package init or build tooling.
|
| 38 |
+
|
| 39 |
+
---
|
| 40 |
+
|
| 41 |
+
## 3) Technical Analysis
|
| 42 |
+
|
| 43 |
+
## 3.1 Risk Assessment
|
| 44 |
+
- **Code integration risk:** Low (no modified files)
|
| 45 |
+
- **Build/pipeline risk:** Medium (tests failed despite workflow success)
|
| 46 |
+
- **Release readiness risk:** Medium to High until failing tests are resolved
|
| 47 |
+
|
| 48 |
+
## 3.2 CI Interpretation
|
| 49 |
+
A successful workflow with failed tests usually means:
|
| 50 |
+
- CI pipeline executed correctly
|
| 51 |
+
- Validation gates detected functional or environmental issues
|
| 52 |
+
|
| 53 |
+
Potential root causes:
|
| 54 |
+
1. New tests added with unmet assumptions
|
| 55 |
+
2. Environment/version mismatch (Python, dependencies, OS)
|
| 56 |
+
3. Packaging/import side effects from newly introduced files
|
| 57 |
+
4. Incomplete implementation merged with placeholder tests
|
| 58 |
+
|
| 59 |
+
## 3.3 Quality Signals
|
| 60 |
+
- ✅ Process signal: automation triggered and completed
|
| 61 |
+
- ⚠️ Product signal: test suite not healthy
|
| 62 |
+
- ⚠️ Governance signal: should not promote to production/release tags until green tests
|
| 63 |
+
|
| 64 |
+
---
|
| 65 |
+
|
| 66 |
+
## 4) Recommendations & Improvements
|
| 67 |
+
|
| 68 |
+
## 4.1 Immediate Actions (High Priority)
|
| 69 |
+
1. **Collect failing test logs** and classify by:
|
| 70 |
+
- deterministic failures
|
| 71 |
+
- flaky/environmental failures
|
| 72 |
+
2. **Map failures to new files** to confirm direct causality.
|
| 73 |
+
3. **Run tests locally** in CI-equivalent environment:
|
| 74 |
+
- pinned Python version
|
| 75 |
+
- locked dependency set
|
| 76 |
+
4. **Block release** until tests pass (or temporarily quarantine known flaky tests with documented rationale).
|
| 77 |
+
|
| 78 |
+
## 4.2 Short-Term Stabilization
|
| 79 |
+
- Add/verify:
|
| 80 |
+
- Type checks (`mypy`/pyright if used)
|
| 81 |
+
- Linting consistency (`ruff`, `flake8`, etc.)
|
| 82 |
+
- Import-time smoke tests for new modules
|
| 83 |
+
- Ensure new files are correctly included/excluded in:
|
| 84 |
+
- `pyproject.toml` / packaging config
|
| 85 |
+
- test discovery patterns
|
| 86 |
+
- docs build steps
|
| 87 |
+
|
| 88 |
+
## 4.3 Process Improvements
|
| 89 |
+
- Enforce branch protection requiring:
|
| 90 |
+
- passing tests
|
| 91 |
+
- required checks before merge
|
| 92 |
+
- Add CI matrix for key supported Python versions to catch compatibility regressions earlier.
|
| 93 |
+
|
| 94 |
+
---
|
| 95 |
+
|
| 96 |
+
## 5) Deployment Information
|
| 97 |
+
|
| 98 |
+
## 5.1 Current Deployment Readiness
|
| 99 |
+
- **Status:** Not release-ready
|
| 100 |
+
- **Reason:** Test suite failed
|
| 101 |
+
|
| 102 |
+
## 5.2 Recommended Deployment Decision
|
| 103 |
+
- **Do not deploy/publish** this revision to package index or production consumers.
|
| 104 |
+
- Promote only after:
|
| 105 |
+
1. failing tests are resolved,
|
| 106 |
+
2. full CI passes,
|
| 107 |
+
3. optional sanity check release (internal/pre-release tag) succeeds.
|
| 108 |
+
|
| 109 |
+
## 5.3 Rollback/Recovery
|
| 110 |
+
Since no existing files were modified, rollback is straightforward:
|
| 111 |
+
- Revert the commit(s) introducing the 8 new files if urgent stabilization is needed.
|
| 112 |
+
|
| 113 |
+
---
|
| 114 |
+
|
| 115 |
+
## 6) Future Planning
|
| 116 |
+
|
| 117 |
+
## 6.1 Near-Term (Next 1–2 iterations)
|
| 118 |
+
- Achieve 100% pass rate for mandatory test suite.
|
| 119 |
+
- Add targeted regression tests specifically covering newly added file behaviors.
|
| 120 |
+
- Improve failure observability (clearer test naming, richer CI artifacts).
|
| 121 |
+
|
| 122 |
+
## 6.2 Mid-Term
|
| 123 |
+
- Introduce change-impact templates in PRs:
|
| 124 |
+
- runtime impact
|
| 125 |
+
- packaging impact
|
| 126 |
+
- test impact
|
| 127 |
+
- Add lightweight release checklist for Python library updates:
|
| 128 |
+
- install test
|
| 129 |
+
- import test
|
| 130 |
+
- minimal API smoke test
|
| 131 |
+
|
| 132 |
+
## 6.3 Long-Term
|
| 133 |
+
- Strengthen quality gates with:
|
| 134 |
+
- mutation/property-based testing for critical paths
|
| 135 |
+
- dependency update automation with compatibility validation
|
| 136 |
+
- trend monitoring for flaky tests and mean time to fix
|
| 137 |
+
|
| 138 |
+
---
|
| 139 |
+
|
| 140 |
+
## 7) Executive Summary
|
| 141 |
+
|
| 142 |
+
This revision is an **additive-only update** (`8 new`, `0 modified`) with **low direct code intrusion** but **failed tests**, making it **unsuitable for release** in its current state. The primary priority is to triage and fix the failing test cases, validate in CI-equivalent environments, and only then proceed with deployment.
|
lifelines/mcp_output/mcp_plugin/__init__.py
ADDED
|
File without changes
|
lifelines/mcp_output/mcp_plugin/adapter.py
ADDED
|
@@ -0,0 +1,333 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import sys
|
| 3 |
+
import importlib
|
| 4 |
+
import traceback
|
| 5 |
+
from typing import Any, Dict, Optional, Tuple
|
| 6 |
+
|
| 7 |
+
source_path = os.path.join(
|
| 8 |
+
os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))),
|
| 9 |
+
"source",
|
| 10 |
+
)
|
| 11 |
+
sys.path.insert(0, source_path)
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
class Adapter:
|
| 15 |
+
"""
|
| 16 |
+
MCP Import Mode Adapter for the lifelines repository.
|
| 17 |
+
|
| 18 |
+
This adapter attempts to import and expose selected classes/functions discovered
|
| 19 |
+
by repository analysis. It supports graceful fallback when imports fail and returns
|
| 20 |
+
unified dictionary responses for every public method.
|
| 21 |
+
"""
|
| 22 |
+
|
| 23 |
+
# -------------------------------------------------------------------------
|
| 24 |
+
# Initialization and Module Management
|
| 25 |
+
# -------------------------------------------------------------------------
|
| 26 |
+
def __init__(self) -> None:
|
| 27 |
+
self.mode = "import"
|
| 28 |
+
self._modules: Dict[str, Any] = {}
|
| 29 |
+
self._symbols: Dict[str, Any] = {}
|
| 30 |
+
self._import_errors: Dict[str, str] = {}
|
| 31 |
+
self._initialize_imports()
|
| 32 |
+
|
| 33 |
+
def _ok(self, data: Optional[Dict[str, Any]] = None, message: str = "Success") -> Dict[str, Any]:
|
| 34 |
+
payload = {"status": "success", "mode": self.mode, "message": message}
|
| 35 |
+
if data:
|
| 36 |
+
payload.update(data)
|
| 37 |
+
return payload
|
| 38 |
+
|
| 39 |
+
def _err(self, message: str, guidance: Optional[str] = None, details: Optional[str] = None) -> Dict[str, Any]:
|
| 40 |
+
payload = {"status": "error", "mode": self.mode, "message": message}
|
| 41 |
+
if guidance:
|
| 42 |
+
payload["guidance"] = guidance
|
| 43 |
+
if details:
|
| 44 |
+
payload["details"] = details
|
| 45 |
+
return payload
|
| 46 |
+
|
| 47 |
+
def _initialize_imports(self) -> None:
|
| 48 |
+
"""
|
| 49 |
+
Attempt to import all identified modules/symbols from analysis results.
|
| 50 |
+
Uses full module paths (with source prefix removed due to sys.path setup).
|
| 51 |
+
"""
|
| 52 |
+
targets = [
|
| 53 |
+
("conftest", "block"),
|
| 54 |
+
("docs.conftest", "tempdir"),
|
| 55 |
+
("docs.conf", "setup"),
|
| 56 |
+
("examples.crowther_royston_clements_splines", "generate_data"),
|
| 57 |
+
("examples.crowther_royston_clements_splines", "CRCSplineFitter"),
|
| 58 |
+
("examples.royston_parmar_splines", "PHSplineFitter"),
|
| 59 |
+
("examples.royston_parmar_splines", "POSplineFitter"),
|
| 60 |
+
("examples.royston_parmar_splines", "SplineFitter"),
|
| 61 |
+
("examples.cure_model", "CureModel"),
|
| 62 |
+
("examples.haft_model", "HAFT"),
|
| 63 |
+
("examples.copula_frailty_weibull_model", "CopulaFrailtyWeilbullModel"),
|
| 64 |
+
("examples.mixture_cure_model", "MixtureCureModel"),
|
| 65 |
+
]
|
| 66 |
+
|
| 67 |
+
for module_path, symbol_name in targets:
|
| 68 |
+
try:
|
| 69 |
+
module = self._modules.get(module_path)
|
| 70 |
+
if module is None:
|
| 71 |
+
module = importlib.import_module(module_path)
|
| 72 |
+
self._modules[module_path] = module
|
| 73 |
+
symbol = getattr(module, symbol_name)
|
| 74 |
+
self._symbols[f"{module_path}.{symbol_name}"] = symbol
|
| 75 |
+
except Exception as e:
|
| 76 |
+
self._import_errors[f"{module_path}.{symbol_name}"] = f"{type(e).__name__}: {e}"
|
| 77 |
+
|
| 78 |
+
def health_check(self) -> Dict[str, Any]:
|
| 79 |
+
"""
|
| 80 |
+
Report import availability and fallback readiness.
|
| 81 |
+
|
| 82 |
+
Returns:
|
| 83 |
+
Unified status dictionary with import summary and actionable guidance.
|
| 84 |
+
"""
|
| 85 |
+
available = sorted(self._symbols.keys())
|
| 86 |
+
failed = dict(self._import_errors)
|
| 87 |
+
if failed:
|
| 88 |
+
return self._ok(
|
| 89 |
+
{
|
| 90 |
+
"available_symbols": available,
|
| 91 |
+
"failed_symbols": failed,
|
| 92 |
+
"fallback_ready": True,
|
| 93 |
+
},
|
| 94 |
+
message="Partial import success. Fallback mode is available for missing symbols.",
|
| 95 |
+
)
|
| 96 |
+
return self._ok(
|
| 97 |
+
{
|
| 98 |
+
"available_symbols": available,
|
| 99 |
+
"failed_symbols": {},
|
| 100 |
+
"fallback_ready": True,
|
| 101 |
+
},
|
| 102 |
+
message="All identified symbols imported successfully.",
|
| 103 |
+
)
|
| 104 |
+
|
| 105 |
+
def _resolve_symbol(self, module_path: str, symbol_name: str) -> Tuple[Optional[Any], Optional[Dict[str, Any]]]:
|
| 106 |
+
key = f"{module_path}.{symbol_name}"
|
| 107 |
+
symbol = self._symbols.get(key)
|
| 108 |
+
if symbol is not None:
|
| 109 |
+
return symbol, None
|
| 110 |
+
err = self._import_errors.get(key, "Unknown import issue.")
|
| 111 |
+
return None, self._err(
|
| 112 |
+
message=f"Requested symbol is unavailable: {key}",
|
| 113 |
+
guidance=(
|
| 114 |
+
"Verify repository source is present under the expected 'source' directory, "
|
| 115 |
+
"install required dependencies (numpy, scipy, pandas, matplotlib, autograd, "
|
| 116 |
+
"autograd-gamma, formulaic), and retry health_check()."
|
| 117 |
+
),
|
| 118 |
+
details=err,
|
| 119 |
+
)
|
| 120 |
+
|
| 121 |
+
def _instantiate(self, module_path: str, class_name: str, *args: Any, **kwargs: Any) -> Dict[str, Any]:
|
| 122 |
+
cls, err = self._resolve_symbol(module_path, class_name)
|
| 123 |
+
if err:
|
| 124 |
+
return err
|
| 125 |
+
try:
|
| 126 |
+
instance = cls(*args, **kwargs)
|
| 127 |
+
return self._ok({"instance": instance, "class": f"{module_path}.{class_name}"}, message="Instance created.")
|
| 128 |
+
except Exception as e:
|
| 129 |
+
return self._err(
|
| 130 |
+
message=f"Failed to instantiate class: {module_path}.{class_name}",
|
| 131 |
+
guidance="Check constructor arguments and dependency availability.",
|
| 132 |
+
details=f"{type(e).__name__}: {e}",
|
| 133 |
+
)
|
| 134 |
+
|
| 135 |
+
def _call(self, module_path: str, function_name: str, *args: Any, **kwargs: Any) -> Dict[str, Any]:
|
| 136 |
+
fn, err = self._resolve_symbol(module_path, function_name)
|
| 137 |
+
if err:
|
| 138 |
+
return err
|
| 139 |
+
try:
|
| 140 |
+
result = fn(*args, **kwargs)
|
| 141 |
+
return self._ok({"result": result, "function": f"{module_path}.{function_name}"}, message="Function executed.")
|
| 142 |
+
except Exception as e:
|
| 143 |
+
return self._err(
|
| 144 |
+
message=f"Failed to execute function: {module_path}.{function_name}",
|
| 145 |
+
guidance="Review function parameters and input data shapes/types.",
|
| 146 |
+
details=f"{type(e).__name__}: {e}",
|
| 147 |
+
)
|
| 148 |
+
|
| 149 |
+
# -------------------------------------------------------------------------
|
| 150 |
+
# Functions from discovered modules
|
| 151 |
+
# -------------------------------------------------------------------------
|
| 152 |
+
def call_conftest_block(self, *args: Any, **kwargs: Any) -> Dict[str, Any]:
|
| 153 |
+
"""
|
| 154 |
+
Call conftest.block(*args, **kwargs).
|
| 155 |
+
|
| 156 |
+
Parameters:
|
| 157 |
+
*args: Positional arguments forwarded to conftest.block.
|
| 158 |
+
**kwargs: Keyword arguments forwarded to conftest.block.
|
| 159 |
+
|
| 160 |
+
Returns:
|
| 161 |
+
Unified status dictionary with function result or actionable error.
|
| 162 |
+
"""
|
| 163 |
+
return self._call("conftest", "block", *args, **kwargs)
|
| 164 |
+
|
| 165 |
+
def call_docs_conftest_tempdir(self, *args: Any, **kwargs: Any) -> Dict[str, Any]:
|
| 166 |
+
"""
|
| 167 |
+
Call docs.conftest.tempdir(*args, **kwargs).
|
| 168 |
+
|
| 169 |
+
Parameters:
|
| 170 |
+
*args: Positional arguments forwarded to docs.conftest.tempdir.
|
| 171 |
+
**kwargs: Keyword arguments forwarded to docs.conftest.tempdir.
|
| 172 |
+
|
| 173 |
+
Returns:
|
| 174 |
+
Unified status dictionary with function result or actionable error.
|
| 175 |
+
"""
|
| 176 |
+
return self._call("docs.conftest", "tempdir", *args, **kwargs)
|
| 177 |
+
|
| 178 |
+
def call_docs_conf_setup(self, *args: Any, **kwargs: Any) -> Dict[str, Any]:
|
| 179 |
+
"""
|
| 180 |
+
Call docs.conf.setup(*args, **kwargs).
|
| 181 |
+
|
| 182 |
+
Parameters:
|
| 183 |
+
*args: Positional arguments forwarded to docs.conf.setup.
|
| 184 |
+
**kwargs: Keyword arguments forwarded to docs.conf.setup.
|
| 185 |
+
|
| 186 |
+
Returns:
|
| 187 |
+
Unified status dictionary with function result or actionable error.
|
| 188 |
+
"""
|
| 189 |
+
return self._call("docs.conf", "setup", *args, **kwargs)
|
| 190 |
+
|
| 191 |
+
def call_generate_data(self, *args: Any, **kwargs: Any) -> Dict[str, Any]:
|
| 192 |
+
"""
|
| 193 |
+
Call examples.crowther_royston_clements_splines.generate_data(*args, **kwargs).
|
| 194 |
+
|
| 195 |
+
Parameters:
|
| 196 |
+
*args: Positional arguments forwarded to generate_data.
|
| 197 |
+
**kwargs: Keyword arguments forwarded to generate_data.
|
| 198 |
+
|
| 199 |
+
Returns:
|
| 200 |
+
Unified status dictionary with generated data or actionable error.
|
| 201 |
+
"""
|
| 202 |
+
return self._call("examples.crowther_royston_clements_splines", "generate_data", *args, **kwargs)
|
| 203 |
+
|
| 204 |
+
# -------------------------------------------------------------------------
|
| 205 |
+
# Class instance factory methods
|
| 206 |
+
# -------------------------------------------------------------------------
|
| 207 |
+
def create_crc_spline_fitter(self, *args: Any, **kwargs: Any) -> Dict[str, Any]:
|
| 208 |
+
"""
|
| 209 |
+
Create an instance of examples.crowther_royston_clements_splines.CRCSplineFitter.
|
| 210 |
+
|
| 211 |
+
Parameters:
|
| 212 |
+
*args: Positional constructor arguments.
|
| 213 |
+
**kwargs: Keyword constructor arguments.
|
| 214 |
+
|
| 215 |
+
Returns:
|
| 216 |
+
Unified status dictionary containing created instance or actionable error.
|
| 217 |
+
"""
|
| 218 |
+
return self._instantiate("examples.crowther_royston_clements_splines", "CRCSplineFitter", *args, **kwargs)
|
| 219 |
+
|
| 220 |
+
def create_ph_spline_fitter(self, *args: Any, **kwargs: Any) -> Dict[str, Any]:
|
| 221 |
+
"""
|
| 222 |
+
Create an instance of examples.royston_parmar_splines.PHSplineFitter.
|
| 223 |
+
|
| 224 |
+
Parameters:
|
| 225 |
+
*args: Positional constructor arguments.
|
| 226 |
+
**kwargs: Keyword constructor arguments.
|
| 227 |
+
|
| 228 |
+
Returns:
|
| 229 |
+
Unified status dictionary containing created instance or actionable error.
|
| 230 |
+
"""
|
| 231 |
+
return self._instantiate("examples.royston_parmar_splines", "PHSplineFitter", *args, **kwargs)
|
| 232 |
+
|
| 233 |
+
def create_po_spline_fitter(self, *args: Any, **kwargs: Any) -> Dict[str, Any]:
|
| 234 |
+
"""
|
| 235 |
+
Create an instance of examples.royston_parmar_splines.POSplineFitter.
|
| 236 |
+
|
| 237 |
+
Parameters:
|
| 238 |
+
*args: Positional constructor arguments.
|
| 239 |
+
**kwargs: Keyword constructor arguments.
|
| 240 |
+
|
| 241 |
+
Returns:
|
| 242 |
+
Unified status dictionary containing created instance or actionable error.
|
| 243 |
+
"""
|
| 244 |
+
return self._instantiate("examples.royston_parmar_splines", "POSplineFitter", *args, **kwargs)
|
| 245 |
+
|
| 246 |
+
def create_spline_fitter(self, *args: Any, **kwargs: Any) -> Dict[str, Any]:
|
| 247 |
+
"""
|
| 248 |
+
Create an instance of examples.royston_parmar_splines.SplineFitter.
|
| 249 |
+
|
| 250 |
+
Parameters:
|
| 251 |
+
*args: Positional constructor arguments.
|
| 252 |
+
**kwargs: Keyword constructor arguments.
|
| 253 |
+
|
| 254 |
+
Returns:
|
| 255 |
+
Unified status dictionary containing created instance or actionable error.
|
| 256 |
+
"""
|
| 257 |
+
return self._instantiate("examples.royston_parmar_splines", "SplineFitter", *args, **kwargs)
|
| 258 |
+
|
| 259 |
+
def create_cure_model(self, *args: Any, **kwargs: Any) -> Dict[str, Any]:
|
| 260 |
+
"""
|
| 261 |
+
Create an instance of examples.cure_model.CureModel.
|
| 262 |
+
|
| 263 |
+
Parameters:
|
| 264 |
+
*args: Positional constructor arguments.
|
| 265 |
+
**kwargs: Keyword constructor arguments.
|
| 266 |
+
|
| 267 |
+
Returns:
|
| 268 |
+
Unified status dictionary containing created instance or actionable error.
|
| 269 |
+
"""
|
| 270 |
+
return self._instantiate("examples.cure_model", "CureModel", *args, **kwargs)
|
| 271 |
+
|
| 272 |
+
def create_haft_model(self, *args: Any, **kwargs: Any) -> Dict[str, Any]:
|
| 273 |
+
"""
|
| 274 |
+
Create an instance of examples.haft_model.HAFT.
|
| 275 |
+
|
| 276 |
+
Parameters:
|
| 277 |
+
*args: Positional constructor arguments.
|
| 278 |
+
**kwargs: Keyword constructor arguments.
|
| 279 |
+
|
| 280 |
+
Returns:
|
| 281 |
+
Unified status dictionary containing created instance or actionable error.
|
| 282 |
+
"""
|
| 283 |
+
return self._instantiate("examples.haft_model", "HAFT", *args, **kwargs)
|
| 284 |
+
|
| 285 |
+
def create_copula_frailty_weilbull_model(self, *args: Any, **kwargs: Any) -> Dict[str, Any]:
|
| 286 |
+
"""
|
| 287 |
+
Create an instance of examples.copula_frailty_weibull_model.CopulaFrailtyWeilbullModel.
|
| 288 |
+
|
| 289 |
+
Parameters:
|
| 290 |
+
*args: Positional constructor arguments.
|
| 291 |
+
**kwargs: Keyword constructor arguments.
|
| 292 |
+
|
| 293 |
+
Returns:
|
| 294 |
+
Unified status dictionary containing created instance or actionable error.
|
| 295 |
+
"""
|
| 296 |
+
return self._instantiate(
|
| 297 |
+
"examples.copula_frailty_weibull_model",
|
| 298 |
+
"CopulaFrailtyWeilbullModel",
|
| 299 |
+
*args,
|
| 300 |
+
**kwargs,
|
| 301 |
+
)
|
| 302 |
+
|
| 303 |
+
def create_mixture_cure_model(self, *args: Any, **kwargs: Any) -> Dict[str, Any]:
|
| 304 |
+
"""
|
| 305 |
+
Create an instance of examples.mixture_cure_model.MixtureCureModel.
|
| 306 |
+
|
| 307 |
+
Parameters:
|
| 308 |
+
*args: Positional constructor arguments.
|
| 309 |
+
**kwargs: Keyword constructor arguments.
|
| 310 |
+
|
| 311 |
+
Returns:
|
| 312 |
+
Unified status dictionary containing created instance or actionable error.
|
| 313 |
+
"""
|
| 314 |
+
return self._instantiate("examples.mixture_cure_model", "MixtureCureModel", *args, **kwargs)
|
| 315 |
+
|
| 316 |
+
# -------------------------------------------------------------------------
|
| 317 |
+
# Utility for runtime troubleshooting
|
| 318 |
+
# -------------------------------------------------------------------------
|
| 319 |
+
def debug_trace(self, exc: BaseException) -> Dict[str, Any]:
|
| 320 |
+
"""
|
| 321 |
+
Return a structured traceback payload for debugging adapter-level exceptions.
|
| 322 |
+
|
| 323 |
+
Parameters:
|
| 324 |
+
exc: Exception object to format.
|
| 325 |
+
|
| 326 |
+
Returns:
|
| 327 |
+
Unified error dictionary with traceback details.
|
| 328 |
+
"""
|
| 329 |
+
return self._err(
|
| 330 |
+
message="Adapter debug trace generated.",
|
| 331 |
+
guidance="Inspect details and fix module paths, missing dependencies, or invalid inputs.",
|
| 332 |
+
details="".join(traceback.format_exception(type(exc), exc, exc.__traceback__)),
|
| 333 |
+
)
|
lifelines/mcp_output/mcp_plugin/main.py
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
MCP Service Auto-Wrapper - Auto-generated
|
| 3 |
+
"""
|
| 4 |
+
from mcp_service import create_app
|
| 5 |
+
|
| 6 |
+
def main():
|
| 7 |
+
"""Main entry point"""
|
| 8 |
+
app = create_app()
|
| 9 |
+
return app
|
| 10 |
+
|
| 11 |
+
if __name__ == "__main__":
|
| 12 |
+
app = main()
|
| 13 |
+
app.run()
|
lifelines/mcp_output/mcp_plugin/mcp_service.py
ADDED
|
@@ -0,0 +1,398 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import sys
|
| 3 |
+
|
| 4 |
+
source_path = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))), "source")
|
| 5 |
+
if source_path not in sys.path:
|
| 6 |
+
sys.path.insert(0, source_path)
|
| 7 |
+
|
| 8 |
+
from fastmcp import FastMCP
|
| 9 |
+
|
| 10 |
+
from conftest import block
|
| 11 |
+
from docs.conftest import tempdir
|
| 12 |
+
from docs.conf import setup
|
| 13 |
+
from examples.crowther_royston_clements_splines import generate_data, CRCSplineFitter
|
| 14 |
+
from examples.royston_parmar_splines import PHSplineFitter, POSplineFitter, SplineFitter
|
| 15 |
+
from examples.cure_model import CureModel
|
| 16 |
+
from examples.haft_model import HAFT
|
| 17 |
+
from examples.copula_frailty_weibull_model import CopulaFrailtyWeilbullModel
|
| 18 |
+
from examples.mixture_cure_model import MixtureCureModel
|
| 19 |
+
|
| 20 |
+
mcp = FastMCP("unknown_service")
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
@mcp.tool(name="block", description="Auto-wrapped function block")
|
| 24 |
+
def block(payload: dict):
|
| 25 |
+
try:
|
| 26 |
+
if block is None:
|
| 27 |
+
return {"success": False, "result": None, "error": "Function block is not available"}
|
| 28 |
+
result = block(**payload)
|
| 29 |
+
return {"success": True, "result": result, "error": None}
|
| 30 |
+
except Exception as e:
|
| 31 |
+
return {"success": False, "result": None, "error": str(e)}
|
| 32 |
+
|
| 33 |
+
@mcp.tool(name="tempdir", description="Auto-wrapped function tempdir")
|
| 34 |
+
def tempdir(payload: dict):
|
| 35 |
+
try:
|
| 36 |
+
if tempdir is None:
|
| 37 |
+
return {"success": False, "result": None, "error": "Function tempdir is not available"}
|
| 38 |
+
result = tempdir(**payload)
|
| 39 |
+
return {"success": True, "result": result, "error": None}
|
| 40 |
+
except Exception as e:
|
| 41 |
+
return {"success": False, "result": None, "error": str(e)}
|
| 42 |
+
|
| 43 |
+
@mcp.tool(name="setup", description="Auto-wrapped function setup")
|
| 44 |
+
def setup(payload: dict):
|
| 45 |
+
try:
|
| 46 |
+
if setup is None:
|
| 47 |
+
return {"success": False, "result": None, "error": "Function setup is not available"}
|
| 48 |
+
result = setup(**payload)
|
| 49 |
+
return {"success": True, "result": result, "error": None}
|
| 50 |
+
except Exception as e:
|
| 51 |
+
return {"success": False, "result": None, "error": str(e)}
|
| 52 |
+
|
| 53 |
+
@mcp.tool(name="generate_data", description="Auto-wrapped function generate_data")
|
| 54 |
+
def generate_data(payload: dict):
|
| 55 |
+
try:
|
| 56 |
+
if generate_data is None:
|
| 57 |
+
return {"success": False, "result": None, "error": "Function generate_data is not available"}
|
| 58 |
+
result = generate_data(**payload)
|
| 59 |
+
return {"success": True, "result": result, "error": None}
|
| 60 |
+
except Exception as e:
|
| 61 |
+
return {"success": False, "result": None, "error": str(e)}
|
| 62 |
+
|
| 63 |
+
@mcp.tool(name="crcsplinefitter", description="CRCSplineFitter class")
|
| 64 |
+
def crcsplinefitter(*args, **kwargs):
|
| 65 |
+
"""CRCSplineFitter class"""
|
| 66 |
+
try:
|
| 67 |
+
if CRCSplineFitter is None:
|
| 68 |
+
return {"success": False, "result": None, "error": "Class CRCSplineFitter is not available, path may need adjustment"}
|
| 69 |
+
|
| 70 |
+
# MCP parameter type conversion
|
| 71 |
+
converted_args = []
|
| 72 |
+
converted_kwargs = kwargs.copy()
|
| 73 |
+
|
| 74 |
+
# Handle position argument type conversion
|
| 75 |
+
for arg in args:
|
| 76 |
+
if isinstance(arg, str):
|
| 77 |
+
# Try to convert to numeric type
|
| 78 |
+
try:
|
| 79 |
+
if '.' in arg:
|
| 80 |
+
converted_args.append(float(arg))
|
| 81 |
+
else:
|
| 82 |
+
converted_args.append(int(arg))
|
| 83 |
+
except ValueError:
|
| 84 |
+
converted_args.append(arg)
|
| 85 |
+
else:
|
| 86 |
+
converted_args.append(arg)
|
| 87 |
+
|
| 88 |
+
# Handle keyword argument type conversion
|
| 89 |
+
for key, value in converted_kwargs.items():
|
| 90 |
+
if isinstance(value, str):
|
| 91 |
+
try:
|
| 92 |
+
if '.' in value:
|
| 93 |
+
converted_kwargs[key] = float(value)
|
| 94 |
+
else:
|
| 95 |
+
converted_kwargs[key] = int(value)
|
| 96 |
+
except ValueError:
|
| 97 |
+
pass
|
| 98 |
+
|
| 99 |
+
instance = CRCSplineFitter(*converted_args, **converted_kwargs)
|
| 100 |
+
return {"success": True, "result": str(instance), "error": None}
|
| 101 |
+
except Exception as e:
|
| 102 |
+
return {"success": False, "result": None, "error": str(e)}
|
| 103 |
+
|
| 104 |
+
@mcp.tool(name="phsplinefitter", description="PHSplineFitter class")
|
| 105 |
+
def phsplinefitter(*args, **kwargs):
|
| 106 |
+
"""PHSplineFitter class"""
|
| 107 |
+
try:
|
| 108 |
+
if PHSplineFitter is None:
|
| 109 |
+
return {"success": False, "result": None, "error": "Class PHSplineFitter is not available, path may need adjustment"}
|
| 110 |
+
|
| 111 |
+
# MCP parameter type conversion
|
| 112 |
+
converted_args = []
|
| 113 |
+
converted_kwargs = kwargs.copy()
|
| 114 |
+
|
| 115 |
+
# Handle position argument type conversion
|
| 116 |
+
for arg in args:
|
| 117 |
+
if isinstance(arg, str):
|
| 118 |
+
# Try to convert to numeric type
|
| 119 |
+
try:
|
| 120 |
+
if '.' in arg:
|
| 121 |
+
converted_args.append(float(arg))
|
| 122 |
+
else:
|
| 123 |
+
converted_args.append(int(arg))
|
| 124 |
+
except ValueError:
|
| 125 |
+
converted_args.append(arg)
|
| 126 |
+
else:
|
| 127 |
+
converted_args.append(arg)
|
| 128 |
+
|
| 129 |
+
# Handle keyword argument type conversion
|
| 130 |
+
for key, value in converted_kwargs.items():
|
| 131 |
+
if isinstance(value, str):
|
| 132 |
+
try:
|
| 133 |
+
if '.' in value:
|
| 134 |
+
converted_kwargs[key] = float(value)
|
| 135 |
+
else:
|
| 136 |
+
converted_kwargs[key] = int(value)
|
| 137 |
+
except ValueError:
|
| 138 |
+
pass
|
| 139 |
+
|
| 140 |
+
instance = PHSplineFitter(*converted_args, **converted_kwargs)
|
| 141 |
+
return {"success": True, "result": str(instance), "error": None}
|
| 142 |
+
except Exception as e:
|
| 143 |
+
return {"success": False, "result": None, "error": str(e)}
|
| 144 |
+
|
| 145 |
+
@mcp.tool(name="posplinefitter", description="POSplineFitter class")
|
| 146 |
+
def posplinefitter(*args, **kwargs):
|
| 147 |
+
"""POSplineFitter class"""
|
| 148 |
+
try:
|
| 149 |
+
if POSplineFitter is None:
|
| 150 |
+
return {"success": False, "result": None, "error": "Class POSplineFitter is not available, path may need adjustment"}
|
| 151 |
+
|
| 152 |
+
# MCP parameter type conversion
|
| 153 |
+
converted_args = []
|
| 154 |
+
converted_kwargs = kwargs.copy()
|
| 155 |
+
|
| 156 |
+
# Handle position argument type conversion
|
| 157 |
+
for arg in args:
|
| 158 |
+
if isinstance(arg, str):
|
| 159 |
+
# Try to convert to numeric type
|
| 160 |
+
try:
|
| 161 |
+
if '.' in arg:
|
| 162 |
+
converted_args.append(float(arg))
|
| 163 |
+
else:
|
| 164 |
+
converted_args.append(int(arg))
|
| 165 |
+
except ValueError:
|
| 166 |
+
converted_args.append(arg)
|
| 167 |
+
else:
|
| 168 |
+
converted_args.append(arg)
|
| 169 |
+
|
| 170 |
+
# Handle keyword argument type conversion
|
| 171 |
+
for key, value in converted_kwargs.items():
|
| 172 |
+
if isinstance(value, str):
|
| 173 |
+
try:
|
| 174 |
+
if '.' in value:
|
| 175 |
+
converted_kwargs[key] = float(value)
|
| 176 |
+
else:
|
| 177 |
+
converted_kwargs[key] = int(value)
|
| 178 |
+
except ValueError:
|
| 179 |
+
pass
|
| 180 |
+
|
| 181 |
+
instance = POSplineFitter(*converted_args, **converted_kwargs)
|
| 182 |
+
return {"success": True, "result": str(instance), "error": None}
|
| 183 |
+
except Exception as e:
|
| 184 |
+
return {"success": False, "result": None, "error": str(e)}
|
| 185 |
+
|
| 186 |
+
@mcp.tool(name="splinefitter", description="SplineFitter class")
|
| 187 |
+
def splinefitter(*args, **kwargs):
|
| 188 |
+
"""SplineFitter class"""
|
| 189 |
+
try:
|
| 190 |
+
if SplineFitter is None:
|
| 191 |
+
return {"success": False, "result": None, "error": "Class SplineFitter is not available, path may need adjustment"}
|
| 192 |
+
|
| 193 |
+
# MCP parameter type conversion
|
| 194 |
+
converted_args = []
|
| 195 |
+
converted_kwargs = kwargs.copy()
|
| 196 |
+
|
| 197 |
+
# Handle position argument type conversion
|
| 198 |
+
for arg in args:
|
| 199 |
+
if isinstance(arg, str):
|
| 200 |
+
# Try to convert to numeric type
|
| 201 |
+
try:
|
| 202 |
+
if '.' in arg:
|
| 203 |
+
converted_args.append(float(arg))
|
| 204 |
+
else:
|
| 205 |
+
converted_args.append(int(arg))
|
| 206 |
+
except ValueError:
|
| 207 |
+
converted_args.append(arg)
|
| 208 |
+
else:
|
| 209 |
+
converted_args.append(arg)
|
| 210 |
+
|
| 211 |
+
# Handle keyword argument type conversion
|
| 212 |
+
for key, value in converted_kwargs.items():
|
| 213 |
+
if isinstance(value, str):
|
| 214 |
+
try:
|
| 215 |
+
if '.' in value:
|
| 216 |
+
converted_kwargs[key] = float(value)
|
| 217 |
+
else:
|
| 218 |
+
converted_kwargs[key] = int(value)
|
| 219 |
+
except ValueError:
|
| 220 |
+
pass
|
| 221 |
+
|
| 222 |
+
instance = SplineFitter(*converted_args, **converted_kwargs)
|
| 223 |
+
return {"success": True, "result": str(instance), "error": None}
|
| 224 |
+
except Exception as e:
|
| 225 |
+
return {"success": False, "result": None, "error": str(e)}
|
| 226 |
+
|
| 227 |
+
@mcp.tool(name="curemodel", description="CureModel class")
|
| 228 |
+
def curemodel(*args, **kwargs):
|
| 229 |
+
"""CureModel class"""
|
| 230 |
+
try:
|
| 231 |
+
if CureModel is None:
|
| 232 |
+
return {"success": False, "result": None, "error": "Class CureModel is not available, path may need adjustment"}
|
| 233 |
+
|
| 234 |
+
# MCP parameter type conversion
|
| 235 |
+
converted_args = []
|
| 236 |
+
converted_kwargs = kwargs.copy()
|
| 237 |
+
|
| 238 |
+
# Handle position argument type conversion
|
| 239 |
+
for arg in args:
|
| 240 |
+
if isinstance(arg, str):
|
| 241 |
+
# Try to convert to numeric type
|
| 242 |
+
try:
|
| 243 |
+
if '.' in arg:
|
| 244 |
+
converted_args.append(float(arg))
|
| 245 |
+
else:
|
| 246 |
+
converted_args.append(int(arg))
|
| 247 |
+
except ValueError:
|
| 248 |
+
converted_args.append(arg)
|
| 249 |
+
else:
|
| 250 |
+
converted_args.append(arg)
|
| 251 |
+
|
| 252 |
+
# Handle keyword argument type conversion
|
| 253 |
+
for key, value in converted_kwargs.items():
|
| 254 |
+
if isinstance(value, str):
|
| 255 |
+
try:
|
| 256 |
+
if '.' in value:
|
| 257 |
+
converted_kwargs[key] = float(value)
|
| 258 |
+
else:
|
| 259 |
+
converted_kwargs[key] = int(value)
|
| 260 |
+
except ValueError:
|
| 261 |
+
pass
|
| 262 |
+
|
| 263 |
+
instance = CureModel(*converted_args, **converted_kwargs)
|
| 264 |
+
return {"success": True, "result": str(instance), "error": None}
|
| 265 |
+
except Exception as e:
|
| 266 |
+
return {"success": False, "result": None, "error": str(e)}
|
| 267 |
+
|
| 268 |
+
@mcp.tool(name="haft", description="HAFT class")
|
| 269 |
+
def haft(*args, **kwargs):
|
| 270 |
+
"""HAFT class"""
|
| 271 |
+
try:
|
| 272 |
+
if HAFT is None:
|
| 273 |
+
return {"success": False, "result": None, "error": "Class HAFT is not available, path may need adjustment"}
|
| 274 |
+
|
| 275 |
+
# MCP parameter type conversion
|
| 276 |
+
converted_args = []
|
| 277 |
+
converted_kwargs = kwargs.copy()
|
| 278 |
+
|
| 279 |
+
# Handle position argument type conversion
|
| 280 |
+
for arg in args:
|
| 281 |
+
if isinstance(arg, str):
|
| 282 |
+
# Try to convert to numeric type
|
| 283 |
+
try:
|
| 284 |
+
if '.' in arg:
|
| 285 |
+
converted_args.append(float(arg))
|
| 286 |
+
else:
|
| 287 |
+
converted_args.append(int(arg))
|
| 288 |
+
except ValueError:
|
| 289 |
+
converted_args.append(arg)
|
| 290 |
+
else:
|
| 291 |
+
converted_args.append(arg)
|
| 292 |
+
|
| 293 |
+
# Handle keyword argument type conversion
|
| 294 |
+
for key, value in converted_kwargs.items():
|
| 295 |
+
if isinstance(value, str):
|
| 296 |
+
try:
|
| 297 |
+
if '.' in value:
|
| 298 |
+
converted_kwargs[key] = float(value)
|
| 299 |
+
else:
|
| 300 |
+
converted_kwargs[key] = int(value)
|
| 301 |
+
except ValueError:
|
| 302 |
+
pass
|
| 303 |
+
|
| 304 |
+
instance = HAFT(*converted_args, **converted_kwargs)
|
| 305 |
+
return {"success": True, "result": str(instance), "error": None}
|
| 306 |
+
except Exception as e:
|
| 307 |
+
return {"success": False, "result": None, "error": str(e)}
|
| 308 |
+
|
| 309 |
+
@mcp.tool(name="copulafrailtyweilbullmodel", description="CopulaFrailtyWeilbullModel class")
|
| 310 |
+
def copulafrailtyweilbullmodel(*args, **kwargs):
|
| 311 |
+
"""CopulaFrailtyWeilbullModel class"""
|
| 312 |
+
try:
|
| 313 |
+
if CopulaFrailtyWeilbullModel is None:
|
| 314 |
+
return {"success": False, "result": None, "error": "Class CopulaFrailtyWeilbullModel is not available, path may need adjustment"}
|
| 315 |
+
|
| 316 |
+
# MCP parameter type conversion
|
| 317 |
+
converted_args = []
|
| 318 |
+
converted_kwargs = kwargs.copy()
|
| 319 |
+
|
| 320 |
+
# Handle position argument type conversion
|
| 321 |
+
for arg in args:
|
| 322 |
+
if isinstance(arg, str):
|
| 323 |
+
# Try to convert to numeric type
|
| 324 |
+
try:
|
| 325 |
+
if '.' in arg:
|
| 326 |
+
converted_args.append(float(arg))
|
| 327 |
+
else:
|
| 328 |
+
converted_args.append(int(arg))
|
| 329 |
+
except ValueError:
|
| 330 |
+
converted_args.append(arg)
|
| 331 |
+
else:
|
| 332 |
+
converted_args.append(arg)
|
| 333 |
+
|
| 334 |
+
# Handle keyword argument type conversion
|
| 335 |
+
for key, value in converted_kwargs.items():
|
| 336 |
+
if isinstance(value, str):
|
| 337 |
+
try:
|
| 338 |
+
if '.' in value:
|
| 339 |
+
converted_kwargs[key] = float(value)
|
| 340 |
+
else:
|
| 341 |
+
converted_kwargs[key] = int(value)
|
| 342 |
+
except ValueError:
|
| 343 |
+
pass
|
| 344 |
+
|
| 345 |
+
instance = CopulaFrailtyWeilbullModel(*converted_args, **converted_kwargs)
|
| 346 |
+
return {"success": True, "result": str(instance), "error": None}
|
| 347 |
+
except Exception as e:
|
| 348 |
+
return {"success": False, "result": None, "error": str(e)}
|
| 349 |
+
|
| 350 |
+
@mcp.tool(name="mixturecuremodel", description="MixtureCureModel class")
|
| 351 |
+
def mixturecuremodel(*args, **kwargs):
|
| 352 |
+
"""MixtureCureModel class"""
|
| 353 |
+
try:
|
| 354 |
+
if MixtureCureModel is None:
|
| 355 |
+
return {"success": False, "result": None, "error": "Class MixtureCureModel is not available, path may need adjustment"}
|
| 356 |
+
|
| 357 |
+
# MCP parameter type conversion
|
| 358 |
+
converted_args = []
|
| 359 |
+
converted_kwargs = kwargs.copy()
|
| 360 |
+
|
| 361 |
+
# Handle position argument type conversion
|
| 362 |
+
for arg in args:
|
| 363 |
+
if isinstance(arg, str):
|
| 364 |
+
# Try to convert to numeric type
|
| 365 |
+
try:
|
| 366 |
+
if '.' in arg:
|
| 367 |
+
converted_args.append(float(arg))
|
| 368 |
+
else:
|
| 369 |
+
converted_args.append(int(arg))
|
| 370 |
+
except ValueError:
|
| 371 |
+
converted_args.append(arg)
|
| 372 |
+
else:
|
| 373 |
+
converted_args.append(arg)
|
| 374 |
+
|
| 375 |
+
# Handle keyword argument type conversion
|
| 376 |
+
for key, value in converted_kwargs.items():
|
| 377 |
+
if isinstance(value, str):
|
| 378 |
+
try:
|
| 379 |
+
if '.' in value:
|
| 380 |
+
converted_kwargs[key] = float(value)
|
| 381 |
+
else:
|
| 382 |
+
converted_kwargs[key] = int(value)
|
| 383 |
+
except ValueError:
|
| 384 |
+
pass
|
| 385 |
+
|
| 386 |
+
instance = MixtureCureModel(*converted_args, **converted_kwargs)
|
| 387 |
+
return {"success": True, "result": str(instance), "error": None}
|
| 388 |
+
except Exception as e:
|
| 389 |
+
return {"success": False, "result": None, "error": str(e)}
|
| 390 |
+
|
| 391 |
+
|
| 392 |
+
|
| 393 |
+
def create_app():
|
| 394 |
+
"""Create and return FastMCP application instance"""
|
| 395 |
+
return mcp
|
| 396 |
+
|
| 397 |
+
if __name__ == "__main__":
|
| 398 |
+
mcp.run(transport="http", host="0.0.0.0", port=8000)
|
lifelines/mcp_output/requirements.txt
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
fastmcp
|
| 2 |
+
fastapi
|
| 3 |
+
uvicorn[standard]
|
| 4 |
+
pydantic>=2.0.0
|
| 5 |
+
numpy
|
| 6 |
+
scipy
|
| 7 |
+
pandas
|
| 8 |
+
matplotlib
|
| 9 |
+
autograd
|
| 10 |
+
autograd-gamma
|
| 11 |
+
formulaic
|
lifelines/mcp_output/start_mcp.py
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
"""
|
| 3 |
+
MCP Service Startup Entry
|
| 4 |
+
"""
|
| 5 |
+
import sys
|
| 6 |
+
import os
|
| 7 |
+
|
| 8 |
+
project_root = os.path.dirname(os.path.abspath(__file__))
|
| 9 |
+
mcp_plugin_dir = os.path.join(project_root, "mcp_plugin")
|
| 10 |
+
if mcp_plugin_dir not in sys.path:
|
| 11 |
+
sys.path.insert(0, mcp_plugin_dir)
|
| 12 |
+
|
| 13 |
+
from mcp_service import create_app
|
| 14 |
+
|
| 15 |
+
def main():
|
| 16 |
+
"""Start FastMCP service"""
|
| 17 |
+
app = create_app()
|
| 18 |
+
# Use environment variable to configure port, default 8000
|
| 19 |
+
port = int(os.environ.get("MCP_PORT", "8000"))
|
| 20 |
+
|
| 21 |
+
# Choose transport mode based on environment variable
|
| 22 |
+
transport = os.environ.get("MCP_TRANSPORT", "stdio")
|
| 23 |
+
if transport == "http":
|
| 24 |
+
app.run(transport="http", host="0.0.0.0", port=port)
|
| 25 |
+
else:
|
| 26 |
+
# Default to STDIO mode
|
| 27 |
+
app.run()
|
| 28 |
+
|
| 29 |
+
if __name__ == "__main__":
|
| 30 |
+
main()
|
lifelines/mcp_output/workflow_summary.json
ADDED
|
@@ -0,0 +1,224 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"repository": {
|
| 3 |
+
"name": "lifelines",
|
| 4 |
+
"url": "https://github.com/CamDavidsonPilon/lifelines",
|
| 5 |
+
"local_path": "/Users/ghh/Documents/Code/Code2MCP-private/workspace/lifelines",
|
| 6 |
+
"description": "Python library",
|
| 7 |
+
"features": "Basic functionality",
|
| 8 |
+
"tech_stack": "Python",
|
| 9 |
+
"stars": 0,
|
| 10 |
+
"forks": 0,
|
| 11 |
+
"language": "Python",
|
| 12 |
+
"last_updated": "",
|
| 13 |
+
"complexity": "medium",
|
| 14 |
+
"intrusiveness_risk": "low"
|
| 15 |
+
},
|
| 16 |
+
"execution": {
|
| 17 |
+
"start_time": 1773273949.5239081,
|
| 18 |
+
"end_time": 1773274139.770443,
|
| 19 |
+
"duration": 190.24653482437134,
|
| 20 |
+
"status": "success",
|
| 21 |
+
"workflow_status": "success",
|
| 22 |
+
"nodes_executed": [
|
| 23 |
+
"download",
|
| 24 |
+
"analysis",
|
| 25 |
+
"env",
|
| 26 |
+
"generate",
|
| 27 |
+
"run",
|
| 28 |
+
"review",
|
| 29 |
+
"finalize"
|
| 30 |
+
],
|
| 31 |
+
"total_files_processed": 5,
|
| 32 |
+
"environment_type": "unknown",
|
| 33 |
+
"llm_calls": 0,
|
| 34 |
+
"deepwiki_calls": 0
|
| 35 |
+
},
|
| 36 |
+
"tests": {
|
| 37 |
+
"original_project": {
|
| 38 |
+
"passed": false,
|
| 39 |
+
"details": {},
|
| 40 |
+
"test_coverage": "100%",
|
| 41 |
+
"execution_time": 0,
|
| 42 |
+
"test_files": []
|
| 43 |
+
},
|
| 44 |
+
"mcp_plugin": {
|
| 45 |
+
"passed": true,
|
| 46 |
+
"details": {},
|
| 47 |
+
"service_health": "healthy",
|
| 48 |
+
"startup_time": 0,
|
| 49 |
+
"transport_mode": "stdio",
|
| 50 |
+
"fastmcp_version": "unknown",
|
| 51 |
+
"mcp_version": "unknown"
|
| 52 |
+
}
|
| 53 |
+
},
|
| 54 |
+
"analysis": {
|
| 55 |
+
"structure": {
|
| 56 |
+
"packages": [
|
| 57 |
+
"source.lifelines",
|
| 58 |
+
"source.lifelines.datasets",
|
| 59 |
+
"source.lifelines.fitters",
|
| 60 |
+
"source.lifelines.tests",
|
| 61 |
+
"source.lifelines.utils"
|
| 62 |
+
]
|
| 63 |
+
},
|
| 64 |
+
"dependencies": {
|
| 65 |
+
"has_environment_yml": false,
|
| 66 |
+
"has_requirements_txt": false,
|
| 67 |
+
"pyproject": false,
|
| 68 |
+
"setup_cfg": false,
|
| 69 |
+
"setup_py": true
|
| 70 |
+
},
|
| 71 |
+
"entry_points": {
|
| 72 |
+
"imports": [],
|
| 73 |
+
"cli": [],
|
| 74 |
+
"modules": []
|
| 75 |
+
},
|
| 76 |
+
"risk_assessment": {
|
| 77 |
+
"import_feasibility": 0.94,
|
| 78 |
+
"intrusiveness_risk": "low",
|
| 79 |
+
"complexity": "medium"
|
| 80 |
+
},
|
| 81 |
+
"deepwiki_analysis": {
|
| 82 |
+
"repo_url": "https://github.com/CamDavidsonPilon/lifelines",
|
| 83 |
+
"repo_name": "lifelines",
|
| 84 |
+
"error": "DeepWiki analysis failed",
|
| 85 |
+
"model": "gpt-5.3-codex",
|
| 86 |
+
"source": "llm_direct_analysis",
|
| 87 |
+
"success": false
|
| 88 |
+
},
|
| 89 |
+
"code_complexity": {
|
| 90 |
+
"cyclomatic_complexity": "medium",
|
| 91 |
+
"cognitive_complexity": "medium",
|
| 92 |
+
"maintainability_index": 75
|
| 93 |
+
},
|
| 94 |
+
"security_analysis": {
|
| 95 |
+
"vulnerabilities_found": 0,
|
| 96 |
+
"security_score": 85,
|
| 97 |
+
"recommendations": []
|
| 98 |
+
}
|
| 99 |
+
},
|
| 100 |
+
"plugin_generation": {
|
| 101 |
+
"files_created": [
|
| 102 |
+
"mcp_output/start_mcp.py",
|
| 103 |
+
"mcp_output/mcp_plugin/__init__.py",
|
| 104 |
+
"mcp_output/mcp_plugin/mcp_service.py",
|
| 105 |
+
"mcp_output/mcp_plugin/adapter.py",
|
| 106 |
+
"mcp_output/mcp_plugin/main.py",
|
| 107 |
+
"mcp_output/requirements.txt",
|
| 108 |
+
"mcp_output/README_MCP.md"
|
| 109 |
+
],
|
| 110 |
+
"main_entry": "start_mcp.py",
|
| 111 |
+
"requirements": [
|
| 112 |
+
"fastmcp>=0.1.0",
|
| 113 |
+
"pydantic>=2.0.0"
|
| 114 |
+
],
|
| 115 |
+
"readme_path": "/Users/ghh/Documents/Code/Code2MCP-private/workspace/lifelines/mcp_output/README_MCP.md",
|
| 116 |
+
"adapter_mode": "import",
|
| 117 |
+
"total_lines_of_code": 0,
|
| 118 |
+
"generated_files_size": 0,
|
| 119 |
+
"tool_endpoints": 0,
|
| 120 |
+
"supported_features": [
|
| 121 |
+
"Basic functionality"
|
| 122 |
+
],
|
| 123 |
+
"generated_tools": [
|
| 124 |
+
"Basic tools",
|
| 125 |
+
"Health check tools",
|
| 126 |
+
"Version info tools"
|
| 127 |
+
]
|
| 128 |
+
},
|
| 129 |
+
"code_review": {},
|
| 130 |
+
"errors": [],
|
| 131 |
+
"warnings": [],
|
| 132 |
+
"recommendations": [
|
| 133 |
+
"migrate packaging from legacy setup.py to a pyproject.toml build (PEP 517/621) with pinned optional extras for docs/tests",
|
| 134 |
+
"split oversized modules (especially lifelines/fitters/__init__.py",
|
| 135 |
+
"coxph_fitter.py",
|
| 136 |
+
"and test_estimation.py) into smaller focused files to improve maintainability",
|
| 137 |
+
"strengthen CI by adding a full test matrix (Python versions/OS)",
|
| 138 |
+
"coverage thresholds",
|
| 139 |
+
"and wheel/sdist smoke-install checks",
|
| 140 |
+
"add performance regression benchmarks in CI using existing perf_tests (with baseline tracking for core fitters like CoxPH and KaplanMeier)",
|
| 141 |
+
"introduce stricter static analysis gates (mypy on key modules",
|
| 142 |
+
"ruff/flake8",
|
| 143 |
+
"docstring lint) and fail builds on new violations",
|
| 144 |
+
"harden API quality by defining/stabilizing public API exports and excluding internal/test/example symbols from generated MCP endpoints",
|
| 145 |
+
"improve docs with task-oriented guides (time-varying covariates",
|
| 146 |
+
"left/interval censoring",
|
| 147 |
+
"model selection) plus runnable notebook tests",
|
| 148 |
+
"add property-based and numerical-stability tests for edge cases (extreme censoring",
|
| 149 |
+
"ties",
|
| 150 |
+
"near-separation",
|
| 151 |
+
"NaN/inf handling)",
|
| 152 |
+
"formalize deprecation/versioning policy and automate changelog/release notes from PR labels",
|
| 153 |
+
"add reproducibility controls across stochastic utilities/tests (global seed strategy and deterministic test fixtures)",
|
| 154 |
+
"add security and dependency hygiene checks (pip-audit/safety",
|
| 155 |
+
"Dependabot",
|
| 156 |
+
"minimal version bounds validation)",
|
| 157 |
+
"improve plugin robustness with endpoint naming normalization and collision checks (e.g.",
|
| 158 |
+
"duplicate fitter names)",
|
| 159 |
+
"and add lightweight architecture docs describing fitter hierarchy",
|
| 160 |
+
"mixins",
|
| 161 |
+
"and statistical test design boundaries"
|
| 162 |
+
],
|
| 163 |
+
"performance_metrics": {
|
| 164 |
+
"memory_usage_mb": 0,
|
| 165 |
+
"cpu_usage_percent": 0,
|
| 166 |
+
"response_time_ms": 0,
|
| 167 |
+
"throughput_requests_per_second": 0
|
| 168 |
+
},
|
| 169 |
+
"deployment_info": {
|
| 170 |
+
"supported_platforms": [
|
| 171 |
+
"Linux",
|
| 172 |
+
"Windows",
|
| 173 |
+
"macOS"
|
| 174 |
+
],
|
| 175 |
+
"python_versions": [
|
| 176 |
+
"3.8",
|
| 177 |
+
"3.9",
|
| 178 |
+
"3.10",
|
| 179 |
+
"3.11",
|
| 180 |
+
"3.12"
|
| 181 |
+
],
|
| 182 |
+
"deployment_methods": [
|
| 183 |
+
"Docker",
|
| 184 |
+
"pip",
|
| 185 |
+
"conda"
|
| 186 |
+
],
|
| 187 |
+
"monitoring_support": true,
|
| 188 |
+
"logging_configuration": "structured"
|
| 189 |
+
},
|
| 190 |
+
"execution_analysis": {
|
| 191 |
+
"success_factors": [
|
| 192 |
+
"Workflow completed end-to-end with status=success across all planned nodes (download, analysis, env, generate, run, review, finalize).",
|
| 193 |
+
"Import-based adapter strategy was feasible (import feasibility 0.94, low intrusiveness risk), enabling rapid MCP wrapping.",
|
| 194 |
+
"Generated MCP service started healthy over stdio and plugin tests passed.",
|
| 195 |
+
"Repository structure was analyzable via zip fallback despite DeepWiki failure."
|
| 196 |
+
],
|
| 197 |
+
"failure_reasons": [
|
| 198 |
+
"No hard workflow failure occurred.",
|
| 199 |
+
"DeepWiki analysis failed (non-blocking) and reduced enrichment quality.",
|
| 200 |
+
"Original project tests were not validated as passing (original_project.passed=false, empty details), creating confidence gaps in behavioral parity.",
|
| 201 |
+
"Metrics instrumentation appears incomplete (0 for LOC/size/resource/perf metrics), limiting evidence-based quality assessment."
|
| 202 |
+
],
|
| 203 |
+
"overall_assessment": "good",
|
| 204 |
+
"node_performance": {
|
| 205 |
+
"download_time": "Completed successfully; repo imported via zip fallback (86 files). Exact per-node timing not provided.",
|
| 206 |
+
"analysis_time": "Completed successfully; medium complexity identified with broad AST-discovered API surface. DeepWiki sub-step failed but analysis continued.",
|
| 207 |
+
"generation_time": "Completed successfully; MCP scaffold and adapter files generated, but reported generated LOC/size/tool count fields are inconsistent with actual endpoint list.",
|
| 208 |
+
"test_time": "MCP plugin health checks passed; original project test validation was effectively not executed/recorded (execution_time=0, no test files)."
|
| 209 |
+
},
|
| 210 |
+
"resource_usage": {
|
| 211 |
+
"memory_efficiency": "Undetermined due to missing telemetry (reported 0 MB).",
|
| 212 |
+
"cpu_efficiency": "Undetermined due to missing telemetry (reported 0%).",
|
| 213 |
+
"disk_usage": "Low-to-moderate footprint expected from small generated scaffold; reported size metrics are missing/inaccurate."
|
| 214 |
+
}
|
| 215 |
+
},
|
| 216 |
+
"technical_quality": {
|
| 217 |
+
"code_quality_score": 72,
|
| 218 |
+
"architecture_score": 74,
|
| 219 |
+
"performance_score": 61,
|
| 220 |
+
"maintainability_score": 68,
|
| 221 |
+
"security_score": 85,
|
| 222 |
+
"scalability_score": 66
|
| 223 |
+
}
|
| 224 |
+
}
|
lifelines/source/.DS_Store
ADDED
|
Binary file (8.2 kB). View file
|
|
|
lifelines/source/.coveragerc
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# .coveragerc to control coverage.py
|
| 2 |
+
[run]
|
| 3 |
+
omit =
|
| 4 |
+
lifelines/plotting.py
|
lifelines/source/.pre-commit-config.yaml
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
repos:
|
| 2 |
+
- repo: https://github.com/pre-commit/pre-commit-hooks
|
| 3 |
+
rev: v4.3.0
|
| 4 |
+
hooks:
|
| 5 |
+
- id: trailing-whitespace
|
| 6 |
+
- id: check-ast
|
| 7 |
+
- id: check-yaml
|
| 8 |
+
- id: end-of-file-fixer
|
| 9 |
+
- id: fix-encoding-pragma
|
| 10 |
+
- id: mixed-line-ending
|
| 11 |
+
- id: trailing-whitespace
|
| 12 |
+
- repo: https://github.com/ambv/black
|
| 13 |
+
rev: 22.8.0
|
| 14 |
+
hooks:
|
| 15 |
+
- id: black
|
| 16 |
+
args: ["--line-length", "130"]
|
lifelines/source/.prospector.yaml
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
strictness: medium
|
| 2 |
+
|
| 3 |
+
pylint:
|
| 4 |
+
options:
|
| 5 |
+
bad-names: foo,baz,toto,tutu,tata,data
|
| 6 |
+
# max-args default = 5
|
| 7 |
+
max-args: 15
|
| 8 |
+
# max-locals default = 15
|
| 9 |
+
max-locals: 50
|
| 10 |
+
# max-branches default = 15
|
| 11 |
+
max-branches: 16
|
| 12 |
+
disable:
|
| 13 |
+
- line-too-long
|
| 14 |
+
- protected-access
|
| 15 |
+
- no-value-for-parameter
|
| 16 |
+
- assignment-from-no-return
|
| 17 |
+
- invalid-unary-operand-type
|
| 18 |
+
|
| 19 |
+
pyflakes:
|
| 20 |
+
disable:
|
| 21 |
+
- F401
|
| 22 |
+
- F841
|
| 23 |
+
# let pylint used-before-assignment handle this
|
| 24 |
+
- F821
|
| 25 |
+
|
| 26 |
+
pep8:
|
| 27 |
+
options:
|
| 28 |
+
max-line-length: 130
|
| 29 |
+
disable:
|
| 30 |
+
- E501
|
| 31 |
+
- E241
|
| 32 |
+
|
| 33 |
+
mccabe:
|
| 34 |
+
options:
|
| 35 |
+
# max-complexity default = 10
|
| 36 |
+
max-complexity: 23
|
| 37 |
+
|
| 38 |
+
pyroma:
|
| 39 |
+
run: true
|
| 40 |
+
|
| 41 |
+
pep257:
|
| 42 |
+
run: false
|
| 43 |
+
|
| 44 |
+
ignore-paths:
|
| 45 |
+
- build
|
| 46 |
+
- benchmarks
|
lifelines/source/.readthedocs.yaml
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Read the Docs configuration file for Sphinx projects
|
| 2 |
+
# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
|
| 3 |
+
|
| 4 |
+
# Required
|
| 5 |
+
version: 2
|
| 6 |
+
|
| 7 |
+
# Set the OS, Python version and other tools you might need
|
| 8 |
+
build:
|
| 9 |
+
os: ubuntu-22.04
|
| 10 |
+
tools:
|
| 11 |
+
python: "3.11"
|
| 12 |
+
# You can also specify other tool versions:
|
| 13 |
+
# nodejs: "20"
|
| 14 |
+
# rust: "1.70"
|
| 15 |
+
# golang: "1.20"
|
| 16 |
+
|
| 17 |
+
# Build documentation in the "docs/" directory with Sphinx
|
| 18 |
+
sphinx:
|
| 19 |
+
configuration: docs/conf.py
|
| 20 |
+
# You can configure Sphinx to use a different builder, for instance use the dirhtml builder for simpler URLs
|
| 21 |
+
# builder: "dirhtml"
|
| 22 |
+
# Fail on all warnings to avoid broken references
|
| 23 |
+
# fail_on_warning: true
|
| 24 |
+
|
| 25 |
+
# Optionally build your docs in additional formats such as PDF and ePub
|
| 26 |
+
# formats:
|
| 27 |
+
# - pdf
|
| 28 |
+
# - epub
|
| 29 |
+
|
| 30 |
+
# Optional but recommended, declare the Python requirements required
|
| 31 |
+
# to build your documentation
|
| 32 |
+
# See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html
|
| 33 |
+
python:
|
| 34 |
+
install:
|
| 35 |
+
- requirements: reqs/docs-requirements.txt
|
lifelines/source/CHANGELOG.md
ADDED
|
@@ -0,0 +1,1310 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
## Changelog
|
| 2 |
+
|
| 3 |
+
#### 0.30.3 - 2026-03-05
|
| 4 |
+
- Revoke the 0.30.2 release and republish as 0.30.3.
|
| 5 |
+
- Require Python >= 3.11 in package metadata.
|
| 6 |
+
|
| 7 |
+
#### 0.30.2 - 2026-03-04
|
| 8 |
+
- Revoke the 0.30.1 release and republish as 0.30.2.
|
| 9 |
+
- Require Python >= 3.10 in package metadata.
|
| 10 |
+
- Update Python trove classifiers to `Python :: 3 :: Only` and add explicit support classifiers for Python 3.12, 3.13, and 3.14.
|
| 11 |
+
|
| 12 |
+
#### 0.30.1 - 2026-02-04
|
| 13 |
+
- Optimize `AalenJohansenFitter` variance calculation using prefix-sum accumulators; add `LinearAccumulator`/`QuadraticAccumulator` utilities and tests.
|
| 14 |
+
- Fix `CoxPHFitter` handling when `event_col=None` (sorting and default event vector).
|
| 15 |
+
- Fix `add_at_risk_counts` for NumPy >= 2.4 scalar conversion; add regression test.
|
| 16 |
+
- Support Python 3.13 and 3.14.
|
| 17 |
+
|
| 18 |
+
#### 0.30.0 - 2024-10-29
|
| 19 |
+
- update dependencies (numpy >= 1.14.0)
|
| 20 |
+
- fix for `decimal` kwarg not working in StatisticalResult
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
#### 0.29.0 - 2024-06-25
|
| 24 |
+
- update dependencies (pandas >= 2.1)
|
| 25 |
+
- update dependencies (scipy >= 1.7)
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
#### 0.28.0 - 2024-01-03
|
| 29 |
+
- Fixes bins that are far into the future with using `survival_table_from_events`, see #1587
|
| 30 |
+
- Removed `sklean_adaptor`. It was a terrible hack, and causing more confusion and support debt than I want. This cleans up our API and simplifies the library. ✨ There's no replacement, and I doubt I'll introduce one ✨
|
| 31 |
+
- Fix pandas>=2.0 compatibility.
|
| 32 |
+
- Fix overflow issue in NelsonAalenfitter, #1585
|
| 33 |
+
- officially drop support for < py3.9
|
| 34 |
+
- update some dependencies (pandas >= 1.2)
|
| 35 |
+
|
| 36 |
+
#### 0.27.8 - 2023-09-13
|
| 37 |
+
- Estimators now have `.label` property
|
| 38 |
+
- Fixed some deprecation warnings
|
| 39 |
+
- Pinned to numpy < 2.0
|
| 40 |
+
|
| 41 |
+
#### 0.27.7 - 2023-05-01
|
| 42 |
+
- `check_assumptions(show_plots=True)` will always show plots, regardless of test outcome. Thanks @nomennominatur!
|
| 43 |
+
- `lifelines.datasets` is now importable.
|
| 44 |
+
|
| 45 |
+
#### 0.27.6 - 2023-04-27
|
| 46 |
+
- Fix for py3.7
|
| 47 |
+
|
| 48 |
+
#### 0.27.5 - 2023-04-27
|
| 49 |
+
- Support pandas 2.0+
|
| 50 |
+
|
| 51 |
+
##### New features
|
| 52 |
+
- Support py3.11
|
| 53 |
+
|
| 54 |
+
#### 0.27.4 - 2022-11-16
|
| 55 |
+
|
| 56 |
+
##### New features
|
| 57 |
+
- Support py3.11
|
| 58 |
+
|
| 59 |
+
#### 0.27.3 - 2022-09-25
|
| 60 |
+
|
| 61 |
+
##### New features
|
| 62 |
+
- Fixed and silenced a lot of warnings
|
| 63 |
+
|
| 64 |
+
##### Bug fixes
|
| 65 |
+
- Migrate to newer Pandas `Styler` for `to_latex`
|
| 66 |
+
|
| 67 |
+
##### API Changes
|
| 68 |
+
- There were way too many functions on the summary objects, so I've hidden `to_*` on them.
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
#### 0.27.2 - 2022-09-07
|
| 72 |
+
|
| 73 |
+
##### Bug fixes
|
| 74 |
+
- Fixed issue in add_at_risk_table when there were very late entries.
|
| 75 |
+
|
| 76 |
+
|
| 77 |
+
#### 0.27.1 - 2022-06-25
|
| 78 |
+
|
| 79 |
+
##### New features
|
| 80 |
+
- all `fit_` methods now accept a `fit_options` dict that allows one to pass kwargs to the underlying fitting algorithm.
|
| 81 |
+
|
| 82 |
+
|
| 83 |
+
##### API Changes
|
| 84 |
+
- `step_size` is removed from Cox models `fit`. See `fit_options` above.
|
| 85 |
+
|
| 86 |
+
##### Bug fixes
|
| 87 |
+
- fixed Cox models when "trivial" matrix was passed in (one with no covariates)
|
| 88 |
+
|
| 89 |
+
#### 0.27.0 - 2022-03-15
|
| 90 |
+
|
| 91 |
+
Dropping Python3.6 support.
|
| 92 |
+
|
| 93 |
+
##### Bug fixes
|
| 94 |
+
- Fix late entry in `add_at_risk_counts`.
|
| 95 |
+
|
| 96 |
+
##### New features
|
| 97 |
+
- `add_at_risk_counts` has a new flag to determine to use start or end-of-period at risk counts.
|
| 98 |
+
- new column in fitter's `summary` that display the number the parameter is being compared against.
|
| 99 |
+
|
| 100 |
+
##### API Changes
|
| 101 |
+
- `plot_lifetimes`'s `duration` arg has the interpretation of "relative time the subject died (since birth)", instead of the old "time observed for". These interpretations are different when there is late entry.
|
| 102 |
+
|
| 103 |
+
|
| 104 |
+
#### 0.26.4 - 2021-11-30
|
| 105 |
+
|
| 106 |
+
##### New features
|
| 107 |
+
- adding `weights` to log rank functions
|
| 108 |
+
|
| 109 |
+
|
| 110 |
+
#### 0.26.3 - 2021-09-16
|
| 111 |
+
|
| 112 |
+
##### Bug fixes
|
| 113 |
+
- Fix using formulas with `CoxPHFitter.score`
|
| 114 |
+
|
| 115 |
+
|
| 116 |
+
#### 0.26.2 - 2021-09-15
|
| 117 |
+
|
| 118 |
+
Error in v0.26.1 deployment
|
| 119 |
+
|
| 120 |
+
#### 0.26.1 - 2021-09-15
|
| 121 |
+
|
| 122 |
+
##### API Changes
|
| 123 |
+
- `t_0` in `logrank_test` now will not remove data, but will instead censor all subjects that experience the event afterwards.
|
| 124 |
+
- update `status` column in `lifelines.datasets.load_lung` to be more standard coding: 0 is censored, 1 is event.
|
| 125 |
+
|
| 126 |
+
##### Bug fixes
|
| 127 |
+
- Fix using formulas with `AalenAdditiveFitter.predict_cumulative_hazard`
|
| 128 |
+
- Fix using formulas with `CoxPHFitter.score`
|
| 129 |
+
|
| 130 |
+
|
| 131 |
+
#### 0.26.0 - 2021-05-26
|
| 132 |
+
|
| 133 |
+
##### New features
|
| 134 |
+
- `.BIC_` is now present on fitted models.
|
| 135 |
+
- `CoxPHFitter` with spline baseline can accept pre-computed knot locations.
|
| 136 |
+
- Left censoring fitting in KaplanMeierFitter is now "expected". That is, `predict` _always_ predicts the survival function (as does every other model), `confidence_interval_` is _always_ the CI for the survival function (as does every other model), and so on. In summary: the API for estimates doesn't change depending on what your censoring your dataset is.
|
| 137 |
+
|
| 138 |
+
##### Bug fixes
|
| 139 |
+
- Fixed an annoying bug where at_risk-table label's were not aligning properly when data spanned large ranges. See merging PR for details.
|
| 140 |
+
- Fixed a bug in `find_best_parametric_model` where the wrong BIC value was being computed.
|
| 141 |
+
- Fixed regression bug when using an array as a penalizer in Cox models.
|
| 142 |
+
|
| 143 |
+
|
| 144 |
+
#### 0.25.11 - 2021-04-06
|
| 145 |
+
|
| 146 |
+
##### Bug fixes
|
| 147 |
+
- Fix integer-valued categorical variables in regression model predictions.
|
| 148 |
+
- numpy > 1.20 is allowed.
|
| 149 |
+
- Bug fix in the elastic-net penalty for Cox models that wasn't weighting the terms correctly.
|
| 150 |
+
|
| 151 |
+
|
| 152 |
+
#### 0.25.10 - 2021-03-03
|
| 153 |
+
|
| 154 |
+
##### New features
|
| 155 |
+
- Better appearance when using a single row to show in `add_at_risk_table`.
|
| 156 |
+
|
| 157 |
+
|
| 158 |
+
#### 0.25.9 - 2021-02-04
|
| 159 |
+
|
| 160 |
+
Small bump in dependencies.
|
| 161 |
+
|
| 162 |
+
|
| 163 |
+
#### 0.25.8 - 2021-01-22
|
| 164 |
+
|
| 165 |
+
Important: we dropped Patsy as our formula framework, and adopted Formulaic. Will the latter is less mature than Patsy, we feel the core capabilities are satisfactory and it provides new opportunities.
|
| 166 |
+
|
| 167 |
+
##### New features
|
| 168 |
+
- Parametric models with formulas are able to be serialized now.
|
| 169 |
+
- a `_scipy_callback` function is available to use in fitting algorithms.
|
| 170 |
+
|
| 171 |
+
|
| 172 |
+
#### 0.25.7 - 2020-12-09
|
| 173 |
+
|
| 174 |
+
##### API Changes
|
| 175 |
+
- Adding `cumulative_hazard_at_times` to NelsonAalenFitter
|
| 176 |
+
|
| 177 |
+
|
| 178 |
+
##### Bug fixes
|
| 179 |
+
- Fixed error in `CoxPHFitter` when entry time == event time.
|
| 180 |
+
- Fixed formulas in AFT interval censoring regression.
|
| 181 |
+
- Fixed `concordance_index_` when no events observed
|
| 182 |
+
- Fixed label being overwritten in ParametricUnivariate models
|
| 183 |
+
|
| 184 |
+
|
| 185 |
+
#### 0.25.6 - 2020-10-26
|
| 186 |
+
|
| 187 |
+
##### New features
|
| 188 |
+
- Parametric Cox models can now handle left and interval censoring datasets.
|
| 189 |
+
|
| 190 |
+
##### Bug fixes
|
| 191 |
+
- "improved" the output of `add_at_risk_counts` by removing a call to `plt.tight_layout()` - this works better when you are calling `add_at_risk_counts` on multiple axes, but it is recommended you call `plt.tight_layout()` at the very end of your script.
|
| 192 |
+
- Fix bug in `KaplanMeierFitter`'s interval censoring where max(lower bound) < min(upper bound).
|
| 193 |
+
|
| 194 |
+
|
| 195 |
+
#### 0.25.5 - 2020-09-23
|
| 196 |
+
|
| 197 |
+
##### API Changes
|
| 198 |
+
- `check_assumptions` now returns a list of list of axes that can be manipulated
|
| 199 |
+
|
| 200 |
+
##### Bug fixes
|
| 201 |
+
- fixed error when using `plot_partial_effects` with categorical data in AFT models
|
| 202 |
+
- improved warning when Hessian matrix contains NaNs.
|
| 203 |
+
- fixed performance regression in interval censoring fitting in parametric models
|
| 204 |
+
- `weights` wasn't being applied properly in NPMLE
|
| 205 |
+
|
| 206 |
+
#### 0.25.4 - 2020-08-26
|
| 207 |
+
|
| 208 |
+
##### New features
|
| 209 |
+
- New baseline estimator for Cox models: ``piecewise``
|
| 210 |
+
- Performance improvements for parametric models `log_likelihood_ratio_test()` and `print_summary()`
|
| 211 |
+
- Better step-size defaults for Cox model -> more robust convergence.
|
| 212 |
+
|
| 213 |
+
|
| 214 |
+
##### Bug fixes
|
| 215 |
+
- fix `check_assumptions` when using formulas.
|
| 216 |
+
|
| 217 |
+
|
| 218 |
+
#### 0.25.3 - 2020-08-24
|
| 219 |
+
|
| 220 |
+
##### New features
|
| 221 |
+
- `survival_difference_at_fixed_point_in_time_test` now accepts fitters instead of raw data, meaning that you can use this function on left, right or interval censored data.
|
| 222 |
+
|
| 223 |
+
##### API Changes
|
| 224 |
+
- See note on `survival_difference_at_fixed_point_in_time_test` above.
|
| 225 |
+
|
| 226 |
+
##### Bug fixes
|
| 227 |
+
- fix `StatisticalResult` printing in notebooks
|
| 228 |
+
- fix Python error when calling `plot_covariate_groups`
|
| 229 |
+
- fix dtype mismatches in `plot_partial_effects_on_outcome`.
|
| 230 |
+
|
| 231 |
+
|
| 232 |
+
#### 0.25.2 - 2020-08-08
|
| 233 |
+
|
| 234 |
+
##### New features
|
| 235 |
+
- Spline `CoxPHFitter` can now use `strata`.
|
| 236 |
+
|
| 237 |
+
##### API Changes
|
| 238 |
+
- a small parameterization change of the spline `CoxPHFitter`. The linear term in the spline part was moved to a new `Intercept` term in the `beta_`.
|
| 239 |
+
- `n_baseline_knots` in the spline `CoxPHFitter` now refers to _all_ knots, and not just interior knots (this was confusing to me, the author.). So add 2 to `n_baseline_knots` to recover the identical model as previously.
|
| 240 |
+
|
| 241 |
+
##### Bug fixes
|
| 242 |
+
- fix splines `CoxPHFitter` with when `predict_hazard` was called.
|
| 243 |
+
- fix some exception imports I missed.
|
| 244 |
+
- fix log-likelihood p-value in splines `CoxPHFitter`
|
| 245 |
+
|
| 246 |
+
|
| 247 |
+
#### 0.25.1 - 2020-08-01
|
| 248 |
+
|
| 249 |
+
##### Bug fixes
|
| 250 |
+
- ok _actually_ ship the out-of-sample calibration code
|
| 251 |
+
- fix `labels=False` in `add_at_risk_counts`
|
| 252 |
+
- allow for specific rows to be shown in `add_at_risk_counts`
|
| 253 |
+
- put `patsy` as a proper dependency.
|
| 254 |
+
- suppress some Pandas 1.1 warnings.
|
| 255 |
+
|
| 256 |
+
|
| 257 |
+
#### 0.25.0 - 2020-07-27
|
| 258 |
+
|
| 259 |
+
##### New features
|
| 260 |
+
- Formulas! *lifelines* now supports R-like formulas in regression models. See docs [here](https://lifelines.readthedocs.io/en/latest/Survival%20Regression.html#fitting-the-regression).
|
| 261 |
+
- `plot_covariate_group` now can plot other y-values like hazards and cumulative hazards (default: survival function).
|
| 262 |
+
- `CoxPHFitter` now accepts late entries via `entry_col`.
|
| 263 |
+
- `calibration.survival_probability_calibration` now works with out-of-sample data.
|
| 264 |
+
- `print_summary` now accepts a `column` argument to filter down the displayed values. This helps with clutter in notebooks, latex, or on the terminal.
|
| 265 |
+
- `add_at_risk_counts` now follows the cool new KMunicate suggestions
|
| 266 |
+
|
| 267 |
+
|
| 268 |
+
##### API Changes
|
| 269 |
+
- With the introduction of formulas, all models can be using formulas under the hood.
|
| 270 |
+
- For both custom regression models or non-AFT regression models, this means that you no longer need to add a constant column to your DataFrame (instead add a `1` as a formula string in the `regressors` dict). You may also need to remove the T and E columns from `regressors`. I've updated the models in the `\examples` folder with examples of this new model building.
|
| 271 |
+
- Unfortunately, if using formulas, your model will not be able to be pickled. This is a problem with an upstream library, and I hope to have it resolved in the near future.
|
| 272 |
+
- `plot_covariate_groups` has been deprecated in favour of `plot_partial_effects_on_outcome`.
|
| 273 |
+
- The baseline in `plot_covariate_groups` has changed from the *mean* observation (including dummy-encoded categorical variables) to *median* for ordinal (including continuous) and *mode* for categorical.
|
| 274 |
+
- Previously, *lifelines* used the label `"_intercept"` to when it added a constant column in regressions. To align with Patsy, we are now using `"Intercept"`.
|
| 275 |
+
- In AFT models, `ancillary_df` kwarg has been renamed to `ancillary`. This reflects the more general use of the kwarg (not always a DataFrame, but could be a boolean or string now, too).
|
| 276 |
+
- Some column names in datasets shipped with lifelines have changed.
|
| 277 |
+
- The never used "lifelines.metrics" is deleted.
|
| 278 |
+
- With the introduction of formulas, `plot_covariate_groups` (now called `plot_partial_effects_on_outcome`) behaves differently for transformed variables. Users no longer need to add "derivatives" features, and encoding is done implicitly. See docs [here](https://lifelines.readthedocs.io/en/latest/Survival%20Regression.html#plotting-the-effect-of-varying-a-covariate).
|
| 279 |
+
- all exceptions and warnings have moved to `lifelines.exceptions`
|
| 280 |
+
|
| 281 |
+
##### Bug fixes
|
| 282 |
+
- The p-value of the log-likelihood ratio test for the CoxPHFitter with splines was returning the wrong result because the degrees of freedom was incorrect.
|
| 283 |
+
- better `print_summary` logic in IDEs and Jupyter exports. Previously it should not be displayed.
|
| 284 |
+
- p-values have been corrected in the `SplineFitter`. Previously, the "null hypothesis" was no coefficient=0, but coefficient=0.01. This is now set to the former.
|
| 285 |
+
- fixed NaN bug in `survival_table_from_events` with intervals when no events would occur in a interval.
|
| 286 |
+
|
| 287 |
+
#### 0.24.16 - 2020-07-09
|
| 288 |
+
|
| 289 |
+
##### New features
|
| 290 |
+
- improved algorithm choice for large DataFrames for Cox models. Should see a significant performance boost.
|
| 291 |
+
|
| 292 |
+
##### Bug fixes
|
| 293 |
+
- fixed `utils.median_survival_time` not accepting Pandas Series.
|
| 294 |
+
|
| 295 |
+
#### 0.24.15 - 2020-07-07
|
| 296 |
+
|
| 297 |
+
##### Bug fixes
|
| 298 |
+
- fixed an edge case in `KaplanMeierFitter` where a really late entry would occur after all other population had died.
|
| 299 |
+
- fixed `plot` in `BreslowFlemingtonHarrisFitter`
|
| 300 |
+
- fixed bug where using `conditional_after` and `times` in `CoxPHFitter("spline")` prediction methods would be ignored.
|
| 301 |
+
|
| 302 |
+
|
| 303 |
+
#### 0.24.14 - 2020-07-02
|
| 304 |
+
|
| 305 |
+
##### Bug fixes
|
| 306 |
+
- fixed a bug where using `conditional_after` and `times` in prediction methods would result in a shape error
|
| 307 |
+
- fixed a bug where `score` was not able to be used in splined `CoxPHFitter`
|
| 308 |
+
- fixed a bug where some columns would not be displayed in `print_summary`
|
| 309 |
+
|
| 310 |
+
#### 0.24.13 - 2020-06-22
|
| 311 |
+
|
| 312 |
+
##### Bug fixes
|
| 313 |
+
- fixed a bug where `CoxPHFitter` would ignore inputed `alpha` levels for confidence intervals
|
| 314 |
+
- fixed a bug where `CoxPHFitter` would fail with working with `sklearn_adapter`
|
| 315 |
+
|
| 316 |
+
|
| 317 |
+
#### 0.24.12 - 2020-06-20
|
| 318 |
+
|
| 319 |
+
##### New features
|
| 320 |
+
- improved convergence of `GeneralizedGamma(Regression)Fitter`.
|
| 321 |
+
|
| 322 |
+
|
| 323 |
+
#### 0.24.11 - 2020-06-17
|
| 324 |
+
|
| 325 |
+
##### New features
|
| 326 |
+
- new spline regression model `CRCSplineFitter` based on the paper "A flexible parametric accelerated failure time model" by Michael J. Crowther, Patrick Royston, Mark Clements.
|
| 327 |
+
- new survival probability calibration tool `lifelines.calibration.survival_probability_calibration` to help validate regression models. Based on “Graphical calibration curves and the integrated calibration index (ICI) for survival models” by P. Austin, F. Harrell, and D. van Klaveren.
|
| 328 |
+
|
| 329 |
+
##### API Changes
|
| 330 |
+
- (and bug fix) scalar parameters in regression models were not being penalized by `penalizer` - we now penalizing everything except intercept terms in linear relationships.
|
| 331 |
+
|
| 332 |
+
|
| 333 |
+
#### 0.24.10 - 2020-06-16
|
| 334 |
+
|
| 335 |
+
##### New features
|
| 336 |
+
- New improvements when using splines model in CoxPHFitter - it should offer much better prediction and baseline-hazard estimation, including extrapolation and interpolation.
|
| 337 |
+
|
| 338 |
+
##### API Changes
|
| 339 |
+
- Related to above: the fitted spline parameters are now available in the `.summary` and `.print_summary` methods.
|
| 340 |
+
|
| 341 |
+
##### Bug fixes
|
| 342 |
+
- fixed a bug in initialization of some interval-censoring models -> better convergence.
|
| 343 |
+
|
| 344 |
+
|
| 345 |
+
#### 0.24.9 - 2020-06-05
|
| 346 |
+
|
| 347 |
+
##### New features
|
| 348 |
+
- Faster NPMLE for interval censored data
|
| 349 |
+
- New weightings available in the `logrank_test`: `wilcoxon`, `tarone-ware`, `peto`, `fleming-harrington`. Thanks @sean-reed
|
| 350 |
+
- new interval censored dataset: `lifelines.datasets.load_mice`
|
| 351 |
+
|
| 352 |
+
##### Bug fixes
|
| 353 |
+
- Cleared up some mislabeling in `plot_loglogs`. Thanks @sean-reed!
|
| 354 |
+
- tuples are now able to be used as input in univariate models.
|
| 355 |
+
|
| 356 |
+
#### 0.24.8 - 2020-05-17
|
| 357 |
+
|
| 358 |
+
##### New features
|
| 359 |
+
- Non parametric interval censoring is now available, _experimentally_. Not all edge cases are fully checked, and some features are missing. Try it under `KaplanMeierFitter.fit_interval_censoring`
|
| 360 |
+
|
| 361 |
+
|
| 362 |
+
#### 0.24.7 - 2020-05-17
|
| 363 |
+
|
| 364 |
+
##### New features
|
| 365 |
+
- `find_best_parametric_model` can handle left and interval censoring. Also allows for more fitting options.
|
| 366 |
+
- `AIC_` is a property on parametric models, and `AIC_partial_` is a property on Cox models.
|
| 367 |
+
- `penalizer` in all regression models can now be an array instead of a float. This enables new functionality and better
|
| 368 |
+
control over penalization. This is similar (but not identical) to `penalty.factors` in glmnet in R.
|
| 369 |
+
- some convergence tweaks which should help recent performance regressions.
|
| 370 |
+
|
| 371 |
+
#### 0.24.6 - 2020-05-05
|
| 372 |
+
|
| 373 |
+
##### New features
|
| 374 |
+
- At the cost of some performance, convergence is improved in many models.
|
| 375 |
+
- New `lifelines.plotting.plot_interval_censored_lifetimes` for plotting interval censored data - thanks @sean-reed!
|
| 376 |
+
|
| 377 |
+
##### Bug fixes
|
| 378 |
+
- fixed bug where `cdf_plot` and `qq_plot` were not factoring in the weights correctly.
|
| 379 |
+
|
| 380 |
+
#### 0.24.5 - 2020-05-01
|
| 381 |
+
|
| 382 |
+
##### New features
|
| 383 |
+
- `plot_lifetimes` accepts pandas Series.
|
| 384 |
+
|
| 385 |
+
##### Bug fixes
|
| 386 |
+
- Fixed important bug in interval censoring models. Users using interval censoring are strongly advised to upgrade.
|
| 387 |
+
- Improved `at_risk_counts` for subplots.
|
| 388 |
+
- More data validation checks for `CoxTimeVaryingFitter`
|
| 389 |
+
|
| 390 |
+
#### 0.24.4 - 2020-04-13
|
| 391 |
+
|
| 392 |
+
##### Bug fixes
|
| 393 |
+
- Improved stability of interval censoring in parametric models.
|
| 394 |
+
- setting a dataframe in `ancillary_df` works for interval censoring
|
| 395 |
+
- `.score` works for interval censored models
|
| 396 |
+
|
| 397 |
+
#### 0.24.3 - 2020-03-25
|
| 398 |
+
|
| 399 |
+
##### New features
|
| 400 |
+
- new `logx` kwarg in plotting curves
|
| 401 |
+
- PH models have `compute_followup_hazard_ratios` for simulating what the hazard ratio would be at previous times. This is useful because the final hazard ratio is some weighted average of these.
|
| 402 |
+
|
| 403 |
+
##### Bug fixes
|
| 404 |
+
- Fixed error in HTML printer that was hiding concordance index information.
|
| 405 |
+
|
| 406 |
+
|
| 407 |
+
#### 0.24.2 - 2020-03-15
|
| 408 |
+
|
| 409 |
+
##### Bug fixes
|
| 410 |
+
- Fixed bug when no covariates were passed into `CoxPHFitter`. See #975
|
| 411 |
+
- Fixed error in `StatisticalResult` where the test name was not displayed correctly.
|
| 412 |
+
- Fixed a keyword bug in `plot_covariate_groups` for parametric models.
|
| 413 |
+
|
| 414 |
+
|
| 415 |
+
#### 0.24.1 - 2020-03-05
|
| 416 |
+
|
| 417 |
+
##### New features
|
| 418 |
+
- Stability improvements for GeneralizedGammaRegressionFitter and CoxPHFitter with spline estimation.
|
| 419 |
+
|
| 420 |
+
##### Bug fixes
|
| 421 |
+
- Fixed bug with plotting hazards in NelsonAalenFitter.
|
| 422 |
+
|
| 423 |
+
|
| 424 |
+
#### 0.24.0 - 2020-02-20
|
| 425 |
+
|
| 426 |
+
This version and future versions of lifelines no longer support py35. Pandas 1.0 is fully supported, along with previous versions. Minimum Scipy has been bumped to 1.2.0.
|
| 427 |
+
|
| 428 |
+
##### New features
|
| 429 |
+
- `CoxPHFitter` and `CoxTimeVaryingFitter` has support for an elastic net penalty, which includes L1 and L2 regression.
|
| 430 |
+
- `CoxPHFitter` has new baseline survival estimation methods. Specifically, `spline` now estimates the coefficients and baseline survival using splines. The traditional method, `breslow`, is still the default however.
|
| 431 |
+
- Regression models have a new `score` method that will score your model against a dataset (ex: a testing or validation dataset). The default is to evaluate the log-likelihood, but also the concordance index can be chose.
|
| 432 |
+
- New `MixtureCureFitter` for quickly creating univariate mixture models.
|
| 433 |
+
- Univariate parametric models have a `plot_density`, `density_at_times`, and property `density_` that computes the probability density function estimates.
|
| 434 |
+
- new dataset for interval regression involving *C. Botulinum*.
|
| 435 |
+
- new `lifelines.fitters.mixins.ProportionalHazardMixin` that implements proportional hazard checks.
|
| 436 |
+
|
| 437 |
+
##### API Changes
|
| 438 |
+
- Models' prediction method that return a single array now return a Series (use to return a DataFrame). This includes `predict_median`, `predict_percentile`, `predict_expectation`, `predict_log_partial_hazard`, and possibly others.
|
| 439 |
+
- The penalty in Cox models is now scaled by the number of observations. This makes it invariant to changing sample sizes. This change also make the penalty magnitude behave the same as any parametric regression model.
|
| 440 |
+
- `score_` on models has been renamed `concordance_index_`
|
| 441 |
+
- models' `.variance_matrix_` is now a DataFrame.
|
| 442 |
+
- `CoxTimeVaryingFitter` no longer requires an `id_col`. It's optional, and some checks may be done for integrity if provided.
|
| 443 |
+
- Significant changes to `utils.k_fold_cross_validation`.
|
| 444 |
+
- removed automatically adding `inf` from `PiecewiseExponentialRegressionFitter.breakpoints` and `PiecewiseExponentialFitter.breakpoints`
|
| 445 |
+
- `tie_method` was dropped from Cox models (it was always Efron anyways...)
|
| 446 |
+
- Mixins are moved to `lifelines.fitters.mixins`
|
| 447 |
+
- `find_best_parametric_model` `evaluation` kwarg has been changed to `scoring_method`.
|
| 448 |
+
- removed `_score_` and `path` from Cox model.
|
| 449 |
+
|
| 450 |
+
##### Bug fixes
|
| 451 |
+
- Fixed `show_censors` with `KaplanMeierFitter.plot_cumulative_density` see issue #940.
|
| 452 |
+
- Fixed error in `"BIC"` code path in `find_best_parametric_model`
|
| 453 |
+
- Fixed a bug where left censoring in AFT models was not converging well
|
| 454 |
+
- Cox models now incorporate any penalizers in their `log_likelihood_`
|
| 455 |
+
|
| 456 |
+
|
| 457 |
+
#### 0.23.9 - 2020-01-28
|
| 458 |
+
|
| 459 |
+
##### Bug fixes
|
| 460 |
+
- fixed important error when a parametric regression model would not assign the correct labels to fitted
|
| 461 |
+
parameters' variances. See more here: https://github.com/CamDavidsonPilon/lifelines/issues/931. Users of `GeneralizedGammaRegressionFitter` and any custom regression models should update their code as soon as possible.
|
| 462 |
+
|
| 463 |
+
#### 0.23.8 - 2020-01-21
|
| 464 |
+
|
| 465 |
+
##### Bug fixes
|
| 466 |
+
- fixed important error when a parametric regression model would not assign the correct labels to fitted
|
| 467 |
+
parameters. See more here: https://github.com/CamDavidsonPilon/lifelines/issues/931. Users of `GeneralizedGammaRegressionFitter` and any custom regression models should update their code as soon as possible.
|
| 468 |
+
|
| 469 |
+
#### 0.23.7 - 2020-01-14
|
| 470 |
+
|
| 471 |
+
Bug fixes for py3.5.
|
| 472 |
+
|
| 473 |
+
#### 0.23.6 - 2020-01-07
|
| 474 |
+
|
| 475 |
+
##### New features
|
| 476 |
+
- New univariate model, `SplineFitter`, that uses cubic splines to model the cumulative hazard.
|
| 477 |
+
- To aid users with selecting the best parametric model, there is a new `lifelines.utils.find_best_parametric_model` function that will iterate through the models and return the model with the lowest AIC (by default).
|
| 478 |
+
- custom parametric regression models can now do left and interval censoring.
|
| 479 |
+
|
| 480 |
+
|
| 481 |
+
#### 0.23.5 - 2020-01-05
|
| 482 |
+
|
| 483 |
+
##### New features
|
| 484 |
+
- New `predict_hazard` for parametric regression models.
|
| 485 |
+
- New lymph node cancer dataset, originally from *H.F. for the German Breast Cancer Study Group (GBSG) (1994)*
|
| 486 |
+
|
| 487 |
+
##### Bug fixes
|
| 488 |
+
- fixes error thrown when converge of regression models fails.
|
| 489 |
+
- `kwargs` is now used in `plot_covariate_groups`
|
| 490 |
+
- fixed bug where large exponential numbers in `print_summary` were not being suppressed correctly.
|
| 491 |
+
|
| 492 |
+
#### 0.23.4 - 2019-12-15
|
| 493 |
+
|
| 494 |
+
- Bug fix for PyPI
|
| 495 |
+
|
| 496 |
+
#### 0.23.3 - 2019-12-11
|
| 497 |
+
|
| 498 |
+
##### New features
|
| 499 |
+
- `StatisticalResult.print_summary` supports html output.
|
| 500 |
+
|
| 501 |
+
##### Bug fixes
|
| 502 |
+
- fix import in `printer.py`
|
| 503 |
+
- fix html printing with Univariate models.
|
| 504 |
+
|
| 505 |
+
|
| 506 |
+
#### 0.23.2 - 2019-12-07
|
| 507 |
+
|
| 508 |
+
##### New features
|
| 509 |
+
- new `lifelines.plotting.rmst_plot` for pretty figures of survival curves and RMSTs.
|
| 510 |
+
- new variance calculations for `lifelines.utils.resticted_mean_survival_time`
|
| 511 |
+
- performance improvements on regression models' preprocessing. Should make datasets with
|
| 512 |
+
high number of columns more performant.
|
| 513 |
+
|
| 514 |
+
##### Bug fixes
|
| 515 |
+
- fixed `print_summary` for AAF class.
|
| 516 |
+
- fixed repr for `sklearn_adapter` classes.
|
| 517 |
+
- fixed `conditional_after` in Cox model with strata was used.
|
| 518 |
+
|
| 519 |
+
|
| 520 |
+
#### 0.23.1 - 2019-11-27
|
| 521 |
+
|
| 522 |
+
##### New features
|
| 523 |
+
- new `print_summary` option `style` to print HTML, LaTeX or ASCII output
|
| 524 |
+
- performance improvements for `CoxPHFitter` - up to 30% performance improvements for some datasets.
|
| 525 |
+
|
| 526 |
+
##### Bug fixes
|
| 527 |
+
- fixed bug where computed statistics were not being shown in `print_summary` for HTML output.
|
| 528 |
+
- fixed bug where "None" was displayed in models' `__repr__`
|
| 529 |
+
- fixed bug in `StatisticalResult.print_summary`
|
| 530 |
+
- fixed bug when using `print_summary` with left censored models.
|
| 531 |
+
- lots of minor bug fixes.
|
| 532 |
+
|
| 533 |
+
#### 0.23.0 - 2019-11-17
|
| 534 |
+
|
| 535 |
+
##### New features
|
| 536 |
+
- new `print_summary` abstraction that allows HTML printing in Jupyter notebooks!
|
| 537 |
+
- silenced some warnings.
|
| 538 |
+
|
| 539 |
+
##### Bug fixes
|
| 540 |
+
- The "comparison" value of some parametric univariate models wasn't standard, so the null hypothesis p-value may have been wrong. This is now fixed.
|
| 541 |
+
- fixed a NaN error in confidence intervals for KaplanMeierFitter
|
| 542 |
+
|
| 543 |
+
##### API Changes
|
| 544 |
+
|
| 545 |
+
- To align values across models, the column names for the confidence intervals in parametric univariate models `summary` have changed.
|
| 546 |
+
- Fixed typo in `ParametricUnivariateFitter` name.
|
| 547 |
+
- `median_` has been removed in favour of `median_survival_time_`.
|
| 548 |
+
- `left_censorship` in `fit` has been removed in favour of `fit_left_censoring`.
|
| 549 |
+
|
| 550 |
+
|
| 551 |
+
#### 0.22.10 - 2019-11-08
|
| 552 |
+
|
| 553 |
+
The tests were re-factored to be shipped with the package. Let me know if this causes problems.
|
| 554 |
+
|
| 555 |
+
|
| 556 |
+
##### Bug fixes
|
| 557 |
+
- fixed error in plotting models with "lower" or "upper" was in the label name.
|
| 558 |
+
- fixed bug in plot_covariate_groups for AFT models when >1d arrays were used for values arg.
|
| 559 |
+
|
| 560 |
+
|
| 561 |
+
#### 0.22.9 - 2019-10-30
|
| 562 |
+
|
| 563 |
+
|
| 564 |
+
##### Bug fixes
|
| 565 |
+
- fixed `predict_` methods in AFT models when `timeline` was not specified.
|
| 566 |
+
- fixed error in `qq_plot`
|
| 567 |
+
- fixed error when submitting a model in `qth_survival_time`
|
| 568 |
+
- `CoxPHFitter` now displays correct columns values when changing alpha param.
|
| 569 |
+
|
| 570 |
+
|
| 571 |
+
#### 0.22.8 - 2019-10-06
|
| 572 |
+
|
| 573 |
+
##### New features
|
| 574 |
+
- Serializing lifelines is better supported. Packages like joblib and pickle are now supported. Thanks @AbdealiJK!
|
| 575 |
+
- `conditional_after` now available in `CoxPHFitter.predict_median`
|
| 576 |
+
- Suppressed some unimportant warnings.
|
| 577 |
+
|
| 578 |
+
##### Bug fixes
|
| 579 |
+
- fixed initial_point being ignored in AFT models.
|
| 580 |
+
|
| 581 |
+
|
| 582 |
+
#### 0.22.7 - 2019-09-29
|
| 583 |
+
|
| 584 |
+
##### New features
|
| 585 |
+
- new `ApproximationWarning` to tell you if the package is making an potentially mislead approximation.
|
| 586 |
+
|
| 587 |
+
##### Bug fixes
|
| 588 |
+
- fixed a bug in parametric prediction for interval censored data.
|
| 589 |
+
- realigned values in `print_summary`.
|
| 590 |
+
- fixed bug in `survival_difference_at_fixed_point_in_time_test`
|
| 591 |
+
|
| 592 |
+
##### API Changes
|
| 593 |
+
|
| 594 |
+
- `utils.qth_survival_time` no longer takes a `cdf` argument - users should take the compliment (1-cdf).
|
| 595 |
+
- Some previous `StatisticalWarnings` have been replaced by `ApproximationWarning`
|
| 596 |
+
|
| 597 |
+
#### 0.22.6 - 2019-09-25
|
| 598 |
+
|
| 599 |
+
##### New features
|
| 600 |
+
- `conditional_after` works for `CoxPHFitter` prediction models 😅
|
| 601 |
+
|
| 602 |
+
##### Bug fixes
|
| 603 |
+
|
| 604 |
+
##### API Changes
|
| 605 |
+
- `CoxPHFitter.baseline_cumulative_hazard_`'s column is renamed `"baseline cumulative hazard"` - previously it was `"baseline hazard"`. (Only applies if the model has no strata.)
|
| 606 |
+
- `utils.dataframe_interpolate_at_times` renamed to `utils.interpolate_at_times_and_return_pandas`.
|
| 607 |
+
|
| 608 |
+
|
| 609 |
+
#### 0.22.5 - 2019-09-20
|
| 610 |
+
|
| 611 |
+
##### New features
|
| 612 |
+
- Improvements to the __repr__ of models that takes into accounts weights.
|
| 613 |
+
- Better support for predicting on Pandas Series
|
| 614 |
+
|
| 615 |
+
##### Bug fixes
|
| 616 |
+
- Fixed issue where `fit_interval_censoring` wouldn't accept lists.
|
| 617 |
+
- Fixed an issue with `AalenJohansenFitter` failing to plot confidence intervals.
|
| 618 |
+
|
| 619 |
+
##### API Changes
|
| 620 |
+
- `_get_initial_value` in parametric univariate models is renamed `_create_initial_point`
|
| 621 |
+
|
| 622 |
+
|
| 623 |
+
#### 0.22.4 - 2019-09-04
|
| 624 |
+
|
| 625 |
+
##### New features
|
| 626 |
+
- Some performance improvements to regression models.
|
| 627 |
+
- lifelines will avoid penalizing the intercept (aka bias) variables in regression models.
|
| 628 |
+
- new `utils.restricted_mean_survival_time` that approximates the RMST using numerical integration against survival functions.
|
| 629 |
+
|
| 630 |
+
##### API changes
|
| 631 |
+
- `KaplanMeierFitter.survival_function_`'s' index is no longer given the name "timeline".
|
| 632 |
+
|
| 633 |
+
##### Bug fixes
|
| 634 |
+
- Fixed issue where `concordance_index` would never exit if NaNs in dataset.
|
| 635 |
+
|
| 636 |
+
|
| 637 |
+
#### 0.22.3 - 2019-08-08
|
| 638 |
+
|
| 639 |
+
##### New features
|
| 640 |
+
- model's now expose a `log_likelihood_` property.
|
| 641 |
+
- new `conditional_after` argument on `predict_*` methods that make prediction on censored subjects easier.
|
| 642 |
+
- new `lifelines.utils.safe_exp` to make `exp` overflows easier to handle.
|
| 643 |
+
- smarter initial conditions for parametric regression models.
|
| 644 |
+
- New regression model: `GeneralizedGammaRegressionFitter`
|
| 645 |
+
|
| 646 |
+
##### API changes
|
| 647 |
+
- removed `lifelines.utils.gamma` - use `autograd_gamma` library instead.
|
| 648 |
+
- removed bottleneck as a dependency. It offered slight performance gains only in Cox models, and only a small fraction of the API was being used.
|
| 649 |
+
|
| 650 |
+
##### Bug fixes
|
| 651 |
+
- AFT log-likelihood ratio test was not using weights correctly.
|
| 652 |
+
- corrected (by bumping) scipy and autograd dependencies
|
| 653 |
+
- convergence is improved for most models, and many `exp` overflow warnings have been eliminated.
|
| 654 |
+
- Fixed an error in the `predict_percentile` of `LogLogisticAFTFitter`. New tests have been added around this.
|
| 655 |
+
|
| 656 |
+
|
| 657 |
+
#### 0.22.2 - 2019-07-25
|
| 658 |
+
|
| 659 |
+
##### New features
|
| 660 |
+
- lifelines is now compatible with scipy>=1.3.0
|
| 661 |
+
|
| 662 |
+
##### Bug fixes
|
| 663 |
+
- fixed printing error when using robust=True in regression models
|
| 664 |
+
- `GeneralizedGammaFitter` is more stable, maybe.
|
| 665 |
+
- lifelines was allowing old version of numpy (1.6), but this caused errors when using the library. The correctly numpy has been pinned (to 1.14.0+)
|
| 666 |
+
|
| 667 |
+
|
| 668 |
+
|
| 669 |
+
#### 0.22.1 - 2019-07-14
|
| 670 |
+
|
| 671 |
+
##### New features
|
| 672 |
+
- New univariate model, `GeneralizedGammaFitter`. This model contains many sub-models, so it is a good model to check fits.
|
| 673 |
+
- added a warning when a time-varying dataset had instantaneous deaths.
|
| 674 |
+
- added a `initial_point` option in univariate parametric fitters.
|
| 675 |
+
- `initial_point` kwarg is present in parametric univariate fitters `.fit`
|
| 676 |
+
- `event_table` is now an attribute on all univariate fitters (if right censoring)
|
| 677 |
+
- improvements to `lifelines.utils.gamma`
|
| 678 |
+
|
| 679 |
+
##### API changes
|
| 680 |
+
- In AFT models, the column names in `confidence_intervals_` has changed to include the alpha value.
|
| 681 |
+
- In AFT models, some column names in `.summary` and `.print_summary` has changed to include the alpha value.
|
| 682 |
+
- In AFT models, some column names in `.summary` and `.print_summary` includes confidence intervals for the exponential of the value.
|
| 683 |
+
|
| 684 |
+
##### Bug fixes
|
| 685 |
+
- when using `censors_show` in plotting functions, the censor ticks are now reactive to the estimate being shown.
|
| 686 |
+
- fixed an overflow bug in `KaplanMeierFitter` confidence intervals
|
| 687 |
+
- improvements in data validation for `CoxTimeVaryingFitter`
|
| 688 |
+
|
| 689 |
+
|
| 690 |
+
#### 0.22.0 - 2019-07-03
|
| 691 |
+
|
| 692 |
+
##### New features
|
| 693 |
+
- Ability to create custom parametric regression models by specifying the cumulative hazard. This enables new and extensions of AFT models.
|
| 694 |
+
- `percentile(p)` method added to univariate models that solves the equation `p = S(t)` for `t`
|
| 695 |
+
- for parametric univariate models, the `conditional_time_to_event_` is now exact instead of an approximation.
|
| 696 |
+
|
| 697 |
+
##### API changes
|
| 698 |
+
- In Cox models, the attribute `hazards_` has been renamed to `params_`. This aligns better with the other regression models, and is more clear (what is a hazard anyways?)
|
| 699 |
+
- In Cox models, a new `hazard_ratios_` attribute is available which is the exponentiation of `params_`.
|
| 700 |
+
- In Cox models, the column names in `confidence_intervals_` has changed to include the alpha value.
|
| 701 |
+
- In Cox models, some column names in `.summary` and `.print_summary` has changed to include the alpha value.
|
| 702 |
+
- In Cox models, some column names in `.summary` and `.print_summary` includes confidence intervals for the exponential of the value.
|
| 703 |
+
- Significant changes to internal AFT code.
|
| 704 |
+
- A change to how `fit_intercept` works in AFT models. Previously one could set `fit_intercept` to False and not have to set `ancillary_df` - now one must specify a DataFrame.
|
| 705 |
+
|
| 706 |
+
##### Bug fixes
|
| 707 |
+
- for parametric univariate models, the `conditional_time_to_event_` is now exact instead of an approximation.
|
| 708 |
+
- fixed a name error bug in `CoxTimeVaryingFitter.plot`
|
| 709 |
+
|
| 710 |
+
#### 0.21.5 - 2019-06-22
|
| 711 |
+
|
| 712 |
+
I'm skipping 0.21.4 version because of deployment issues.
|
| 713 |
+
|
| 714 |
+
##### New features
|
| 715 |
+
- `scoring_method` now a kwarg on `sklearn_adapter`
|
| 716 |
+
|
| 717 |
+
##### Bug fixes
|
| 718 |
+
- fixed an implicit import of scikit-learn. scikit-learn is an optional package.
|
| 719 |
+
- fixed visual bug that misaligned x-axis ticks and at-risk counts. Thanks @christopherahern!
|
| 720 |
+
|
| 721 |
+
|
| 722 |
+
#### 0.21.3 - 2019-06-04
|
| 723 |
+
|
| 724 |
+
##### New features
|
| 725 |
+
- include in lifelines is a scikit-learn adapter so lifeline's models can be used with scikit-learn's API. See [documentation here](https://lifelines.readthedocs.io/en/latest/Compatibility%20with%20scikit-learn.html).
|
| 726 |
+
- `CoxPHFitter.plot` now accepts a `hazard_ratios` (boolean) parameter that will plot the hazard ratios (and CIs) instead of the log-hazard ratios.
|
| 727 |
+
- `CoxPHFitter.check_assumptions` now accepts a `columns` parameter to specify only checking a subset of columns.
|
| 728 |
+
|
| 729 |
+
##### Bug fixes
|
| 730 |
+
- `covariates_from_event_matrix` handle nulls better
|
| 731 |
+
|
| 732 |
+
|
| 733 |
+
#### 0.21.2 - 2019-05-16
|
| 734 |
+
|
| 735 |
+
##### New features
|
| 736 |
+
- New regression model: `PiecewiseExponentialRegressionFitter` is available. See blog post here: https://dataorigami.net/blogs/napkin-folding/churn
|
| 737 |
+
- Regression models have a new method `log_likelihood_ratio_test` that computes, you guessed it, the log-likelihood ratio test. Previously this was an internal API that is being exposed.
|
| 738 |
+
|
| 739 |
+
##### API changes
|
| 740 |
+
- The default behavior of the `predict` method on non-parametric estimators (`KaplanMeierFitter`, etc.) has changed from (previous) linear interpolation to (new) return last value. Linear interpolation is still possible with the `interpolate` flag.
|
| 741 |
+
- removing `_compute_likelihood_ratio_test` on regression models. Use `log_likelihood_ratio_test` now.
|
| 742 |
+
|
| 743 |
+
##### Bug fixes
|
| 744 |
+
|
| 745 |
+
|
| 746 |
+
#### 0.21.1 - 2019-04-26
|
| 747 |
+
|
| 748 |
+
##### New features
|
| 749 |
+
- users can provided their own start and stop column names in `add_covariate_to_timeline`
|
| 750 |
+
- PiecewiseExponentialFitter now allows numpy arrays as breakpoints
|
| 751 |
+
|
| 752 |
+
##### API changes
|
| 753 |
+
- output of `survival_table_from_events` when collapsing rows to intervals now removes the "aggregate" column multi-index.
|
| 754 |
+
|
| 755 |
+
##### Bug fixes
|
| 756 |
+
- fixed bug in CoxTimeVaryingFitter when ax is provided, thanks @j-i-l!
|
| 757 |
+
|
| 758 |
+
#### 0.21.0 - 2019-04-12
|
| 759 |
+
|
| 760 |
+
##### New features
|
| 761 |
+
- `weights` is now a optional kwarg for parametric univariate models.
|
| 762 |
+
- all univariate and multivariate parametric models now have ability to handle left, right and interval censored data (the former two being special cases of the latter). Users can use the `fit_right_censoring` (which is an alias for `fit`), `fit_left_censoring` and `fit_interval_censoring`.
|
| 763 |
+
- a new interval censored dataset is available under `lifelines.datasets.load_diabetes`
|
| 764 |
+
|
| 765 |
+
##### API changes
|
| 766 |
+
- `left_censorship` on all univariate fitters has been deprecated. Please use the new
|
| 767 |
+
api `model.fit_left_censoring(...)`.
|
| 768 |
+
- `invert_y_axis` in `model.plot(...` has been removed.
|
| 769 |
+
- `entries` property in multivariate parametric models has a new Series name: `entry`
|
| 770 |
+
|
| 771 |
+
##### Bug fixes
|
| 772 |
+
- lifelines was silently converting any NaNs in the event vector to True. An error is now thrown instead.
|
| 773 |
+
- Fixed an error that didn't let users use Numpy arrays in prediction for AFT models
|
| 774 |
+
|
| 775 |
+
|
| 776 |
+
#### 0.20.5 - 2019-04-08
|
| 777 |
+
|
| 778 |
+
##### New features
|
| 779 |
+
- performance improvements for `print_summary`.
|
| 780 |
+
|
| 781 |
+
##### API changes
|
| 782 |
+
- `utils.survival_events_from_table` returns an integer weight vector as well as durations and censoring vector.
|
| 783 |
+
- in `AalenJohansenFitter`, the `variance` parameter is renamed to `variance_` to align with the usual lifelines convention.
|
| 784 |
+
|
| 785 |
+
##### Bug fixes
|
| 786 |
+
- Fixed an error in the `CoxTimeVaryingFitter`'s likelihood ratio test when using strata.
|
| 787 |
+
- Fixed some plotting bugs with `AalenJohansenFitter`
|
| 788 |
+
|
| 789 |
+
|
| 790 |
+
#### 0.20.4 - 2019-03-27
|
| 791 |
+
|
| 792 |
+
##### New features
|
| 793 |
+
- left-truncation support in AFT models, using the `entry_col` kwarg in `fit()`
|
| 794 |
+
- `generate_datasets.piecewise_exponential_survival_data` for generating piecewise exp. data
|
| 795 |
+
- Faster `print_summary` for AFT models.
|
| 796 |
+
|
| 797 |
+
##### API changes
|
| 798 |
+
- Pandas is now correctly pinned to >= 0.23.0. This was always the case, but not specified in setup.py correctly.
|
| 799 |
+
|
| 800 |
+
##### Bug fixes
|
| 801 |
+
- Better handling for extremely large numbers in `print_summary`
|
| 802 |
+
- `PiecewiseExponentialFitter` is available with `from lifelines import *`.
|
| 803 |
+
|
| 804 |
+
|
| 805 |
+
#### 0.20.3 - 2019-03-23
|
| 806 |
+
|
| 807 |
+
##### New features
|
| 808 |
+
- Now `cumulative_density_` & `survival_function_` are _always_ present on a fitted `KaplanMeierFitter`.
|
| 809 |
+
- New attributes/methods on `KaplanMeierFitter`: `plot_cumulative_density()`, `confidence_interval_cumulative_density_`, `plot_survival_function` and `confidence_interval_survival_function_`.
|
| 810 |
+
|
| 811 |
+
|
| 812 |
+
#### 0.20.2 - 2019-03-21
|
| 813 |
+
|
| 814 |
+
##### New features
|
| 815 |
+
- Left censoring is now supported in univariate parametric models: `.fit(..., left_censorship=True)`. Examples are in the docs.
|
| 816 |
+
- new dataset: `lifelines.datasets.load_nh4()`
|
| 817 |
+
- Univariate parametric models now include, by default, support for the cumulative density function: `.cumulative_density_`, `.confidence_interval_cumulative_density_`, `plot_cumulative_density()`, `cumulative_density_at_times(t)`.
|
| 818 |
+
- add a `lifelines.plotting.qq_plot` for univariate parametric models that handles censored data.
|
| 819 |
+
|
| 820 |
+
##### API changes
|
| 821 |
+
- `plot_lifetimes` no longer reverses the order when plotting. Thanks @vpolimenov!
|
| 822 |
+
- The `C` column in `load_lcd` dataset is renamed to `E`.
|
| 823 |
+
|
| 824 |
+
##### Bug fixes
|
| 825 |
+
- fixed a naming error in `KaplanMeierFitter` when `left_censorship` was set to True, `plot_cumulative_density_()` is now `plot_cumulative_density()`.
|
| 826 |
+
- added some error handling when passing in timedeltas. Ideally, users don't pass in timedeltas, as the scale is ambiguous. However, the error message before was not obvious, so we do some conversion, warn the user, and pass it through.
|
| 827 |
+
- `qth_survival_times` for a truncated CDF would return `np.inf` if the q parameter was below the truncation limit. This should have been `-np.inf`
|
| 828 |
+
|
| 829 |
+
|
| 830 |
+
#### 0.20.1 - 2019-03-16
|
| 831 |
+
|
| 832 |
+
- Some performance improvements to `CoxPHFitter` (about 30%). I know it may seem silly, but we are now about the same or slighty faster than the Cox model in R's `survival` package (for some testing datasets and some configurations). This is a big deal, because 1) lifelines does more error checking prior, 2) R's cox model is written in C, and we are still pure Python/NumPy, 3) R's cox model has decades of development.
|
| 833 |
+
- suppressed unimportant warnings
|
| 834 |
+
|
| 835 |
+
##### API changes
|
| 836 |
+
- Previously, lifelines _always_ added a 0 row to `cph.baseline_hazard_`, even if there were no event at this time. This is no longer the case. A 0 will still be added if there is a duration (observed or not) at 0 occurs however.
|
| 837 |
+
|
| 838 |
+
|
| 839 |
+
#### 0.20.0 - 2019-03-05
|
| 840 |
+
|
| 841 |
+
- Starting with 0.20.0, only Python3 will be supported. Over 75% of recent installs where Py3.
|
| 842 |
+
- Updated minimum dependencies, specifically Matplotlib and Pandas.
|
| 843 |
+
|
| 844 |
+
##### New features
|
| 845 |
+
- smarter initialization for AFT models which should improve convergence.
|
| 846 |
+
|
| 847 |
+
##### API changes
|
| 848 |
+
- `inital_beta` in Cox model's `.fit` is now `initial_point`.
|
| 849 |
+
- `initial_point` is now available in AFT models and `CoxTimeVaryingFitter`
|
| 850 |
+
- the DataFrame `confidence_intervals_` for univariate models is transposed now (previous parameters where columns, now parameters are rows).
|
| 851 |
+
|
| 852 |
+
##### Bug fixes
|
| 853 |
+
- Fixed a bug with plotting and `check_assumptions`.
|
| 854 |
+
|
| 855 |
+
|
| 856 |
+
|
| 857 |
+
#### 0.19.5 - 2019-02-26
|
| 858 |
+
|
| 859 |
+
##### New features
|
| 860 |
+
- `plot_covariate_group` can accept multiple covariates to plot. This is useful for columns that have implicit correlation like polynomial features or categorical variables.
|
| 861 |
+
- Convergence improvements for AFT models.
|
| 862 |
+
|
| 863 |
+
#### 0.19.4 - 2019-02-25
|
| 864 |
+
|
| 865 |
+
##### Bug fixes
|
| 866 |
+
- remove some bad print statements in `CoxPHFitter`.
|
| 867 |
+
|
| 868 |
+
#### 0.19.3 - 2019-02-25
|
| 869 |
+
|
| 870 |
+
##### New features
|
| 871 |
+
- new AFT models: `LogNormalAFTFitter` and `LogLogisticAFTFitter`.
|
| 872 |
+
- AFT models now accept a `weights_col` argument to `fit`.
|
| 873 |
+
- Robust errors (sandwich errors) are now avilable in AFT models using the `robust=True` kwarg in `fit`.
|
| 874 |
+
- Performance increase to `print_summary` in the `CoxPHFitter` and `CoxTimeVaryingFitter` model.
|
| 875 |
+
|
| 876 |
+
#### 0.19.2 - 2019-02-22
|
| 877 |
+
|
| 878 |
+
##### New features
|
| 879 |
+
- `ParametricUnivariateFitters`, like `WeibullFitter`, have smoothed plots when plotting (vs stepped plots)
|
| 880 |
+
|
| 881 |
+
##### Bug fixes
|
| 882 |
+
- The `ExponentialFitter` log likelihood _value_ was incorrect - inference was correct however.
|
| 883 |
+
- Univariate fitters are more flexiable and can allow 2-d and DataFrames as inputs.
|
| 884 |
+
|
| 885 |
+
#### 0.19.1 - 2019-02-21
|
| 886 |
+
|
| 887 |
+
##### New features
|
| 888 |
+
- improved stability of `LogNormalFitter`
|
| 889 |
+
- Matplotlib for Python3 users are not longer forced to use 2.x.
|
| 890 |
+
|
| 891 |
+
##### API changes
|
| 892 |
+
- **Important**: we changed the parameterization of the `PiecewiseExponential` to the same as `ExponentialFitter` (from `\lambda * t` to `t / \lambda`).
|
| 893 |
+
|
| 894 |
+
|
| 895 |
+
#### 0.19.0 - 2019-02-20
|
| 896 |
+
|
| 897 |
+
##### New features
|
| 898 |
+
- New regression model `WeibullAFTFitter` for fitting accelerated failure time models. Docs have been added to our [documentation](https://lifelines.readthedocs.io/) about how to use `WeibullAFTFitter` (spoiler: it's API is similar to the other regression models) and how to interpret the output.
|
| 899 |
+
- `CoxPHFitter` performance improvements (about 10%)
|
| 900 |
+
- `CoxTimeVaryingFitter` performance improvements (about 10%)
|
| 901 |
+
|
| 902 |
+
|
| 903 |
+
##### API changes
|
| 904 |
+
- **Important**: we changed the `.hazards_` and `.standard_errors_` on Cox models to be pandas Series (instead of Dataframes). This felt like a more natural representation of them. You may need to update your code to reflect this. See notes here: https://github.com/CamDavidsonPilon/lifelines/issues/636
|
| 905 |
+
- **Important**: we changed the `.confidence_intervals_` on Cox models to be transposed. This felt like a more natural representation of them. You may need to update your code to reflect this. See notes here: https://github.com/CamDavidsonPilon/lifelines/issues/636
|
| 906 |
+
- **Important**: we changed the parameterization of the `WeibullFitter` and `ExponentialFitter` from `\lambda * t` to `t / \lambda`. This was for a few reasons: 1) it is a more common parameterization in literature, 2) it helps in convergence.
|
| 907 |
+
- **Important**: in models where we add an intercept (currently only `AalenAdditiveModel`), the name of the added column has been changed from `baseline` to `_intercept`
|
| 908 |
+
- **Important**: the meaning of `alpha` in all fitters has changed to be the standard interpretation of alpha in confidence intervals. That means that the _default_ for alpha is set to 0.05 in the latest lifelines, instead of 0.95 in previous versions.
|
| 909 |
+
|
| 910 |
+
##### Bug Fixes
|
| 911 |
+
- Fixed a bug in the `_log_likelihood_` property of `ParametericUnivariateFitter` models. It was showing the "average" log-likelihood (i.e. scaled by 1/n) instead of the total. It now displays the total.
|
| 912 |
+
- In model `print_summary`s, correct a label erroring. Instead of "Likelihood test", it should have read "Log-likelihood test".
|
| 913 |
+
- Fixed a bug that was too frequently rejecting the dtype of `event` columns.
|
| 914 |
+
- Fixed a calculation bug in the concordance index for stratified Cox models. Thanks @airanmehr!
|
| 915 |
+
- Fixed some Pandas <0.24 bugs.
|
| 916 |
+
|
| 917 |
+
#### 0.18.6 - 2019-02-13
|
| 918 |
+
|
| 919 |
+
- some improvements to the output of `check_assumptions`. `show_plots` is turned to `False` by default now. It only shows `rank` and `km` p-values now.
|
| 920 |
+
- some performance improvements to `qth_survival_time`.
|
| 921 |
+
|
| 922 |
+
#### 0.18.5 - 2019-02-11
|
| 923 |
+
|
| 924 |
+
- added new plotting methods to parametric univariate models: `plot_survival_function`, `plot_hazard` and `plot_cumulative_hazard`. The last one is an alias for `plot`.
|
| 925 |
+
- added new properties to parametric univarite models: `confidence_interval_survival_function_`, `confidence_interval_hazard_`, `confidence_interval_cumulative_hazard_`. The last one is an alias for `confidence_interval_`.
|
| 926 |
+
- Fixed some overflow issues with `AalenJohansenFitter`'s variance calculations when using large datasets.
|
| 927 |
+
- Fixed an edgecase in `AalenJohansenFitter` that causing some datasets with to be jittered too often.
|
| 928 |
+
- Add a new kwarg to `AalenJohansenFitter`, `calculate_variance` that can be used to turn off variance calculations since this can take a long time for large datasets. Thanks @pzivich!
|
| 929 |
+
|
| 930 |
+
#### 0.18.4 - 2019-02-10
|
| 931 |
+
|
| 932 |
+
- fixed confidence intervals in cumulative hazards for parametric univarite models. They were previously
|
| 933 |
+
serverly depressed.
|
| 934 |
+
- adding left-truncation support to parametric univarite models with the `entry` kwarg in `.fit`
|
| 935 |
+
|
| 936 |
+
#### 0.18.3 - 2019-02-07
|
| 937 |
+
|
| 938 |
+
- Some performance improvements to parametric univariate models.
|
| 939 |
+
- Suppressing some irrelevant NumPy and autograd warnings, so lifeline warnings are more noticeable.
|
| 940 |
+
- Improved some warning and error messages.
|
| 941 |
+
|
| 942 |
+
#### 0.18.2 - 2019-02-05
|
| 943 |
+
|
| 944 |
+
- New univariate fitter `PiecewiseExponentialFitter` for creating a stepwise hazard model. See docs online.
|
| 945 |
+
- Ability to create novel parametric univariate models using the new `ParametericUnivariateFitter` super class. See docs online for how to do this.
|
| 946 |
+
- Unfortunately, parametric univariate fitters are not serializable with `pickle`. The library `dill` is still useable.
|
| 947 |
+
- Complete overhaul of all internals for parametric univariate fitters. Moved them all (most) to use `autograd`.
|
| 948 |
+
- `LogNormalFitter` no longer models `log_sigma`.
|
| 949 |
+
|
| 950 |
+
|
| 951 |
+
#### 0.18.1 - 2019-02-02
|
| 952 |
+
- bug fixes in `LogNormalFitter` variance estimates
|
| 953 |
+
- improve convergence of `LogNormalFitter`. We now model the log of sigma internally, but still expose sigma externally.
|
| 954 |
+
- use the `autograd` lib to help with gradients.
|
| 955 |
+
- New `LogLogisticFitter` univariate fitter available.
|
| 956 |
+
|
| 957 |
+
#### 0.18.0 - 2019-01-31
|
| 958 |
+
|
| 959 |
+
- `LogNormalFitter` is a new univariate fitter you can use.
|
| 960 |
+
- `WeibullFitter` now correctly returns the confidence intervals (previously returned only NaNs)
|
| 961 |
+
- `WeibullFitter.print_summary()` displays p-values associated with its parameters not equal to 1.0 - previously this was (implicitly) comparing against 0, which is trivially always true (the parameters must be greater than 0)
|
| 962 |
+
- `ExponentialFitter.print_summary()` displays p-values associated with its parameters not equal to 1.0 - previously this was (implicitly) comparing against 0, which is trivially always true (the parameters must be greater than 0)
|
| 963 |
+
- `ExponentialFitter.plot` now displays the cumulative hazard, instead of the survival function. This is to make it easier to compare to `WeibullFitter` and `LogNormalFitter`
|
| 964 |
+
- Univariate fitters' `cumulative_hazard_at_times`, `hazard_at_times`, `survival_function_at_times` return pandas Series now (use to be numpy arrays)
|
| 965 |
+
- remove `alpha` keyword from all statistical functions. This was never being used.
|
| 966 |
+
- Gone are astericks and dots in `print_summary` functions that represent signficance thresholds.
|
| 967 |
+
- In models' `summary` (including `print_summary`), the `log(p)` term has changed to `-log2(p)`. This is known as the s-value. See https://lesslikely.com/statistics/s-values/
|
| 968 |
+
- introduce new statistical tests between univariate datasets: `survival_difference_at_fixed_point_in_time_test`,...
|
| 969 |
+
- new warning message when Cox models detects possible non-unique solutions to maximum likelihood.
|
| 970 |
+
- Generally: clean up lifelines exception handling. Ex: catch `LinAlgError: Matrix is singular.` and report back to the user advice.
|
| 971 |
+
|
| 972 |
+
#### 0.17.5 - 2019-01-25
|
| 973 |
+
|
| 974 |
+
- more bugs in `plot_covariate_groups` fixed when using non-numeric strata.
|
| 975 |
+
|
| 976 |
+
#### 0.17.4 -2019-01-25
|
| 977 |
+
|
| 978 |
+
- Fix bug in `plot_covariate_groups` that wasn't allowing for strata to be used.
|
| 979 |
+
- change name of `multicenter_aids_cohort_study` to `load_multicenter_aids_cohort_study`
|
| 980 |
+
- `groups` is now called `values` in `CoxPHFitter.plot_covariate_groups`
|
| 981 |
+
|
| 982 |
+
#### 0.17.3 - 2019-01-24
|
| 983 |
+
- Fix in `compute_residuals` when using `schoenfeld` and the minumum duration has only censored subjects.
|
| 984 |
+
|
| 985 |
+
#### 0.17.2 2019-01-22
|
| 986 |
+
- Another round of serious performance improvements for the Cox models. Up to 2x faster for CoxPHFitter and CoxTimeVaryingFitter. This was mostly the result of using NumPy's `einsum` to simplify a previous `for` loop. The downside is the code is more esoteric now. I've added comments as necessary though 🤞
|
| 987 |
+
|
| 988 |
+
#### 0.17.1 - 2019-01-20
|
| 989 |
+
|
| 990 |
+
- adding bottleneck as a dependency. This library is highly-recommended by Pandas, and in lifelines we see some nice performance improvements with it too. (~15% for `CoxPHFitter`)
|
| 991 |
+
- There was a small bug in `CoxPHFitter` when using `batch_mode` that was causing coefficients to deviate from their MLE value. This bug eluded tests, which means that it's discrepancy was less than 0.0001 difference. It's fixed now, and even more accurate tests are added.
|
| 992 |
+
- Faster `CoxPHFitter._compute_likelihood_ratio_test()`
|
| 993 |
+
- Fixes a Pandas performance warning in `CoxTimeVaryingFitter`.
|
| 994 |
+
- Performances improvements to `CoxTimeVaryingFitter`.
|
| 995 |
+
|
| 996 |
+
#### 0.17.0 - 2019-01-11
|
| 997 |
+
|
| 998 |
+
- corrected behaviour in `CoxPHFitter` where `score_` was not being refreshed on every new `fit`.
|
| 999 |
+
- Reimplentation of `AalenAdditiveFitter`. There were significant changes to it:
|
| 1000 |
+
- implementation is at least 10x faster, and possibly up to 100x faster for some datasets.
|
| 1001 |
+
- memory consumption is way down
|
| 1002 |
+
- removed the time-varying component from `AalenAdditiveFitter`. This will return in a future release.
|
| 1003 |
+
- new `print_summary`
|
| 1004 |
+
- `weights_col` is added
|
| 1005 |
+
- `nn_cumulative_hazard` is removed (may add back)
|
| 1006 |
+
- some plotting improvemnts to `plotting.plot_lifetimes`
|
| 1007 |
+
|
| 1008 |
+
|
| 1009 |
+
#### 0.16.3 - 2019-01-03
|
| 1010 |
+
|
| 1011 |
+
- More `CoxPHFitter` performance improvements. Up to a 40% reduction vs 0.16.2 for some datasets.
|
| 1012 |
+
|
| 1013 |
+
#### 0.16.2 - 2019-01-02
|
| 1014 |
+
|
| 1015 |
+
- Fixed `CoxTimeVaryingFitter` to allow more than one variable to be stratafied
|
| 1016 |
+
- Significant performance improvements for `CoxPHFitter` with dataset has lots of duplicate times. See https://github.com/CamDavidsonPilon/lifelines/issues/591
|
| 1017 |
+
|
| 1018 |
+
#### 0.16.1 - 2019-01-01
|
| 1019 |
+
- Fixed py2 division error in `concordance` method.
|
| 1020 |
+
|
| 1021 |
+
#### 0.16.0 - 2019-01-01
|
| 1022 |
+
|
| 1023 |
+
- Drop Python 3.4 support.
|
| 1024 |
+
- introduction of residual calculations in `CoxPHFitter.compute_residuals`. Residuals include "schoenfeld", "score", "delta_beta", "deviance", "martingale", and "scaled_schoenfeld".
|
| 1025 |
+
- removes `estimation` namespace for fitters. Should be using `from lifelines import xFitter` now. Thanks @usmanatron
|
| 1026 |
+
- removes `predict_log_hazard_relative_to_mean` from Cox model. Thanks @usmanatron
|
| 1027 |
+
- `StatisticalResult` has be generalized to allow for multiple results (ex: from pairwise comparisons). This means a slightly changed API that is mostly backwards compatible. See doc string for how to use it.
|
| 1028 |
+
- `statistics.pairwise_logrank_test` now returns a `StatisticalResult` object instead of a nasty NxN DataFrame 💗
|
| 1029 |
+
- Display log(p-values) as well as p-values in `print_summary`. Also, p-values below thesholds will be truncated. The orignal p-values are still recoverable using `.summary`.
|
| 1030 |
+
- Floats `print_summary` is now displayed to 2 decimal points. This can be changed using the `decimal` kwarg.
|
| 1031 |
+
- removed `standardized` from `Cox` model plotting. It was confusing.
|
| 1032 |
+
- visual improvements to Cox models `.plot`
|
| 1033 |
+
- `print_summary` methods accepts kwargs to also be displayed.
|
| 1034 |
+
- `CoxPHFitter` has a new human-readable method, `check_assumptions`, to check the assumptions of your Cox proportional hazard model.
|
| 1035 |
+
- A new helper util to "expand" static datasets into long-form: `lifelines.utils.to_episodic_format`.
|
| 1036 |
+
- `CoxTimeVaryingFitter` now accepts `strata`.
|
| 1037 |
+
|
| 1038 |
+
#### 0.15.4
|
| 1039 |
+
|
| 1040 |
+
- bug fix for the Cox model likelihood ratio test when using non-trivial weights.
|
| 1041 |
+
|
| 1042 |
+
#### 0.15.3 - 2018-12-18
|
| 1043 |
+
- Only allow matplotlib less than 3.0.
|
| 1044 |
+
|
| 1045 |
+
#### 0.15.2 - 2018-11-23
|
| 1046 |
+
- API changes to `plotting.plot_lifetimes`
|
| 1047 |
+
- `cluster_col` and `strata` can be used together in `CoxPHFitter`
|
| 1048 |
+
- removed `entry` from `ExponentialFitter` and `WeibullFitter` as it was doing nothing.
|
| 1049 |
+
|
| 1050 |
+
#### 0.15.1 - 2018-11-23
|
| 1051 |
+
- Bug fixes for v0.15.0
|
| 1052 |
+
- Raise NotImplementedError if the `robust` flag is used in `CoxTimeVaryingFitter` - that's not ready yet.
|
| 1053 |
+
|
| 1054 |
+
#### 0.15.0 - 2018-11-22
|
| 1055 |
+
- adding `robust` params to `CoxPHFitter`'s `fit`. This enables atleast i) using non-integer weights in the model (these could be sampling weights like IPTW), and ii) mis-specified models (ex: non-proportional hazards). Under the hood it's a sandwich estimator. This does not handle ties, so if there are high number of ties, results may significantly differ from other software.
|
| 1056 |
+
- `standard_errors_` is now a property on fitted `CoxPHFitter` which describes the standard errors of the coefficients.
|
| 1057 |
+
- `variance_matrix_` is now a property on fitted `CoxPHFitter` which describes the variance matrix of the coefficients.
|
| 1058 |
+
- new criteria for convergence of `CoxPHFitter` and `CoxTimeVaryingFitter` called the Newton-decrement. Tests show it is as accurate (w.r.t to previous coefficients) and typically shaves off a single step, resulting in generally faster convergence. See https://www.cs.cmu.edu/~pradeepr/convexopt/Lecture_Slides/Newton_methods.pdf. Details about the Newton-decrement are added to the `show_progress` statements.
|
| 1059 |
+
- Minimum suppport for scipy is 1.0
|
| 1060 |
+
- Convergence errors in models that use Newton-Rhapson methods now throw a `ConvergenceError`, instead of a `ValueError` (the former is a subclass of the latter, however).
|
| 1061 |
+
- `AalenAdditiveModel` raises `ConvergenceWarning` instead of printing a warning.
|
| 1062 |
+
- `KaplanMeierFitter` now has a cumulative plot option. Example `kmf.plot(invert_y_axis=True)`
|
| 1063 |
+
- a `weights_col` option has been added to `CoxTimeVaryingFitter` that allows for time-varying weights.
|
| 1064 |
+
- `WeibullFitter` has a new `show_progress` param and additional information if the convergence fails.
|
| 1065 |
+
- `CoxPHFitter`, `ExponentialFitter`, `WeibullFitter` and `CoxTimeVaryFitter` method `print_summary` is updated with new fields.
|
| 1066 |
+
- `WeibullFitter` has renamed the incorrect `_jacobian` to `_hessian_`.
|
| 1067 |
+
- `variance_matrix_` is now a property on fitted `WeibullFitter` which describes the variance matrix of the parameters.
|
| 1068 |
+
- The default `WeibullFitter().timeline` has changed from integers between the min and max duration to _n_ floats between the max and min durations, where _n_ is the number of observations.
|
| 1069 |
+
- Performance improvements for `CoxPHFitter` (~20% faster)
|
| 1070 |
+
- Performance improvements for `CoxTimeVaryingFitter` (~100% faster)
|
| 1071 |
+
- In Python3, Univariate models are now serialisable with `pickle`. Thanks @dwilson1988 for the contribution. For Python2, `dill` is still the preferred method.
|
| 1072 |
+
- `baseline_cumulative_hazard_` (and derivatives of that) on `CoxPHFitter` now correctly incorporate the `weights_col`.
|
| 1073 |
+
- Fixed a bug in `KaplanMeierFitter` when late entry times lined up with death events. Thanks @pzivich
|
| 1074 |
+
- Adding `cluster_col` argument to `CoxPHFitter` so users can specify groups of subjects/rows that may be correlated.
|
| 1075 |
+
- Shifting the "signficance codes" for p-values down an order of magnitude. (Example, p-values between 0.1 and 0.05 are not noted at all and p-values between 0.05 and 0.1 are noted with `.`, etc.). This deviates with how they are presented in other software. There is an argument to be made to remove p-values from lifelines altogether (_become the changes you want to see in the world_ lol), but I worry that people could compute the p-values by hand incorrectly, a worse outcome I think. So, this is my stance. P-values between 0.1 and 0.05 offer _very_ little information, so they are removed. There is a growing movement in statistics to shift "signficant" findings to p-values less than 0.01 anyways.
|
| 1076 |
+
- New fitter for cumulative incidence of multiple risks `AalenJohansenFitter`. Thanks @pzivich! See "Methodologic Issues When Estimating Risks in Pharmacoepidemiology" for a nice overview of the model.
|
| 1077 |
+
|
| 1078 |
+
#### 0.14.6 - 2018-07-02
|
| 1079 |
+
- fix for n > 2 groups in `multivariate_logrank_test` (again).
|
| 1080 |
+
- fix bug for when `event_observed` column was not boolean.
|
| 1081 |
+
|
| 1082 |
+
#### 0.14.5 - 2018-06-29
|
| 1083 |
+
- fix for n > 2 groups in `multivariate_logrank_test`
|
| 1084 |
+
- fix weights in KaplanMeierFitter when using a pandas Series.
|
| 1085 |
+
|
| 1086 |
+
#### 0.14.4 - 2018-06-14
|
| 1087 |
+
- Adds `baseline_cumulative_hazard_` and `baseline_survival_` to `CoxTimeVaryingFitter`. Because of this, new prediction methods are available.
|
| 1088 |
+
- fixed a bug in `add_covariate_to_timeline` when using `cumulative_sum` with multiple columns.
|
| 1089 |
+
- Added `Likelihood ratio test` to `CoxPHFitter.print_summary` and `CoxTimeVaryingFitter.print_summary`
|
| 1090 |
+
- New checks in `CoxTimeVaryingFitter` that check for immediate deaths and redundant rows.
|
| 1091 |
+
- New `delay` parameter in `add_covariate_to_timeline`
|
| 1092 |
+
- removed `two_sided_z_test` from `statistics`
|
| 1093 |
+
|
| 1094 |
+
#### 0.14.3 - 2018-05-24
|
| 1095 |
+
- fixes a bug when subtracting or dividing two `UnivariateFitters` with labels.
|
| 1096 |
+
- fixes an import error with using `CoxTimeVaryingFitter` predict methods.
|
| 1097 |
+
- adds a `column` argument to `CoxTimeVaryingFitter` and `CoxPHFitter` `plot` method to plot only a subset of columns.
|
| 1098 |
+
|
| 1099 |
+
#### 0.14.2 - 2018-05-18
|
| 1100 |
+
- some quality of life improvements for working with `CoxTimeVaryingFitter` including new `predict_` methods.
|
| 1101 |
+
|
| 1102 |
+
#### 0.14.1 - 2018-04-01
|
| 1103 |
+
- fixed bug with using weights and strata in `CoxPHFitter`
|
| 1104 |
+
- fixed bug in using non-integer weights in `KaplanMeierFitter`
|
| 1105 |
+
- Performance optimizations in `CoxPHFitter` for up to 40% faster completion of `fit`.
|
| 1106 |
+
- even smarter `step_size` calculations for iterative optimizations.
|
| 1107 |
+
- simple code optimizations & cleanup in specific hot spots.
|
| 1108 |
+
- Performance optimizations in `AalenAdditiveFitter` for up to 50% faster completion of `fit` for large dataframes, and up to 10% faster for small dataframes.
|
| 1109 |
+
|
| 1110 |
+
|
| 1111 |
+
#### 0.14.0 - 2018-03-03
|
| 1112 |
+
- adding `plot_covariate_groups` to `CoxPHFitter` to visualize what happens to survival as we vary a covariate, all else being equal.
|
| 1113 |
+
- `utils` functions like `qth_survival_times` and `median_survival_times` now return the transpose of the DataFrame compared to previous version of lifelines. The reason for this is that we often treat survival curves as columns in DataFrames, and functions of the survival curve as index (ex: KaplanMeierFitter.survival_function_ returns a survival curve _at_ time _t_).
|
| 1114 |
+
- `KaplanMeierFitter.fit` and `NelsonAalenFitter.fit` accept a `weights` vector that can be used for pre-aggregated datasets. See this [issue](https://github.com/CamDavidsonPilon/lifelines/issues/396).
|
| 1115 |
+
- Convergence errors now return a custom `ConvergenceWarning` instead of a `RuntimeWarning`
|
| 1116 |
+
- New checks for complete separation in the dataset for regressions.
|
| 1117 |
+
|
| 1118 |
+
#### 0.13.0 - 2017-12-22
|
| 1119 |
+
- removes `is_significant` and `test_result` from `StatisticalResult`. Users can instead choose their significance level by comparing to `p_value`. The string representation of this class has changed aswell.
|
| 1120 |
+
- `CoxPHFitter` and `AalenAdditiveFitter` now have a `score_` property that is the concordance-index of the dataset to the fitted model.
|
| 1121 |
+
- `CoxPHFitter` and `AalenAdditiveFitter` no longer have the `data` property. It was an _almost_ duplicate of the training data, but was causing the model to be very large when serialized.
|
| 1122 |
+
- Implements a new fitter `CoxTimeVaryingFitter` available under the `lifelines` namespace. This model implements the Cox model for time-varying covariates.
|
| 1123 |
+
- Utils for creating time varying datasets available in `utils`.
|
| 1124 |
+
- less noisy check for complete separation.
|
| 1125 |
+
- removed `datasets` namespace from the main `lifelines` namespace
|
| 1126 |
+
- `CoxPHFitter` has a slightly more intelligent (barely...) way to pick a step size, so convergence should generally be faster.
|
| 1127 |
+
- `CoxPHFitter.fit` now has accepts a `weight_col` kwarg so one can pass in weights per observation. This is very useful if you have many subjects, and the space of covariates is not large. Thus you can group the same subjects together and give that observation a weight equal to the count. Altogether, this means a much faster regression.
|
| 1128 |
+
|
| 1129 |
+
#### 0.12.0
|
| 1130 |
+
- removes `include_likelihood` from `CoxPHFitter.fit` - it was not slowing things down much (empirically), and often I wanted it for debugging (I suppose others do too). It's also another exit condition, so we many exit from the NR iterations faster.
|
| 1131 |
+
- added `step_size` param to `CoxPHFitter.fit` - the default is good, but for extremely large or small datasets this may want to be set manually.
|
| 1132 |
+
- added a warning to `CoxPHFitter` to check for complete seperation: https://stats.idre.ucla.edu/other/mult-pkg/faq/general/faqwhat-is-complete-or-quasi-complete-separation-in-logisticprobit-regression-and-how-do-we-deal-with-them/
|
| 1133 |
+
- Additional functionality to `utils.survival_table_from_events` to bin the index to make the resulting table more readable.
|
| 1134 |
+
|
| 1135 |
+
#### 0.11.3
|
| 1136 |
+
- No longer support matplotlib 1.X
|
| 1137 |
+
- Adding `times` argument to `CoxPHFitter`'s `predict_survival_function` and `predict_cumulative_hazard` to predict the estimates at, instead uses the default times of observation or censorship.
|
| 1138 |
+
- More accurate prediction methods parametrics univariate models.
|
| 1139 |
+
|
| 1140 |
+
#### 0.11.2
|
| 1141 |
+
- Changing liscense to valilla MIT.
|
| 1142 |
+
- Speed up `NelsonAalenFitter.fit` considerably.
|
| 1143 |
+
|
| 1144 |
+
#### 0.11.1 - 2017-06-22
|
| 1145 |
+
- Python3 fix for `CoxPHFitter.plot`.
|
| 1146 |
+
|
| 1147 |
+
#### 0.11.0 - 2017-06-21
|
| 1148 |
+
- fixes regression in `KaplanMeierFitter.plot` when using Seaborn and lifelines.
|
| 1149 |
+
- introduce a new `.plot` function to a fitted `CoxPHFitter` instance. This plots the hazard coefficients and their confidence intervals.
|
| 1150 |
+
- in all plot methods, the `ix` kwarg has been deprecated in favour of a new `loc` kwarg. This is to align with Pandas deprecating `ix`
|
| 1151 |
+
|
| 1152 |
+
#### 0.10.1 - 2017-06-05
|
| 1153 |
+
- fix in internal normalization for `CoxPHFitter` predict methods.
|
| 1154 |
+
|
| 1155 |
+
#### 0.10.0
|
| 1156 |
+
- corrected bug that was returning the wrong baseline survival and hazard values in `CoxPHFitter` when `normalize=True`.
|
| 1157 |
+
- removed `normalize` kwarg in `CoxPHFitter`. This was causing lots of confusion for users, and added code complexity. It's really nice to be able to remove it.
|
| 1158 |
+
- correcting column name in `CoxPHFitter.baseline_survival_`
|
| 1159 |
+
- `CoxPHFitter.baseline_cumulative_hazard_` is always centered, to mimic R's `basehaz` API.
|
| 1160 |
+
- new `predict_log_partial_hazards` to `CoxPHFitter`
|
| 1161 |
+
|
| 1162 |
+
#### 0.9.4
|
| 1163 |
+
- adding `plot_loglogs` to `KaplanMeierFitter`
|
| 1164 |
+
- added a (correct) check to see if some columns in a dataset will cause convergence problems.
|
| 1165 |
+
- removing `flat` argument in `plot` methods. It was causing confusion. To replicate it, one can set `ci_force_lines=True` and `show_censors=True`.
|
| 1166 |
+
- adding `strata` keyword argument to `CoxPHFitter` on initialization (ex: `CoxPHFitter(strata=['v1', 'v2'])`. Why? Fitters initialized with `strata` can now be passed into `k_fold_cross_validation`, plus it makes unit testing `strata` fitters easier.
|
| 1167 |
+
- If using `strata` in `CoxPHFitter`, access to strata specific baseline hazards and survival functions are available (previously it was a blended valie). Prediction also uses the specific baseline hazards/survivals.
|
| 1168 |
+
- performance improvements in `CoxPHFitter` - should see at least a 10% speed improvement in `fit`.
|
| 1169 |
+
|
| 1170 |
+
#### 0.9.2
|
| 1171 |
+
- deprecates Pandas versions before 0.18.
|
| 1172 |
+
- throw an error if no admissable pairs in the c-index calculation. Previously a NaN was returned.
|
| 1173 |
+
|
| 1174 |
+
#### 0.9.1
|
| 1175 |
+
- add two summary functions to Weibull and Exponential fitter, solves #224
|
| 1176 |
+
|
| 1177 |
+
#### 0.9.0
|
| 1178 |
+
- new prediction function in `CoxPHFitter`, `predict_log_hazard_relative_to_mean`, that mimics what R's `predict.coxph` does.
|
| 1179 |
+
- removing the `predict` method in CoxPHFitter and AalenAdditiveFitter. This is because the choice of `predict_median` as a default was causing too much confusion, and no other natual choice as a default was available. All other `predict_` methods remain.
|
| 1180 |
+
- Default predict method in `k_fold_cross_validation` is now `predict_expectation`
|
| 1181 |
+
|
| 1182 |
+
#### 0.8.1 - 2015-08-01
|
| 1183 |
+
- supports matplotlib 1.5.
|
| 1184 |
+
- introduction of a param `nn_cumulative_hazards` in AalenAdditiveModel's `__init__` (default True). This parameter will truncate all non-negative cumulative hazards in prediction methods to 0.
|
| 1185 |
+
- bug fixes including:
|
| 1186 |
+
- fixed issue where the while loop in `_newton_rhaphson` would break too early causing a variable not to be set properly.
|
| 1187 |
+
- scaling of smooth hazards in NelsonAalenFitter was off by a factor of 0.5.
|
| 1188 |
+
|
| 1189 |
+
|
| 1190 |
+
#### 0.8.0
|
| 1191 |
+
- reorganized lifelines directories:
|
| 1192 |
+
- moved test files out of main directory.
|
| 1193 |
+
- moved `utils.py` into it's own directory.
|
| 1194 |
+
- moved all estimators `fitters` directory.
|
| 1195 |
+
- added a `at_risk` column to the output of `group_survival_table_from_events` and `survival_table_from_events`
|
| 1196 |
+
- added sample size and power calculations for statistical tests. See `lifeline.statistics. sample_size_necessary_under_cph` and `lifelines.statistics. power_under_cph`.
|
| 1197 |
+
- fixed a bug when using KaplanMeierFitter for left-censored data.
|
| 1198 |
+
|
| 1199 |
+
|
| 1200 |
+
#### 0.7.1
|
| 1201 |
+
- addition of a l2 `penalizer` to `CoxPHFitter`.
|
| 1202 |
+
- dropped Fortran implementation of efficient Python version. Lifelines is pure python once again!
|
| 1203 |
+
- addition of `strata` keyword argument to `CoxPHFitter` to allow for stratification of a single or set of
|
| 1204 |
+
categorical variables in your dataset.
|
| 1205 |
+
- `datetimes_to_durations` now accepts a list as `na_values`, so multiple values can be checked.
|
| 1206 |
+
- fixed a bug in `datetimes_to_durations` where `fill_date` was not properly being applied.
|
| 1207 |
+
- Changed warning in `datetimes_to_durations` to be correct.
|
| 1208 |
+
- refactor each fitter into it's own submodule. For now, the tests are still in the same file. This will also *not* break the API.
|
| 1209 |
+
|
| 1210 |
+
|
| 1211 |
+
#### 0.7.0 - 2015-03-01
|
| 1212 |
+
- allow for multiple fitters to be passed into `k_fold_cross_validation`.
|
| 1213 |
+
- statistical tests in `lifelines.statistics`. now return a `StatisticalResult` object with properties like `p_value`, `test_results`, and `summary`.
|
| 1214 |
+
- fixed a bug in how log-rank statistical tests are performed. The covariance matrix was not being correctly calculated. This resulted in slightly different p-values.
|
| 1215 |
+
- `WeibullFitter`, `ExponentialFitter`, `KaplanMeierFitter` and `BreslowFlemingHarringtonFitter` all have a `conditional_time_to_event_` property that measures the median duration remaining until the death event, given survival up until time t.
|
| 1216 |
+
|
| 1217 |
+
#### 0.6.1
|
| 1218 |
+
|
| 1219 |
+
- addition of `median_` property to `WeibullFitter` and `ExponentialFitter`.
|
| 1220 |
+
- `WeibullFitter` and `ExponentialFitter` will use integer timelines instead of float provided by `linspace`. This is
|
| 1221 |
+
so if your work is to sum up the survival function (for expected values or something similar), it's more difficult to
|
| 1222 |
+
make a mistake.
|
| 1223 |
+
|
| 1224 |
+
#### 0.6.0 - 2015-02-04
|
| 1225 |
+
|
| 1226 |
+
- Inclusion of the univariate fitters `WeibullFitter` and `ExponentialFitter`.
|
| 1227 |
+
- Removing `BayesianFitter` from lifelines.
|
| 1228 |
+
- Added new penalization scheme to AalenAdditiveFitter. You can now add a smoothing penalizer
|
| 1229 |
+
that will try to keep subsequent values of a hazard curve close together. The penalizing coefficient
|
| 1230 |
+
is `smoothing_penalizer`.
|
| 1231 |
+
- Changed `penalizer` keyword arg to `coef_penalizer` in AalenAdditiveFitter.
|
| 1232 |
+
- new `ridge_regression` function in `utils.py` to perform linear regression with l2 penalizer terms.
|
| 1233 |
+
- Matplotlib is no longer a mandatory dependency.
|
| 1234 |
+
- `.predict(time)` method on univariate fitters can now accept a scalar (and returns a scalar) and an iterable (and returns a numpy array)
|
| 1235 |
+
- In `KaplanMeierFitter`, `epsilon` has been renamed to `precision`.
|
| 1236 |
+
|
| 1237 |
+
|
| 1238 |
+
#### 0.5.1 - 2014-12-24
|
| 1239 |
+
|
| 1240 |
+
- New API for `CoxPHFitter` and `AalenAdditiveFitter`: the default arguments for `event_col` and `duration_col`. `duration_col` is now mandatory, and `event_col` now accepts a column, or by default, `None`, which assumes all events are observed (non-censored).
|
| 1241 |
+
- Fix statistical tests.
|
| 1242 |
+
- Allow negative durations in Fitters.
|
| 1243 |
+
- New API in `survival_table_from_events`: `min_observations` is replaced by `birth_times` (default `None`).
|
| 1244 |
+
- New API in `CoxPHFitter` for summary: `summary` will return a dataframe with statistics, `print_summary()` will print the dataframe (plus some other statistics) in a pretty manner.
|
| 1245 |
+
- Adding "At Risk" counts option to univariate fitter `plot` methods, `.plot(at_risk_counts=True)`, and the function `lifelines.plotting.add_at_risk_counts`.
|
| 1246 |
+
- Fix bug Epanechnikov kernel.
|
| 1247 |
+
|
| 1248 |
+
#### 0.5.0 - 2014-12-07
|
| 1249 |
+
|
| 1250 |
+
- move testing to py.test
|
| 1251 |
+
- refactor tests into smaller files
|
| 1252 |
+
- make `test_pairwise_logrank_test_with_identical_data_returns_inconclusive` a better test
|
| 1253 |
+
- add test for summary()
|
| 1254 |
+
- Alternate metrics can be used for `k_fold_cross_validation`.
|
| 1255 |
+
|
| 1256 |
+
|
| 1257 |
+
#### 0.4.4 - 2014-11-27
|
| 1258 |
+
|
| 1259 |
+
- Lots of improvements to numerical stability (but something things still need work)
|
| 1260 |
+
- Additions to `summary` in CoxPHFitter.
|
| 1261 |
+
- Make all prediction methods output a DataFrame
|
| 1262 |
+
- Fixes bug in 1-d input not returning in CoxPHFitter
|
| 1263 |
+
- Lots of new tests.
|
| 1264 |
+
|
| 1265 |
+
#### 0.4.3 - 2014-07-23
|
| 1266 |
+
- refactoring of `qth_survival_times`: it can now accept an iterable (or a scalar still) of probabilities in the q argument, and will return a DataFrame with these as columns. If len(q)==1 and a single survival function is given, will return a scalar, not a DataFrame. Also some good speed improvements.
|
| 1267 |
+
- KaplanMeierFitter and NelsonAalenFitter now have a `_label` property that is passed in during the fit.
|
| 1268 |
+
- KaplanMeierFitter/NelsonAalenFitter's inital `alpha` value is overwritten if a new `alpha` value is passed
|
| 1269 |
+
in during the `fit`.
|
| 1270 |
+
- New method for KaplanMeierFitter: `conditional_time_to`. This returns a DataFrame of the estimate:
|
| 1271 |
+
med(S(t | T>s)) - s, human readable: the estimated time left of living, given an individual is aged s.
|
| 1272 |
+
- Adds option `include_likelihood` to CoxPHFitter fit method to save the final log-likelihood value.
|
| 1273 |
+
|
| 1274 |
+
#### 0.4.2 - 2014-06-19
|
| 1275 |
+
|
| 1276 |
+
- Massive speed improvements to CoxPHFitter.
|
| 1277 |
+
- Additional prediction method: `predict_percentile` is available on CoxPHFitter and AalenAdditiveFitter. Given a percentile, p, this function returns the value t such that *S(t | x) = p*. It is a generalization of `predict_median`.
|
| 1278 |
+
- Additional kwargs in `k_fold_cross_validation` that will accept different prediction methods (default is `predict_median`).
|
| 1279 |
+
- Bug fix in CoxPHFitter `predict_expectation` function.
|
| 1280 |
+
- Correct spelling mistake in newton-rhapson algorithm.
|
| 1281 |
+
- `datasets` now contains functions for generating the respective datasets, ex: `generate_waltons_dataset`.
|
| 1282 |
+
- Bumping up the number of samples in statistical tests to prevent them from failing so often (this a stop-gap)
|
| 1283 |
+
- pep8 everything
|
| 1284 |
+
|
| 1285 |
+
#### 0.4.1.1
|
| 1286 |
+
|
| 1287 |
+
- Ability to specify default printing in statistical tests with the `suppress_print` keyword argument (default False).
|
| 1288 |
+
- For the multivariate log rank test, the inverse step has been replaced with the generalized inverse. This seems to be what other packages use.
|
| 1289 |
+
- Adding more robust cross validation scheme based on issue #67.
|
| 1290 |
+
- fixing `regression_dataset` in `datasets`.
|
| 1291 |
+
|
| 1292 |
+
|
| 1293 |
+
#### 0.4.1 - 2014-06-11
|
| 1294 |
+
|
| 1295 |
+
- `CoxFitter` is now known as `CoxPHFitter`
|
| 1296 |
+
- refactoring some tests that used redundant data from `lifelines.datasets`.
|
| 1297 |
+
- Adding cross validation: in `utils` is a new `k_fold_cross_validation` for model selection in regression problems.
|
| 1298 |
+
- Change CoxPHFitter's fit method's `display_output` to `False`.
|
| 1299 |
+
- fixing bug in CoxPHFitter's `_compute_baseline_hazard` that errored when sending Series objects to
|
| 1300 |
+
`survival_table_from_events`.
|
| 1301 |
+
- CoxPHFitter's `fit` now looks to columns with too low variance, and halts NR algorithm if a NaN is found.
|
| 1302 |
+
- Adding a Changelog.
|
| 1303 |
+
- more sanitizing for the statistical tests =)
|
| 1304 |
+
|
| 1305 |
+
#### 0.4.0 - 2014-06-08
|
| 1306 |
+
|
| 1307 |
+
- `CoxFitter` implements Cox Proportional Hazards model in lifelines.
|
| 1308 |
+
- lifelines moves the wheels distributions.
|
| 1309 |
+
- tests in the `statistics` module now prints the summary (and still return the regular values)
|
| 1310 |
+
- new `BaseFitter` class is inherited from all fitters.
|
lifelines/source/CITATION.cff
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# YAML 1.2
|
| 2 |
+
---
|
| 3 |
+
authors:
|
| 4 |
+
-
|
| 5 |
+
family-names: "Davidson-Pilon"
|
| 6 |
+
given-names: Cameron
|
| 7 |
+
orcid: "https://orcid.org/0000-0003-1794-9143"
|
| 8 |
+
cff-version: "1.1.0"
|
| 9 |
+
doi: "https://doi.org/10.21105/joss.01317"
|
| 10 |
+
license: MIT
|
| 11 |
+
message: "If you use this software, please cite it using these metadata."
|
| 12 |
+
repository-code: "https://github.com/camDavidsonPilon/lifelines"
|
| 13 |
+
title: lifelines, survival analysis in Python
|
| 14 |
+
...
|
lifelines/source/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
MIT License
|
| 2 |
+
|
| 3 |
+
Copyright (c) 2017 Cameron Davidson-Pilon
|
| 4 |
+
|
| 5 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
| 6 |
+
of this software and associated documentation files (the "Software"), to deal
|
| 7 |
+
in the Software without restriction, including without limitation the rights
|
| 8 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
| 9 |
+
copies of the Software, and to permit persons to whom the Software is
|
| 10 |
+
furnished to do so, subject to the following conditions:
|
| 11 |
+
|
| 12 |
+
The above copyright notice and this permission notice shall be included in all
|
| 13 |
+
copies or substantial portions of the Software.
|
| 14 |
+
|
| 15 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
| 16 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
| 17 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
| 18 |
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
| 19 |
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
| 20 |
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
| 21 |
+
SOFTWARE.
|
lifelines/source/MANIFEST.in
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
include README.md
|
| 2 |
+
include LICENSE
|
| 3 |
+
include MANIFEST.in
|
| 4 |
+
|
| 5 |
+
include *.ipynb
|
| 6 |
+
|
| 7 |
+
recursive-include lifelines *
|
| 8 |
+
recursive-include datasets *
|
| 9 |
+
recursive-include styles *
|
| 10 |
+
recursive-include reqs *
|
| 11 |
+
|
| 12 |
+
recursive-exclude * *.py[co]
|
lifelines/source/Makefile
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
init:
|
| 2 |
+
ifeq ($(TRAVIS), true)
|
| 3 |
+
pip install -r reqs/travis-requirements.txt
|
| 4 |
+
pip install pandas==${PANDAS_VERSION}
|
| 5 |
+
pip install numpy==${NUMPY_VERSION}
|
| 6 |
+
pip freeze --local
|
| 7 |
+
else
|
| 8 |
+
pip install -r reqs/dev-requirements.txt
|
| 9 |
+
pre-commit install
|
| 10 |
+
endif
|
| 11 |
+
|
| 12 |
+
test:
|
| 13 |
+
py.test lifelines/ -rfs --cov=lifelines --block=False --cov-report term-missing
|
| 14 |
+
|
| 15 |
+
lint:
|
| 16 |
+
ifeq ($(TRAVIS_PYTHON_VERSION), 2.7)
|
| 17 |
+
echo "Skip linting for Python2.7"
|
| 18 |
+
else
|
| 19 |
+
make black
|
| 20 |
+
prospector --output-format grouped
|
| 21 |
+
endif
|
| 22 |
+
|
| 23 |
+
black:
|
| 24 |
+
ifeq ($(TRAVIS_PYTHON_VERSION), 2.7)
|
| 25 |
+
echo "Skip linting for Python2.7"
|
| 26 |
+
else
|
| 27 |
+
black lifelines/ -l 120 --fast
|
| 28 |
+
endif
|
| 29 |
+
|
| 30 |
+
check_format:
|
| 31 |
+
ifeq ($(TRAVIS_PYTHON_VERSION), 3.6)
|
| 32 |
+
black . --check --line-length 120
|
| 33 |
+
else
|
| 34 |
+
echo "Only check format on Python3.6"
|
| 35 |
+
endif
|
| 36 |
+
|
| 37 |
+
pre:
|
| 38 |
+
pre-commit run --all-files
|
lifelines/source/README.md
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+

|
| 2 |
+
|
| 3 |
+
[](https://badge.fury.io/py/lifelines)
|
| 4 |
+
[](https://conda.anaconda.org/conda-forge)
|
| 6 |
+
[](https://zenodo.org/badge/latestdoi/12420595)
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
[What is survival analysis and why should I learn it?](http://lifelines.readthedocs.org/en/latest/Survival%20Analysis%20intro.html)
|
| 10 |
+
Survival analysis was originally developed and applied heavily by the actuarial and medical community. Its purpose was to answer *why do events occur now versus later* under uncertainty (where *events* might refer to deaths, disease remission, etc.). This is great for researchers who are interested in measuring lifetimes: they can answer questions like *what factors might influence deaths?*
|
| 11 |
+
|
| 12 |
+
But outside of medicine and actuarial science, there are many other interesting and exciting applications of survival analysis. For example:
|
| 13 |
+
- SaaS providers are interested in measuring subscriber lifetimes, or time to some first action
|
| 14 |
+
- inventory stock out is a censoring event for true "demand" of a good.
|
| 15 |
+
- sociologists are interested in measuring political parties' lifetimes, or relationships, or marriages
|
| 16 |
+
- A/B tests to determine how long it takes different groups to perform an action.
|
| 17 |
+
|
| 18 |
+
*lifelines* is a pure Python implementation of the best parts of survival analysis.
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
## Documentation and intro to survival analysis
|
| 22 |
+
|
| 23 |
+
If you are new to survival analysis, wondering why it is useful, or are interested in *lifelines* examples, API, and syntax, please read the [Documentation and Tutorials page](http://lifelines.readthedocs.org/en/latest/index.html)
|
| 24 |
+
|
| 25 |
+
## Contact
|
| 26 |
+
- Start a conversation in our [Discussions room](https://github.com/CamDavidsonPilon/lifelines/discussions).
|
| 27 |
+
- Some users have posted common questions at [stats.stackexchange.com](https://stats.stackexchange.com/search?tab=votes&q=%22lifelines%22%20is%3aquestion).
|
| 28 |
+
- Creating an issue in the [Github repository](https://github.com/camdavidsonpilon/lifelines).
|
| 29 |
+
|
| 30 |
+
## Development
|
| 31 |
+
|
| 32 |
+
See our [Contributing](https://github.com/CamDavidsonPilon/lifelines/blob/master/.github/CONTRIBUTING.md) guidelines.
|
lifelines/source/__init__.py
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# -*- coding: utf-8 -*-
|
| 2 |
+
"""
|
| 3 |
+
lifelines Project Package Initialization File
|
| 4 |
+
"""
|
lifelines/source/conftest.py
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# -*- coding: utf-8 -*-
|
| 2 |
+
import numpy as np
|
| 3 |
+
import pytest
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
def pytest_runtest_setup(item):
|
| 7 |
+
random_seed = np.random.randint(1000)
|
| 8 |
+
print("Seed used in np.random.seed(): %d" % random_seed)
|
| 9 |
+
np.random.seed(random_seed)
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
def pytest_addoption(parser):
|
| 13 |
+
parser.addoption("--block", action="store", default=True, help="Should plotting block or not.")
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
@pytest.fixture
|
| 17 |
+
def block(request):
|
| 18 |
+
try:
|
| 19 |
+
return request.config.getoption("--block") not in "False,false,no,0".split(",")
|
| 20 |
+
except ValueError:
|
| 21 |
+
return True
|
lifelines/source/docs/Changelog.rst
ADDED
|
@@ -0,0 +1,2822 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Changelog
|
| 2 |
+
=========
|
| 3 |
+
|
| 4 |
+
0.28.0 - Upcoming
|
| 5 |
+
-----------------
|
| 6 |
+
|
| 7 |
+
- Fixes bins that are far into the future with using
|
| 8 |
+
``survival_table_from_events``, see #1587
|
| 9 |
+
- Removed ``sklean_adaptor``. It was a terrible hack, and causing more
|
| 10 |
+
confusion and support debt than I want. This cleans up our API and
|
| 11 |
+
simplifies the library. ✨ There’s no replacement, and I doubt I’ll
|
| 12 |
+
introduce one ✨
|
| 13 |
+
- Fix Pandas 2.0 compatibility.
|
| 14 |
+
- Fix overflow issue in NelsonAalenfitter, #1585
|
| 15 |
+
|
| 16 |
+
0.27.8 - 2023-09-13
|
| 17 |
+
-------------------
|
| 18 |
+
|
| 19 |
+
- Estimators now have ``.label`` property
|
| 20 |
+
- Fixed some deprecation warnings
|
| 21 |
+
- Pinned to numpy < 2.0
|
| 22 |
+
|
| 23 |
+
.. _section-1:
|
| 24 |
+
|
| 25 |
+
0.27.7 - 2023-05-01
|
| 26 |
+
-------------------
|
| 27 |
+
|
| 28 |
+
- ``check_assumptions(show_plots=True)`` will always show plots,
|
| 29 |
+
regardless of test outcome. Thanks @nomennominatur!
|
| 30 |
+
- ``lifelines.datasets`` is now importable.
|
| 31 |
+
|
| 32 |
+
.. _section-2:
|
| 33 |
+
|
| 34 |
+
0.27.6 - 2023-04-27
|
| 35 |
+
-------------------
|
| 36 |
+
|
| 37 |
+
- Fix for py3.7
|
| 38 |
+
|
| 39 |
+
.. _section-3:
|
| 40 |
+
|
| 41 |
+
0.27.5 - 2023-04-27
|
| 42 |
+
-------------------
|
| 43 |
+
|
| 44 |
+
- Support pandas 2.0+
|
| 45 |
+
|
| 46 |
+
New features
|
| 47 |
+
~~~~~~~~~~~~
|
| 48 |
+
|
| 49 |
+
- Support py3.11
|
| 50 |
+
|
| 51 |
+
.. _section-4:
|
| 52 |
+
|
| 53 |
+
0.27.4 - 2022-11-16
|
| 54 |
+
-------------------
|
| 55 |
+
|
| 56 |
+
.. _new-features-1:
|
| 57 |
+
|
| 58 |
+
New features
|
| 59 |
+
~~~~~~~~~~~~
|
| 60 |
+
|
| 61 |
+
- Support py3.11
|
| 62 |
+
|
| 63 |
+
.. _section-5:
|
| 64 |
+
|
| 65 |
+
0.27.3 - 2022-09-25
|
| 66 |
+
-------------------
|
| 67 |
+
|
| 68 |
+
.. _new-features-2:
|
| 69 |
+
|
| 70 |
+
New features
|
| 71 |
+
~~~~~~~~~~~~
|
| 72 |
+
|
| 73 |
+
- Fixed and silenced a lot of warnings
|
| 74 |
+
|
| 75 |
+
Bug fixes
|
| 76 |
+
~~~~~~~~~
|
| 77 |
+
|
| 78 |
+
- Migrate to newer Pandas ``Styler`` for ``to_latex``
|
| 79 |
+
|
| 80 |
+
API Changes
|
| 81 |
+
~~~~~~~~~~~
|
| 82 |
+
|
| 83 |
+
- There were way too many functions on the summary objects, so I’ve
|
| 84 |
+
hidden ``to_*`` on them.
|
| 85 |
+
|
| 86 |
+
.. _section-6:
|
| 87 |
+
|
| 88 |
+
0.27.2 - 2022-09-07
|
| 89 |
+
-------------------
|
| 90 |
+
|
| 91 |
+
.. _bug-fixes-1:
|
| 92 |
+
|
| 93 |
+
Bug fixes
|
| 94 |
+
~~~~~~~~~
|
| 95 |
+
|
| 96 |
+
- Fixed issue in add_at_risk_table when there were very late entries.
|
| 97 |
+
|
| 98 |
+
.. _section-7:
|
| 99 |
+
|
| 100 |
+
0.27.1 - 2022-06-25
|
| 101 |
+
-------------------
|
| 102 |
+
|
| 103 |
+
.. _new-features-3:
|
| 104 |
+
|
| 105 |
+
New features
|
| 106 |
+
~~~~~~~~~~~~
|
| 107 |
+
|
| 108 |
+
- all ``fit_`` methods now accept a ``fit_options`` dict that allows
|
| 109 |
+
one to pass kwargs to the underlying fitting algorithm.
|
| 110 |
+
|
| 111 |
+
.. _api-changes-1:
|
| 112 |
+
|
| 113 |
+
API Changes
|
| 114 |
+
~~~~~~~~~~~
|
| 115 |
+
|
| 116 |
+
- ``step_size`` is removed from Cox models ``fit``. See ``fit_options``
|
| 117 |
+
above.
|
| 118 |
+
|
| 119 |
+
.. _bug-fixes-2:
|
| 120 |
+
|
| 121 |
+
Bug fixes
|
| 122 |
+
~~~~~~~~~
|
| 123 |
+
|
| 124 |
+
- fixed Cox models when “trivial” matrix was passed in (one with no
|
| 125 |
+
covariates)
|
| 126 |
+
|
| 127 |
+
.. _section-8:
|
| 128 |
+
|
| 129 |
+
0.27.0 - 2022-03-15
|
| 130 |
+
-------------------
|
| 131 |
+
|
| 132 |
+
Dropping Python3.6 support.
|
| 133 |
+
|
| 134 |
+
.. _bug-fixes-3:
|
| 135 |
+
|
| 136 |
+
Bug fixes
|
| 137 |
+
~~~~~~~~~
|
| 138 |
+
|
| 139 |
+
- Fix late entry in ``add_at_risk_counts``.
|
| 140 |
+
|
| 141 |
+
.. _new-features-4:
|
| 142 |
+
|
| 143 |
+
New features
|
| 144 |
+
~~~~~~~~~~~~
|
| 145 |
+
|
| 146 |
+
- ``add_at_risk_counts`` has a new flag to determine to use start or
|
| 147 |
+
end-of-period at risk counts.
|
| 148 |
+
- new column in fitter’s ``summary`` that display the number the
|
| 149 |
+
parameter is being compared against.
|
| 150 |
+
|
| 151 |
+
.. _api-changes-2:
|
| 152 |
+
|
| 153 |
+
API Changes
|
| 154 |
+
~~~~~~~~~~~
|
| 155 |
+
|
| 156 |
+
- ``plot_lifetimes``\ ’s ``duration`` arg has the interpretation of
|
| 157 |
+
“relative time the subject died (since birth)”, instead of the old
|
| 158 |
+
“time observed for”. These interpretations are different when there
|
| 159 |
+
is late entry.
|
| 160 |
+
|
| 161 |
+
.. _section-9:
|
| 162 |
+
|
| 163 |
+
0.26.4 - 2021-11-30
|
| 164 |
+
-------------------
|
| 165 |
+
|
| 166 |
+
.. _new-features-5:
|
| 167 |
+
|
| 168 |
+
New features
|
| 169 |
+
~~~~~~~~~~~~
|
| 170 |
+
|
| 171 |
+
- adding ``weights`` to log rank functions
|
| 172 |
+
|
| 173 |
+
.. _section-10:
|
| 174 |
+
|
| 175 |
+
0.26.3 - 2021-09-16
|
| 176 |
+
-------------------
|
| 177 |
+
|
| 178 |
+
.. _bug-fixes-4:
|
| 179 |
+
|
| 180 |
+
Bug fixes
|
| 181 |
+
~~~~~~~~~
|
| 182 |
+
|
| 183 |
+
- Fix using formulas with ``CoxPHFitter.score``
|
| 184 |
+
|
| 185 |
+
.. _section-11:
|
| 186 |
+
|
| 187 |
+
0.26.2 - 2021-09-15
|
| 188 |
+
-------------------
|
| 189 |
+
|
| 190 |
+
Error in v0.26.1 deployment
|
| 191 |
+
|
| 192 |
+
.. _section-12:
|
| 193 |
+
|
| 194 |
+
0.26.1 - 2021-09-15
|
| 195 |
+
-------------------
|
| 196 |
+
|
| 197 |
+
.. _api-changes-3:
|
| 198 |
+
|
| 199 |
+
API Changes
|
| 200 |
+
~~~~~~~~~~~
|
| 201 |
+
|
| 202 |
+
- ``t_0`` in ``logrank_test`` now will not remove data, but will
|
| 203 |
+
instead censor all subjects that experience the event afterwards.
|
| 204 |
+
- update ``status`` column in ``lifelines.datasets.load_lung`` to be
|
| 205 |
+
more standard coding: 0 is censored, 1 is event.
|
| 206 |
+
|
| 207 |
+
.. _bug-fixes-5:
|
| 208 |
+
|
| 209 |
+
Bug fixes
|
| 210 |
+
~~~~~~~~~
|
| 211 |
+
|
| 212 |
+
- Fix using formulas with
|
| 213 |
+
``AalenAdditiveFitter.predict_cumulative_hazard``
|
| 214 |
+
- Fix using formulas with ``CoxPHFitter.score``
|
| 215 |
+
|
| 216 |
+
.. _section-13:
|
| 217 |
+
|
| 218 |
+
0.26.0 - 2021-05-26
|
| 219 |
+
-------------------
|
| 220 |
+
|
| 221 |
+
.. _new-features-6:
|
| 222 |
+
|
| 223 |
+
New features
|
| 224 |
+
~~~~~~~~~~~~
|
| 225 |
+
|
| 226 |
+
- ``.BIC_`` is now present on fitted models.
|
| 227 |
+
- ``CoxPHFitter`` with spline baseline can accept pre-computed knot
|
| 228 |
+
locations.
|
| 229 |
+
- Left censoring fitting in KaplanMeierFitter is now “expected”. That
|
| 230 |
+
is, ``predict`` *always* predicts the survival function (as does
|
| 231 |
+
every other model), ``confidence_interval_`` is *always* the CI for
|
| 232 |
+
the survival function (as does every other model), and so on. In
|
| 233 |
+
summary: the API for estimates doesn’t change depending on what your
|
| 234 |
+
censoring your dataset is.
|
| 235 |
+
|
| 236 |
+
.. _bug-fixes-6:
|
| 237 |
+
|
| 238 |
+
Bug fixes
|
| 239 |
+
~~~~~~~~~
|
| 240 |
+
|
| 241 |
+
- Fixed an annoying bug where at_risk-table label’s were not aligning
|
| 242 |
+
properly when data spanned large ranges. See merging PR for details.
|
| 243 |
+
- Fixed a bug in ``find_best_parametric_model`` where the wrong BIC
|
| 244 |
+
value was being computed.
|
| 245 |
+
- Fixed regression bug when using an array as a penalizer in Cox
|
| 246 |
+
models.
|
| 247 |
+
|
| 248 |
+
.. _section-14:
|
| 249 |
+
|
| 250 |
+
0.25.11 - 2021-04-06
|
| 251 |
+
--------------------
|
| 252 |
+
|
| 253 |
+
.. _bug-fixes-7:
|
| 254 |
+
|
| 255 |
+
Bug fixes
|
| 256 |
+
~~~~~~~~~
|
| 257 |
+
|
| 258 |
+
- Fix integer-valued categorical variables in regression model
|
| 259 |
+
predictions.
|
| 260 |
+
- numpy > 1.20 is allowed.
|
| 261 |
+
- Bug fix in the elastic-net penalty for Cox models that wasn’t
|
| 262 |
+
weighting the terms correctly.
|
| 263 |
+
|
| 264 |
+
.. _section-15:
|
| 265 |
+
|
| 266 |
+
0.25.10 - 2021-03-03
|
| 267 |
+
--------------------
|
| 268 |
+
|
| 269 |
+
.. _new-features-7:
|
| 270 |
+
|
| 271 |
+
New features
|
| 272 |
+
~~~~~~~~~~~~
|
| 273 |
+
|
| 274 |
+
- Better appearance when using a single row to show in
|
| 275 |
+
``add_at_risk_table``.
|
| 276 |
+
|
| 277 |
+
.. _section-16:
|
| 278 |
+
|
| 279 |
+
0.25.9 - 2021-02-04
|
| 280 |
+
-------------------
|
| 281 |
+
|
| 282 |
+
Small bump in dependencies.
|
| 283 |
+
|
| 284 |
+
.. _section-17:
|
| 285 |
+
|
| 286 |
+
0.25.8 - 2021-01-22
|
| 287 |
+
-------------------
|
| 288 |
+
|
| 289 |
+
Important: we dropped Patsy as our formula framework, and adopted
|
| 290 |
+
Formulaic. Will the latter is less mature than Patsy, we feel the core
|
| 291 |
+
capabilities are satisfactory and it provides new opportunities.
|
| 292 |
+
|
| 293 |
+
.. _new-features-8:
|
| 294 |
+
|
| 295 |
+
New features
|
| 296 |
+
~~~~~~~~~~~~
|
| 297 |
+
|
| 298 |
+
- Parametric models with formulas are able to be serialized now.
|
| 299 |
+
- a ``_scipy_callback`` function is available to use in fitting
|
| 300 |
+
algorithms.
|
| 301 |
+
|
| 302 |
+
.. _section-18:
|
| 303 |
+
|
| 304 |
+
0.25.7 - 2020-12-09
|
| 305 |
+
-------------------
|
| 306 |
+
|
| 307 |
+
.. _api-changes-4:
|
| 308 |
+
|
| 309 |
+
API Changes
|
| 310 |
+
~~~~~~~~~~~
|
| 311 |
+
|
| 312 |
+
- Adding ``cumulative_hazard_at_times`` to NelsonAalenFitter
|
| 313 |
+
|
| 314 |
+
.. _bug-fixes-8:
|
| 315 |
+
|
| 316 |
+
Bug fixes
|
| 317 |
+
~~~~~~~~~
|
| 318 |
+
|
| 319 |
+
- Fixed error in ``CoxPHFitter`` when entry time == event time.
|
| 320 |
+
- Fixed formulas in AFT interval censoring regression.
|
| 321 |
+
- Fixed ``concordance_index_`` when no events observed
|
| 322 |
+
- Fixed label being overwritten in ParametricUnivariate models
|
| 323 |
+
|
| 324 |
+
.. _section-19:
|
| 325 |
+
|
| 326 |
+
0.25.6 - 2020-10-26
|
| 327 |
+
-------------------
|
| 328 |
+
|
| 329 |
+
.. _new-features-9:
|
| 330 |
+
|
| 331 |
+
New features
|
| 332 |
+
~~~~~~~~~~~~
|
| 333 |
+
|
| 334 |
+
- Parametric Cox models can now handle left and interval censoring
|
| 335 |
+
datasets.
|
| 336 |
+
|
| 337 |
+
.. _bug-fixes-9:
|
| 338 |
+
|
| 339 |
+
Bug fixes
|
| 340 |
+
~~~~~~~~~
|
| 341 |
+
|
| 342 |
+
- “improved” the output of ``add_at_risk_counts`` by removing a call to
|
| 343 |
+
``plt.tight_layout()`` - this works better when you are calling
|
| 344 |
+
``add_at_risk_counts`` on multiple axes, but it is recommended you
|
| 345 |
+
call ``plt.tight_layout()`` at the very end of your script.
|
| 346 |
+
- Fix bug in ``KaplanMeierFitter``\ ’s interval censoring where
|
| 347 |
+
max(lower bound) < min(upper bound).
|
| 348 |
+
|
| 349 |
+
.. _section-20:
|
| 350 |
+
|
| 351 |
+
0.25.5 - 2020-09-23
|
| 352 |
+
-------------------
|
| 353 |
+
|
| 354 |
+
.. _api-changes-5:
|
| 355 |
+
|
| 356 |
+
API Changes
|
| 357 |
+
~~~~~~~~~~~
|
| 358 |
+
|
| 359 |
+
- ``check_assumptions`` now returns a list of list of axes that can be
|
| 360 |
+
manipulated
|
| 361 |
+
|
| 362 |
+
.. _bug-fixes-10:
|
| 363 |
+
|
| 364 |
+
Bug fixes
|
| 365 |
+
~~~~~~~~~
|
| 366 |
+
|
| 367 |
+
- fixed error when using ``plot_partial_effects`` with categorical data
|
| 368 |
+
in AFT models
|
| 369 |
+
- improved warning when Hessian matrix contains NaNs.
|
| 370 |
+
- fixed performance regression in interval censoring fitting in
|
| 371 |
+
parametric models
|
| 372 |
+
- ``weights`` wasn’t being applied properly in NPMLE
|
| 373 |
+
|
| 374 |
+
.. _section-21:
|
| 375 |
+
|
| 376 |
+
0.25.4 - 2020-08-26
|
| 377 |
+
-------------------
|
| 378 |
+
|
| 379 |
+
.. _new-features-10:
|
| 380 |
+
|
| 381 |
+
New features
|
| 382 |
+
~~~~~~~~~~~~
|
| 383 |
+
|
| 384 |
+
- New baseline estimator for Cox models: ``piecewise``
|
| 385 |
+
- Performance improvements for parametric models
|
| 386 |
+
``log_likelihood_ratio_test()`` and ``print_summary()``
|
| 387 |
+
- Better step-size defaults for Cox model -> more robust convergence.
|
| 388 |
+
|
| 389 |
+
.. _bug-fixes-11:
|
| 390 |
+
|
| 391 |
+
Bug fixes
|
| 392 |
+
~~~~~~~~~
|
| 393 |
+
|
| 394 |
+
- fix ``check_assumptions`` when using formulas.
|
| 395 |
+
|
| 396 |
+
.. _section-22:
|
| 397 |
+
|
| 398 |
+
0.25.3 - 2020-08-24
|
| 399 |
+
-------------------
|
| 400 |
+
|
| 401 |
+
.. _new-features-11:
|
| 402 |
+
|
| 403 |
+
New features
|
| 404 |
+
~~~~~~~~~~~~
|
| 405 |
+
|
| 406 |
+
- ``survival_difference_at_fixed_point_in_time_test`` now accepts
|
| 407 |
+
fitters instead of raw data, meaning that you can use this function
|
| 408 |
+
on left, right or interval censored data.
|
| 409 |
+
|
| 410 |
+
.. _api-changes-6:
|
| 411 |
+
|
| 412 |
+
API Changes
|
| 413 |
+
~~~~~~~~~~~
|
| 414 |
+
|
| 415 |
+
- See note on ``survival_difference_at_fixed_point_in_time_test``
|
| 416 |
+
above.
|
| 417 |
+
|
| 418 |
+
.. _bug-fixes-12:
|
| 419 |
+
|
| 420 |
+
Bug fixes
|
| 421 |
+
~~~~~~~~~
|
| 422 |
+
|
| 423 |
+
- fix ``StatisticalResult`` printing in notebooks
|
| 424 |
+
- fix Python error when calling ``plot_covariate_groups``
|
| 425 |
+
- fix dtype mismatches in ``plot_partial_effects_on_outcome``.
|
| 426 |
+
|
| 427 |
+
.. _section-23:
|
| 428 |
+
|
| 429 |
+
0.25.2 - 2020-08-08
|
| 430 |
+
-------------------
|
| 431 |
+
|
| 432 |
+
.. _new-features-12:
|
| 433 |
+
|
| 434 |
+
New features
|
| 435 |
+
~~~~~~~~~~~~
|
| 436 |
+
|
| 437 |
+
- Spline ``CoxPHFitter`` can now use ``strata``.
|
| 438 |
+
|
| 439 |
+
.. _api-changes-7:
|
| 440 |
+
|
| 441 |
+
API Changes
|
| 442 |
+
~~~~~~~~~~~
|
| 443 |
+
|
| 444 |
+
- a small parameterization change of the spline ``CoxPHFitter``. The
|
| 445 |
+
linear term in the spline part was moved to a new ``Intercept`` term
|
| 446 |
+
in the ``beta_``.
|
| 447 |
+
- ``n_baseline_knots`` in the spline ``CoxPHFitter`` now refers to
|
| 448 |
+
*all* knots, and not just interior knots (this was confusing to me,
|
| 449 |
+
the author.). So add 2 to ``n_baseline_knots`` to recover the
|
| 450 |
+
identical model as previously.
|
| 451 |
+
|
| 452 |
+
.. _bug-fixes-13:
|
| 453 |
+
|
| 454 |
+
Bug fixes
|
| 455 |
+
~~~~~~~~~
|
| 456 |
+
|
| 457 |
+
- fix splines ``CoxPHFitter`` with when ``predict_hazard`` was called.
|
| 458 |
+
- fix some exception imports I missed.
|
| 459 |
+
- fix log-likelihood p-value in splines ``CoxPHFitter``
|
| 460 |
+
|
| 461 |
+
.. _section-24:
|
| 462 |
+
|
| 463 |
+
0.25.1 - 2020-08-01
|
| 464 |
+
-------------------
|
| 465 |
+
|
| 466 |
+
.. _bug-fixes-14:
|
| 467 |
+
|
| 468 |
+
Bug fixes
|
| 469 |
+
~~~~~~~~~
|
| 470 |
+
|
| 471 |
+
- ok *actually* ship the out-of-sample calibration code
|
| 472 |
+
- fix ``labels=False`` in ``add_at_risk_counts``
|
| 473 |
+
- allow for specific rows to be shown in ``add_at_risk_counts``
|
| 474 |
+
- put ``patsy`` as a proper dependency.
|
| 475 |
+
- suppress some Pandas 1.1 warnings.
|
| 476 |
+
|
| 477 |
+
.. _section-25:
|
| 478 |
+
|
| 479 |
+
0.25.0 - 2020-07-27
|
| 480 |
+
-------------------
|
| 481 |
+
|
| 482 |
+
.. _new-features-13:
|
| 483 |
+
|
| 484 |
+
New features
|
| 485 |
+
~~~~~~~~~~~~
|
| 486 |
+
|
| 487 |
+
- Formulas! *lifelines* now supports R-like formulas in regression
|
| 488 |
+
models. See docs
|
| 489 |
+
`here <https://lifelines.readthedocs.io/en/latest/Survival%20Regression.html#fitting-the-regression>`__.
|
| 490 |
+
- ``plot_covariate_group`` now can plot other y-values like hazards and
|
| 491 |
+
cumulative hazards (default: survival function).
|
| 492 |
+
- ``CoxPHFitter`` now accepts late entries via ``entry_col``.
|
| 493 |
+
- ``calibration.survival_probability_calibration`` now works with
|
| 494 |
+
out-of-sample data.
|
| 495 |
+
- ``print_summary`` now accepts a ``column`` argument to filter down
|
| 496 |
+
the displayed values. This helps with clutter in notebooks, latex, or
|
| 497 |
+
on the terminal.
|
| 498 |
+
- ``add_at_risk_counts`` now follows the cool new KMunicate suggestions
|
| 499 |
+
|
| 500 |
+
.. _api-changes-8:
|
| 501 |
+
|
| 502 |
+
API Changes
|
| 503 |
+
~~~~~~~~~~~
|
| 504 |
+
|
| 505 |
+
- With the introduction of formulas, all models can be using formulas
|
| 506 |
+
under the hood.
|
| 507 |
+
|
| 508 |
+
- For both custom regression models or non-AFT regression models,
|
| 509 |
+
this means that you no longer need to add a constant column to
|
| 510 |
+
your DataFrame (instead add a ``1`` as a formula string in the
|
| 511 |
+
``regressors`` dict). You may also need to remove the T and E
|
| 512 |
+
columns from ``regressors``. I’ve updated the models in the
|
| 513 |
+
``\examples`` folder with examples of this new model building.
|
| 514 |
+
|
| 515 |
+
- Unfortunately, if using formulas, your model will not be able to be
|
| 516 |
+
pickled. This is a problem with an upstream library, and I hope to
|
| 517 |
+
have it resolved in the near future.
|
| 518 |
+
- ``plot_covariate_groups`` has been deprecated in favour of
|
| 519 |
+
``plot_partial_effects_on_outcome``.
|
| 520 |
+
- The baseline in ``plot_covariate_groups`` has changed from the *mean*
|
| 521 |
+
observation (including dummy-encoded categorical variables) to
|
| 522 |
+
*median* for ordinal (including continuous) and *mode* for
|
| 523 |
+
categorical.
|
| 524 |
+
- Previously, *lifelines* used the label ``"_intercept"`` to when it
|
| 525 |
+
added a constant column in regressions. To align with Patsy, we are
|
| 526 |
+
now using ``"Intercept"``.
|
| 527 |
+
- In AFT models, ``ancillary_df`` kwarg has been renamed to
|
| 528 |
+
``ancillary``. This reflects the more general use of the kwarg (not
|
| 529 |
+
always a DataFrame, but could be a boolean or string now, too).
|
| 530 |
+
- Some column names in datasets shipped with lifelines have changed.
|
| 531 |
+
- The never used “lifelines.metrics” is deleted.
|
| 532 |
+
- With the introduction of formulas, ``plot_covariate_groups`` (now
|
| 533 |
+
called ``plot_partial_effects_on_outcome``) behaves differently for
|
| 534 |
+
transformed variables. Users no longer need to add “derivatives”
|
| 535 |
+
features, and encoding is done implicitly. See docs
|
| 536 |
+
`here <https://lifelines.readthedocs.io/en/latest/Survival%20Regression.html#plotting-the-effect-of-varying-a-covariate>`__.
|
| 537 |
+
- all exceptions and warnings have moved to ``lifelines.exceptions``
|
| 538 |
+
|
| 539 |
+
.. _bug-fixes-15:
|
| 540 |
+
|
| 541 |
+
Bug fixes
|
| 542 |
+
~~~~~~~~~
|
| 543 |
+
|
| 544 |
+
- The p-value of the log-likelihood ratio test for the CoxPHFitter with
|
| 545 |
+
splines was returning the wrong result because the degrees of freedom
|
| 546 |
+
was incorrect.
|
| 547 |
+
- better ``print_summary`` logic in IDEs and Jupyter exports.
|
| 548 |
+
Previously it should not be displayed.
|
| 549 |
+
- p-values have been corrected in the ``SplineFitter``. Previously, the
|
| 550 |
+
“null hypothesis” was no coefficient=0, but coefficient=0.01. This is
|
| 551 |
+
now set to the former.
|
| 552 |
+
- fixed NaN bug in ``survival_table_from_events`` with intervals when
|
| 553 |
+
no events would occur in a interval.
|
| 554 |
+
|
| 555 |
+
.. _section-26:
|
| 556 |
+
|
| 557 |
+
0.24.16 - 2020-07-09
|
| 558 |
+
--------------------
|
| 559 |
+
|
| 560 |
+
.. _new-features-14:
|
| 561 |
+
|
| 562 |
+
New features
|
| 563 |
+
~~~~~~~~~~~~
|
| 564 |
+
|
| 565 |
+
- improved algorithm choice for large DataFrames for Cox models. Should
|
| 566 |
+
see a significant performance boost.
|
| 567 |
+
|
| 568 |
+
.. _bug-fixes-16:
|
| 569 |
+
|
| 570 |
+
Bug fixes
|
| 571 |
+
~~~~~~~~~
|
| 572 |
+
|
| 573 |
+
- fixed ``utils.median_survival_time`` not accepting Pandas Series.
|
| 574 |
+
|
| 575 |
+
.. _section-27:
|
| 576 |
+
|
| 577 |
+
0.24.15 - 2020-07-07
|
| 578 |
+
--------------------
|
| 579 |
+
|
| 580 |
+
.. _bug-fixes-17:
|
| 581 |
+
|
| 582 |
+
Bug fixes
|
| 583 |
+
~~~~~~~~~
|
| 584 |
+
|
| 585 |
+
- fixed an edge case in ``KaplanMeierFitter`` where a really late entry
|
| 586 |
+
would occur after all other population had died.
|
| 587 |
+
- fixed ``plot`` in ``BreslowFlemingtonHarrisFitter``
|
| 588 |
+
- fixed bug where using ``conditional_after`` and ``times`` in
|
| 589 |
+
``CoxPHFitter("spline")`` prediction methods would be ignored.
|
| 590 |
+
|
| 591 |
+
.. _section-28:
|
| 592 |
+
|
| 593 |
+
0.24.14 - 2020-07-02
|
| 594 |
+
--------------------
|
| 595 |
+
|
| 596 |
+
.. _bug-fixes-18:
|
| 597 |
+
|
| 598 |
+
Bug fixes
|
| 599 |
+
~~~~~~~~~
|
| 600 |
+
|
| 601 |
+
- fixed a bug where using ``conditional_after`` and ``times`` in
|
| 602 |
+
prediction methods would result in a shape error
|
| 603 |
+
- fixed a bug where ``score`` was not able to be used in splined
|
| 604 |
+
``CoxPHFitter``
|
| 605 |
+
- fixed a bug where some columns would not be displayed in
|
| 606 |
+
``print_summary``
|
| 607 |
+
|
| 608 |
+
.. _section-29:
|
| 609 |
+
|
| 610 |
+
0.24.13 - 2020-06-22
|
| 611 |
+
--------------------
|
| 612 |
+
|
| 613 |
+
.. _bug-fixes-19:
|
| 614 |
+
|
| 615 |
+
Bug fixes
|
| 616 |
+
~~~~~~~~~
|
| 617 |
+
|
| 618 |
+
- fixed a bug where ``CoxPHFitter`` would ignore inputed ``alpha``
|
| 619 |
+
levels for confidence intervals
|
| 620 |
+
- fixed a bug where ``CoxPHFitter`` would fail with working with
|
| 621 |
+
``sklearn_adapter``
|
| 622 |
+
|
| 623 |
+
.. _section-30:
|
| 624 |
+
|
| 625 |
+
0.24.12 - 2020-06-20
|
| 626 |
+
--------------------
|
| 627 |
+
|
| 628 |
+
.. _new-features-15:
|
| 629 |
+
|
| 630 |
+
New features
|
| 631 |
+
~~~~~~~~~~~~
|
| 632 |
+
|
| 633 |
+
- improved convergence of ``GeneralizedGamma(Regression)Fitter``.
|
| 634 |
+
|
| 635 |
+
.. _section-31:
|
| 636 |
+
|
| 637 |
+
0.24.11 - 2020-06-17
|
| 638 |
+
--------------------
|
| 639 |
+
|
| 640 |
+
.. _new-features-16:
|
| 641 |
+
|
| 642 |
+
New features
|
| 643 |
+
~~~~~~~~~~~~
|
| 644 |
+
|
| 645 |
+
- new spline regression model ``CRCSplineFitter`` based on the paper “A
|
| 646 |
+
flexible parametric accelerated failure time model” by Michael J.
|
| 647 |
+
Crowther, Patrick Royston, Mark Clements.
|
| 648 |
+
- new survival probability calibration tool
|
| 649 |
+
``lifelines.calibration.survival_probability_calibration`` to help
|
| 650 |
+
validate regression models. Based on “Graphical calibration curves
|
| 651 |
+
and the integrated calibration index (ICI) for survival models” by P.
|
| 652 |
+
Austin, F. Harrell, and D. van Klaveren.
|
| 653 |
+
|
| 654 |
+
.. _api-changes-9:
|
| 655 |
+
|
| 656 |
+
API Changes
|
| 657 |
+
~~~~~~~~~~~
|
| 658 |
+
|
| 659 |
+
- (and bug fix) scalar parameters in regression models were not being
|
| 660 |
+
penalized by ``penalizer`` - we now penalizing everything except
|
| 661 |
+
intercept terms in linear relationships.
|
| 662 |
+
|
| 663 |
+
.. _section-32:
|
| 664 |
+
|
| 665 |
+
0.24.10 - 2020-06-16
|
| 666 |
+
--------------------
|
| 667 |
+
|
| 668 |
+
.. _new-features-17:
|
| 669 |
+
|
| 670 |
+
New features
|
| 671 |
+
~~~~~~~~~~~~
|
| 672 |
+
|
| 673 |
+
- New improvements when using splines model in CoxPHFitter - it should
|
| 674 |
+
offer much better prediction and baseline-hazard estimation,
|
| 675 |
+
including extrapolation and interpolation.
|
| 676 |
+
|
| 677 |
+
.. _api-changes-10:
|
| 678 |
+
|
| 679 |
+
API Changes
|
| 680 |
+
~~~~~~~~~~~
|
| 681 |
+
|
| 682 |
+
- Related to above: the fitted spline parameters are now available in
|
| 683 |
+
the ``.summary`` and ``.print_summary`` methods.
|
| 684 |
+
|
| 685 |
+
.. _bug-fixes-20:
|
| 686 |
+
|
| 687 |
+
Bug fixes
|
| 688 |
+
~~~~~~~~~
|
| 689 |
+
|
| 690 |
+
- fixed a bug in initialization of some interval-censoring models ->
|
| 691 |
+
better convergence.
|
| 692 |
+
|
| 693 |
+
.. _section-33:
|
| 694 |
+
|
| 695 |
+
0.24.9 - 2020-06-05
|
| 696 |
+
-------------------
|
| 697 |
+
|
| 698 |
+
.. _new-features-18:
|
| 699 |
+
|
| 700 |
+
New features
|
| 701 |
+
~~~~~~~~~~~~
|
| 702 |
+
|
| 703 |
+
- Faster NPMLE for interval censored data
|
| 704 |
+
- New weightings available in the ``logrank_test``: ``wilcoxon``,
|
| 705 |
+
``tarone-ware``, ``peto``, ``fleming-harrington``. Thanks @sean-reed
|
| 706 |
+
- new interval censored dataset: ``lifelines.datasets.load_mice``
|
| 707 |
+
|
| 708 |
+
.. _bug-fixes-21:
|
| 709 |
+
|
| 710 |
+
Bug fixes
|
| 711 |
+
~~~~~~~~~
|
| 712 |
+
|
| 713 |
+
- Cleared up some mislabeling in ``plot_loglogs``. Thanks @sean-reed!
|
| 714 |
+
- tuples are now able to be used as input in univariate models.
|
| 715 |
+
|
| 716 |
+
.. _section-34:
|
| 717 |
+
|
| 718 |
+
0.24.8 - 2020-05-17
|
| 719 |
+
-------------------
|
| 720 |
+
|
| 721 |
+
.. _new-features-19:
|
| 722 |
+
|
| 723 |
+
New features
|
| 724 |
+
~~~~~~~~~~~~
|
| 725 |
+
|
| 726 |
+
- Non parametric interval censoring is now available, *experimentally*.
|
| 727 |
+
Not all edge cases are fully checked, and some features are missing.
|
| 728 |
+
Try it under ``KaplanMeierFitter.fit_interval_censoring``
|
| 729 |
+
|
| 730 |
+
.. _section-35:
|
| 731 |
+
|
| 732 |
+
0.24.7 - 2020-05-17
|
| 733 |
+
-------------------
|
| 734 |
+
|
| 735 |
+
.. _new-features-20:
|
| 736 |
+
|
| 737 |
+
New features
|
| 738 |
+
~~~~~~~~~~~~
|
| 739 |
+
|
| 740 |
+
- ``find_best_parametric_model`` can handle left and interval
|
| 741 |
+
censoring. Also allows for more fitting options.
|
| 742 |
+
- ``AIC_`` is a property on parametric models, and ``AIC_partial_`` is
|
| 743 |
+
a property on Cox models.
|
| 744 |
+
- ``penalizer`` in all regression models can now be an array instead of
|
| 745 |
+
a float. This enables new functionality and better control over
|
| 746 |
+
penalization. This is similar (but not identical) to
|
| 747 |
+
``penalty.factors`` in glmnet in R.
|
| 748 |
+
- some convergence tweaks which should help recent performance
|
| 749 |
+
regressions.
|
| 750 |
+
|
| 751 |
+
.. _section-36:
|
| 752 |
+
|
| 753 |
+
0.24.6 - 2020-05-05
|
| 754 |
+
-------------------
|
| 755 |
+
|
| 756 |
+
.. _new-features-21:
|
| 757 |
+
|
| 758 |
+
New features
|
| 759 |
+
~~~~~~~~~~~~
|
| 760 |
+
|
| 761 |
+
- At the cost of some performance, convergence is improved in many
|
| 762 |
+
models.
|
| 763 |
+
- New ``lifelines.plotting.plot_interval_censored_lifetimes`` for
|
| 764 |
+
plotting interval censored data - thanks @sean-reed!
|
| 765 |
+
|
| 766 |
+
.. _bug-fixes-22:
|
| 767 |
+
|
| 768 |
+
Bug fixes
|
| 769 |
+
~~~~~~~~~
|
| 770 |
+
|
| 771 |
+
- fixed bug where ``cdf_plot`` and ``qq_plot`` were not factoring in
|
| 772 |
+
the weights correctly.
|
| 773 |
+
|
| 774 |
+
.. _section-37:
|
| 775 |
+
|
| 776 |
+
0.24.5 - 2020-05-01
|
| 777 |
+
-------------------
|
| 778 |
+
|
| 779 |
+
.. _new-features-22:
|
| 780 |
+
|
| 781 |
+
New features
|
| 782 |
+
~~~~~~~~~~~~
|
| 783 |
+
|
| 784 |
+
- ``plot_lifetimes`` accepts pandas Series.
|
| 785 |
+
|
| 786 |
+
.. _bug-fixes-23:
|
| 787 |
+
|
| 788 |
+
Bug fixes
|
| 789 |
+
~~~~~~~~~
|
| 790 |
+
|
| 791 |
+
- Fixed important bug in interval censoring models. Users using
|
| 792 |
+
interval censoring are strongly advised to upgrade.
|
| 793 |
+
- Improved ``at_risk_counts`` for subplots.
|
| 794 |
+
- More data validation checks for ``CoxTimeVaryingFitter``
|
| 795 |
+
|
| 796 |
+
.. _section-38:
|
| 797 |
+
|
| 798 |
+
0.24.4 - 2020-04-13
|
| 799 |
+
-------------------
|
| 800 |
+
|
| 801 |
+
.. _bug-fixes-24:
|
| 802 |
+
|
| 803 |
+
Bug fixes
|
| 804 |
+
~~~~~~~~~
|
| 805 |
+
|
| 806 |
+
- Improved stability of interval censoring in parametric models.
|
| 807 |
+
- setting a dataframe in ``ancillary_df`` works for interval censoring
|
| 808 |
+
- ``.score`` works for interval censored models
|
| 809 |
+
|
| 810 |
+
.. _section-39:
|
| 811 |
+
|
| 812 |
+
0.24.3 - 2020-03-25
|
| 813 |
+
-------------------
|
| 814 |
+
|
| 815 |
+
.. _new-features-23:
|
| 816 |
+
|
| 817 |
+
New features
|
| 818 |
+
~~~~~~~~~~~~
|
| 819 |
+
|
| 820 |
+
- new ``logx`` kwarg in plotting curves
|
| 821 |
+
- PH models have ``compute_followup_hazard_ratios`` for simulating what
|
| 822 |
+
the hazard ratio would be at previous times. This is useful because
|
| 823 |
+
the final hazard ratio is some weighted average of these.
|
| 824 |
+
|
| 825 |
+
.. _bug-fixes-25:
|
| 826 |
+
|
| 827 |
+
Bug fixes
|
| 828 |
+
~~~~~~~~~
|
| 829 |
+
|
| 830 |
+
- Fixed error in HTML printer that was hiding concordance index
|
| 831 |
+
information.
|
| 832 |
+
|
| 833 |
+
.. _section-40:
|
| 834 |
+
|
| 835 |
+
0.24.2 - 2020-03-15
|
| 836 |
+
-------------------
|
| 837 |
+
|
| 838 |
+
.. _bug-fixes-26:
|
| 839 |
+
|
| 840 |
+
Bug fixes
|
| 841 |
+
~~~~~~~~~
|
| 842 |
+
|
| 843 |
+
- Fixed bug when no covariates were passed into ``CoxPHFitter``. See
|
| 844 |
+
#975
|
| 845 |
+
- Fixed error in ``StatisticalResult`` where the test name was not
|
| 846 |
+
displayed correctly.
|
| 847 |
+
- Fixed a keyword bug in ``plot_covariate_groups`` for parametric
|
| 848 |
+
models.
|
| 849 |
+
|
| 850 |
+
.. _section-41:
|
| 851 |
+
|
| 852 |
+
0.24.1 - 2020-03-05
|
| 853 |
+
-------------------
|
| 854 |
+
|
| 855 |
+
.. _new-features-24:
|
| 856 |
+
|
| 857 |
+
New features
|
| 858 |
+
~~~~~~~~~~~~
|
| 859 |
+
|
| 860 |
+
- Stability improvements for GeneralizedGammaRegressionFitter and
|
| 861 |
+
CoxPHFitter with spline estimation.
|
| 862 |
+
|
| 863 |
+
.. _bug-fixes-27:
|
| 864 |
+
|
| 865 |
+
Bug fixes
|
| 866 |
+
~~~~~~~~~
|
| 867 |
+
|
| 868 |
+
- Fixed bug with plotting hazards in NelsonAalenFitter.
|
| 869 |
+
|
| 870 |
+
.. _section-42:
|
| 871 |
+
|
| 872 |
+
0.24.0 - 2020-02-20
|
| 873 |
+
-------------------
|
| 874 |
+
|
| 875 |
+
This version and future versions of lifelines no longer support py35.
|
| 876 |
+
Pandas 1.0 is fully supported, along with previous versions. Minimum
|
| 877 |
+
Scipy has been bumped to 1.2.0.
|
| 878 |
+
|
| 879 |
+
.. _new-features-25:
|
| 880 |
+
|
| 881 |
+
New features
|
| 882 |
+
~~~~~~~~~~~~
|
| 883 |
+
|
| 884 |
+
- ``CoxPHFitter`` and ``CoxTimeVaryingFitter`` has support for an
|
| 885 |
+
elastic net penalty, which includes L1 and L2 regression.
|
| 886 |
+
- ``CoxPHFitter`` has new baseline survival estimation methods.
|
| 887 |
+
Specifically, ``spline`` now estimates the coefficients and baseline
|
| 888 |
+
survival using splines. The traditional method, ``breslow``, is still
|
| 889 |
+
the default however.
|
| 890 |
+
- Regression models have a new ``score`` method that will score your
|
| 891 |
+
model against a dataset (ex: a testing or validation dataset). The
|
| 892 |
+
default is to evaluate the log-likelihood, but also the concordance
|
| 893 |
+
index can be chose.
|
| 894 |
+
- New ``MixtureCureFitter`` for quickly creating univariate mixture
|
| 895 |
+
models.
|
| 896 |
+
- Univariate parametric models have a ``plot_density``,
|
| 897 |
+
``density_at_times``, and property ``density_`` that computes the
|
| 898 |
+
probability density function estimates.
|
| 899 |
+
- new dataset for interval regression involving *C. Botulinum*.
|
| 900 |
+
- new ``lifelines.fitters.mixins.ProportionalHazardMixin`` that
|
| 901 |
+
implements proportional hazard checks.
|
| 902 |
+
|
| 903 |
+
.. _api-changes-11:
|
| 904 |
+
|
| 905 |
+
API Changes
|
| 906 |
+
~~~~~~~~~~~
|
| 907 |
+
|
| 908 |
+
- Models’ prediction method that return a single array now return a
|
| 909 |
+
Series (use to return a DataFrame). This includes ``predict_median``,
|
| 910 |
+
``predict_percentile``, ``predict_expectation``,
|
| 911 |
+
``predict_log_partial_hazard``, and possibly others.
|
| 912 |
+
- The penalty in Cox models is now scaled by the number of
|
| 913 |
+
observations. This makes it invariant to changing sample sizes. This
|
| 914 |
+
change also make the penalty magnitude behave the same as any
|
| 915 |
+
parametric regression model.
|
| 916 |
+
- ``score_`` on models has been renamed ``concordance_index_``
|
| 917 |
+
- models’ ``.variance_matrix_`` is now a DataFrame.
|
| 918 |
+
- ``CoxTimeVaryingFitter`` no longer requires an ``id_col``. It’s
|
| 919 |
+
optional, and some checks may be done for integrity if provided.
|
| 920 |
+
- Significant changes to ``utils.k_fold_cross_validation``.
|
| 921 |
+
- removed automatically adding ``inf`` from
|
| 922 |
+
``PiecewiseExponentialRegressionFitter.breakpoints`` and
|
| 923 |
+
``PiecewiseExponentialFitter.breakpoints``
|
| 924 |
+
- ``tie_method`` was dropped from Cox models (it was always Efron
|
| 925 |
+
anyways…)
|
| 926 |
+
- Mixins are moved to ``lifelines.fitters.mixins``
|
| 927 |
+
- ``find_best_parametric_model`` ``evaluation`` kwarg has been changed
|
| 928 |
+
to ``scoring_method``.
|
| 929 |
+
- removed ``_score_`` and ``path`` from Cox model.
|
| 930 |
+
|
| 931 |
+
.. _bug-fixes-28:
|
| 932 |
+
|
| 933 |
+
Bug fixes
|
| 934 |
+
~~~~~~~~~
|
| 935 |
+
|
| 936 |
+
- Fixed ``show_censors`` with
|
| 937 |
+
``KaplanMeierFitter.plot_cumulative_density`` see issue #940.
|
| 938 |
+
- Fixed error in ``"BIC"`` code path in ``find_best_parametric_model``
|
| 939 |
+
- Fixed a bug where left censoring in AFT models was not converging
|
| 940 |
+
well
|
| 941 |
+
- Cox models now incorporate any penalizers in their
|
| 942 |
+
``log_likelihood_``
|
| 943 |
+
|
| 944 |
+
.. _section-43:
|
| 945 |
+
|
| 946 |
+
0.23.9 - 2020-01-28
|
| 947 |
+
-------------------
|
| 948 |
+
|
| 949 |
+
.. _bug-fixes-29:
|
| 950 |
+
|
| 951 |
+
Bug fixes
|
| 952 |
+
~~~~~~~~~
|
| 953 |
+
|
| 954 |
+
- fixed important error when a parametric regression model would not
|
| 955 |
+
assign the correct labels to fitted parameters’ variances. See more
|
| 956 |
+
here: https://github.com/CamDavidsonPilon/lifelines/issues/931. Users
|
| 957 |
+
of ``GeneralizedGammaRegressionFitter`` and any custom regression
|
| 958 |
+
models should update their code as soon as possible.
|
| 959 |
+
|
| 960 |
+
.. _section-44:
|
| 961 |
+
|
| 962 |
+
0.23.8 - 2020-01-21
|
| 963 |
+
-------------------
|
| 964 |
+
|
| 965 |
+
.. _bug-fixes-30:
|
| 966 |
+
|
| 967 |
+
Bug fixes
|
| 968 |
+
~~~~~~~~~
|
| 969 |
+
|
| 970 |
+
- fixed important error when a parametric regression model would not
|
| 971 |
+
assign the correct labels to fitted parameters. See more here:
|
| 972 |
+
https://github.com/CamDavidsonPilon/lifelines/issues/931. Users of
|
| 973 |
+
``GeneralizedGammaRegressionFitter`` and any custom regression models
|
| 974 |
+
should update their code as soon as possible.
|
| 975 |
+
|
| 976 |
+
.. _section-45:
|
| 977 |
+
|
| 978 |
+
0.23.7 - 2020-01-14
|
| 979 |
+
-------------------
|
| 980 |
+
|
| 981 |
+
Bug fixes for py3.5.
|
| 982 |
+
|
| 983 |
+
.. _section-46:
|
| 984 |
+
|
| 985 |
+
0.23.6 - 2020-01-07
|
| 986 |
+
-------------------
|
| 987 |
+
|
| 988 |
+
.. _new-features-26:
|
| 989 |
+
|
| 990 |
+
New features
|
| 991 |
+
~~~~~~~~~~~~
|
| 992 |
+
|
| 993 |
+
- New univariate model, ``SplineFitter``, that uses cubic splines to
|
| 994 |
+
model the cumulative hazard.
|
| 995 |
+
- To aid users with selecting the best parametric model, there is a new
|
| 996 |
+
``lifelines.utils.find_best_parametric_model`` function that will
|
| 997 |
+
iterate through the models and return the model with the lowest AIC
|
| 998 |
+
(by default).
|
| 999 |
+
- custom parametric regression models can now do left and interval
|
| 1000 |
+
censoring.
|
| 1001 |
+
|
| 1002 |
+
.. _section-47:
|
| 1003 |
+
|
| 1004 |
+
0.23.5 - 2020-01-05
|
| 1005 |
+
-------------------
|
| 1006 |
+
|
| 1007 |
+
.. _new-features-27:
|
| 1008 |
+
|
| 1009 |
+
New features
|
| 1010 |
+
~~~~~~~~~~~~
|
| 1011 |
+
|
| 1012 |
+
- New ``predict_hazard`` for parametric regression models.
|
| 1013 |
+
- New lymph node cancer dataset, originally from *H.F. for the German
|
| 1014 |
+
Breast Cancer Study Group (GBSG) (1994)*
|
| 1015 |
+
|
| 1016 |
+
.. _bug-fixes-31:
|
| 1017 |
+
|
| 1018 |
+
Bug fixes
|
| 1019 |
+
~~~~~~~~~
|
| 1020 |
+
|
| 1021 |
+
- fixes error thrown when converge of regression models fails.
|
| 1022 |
+
- ``kwargs`` is now used in ``plot_covariate_groups``
|
| 1023 |
+
- fixed bug where large exponential numbers in ``print_summary`` were
|
| 1024 |
+
not being suppressed correctly.
|
| 1025 |
+
|
| 1026 |
+
.. _section-48:
|
| 1027 |
+
|
| 1028 |
+
0.23.4 - 2019-12-15
|
| 1029 |
+
-------------------
|
| 1030 |
+
|
| 1031 |
+
- Bug fix for PyPI
|
| 1032 |
+
|
| 1033 |
+
.. _section-49:
|
| 1034 |
+
|
| 1035 |
+
0.23.3 - 2019-12-11
|
| 1036 |
+
-------------------
|
| 1037 |
+
|
| 1038 |
+
.. _new-features-28:
|
| 1039 |
+
|
| 1040 |
+
New features
|
| 1041 |
+
~~~~~~~~~~~~
|
| 1042 |
+
|
| 1043 |
+
- ``StatisticalResult.print_summary`` supports html output.
|
| 1044 |
+
|
| 1045 |
+
.. _bug-fixes-32:
|
| 1046 |
+
|
| 1047 |
+
Bug fixes
|
| 1048 |
+
~~~~~~~~~
|
| 1049 |
+
|
| 1050 |
+
- fix import in ``printer.py``
|
| 1051 |
+
- fix html printing with Univariate models.
|
| 1052 |
+
|
| 1053 |
+
.. _section-50:
|
| 1054 |
+
|
| 1055 |
+
0.23.2 - 2019-12-07
|
| 1056 |
+
-------------------
|
| 1057 |
+
|
| 1058 |
+
.. _new-features-29:
|
| 1059 |
+
|
| 1060 |
+
New features
|
| 1061 |
+
~~~~~~~~~~~~
|
| 1062 |
+
|
| 1063 |
+
- new ``lifelines.plotting.rmst_plot`` for pretty figures of survival
|
| 1064 |
+
curves and RMSTs.
|
| 1065 |
+
- new variance calculations for
|
| 1066 |
+
``lifelines.utils.restricted_mean_survival_time``
|
| 1067 |
+
- performance improvements on regression models’ preprocessing. Should
|
| 1068 |
+
make datasets with high number of columns more performant.
|
| 1069 |
+
|
| 1070 |
+
.. _bug-fixes-33:
|
| 1071 |
+
|
| 1072 |
+
Bug fixes
|
| 1073 |
+
~~~~~~~~~
|
| 1074 |
+
|
| 1075 |
+
- fixed ``print_summary`` for AAF class.
|
| 1076 |
+
- fixed repr for ``sklearn_adapter`` classes.
|
| 1077 |
+
- fixed ``conditional_after`` in Cox model with strata was used.
|
| 1078 |
+
|
| 1079 |
+
.. _section-51:
|
| 1080 |
+
|
| 1081 |
+
0.23.1 - 2019-11-27
|
| 1082 |
+
-------------------
|
| 1083 |
+
|
| 1084 |
+
.. _new-features-30:
|
| 1085 |
+
|
| 1086 |
+
New features
|
| 1087 |
+
~~~~~~~~~~~~
|
| 1088 |
+
|
| 1089 |
+
- new ``print_summary`` option ``style`` to print HTML, LaTeX or ASCII
|
| 1090 |
+
output
|
| 1091 |
+
- performance improvements for ``CoxPHFitter`` - up to 30% performance
|
| 1092 |
+
improvements for some datasets.
|
| 1093 |
+
|
| 1094 |
+
.. _bug-fixes-34:
|
| 1095 |
+
|
| 1096 |
+
Bug fixes
|
| 1097 |
+
~~~~~~~~~
|
| 1098 |
+
|
| 1099 |
+
- fixed bug where computed statistics were not being shown in
|
| 1100 |
+
``print_summary`` for HTML output.
|
| 1101 |
+
- fixed bug where “None” was displayed in models’ ``__repr__``
|
| 1102 |
+
- fixed bug in ``StatisticalResult.print_summary``
|
| 1103 |
+
- fixed bug when using ``print_summary`` with left censored models.
|
| 1104 |
+
- lots of minor bug fixes.
|
| 1105 |
+
|
| 1106 |
+
.. _section-52:
|
| 1107 |
+
|
| 1108 |
+
0.23.0 - 2019-11-17
|
| 1109 |
+
-------------------
|
| 1110 |
+
|
| 1111 |
+
.. _new-features-31:
|
| 1112 |
+
|
| 1113 |
+
New features
|
| 1114 |
+
~~~~~~~~~~~~
|
| 1115 |
+
|
| 1116 |
+
- new ``print_summary`` abstraction that allows HTML printing in
|
| 1117 |
+
Jupyter notebooks!
|
| 1118 |
+
- silenced some warnings.
|
| 1119 |
+
|
| 1120 |
+
.. _bug-fixes-35:
|
| 1121 |
+
|
| 1122 |
+
Bug fixes
|
| 1123 |
+
~~~~~~~~~
|
| 1124 |
+
|
| 1125 |
+
- The “comparison” value of some parametric univariate models wasn’t
|
| 1126 |
+
standard, so the null hypothesis p-value may have been wrong. This is
|
| 1127 |
+
now fixed.
|
| 1128 |
+
- fixed a NaN error in confidence intervals for KaplanMeierFitter
|
| 1129 |
+
|
| 1130 |
+
.. _api-changes-12:
|
| 1131 |
+
|
| 1132 |
+
API Changes
|
| 1133 |
+
~~~~~~~~~~~
|
| 1134 |
+
|
| 1135 |
+
- To align values across models, the column names for the confidence
|
| 1136 |
+
intervals in parametric univariate models ``summary`` have changed.
|
| 1137 |
+
- Fixed typo in ``ParametricUnivariateFitter`` name.
|
| 1138 |
+
- ``median_`` has been removed in favour of ``median_survival_time_``.
|
| 1139 |
+
- ``left_censorship`` in ``fit`` has been removed in favour of
|
| 1140 |
+
``fit_left_censoring``.
|
| 1141 |
+
|
| 1142 |
+
.. _section-53:
|
| 1143 |
+
|
| 1144 |
+
0.22.10 - 2019-11-08
|
| 1145 |
+
--------------------
|
| 1146 |
+
|
| 1147 |
+
The tests were re-factored to be shipped with the package. Let me know
|
| 1148 |
+
if this causes problems.
|
| 1149 |
+
|
| 1150 |
+
.. _bug-fixes-36:
|
| 1151 |
+
|
| 1152 |
+
Bug fixes
|
| 1153 |
+
~~~~~~~~~
|
| 1154 |
+
|
| 1155 |
+
- fixed error in plotting models with “lower” or “upper” was in the
|
| 1156 |
+
label name.
|
| 1157 |
+
- fixed bug in plot_covariate_groups for AFT models when >1d arrays
|
| 1158 |
+
were used for values arg.
|
| 1159 |
+
|
| 1160 |
+
.. _section-54:
|
| 1161 |
+
|
| 1162 |
+
0.22.9 - 2019-10-30
|
| 1163 |
+
-------------------
|
| 1164 |
+
|
| 1165 |
+
.. _bug-fixes-37:
|
| 1166 |
+
|
| 1167 |
+
Bug fixes
|
| 1168 |
+
~~~~~~~~~
|
| 1169 |
+
|
| 1170 |
+
- fixed ``predict_`` methods in AFT models when ``timeline`` was not
|
| 1171 |
+
specified.
|
| 1172 |
+
- fixed error in ``qq_plot``
|
| 1173 |
+
- fixed error when submitting a model in ``qth_survival_time``
|
| 1174 |
+
- ``CoxPHFitter`` now displays correct columns values when changing
|
| 1175 |
+
alpha param.
|
| 1176 |
+
|
| 1177 |
+
.. _section-55:
|
| 1178 |
+
|
| 1179 |
+
0.22.8 - 2019-10-06
|
| 1180 |
+
-------------------
|
| 1181 |
+
|
| 1182 |
+
.. _new-features-32:
|
| 1183 |
+
|
| 1184 |
+
New features
|
| 1185 |
+
~~~~~~~~~~~~
|
| 1186 |
+
|
| 1187 |
+
- Serializing lifelines is better supported. Packages like joblib and
|
| 1188 |
+
pickle are now supported. Thanks @AbdealiJK!
|
| 1189 |
+
- ``conditional_after`` now available in ``CoxPHFitter.predict_median``
|
| 1190 |
+
- Suppressed some unimportant warnings.
|
| 1191 |
+
|
| 1192 |
+
.. _bug-fixes-38:
|
| 1193 |
+
|
| 1194 |
+
Bug fixes
|
| 1195 |
+
~~~~~~~~~
|
| 1196 |
+
|
| 1197 |
+
- fixed initial_point being ignored in AFT models.
|
| 1198 |
+
|
| 1199 |
+
.. _section-56:
|
| 1200 |
+
|
| 1201 |
+
0.22.7 - 2019-09-29
|
| 1202 |
+
-------------------
|
| 1203 |
+
|
| 1204 |
+
.. _new-features-33:
|
| 1205 |
+
|
| 1206 |
+
New features
|
| 1207 |
+
~~~~~~~~~~~~
|
| 1208 |
+
|
| 1209 |
+
- new ``ApproximationWarning`` to tell you if the package is making an
|
| 1210 |
+
potentially mislead approximation.
|
| 1211 |
+
|
| 1212 |
+
.. _bug-fixes-39:
|
| 1213 |
+
|
| 1214 |
+
Bug fixes
|
| 1215 |
+
~~~~~~~~~
|
| 1216 |
+
|
| 1217 |
+
- fixed a bug in parametric prediction for interval censored data.
|
| 1218 |
+
- realigned values in ``print_summary``.
|
| 1219 |
+
- fixed bug in ``survival_difference_at_fixed_point_in_time_test``
|
| 1220 |
+
|
| 1221 |
+
.. _api-changes-13:
|
| 1222 |
+
|
| 1223 |
+
API Changes
|
| 1224 |
+
~~~~~~~~~~~
|
| 1225 |
+
|
| 1226 |
+
- ``utils.qth_survival_time`` no longer takes a ``cdf`` argument -
|
| 1227 |
+
users should take the compliment (1-cdf).
|
| 1228 |
+
- Some previous ``StatisticalWarnings`` have been replaced by
|
| 1229 |
+
``ApproximationWarning``
|
| 1230 |
+
|
| 1231 |
+
.. _section-57:
|
| 1232 |
+
|
| 1233 |
+
0.22.6 - 2019-09-25
|
| 1234 |
+
-------------------
|
| 1235 |
+
|
| 1236 |
+
.. _new-features-34:
|
| 1237 |
+
|
| 1238 |
+
New features
|
| 1239 |
+
~~~~~~~~~~~~
|
| 1240 |
+
|
| 1241 |
+
- ``conditional_after`` works for ``CoxPHFitter`` prediction models 😅
|
| 1242 |
+
|
| 1243 |
+
.. _bug-fixes-40:
|
| 1244 |
+
|
| 1245 |
+
Bug fixes
|
| 1246 |
+
~~~~~~~~~
|
| 1247 |
+
|
| 1248 |
+
.. _api-changes-14:
|
| 1249 |
+
|
| 1250 |
+
API Changes
|
| 1251 |
+
~~~~~~~~~~~
|
| 1252 |
+
|
| 1253 |
+
- ``CoxPHFitter.baseline_cumulative_hazard_``\ ’s column is renamed
|
| 1254 |
+
``"baseline cumulative hazard"`` - previously it was
|
| 1255 |
+
``"baseline hazard"``. (Only applies if the model has no strata.)
|
| 1256 |
+
- ``utils.dataframe_interpolate_at_times`` renamed to
|
| 1257 |
+
``utils.interpolate_at_times_and_return_pandas``.
|
| 1258 |
+
|
| 1259 |
+
.. _section-58:
|
| 1260 |
+
|
| 1261 |
+
0.22.5 - 2019-09-20
|
| 1262 |
+
-------------------
|
| 1263 |
+
|
| 1264 |
+
.. _new-features-35:
|
| 1265 |
+
|
| 1266 |
+
New features
|
| 1267 |
+
~~~~~~~~~~~~
|
| 1268 |
+
|
| 1269 |
+
- Improvements to the **repr** of models that takes into accounts
|
| 1270 |
+
weights.
|
| 1271 |
+
- Better support for predicting on Pandas Series
|
| 1272 |
+
|
| 1273 |
+
.. _bug-fixes-41:
|
| 1274 |
+
|
| 1275 |
+
Bug fixes
|
| 1276 |
+
~~~~~~~~~
|
| 1277 |
+
|
| 1278 |
+
- Fixed issue where ``fit_interval_censoring`` wouldn’t accept lists.
|
| 1279 |
+
- Fixed an issue with ``AalenJohansenFitter`` failing to plot
|
| 1280 |
+
confidence intervals.
|
| 1281 |
+
|
| 1282 |
+
.. _api-changes-15:
|
| 1283 |
+
|
| 1284 |
+
API Changes
|
| 1285 |
+
~~~~~~~~~~~
|
| 1286 |
+
|
| 1287 |
+
- ``_get_initial_value`` in parametric univariate models is renamed
|
| 1288 |
+
``_create_initial_point``
|
| 1289 |
+
|
| 1290 |
+
.. _section-59:
|
| 1291 |
+
|
| 1292 |
+
0.22.4 - 2019-09-04
|
| 1293 |
+
-------------------
|
| 1294 |
+
|
| 1295 |
+
.. _new-features-36:
|
| 1296 |
+
|
| 1297 |
+
New features
|
| 1298 |
+
~~~~~~~~~~~~
|
| 1299 |
+
|
| 1300 |
+
- Some performance improvements to regression models.
|
| 1301 |
+
- lifelines will avoid penalizing the intercept (aka bias) variables in
|
| 1302 |
+
regression models.
|
| 1303 |
+
- new ``utils.restricted_mean_survival_time`` that approximates the
|
| 1304 |
+
RMST using numerical integration against survival functions.
|
| 1305 |
+
|
| 1306 |
+
.. _api-changes-16:
|
| 1307 |
+
|
| 1308 |
+
API changes
|
| 1309 |
+
~~~~~~~~~~~
|
| 1310 |
+
|
| 1311 |
+
- ``KaplanMeierFitter.survival_function_``\ ‘s’ index is no longer
|
| 1312 |
+
given the name “timeline”.
|
| 1313 |
+
|
| 1314 |
+
.. _bug-fixes-42:
|
| 1315 |
+
|
| 1316 |
+
Bug fixes
|
| 1317 |
+
~~~~~~~~~
|
| 1318 |
+
|
| 1319 |
+
- Fixed issue where ``concordance_index`` would never exit if NaNs in
|
| 1320 |
+
dataset.
|
| 1321 |
+
|
| 1322 |
+
.. _section-60:
|
| 1323 |
+
|
| 1324 |
+
0.22.3 - 2019-08-08
|
| 1325 |
+
-------------------
|
| 1326 |
+
|
| 1327 |
+
.. _new-features-37:
|
| 1328 |
+
|
| 1329 |
+
New features
|
| 1330 |
+
~~~~~~~~~~~~
|
| 1331 |
+
|
| 1332 |
+
- model’s now expose a ``log_likelihood_`` property.
|
| 1333 |
+
- new ``conditional_after`` argument on ``predict_*`` methods that make
|
| 1334 |
+
prediction on censored subjects easier.
|
| 1335 |
+
- new ``lifelines.utils.safe_exp`` to make ``exp`` overflows easier to
|
| 1336 |
+
handle.
|
| 1337 |
+
- smarter initial conditions for parametric regression models.
|
| 1338 |
+
- New regression model: ``GeneralizedGammaRegressionFitter``
|
| 1339 |
+
|
| 1340 |
+
.. _api-changes-17:
|
| 1341 |
+
|
| 1342 |
+
API changes
|
| 1343 |
+
~~~~~~~~~~~
|
| 1344 |
+
|
| 1345 |
+
- removed ``lifelines.utils.gamma`` - use ``autograd_gamma`` library
|
| 1346 |
+
instead.
|
| 1347 |
+
- removed bottleneck as a dependency. It offered slight performance
|
| 1348 |
+
gains only in Cox models, and only a small fraction of the API was
|
| 1349 |
+
being used.
|
| 1350 |
+
|
| 1351 |
+
.. _bug-fixes-43:
|
| 1352 |
+
|
| 1353 |
+
Bug fixes
|
| 1354 |
+
~~~~~~~~~
|
| 1355 |
+
|
| 1356 |
+
- AFT log-likelihood ratio test was not using weights correctly.
|
| 1357 |
+
- corrected (by bumping) scipy and autograd dependencies
|
| 1358 |
+
- convergence is improved for most models, and many ``exp`` overflow
|
| 1359 |
+
warnings have been eliminated.
|
| 1360 |
+
- Fixed an error in the ``predict_percentile`` of
|
| 1361 |
+
``LogLogisticAFTFitter``. New tests have been added around this.
|
| 1362 |
+
|
| 1363 |
+
.. _section-61:
|
| 1364 |
+
|
| 1365 |
+
0.22.2 - 2019-07-25
|
| 1366 |
+
-------------------
|
| 1367 |
+
|
| 1368 |
+
.. _new-features-38:
|
| 1369 |
+
|
| 1370 |
+
New features
|
| 1371 |
+
~~~~~~~~~~~~
|
| 1372 |
+
|
| 1373 |
+
- lifelines is now compatible with scipy>=1.3.0
|
| 1374 |
+
|
| 1375 |
+
.. _bug-fixes-44:
|
| 1376 |
+
|
| 1377 |
+
Bug fixes
|
| 1378 |
+
~~~~~~~~~
|
| 1379 |
+
|
| 1380 |
+
- fixed printing error when using robust=True in regression models
|
| 1381 |
+
- ``GeneralizedGammaFitter`` is more stable, maybe.
|
| 1382 |
+
- lifelines was allowing old version of numpy (1.6), but this caused
|
| 1383 |
+
errors when using the library. The correctly numpy has been pinned
|
| 1384 |
+
(to 1.14.0+)
|
| 1385 |
+
|
| 1386 |
+
.. _section-62:
|
| 1387 |
+
|
| 1388 |
+
0.22.1 - 2019-07-14
|
| 1389 |
+
-------------------
|
| 1390 |
+
|
| 1391 |
+
.. _new-features-39:
|
| 1392 |
+
|
| 1393 |
+
New features
|
| 1394 |
+
~~~~~~~~~~~~
|
| 1395 |
+
|
| 1396 |
+
- New univariate model, ``GeneralizedGammaFitter``. This model contains
|
| 1397 |
+
many sub-models, so it is a good model to check fits.
|
| 1398 |
+
- added a warning when a time-varying dataset had instantaneous deaths.
|
| 1399 |
+
- added a ``initial_point`` option in univariate parametric fitters.
|
| 1400 |
+
- ``initial_point`` kwarg is present in parametric univariate fitters
|
| 1401 |
+
``.fit``
|
| 1402 |
+
- ``event_table`` is now an attribute on all univariate fitters (if
|
| 1403 |
+
right censoring)
|
| 1404 |
+
- improvements to ``lifelines.utils.gamma``
|
| 1405 |
+
|
| 1406 |
+
.. _api-changes-18:
|
| 1407 |
+
|
| 1408 |
+
API changes
|
| 1409 |
+
~~~~~~~~~~~
|
| 1410 |
+
|
| 1411 |
+
- In AFT models, the column names in ``confidence_intervals_`` has
|
| 1412 |
+
changed to include the alpha value.
|
| 1413 |
+
- In AFT models, some column names in ``.summary`` and
|
| 1414 |
+
``.print_summary`` has changed to include the alpha value.
|
| 1415 |
+
- In AFT models, some column names in ``.summary`` and
|
| 1416 |
+
``.print_summary`` includes confidence intervals for the exponential
|
| 1417 |
+
of the value.
|
| 1418 |
+
|
| 1419 |
+
.. _bug-fixes-45:
|
| 1420 |
+
|
| 1421 |
+
Bug fixes
|
| 1422 |
+
~~~~~~~~~
|
| 1423 |
+
|
| 1424 |
+
- when using ``censors_show`` in plotting functions, the censor ticks
|
| 1425 |
+
are now reactive to the estimate being shown.
|
| 1426 |
+
- fixed an overflow bug in ``KaplanMeierFitter`` confidence intervals
|
| 1427 |
+
- improvements in data validation for ``CoxTimeVaryingFitter``
|
| 1428 |
+
|
| 1429 |
+
.. _section-63:
|
| 1430 |
+
|
| 1431 |
+
0.22.0 - 2019-07-03
|
| 1432 |
+
-------------------
|
| 1433 |
+
|
| 1434 |
+
.. _new-features-40:
|
| 1435 |
+
|
| 1436 |
+
New features
|
| 1437 |
+
~~~~~~~~~~~~
|
| 1438 |
+
|
| 1439 |
+
- Ability to create custom parametric regression models by specifying
|
| 1440 |
+
the cumulative hazard. This enables new and extensions of AFT models.
|
| 1441 |
+
- ``percentile(p)`` method added to univariate models that solves the
|
| 1442 |
+
equation ``p = S(t)`` for ``t``
|
| 1443 |
+
- for parametric univariate models, the ``conditional_time_to_event_``
|
| 1444 |
+
is now exact instead of an approximation.
|
| 1445 |
+
|
| 1446 |
+
.. _api-changes-19:
|
| 1447 |
+
|
| 1448 |
+
API changes
|
| 1449 |
+
~~~~~~~~~~~
|
| 1450 |
+
|
| 1451 |
+
- In Cox models, the attribute ``hazards_`` has been renamed to
|
| 1452 |
+
``params_``. This aligns better with the other regression models, and
|
| 1453 |
+
is more clear (what is a hazard anyways?)
|
| 1454 |
+
- In Cox models, a new ``hazard_ratios_`` attribute is available which
|
| 1455 |
+
is the exponentiation of ``params_``.
|
| 1456 |
+
- In Cox models, the column names in ``confidence_intervals_`` has
|
| 1457 |
+
changed to include the alpha value.
|
| 1458 |
+
- In Cox models, some column names in ``.summary`` and
|
| 1459 |
+
``.print_summary`` has changed to include the alpha value.
|
| 1460 |
+
- In Cox models, some column names in ``.summary`` and
|
| 1461 |
+
``.print_summary`` includes confidence intervals for the exponential
|
| 1462 |
+
of the value.
|
| 1463 |
+
- Significant changes to internal AFT code.
|
| 1464 |
+
- A change to how ``fit_intercept`` works in AFT models. Previously one
|
| 1465 |
+
could set ``fit_intercept`` to False and not have to set
|
| 1466 |
+
``ancillary_df`` - now one must specify a DataFrame.
|
| 1467 |
+
|
| 1468 |
+
.. _bug-fixes-46:
|
| 1469 |
+
|
| 1470 |
+
Bug fixes
|
| 1471 |
+
~~~~~~~~~
|
| 1472 |
+
|
| 1473 |
+
- for parametric univariate models, the ``conditional_time_to_event_``
|
| 1474 |
+
is now exact instead of an approximation.
|
| 1475 |
+
- fixed a name error bug in ``CoxTimeVaryingFitter.plot``
|
| 1476 |
+
|
| 1477 |
+
.. _section-64:
|
| 1478 |
+
|
| 1479 |
+
0.21.5 - 2019-06-22
|
| 1480 |
+
-------------------
|
| 1481 |
+
|
| 1482 |
+
I’m skipping 0.21.4 version because of deployment issues.
|
| 1483 |
+
|
| 1484 |
+
.. _new-features-41:
|
| 1485 |
+
|
| 1486 |
+
New features
|
| 1487 |
+
~~~~~~~~~~~~
|
| 1488 |
+
|
| 1489 |
+
- ``scoring_method`` now a kwarg on ``sklearn_adapter``
|
| 1490 |
+
|
| 1491 |
+
.. _bug-fixes-47:
|
| 1492 |
+
|
| 1493 |
+
Bug fixes
|
| 1494 |
+
~~~~~~~~~
|
| 1495 |
+
|
| 1496 |
+
- fixed an implicit import of scikit-learn. scikit-learn is an optional
|
| 1497 |
+
package.
|
| 1498 |
+
- fixed visual bug that misaligned x-axis ticks and at-risk counts.
|
| 1499 |
+
Thanks @christopherahern!
|
| 1500 |
+
|
| 1501 |
+
.. _section-65:
|
| 1502 |
+
|
| 1503 |
+
0.21.3 - 2019-06-04
|
| 1504 |
+
-------------------
|
| 1505 |
+
|
| 1506 |
+
.. _new-features-42:
|
| 1507 |
+
|
| 1508 |
+
New features
|
| 1509 |
+
~~~~~~~~~~~~
|
| 1510 |
+
|
| 1511 |
+
- include in lifelines is a scikit-learn adapter so lifeline’s models
|
| 1512 |
+
can be used with scikit-learn’s API. See `documentation
|
| 1513 |
+
here <https://lifelines.readthedocs.io/en/latest/Compatibility%20with%20scikit-learn.html>`__.
|
| 1514 |
+
- ``CoxPHFitter.plot`` now accepts a ``hazard_ratios`` (boolean)
|
| 1515 |
+
parameter that will plot the hazard ratios (and CIs) instead of the
|
| 1516 |
+
log-hazard ratios.
|
| 1517 |
+
- ``CoxPHFitter.check_assumptions`` now accepts a ``columns`` parameter
|
| 1518 |
+
to specify only checking a subset of columns.
|
| 1519 |
+
|
| 1520 |
+
.. _bug-fixes-48:
|
| 1521 |
+
|
| 1522 |
+
Bug fixes
|
| 1523 |
+
~~~~~~~~~
|
| 1524 |
+
|
| 1525 |
+
- ``covariates_from_event_matrix`` handle nulls better
|
| 1526 |
+
|
| 1527 |
+
.. _section-66:
|
| 1528 |
+
|
| 1529 |
+
0.21.2 - 2019-05-16
|
| 1530 |
+
-------------------
|
| 1531 |
+
|
| 1532 |
+
.. _new-features-43:
|
| 1533 |
+
|
| 1534 |
+
New features
|
| 1535 |
+
~~~~~~~~~~~~
|
| 1536 |
+
|
| 1537 |
+
- New regression model: ``PiecewiseExponentialRegressionFitter`` is
|
| 1538 |
+
available. See blog post here:
|
| 1539 |
+
https://dataorigami.net/blogs/napkin-folding/churn
|
| 1540 |
+
- Regression models have a new method ``log_likelihood_ratio_test``
|
| 1541 |
+
that computes, you guessed it, the log-likelihood ratio test.
|
| 1542 |
+
Previously this was an internal API that is being exposed.
|
| 1543 |
+
|
| 1544 |
+
.. _api-changes-20:
|
| 1545 |
+
|
| 1546 |
+
API changes
|
| 1547 |
+
~~~~~~~~~~~
|
| 1548 |
+
|
| 1549 |
+
- The default behavior of the ``predict`` method on non-parametric
|
| 1550 |
+
estimators (``KaplanMeierFitter``, etc.) has changed from (previous)
|
| 1551 |
+
linear interpolation to (new) return last value. Linear interpolation
|
| 1552 |
+
is still possible with the ``interpolate`` flag.
|
| 1553 |
+
- removing ``_compute_likelihood_ratio_test`` on regression models. Use
|
| 1554 |
+
``log_likelihood_ratio_test`` now.
|
| 1555 |
+
|
| 1556 |
+
.. _bug-fixes-49:
|
| 1557 |
+
|
| 1558 |
+
Bug fixes
|
| 1559 |
+
~~~~~~~~~
|
| 1560 |
+
|
| 1561 |
+
.. _section-67:
|
| 1562 |
+
|
| 1563 |
+
0.21.1 - 2019-04-26
|
| 1564 |
+
-------------------
|
| 1565 |
+
|
| 1566 |
+
.. _new-features-44:
|
| 1567 |
+
|
| 1568 |
+
New features
|
| 1569 |
+
~~~~~~~~~~~~
|
| 1570 |
+
|
| 1571 |
+
- users can provided their own start and stop column names in
|
| 1572 |
+
``add_covariate_to_timeline``
|
| 1573 |
+
- PiecewiseExponentialFitter now allows numpy arrays as breakpoints
|
| 1574 |
+
|
| 1575 |
+
.. _api-changes-21:
|
| 1576 |
+
|
| 1577 |
+
API changes
|
| 1578 |
+
~~~~~~~~~~~
|
| 1579 |
+
|
| 1580 |
+
- output of ``survival_table_from_events`` when collapsing rows to
|
| 1581 |
+
intervals now removes the “aggregate” column multi-index.
|
| 1582 |
+
|
| 1583 |
+
.. _bug-fixes-50:
|
| 1584 |
+
|
| 1585 |
+
Bug fixes
|
| 1586 |
+
~~~~~~~~~
|
| 1587 |
+
|
| 1588 |
+
- fixed bug in CoxTimeVaryingFitter when ax is provided, thanks @j-i-l!
|
| 1589 |
+
|
| 1590 |
+
.. _section-68:
|
| 1591 |
+
|
| 1592 |
+
0.21.0 - 2019-04-12
|
| 1593 |
+
-------------------
|
| 1594 |
+
|
| 1595 |
+
.. _new-features-45:
|
| 1596 |
+
|
| 1597 |
+
New features
|
| 1598 |
+
~~~~~~~~~~~~
|
| 1599 |
+
|
| 1600 |
+
- ``weights`` is now a optional kwarg for parametric univariate models.
|
| 1601 |
+
- all univariate and multivariate parametric models now have ability to
|
| 1602 |
+
handle left, right and interval censored data (the former two being
|
| 1603 |
+
special cases of the latter). Users can use the
|
| 1604 |
+
``fit_right_censoring`` (which is an alias for ``fit``),
|
| 1605 |
+
``fit_left_censoring`` and ``fit_interval_censoring``.
|
| 1606 |
+
- a new interval censored dataset is available under
|
| 1607 |
+
``lifelines.datasets.load_diabetes``
|
| 1608 |
+
|
| 1609 |
+
.. _api-changes-22:
|
| 1610 |
+
|
| 1611 |
+
API changes
|
| 1612 |
+
~~~~~~~~~~~
|
| 1613 |
+
|
| 1614 |
+
- ``left_censorship`` on all univariate fitters has been deprecated.
|
| 1615 |
+
Please use the new api ``model.fit_left_censoring(...)``.
|
| 1616 |
+
- ``invert_y_axis`` in ``model.plot(...`` has been removed.
|
| 1617 |
+
- ``entries`` property in multivariate parametric models has a new
|
| 1618 |
+
Series name: ``entry``
|
| 1619 |
+
|
| 1620 |
+
.. _bug-fixes-51:
|
| 1621 |
+
|
| 1622 |
+
Bug fixes
|
| 1623 |
+
~~~~~~~~~
|
| 1624 |
+
|
| 1625 |
+
- lifelines was silently converting any NaNs in the event vector to
|
| 1626 |
+
True. An error is now thrown instead.
|
| 1627 |
+
- Fixed an error that didn’t let users use Numpy arrays in prediction
|
| 1628 |
+
for AFT models
|
| 1629 |
+
|
| 1630 |
+
.. _section-69:
|
| 1631 |
+
|
| 1632 |
+
0.20.5 - 2019-04-08
|
| 1633 |
+
-------------------
|
| 1634 |
+
|
| 1635 |
+
.. _new-features-46:
|
| 1636 |
+
|
| 1637 |
+
New features
|
| 1638 |
+
~~~~~~~~~~~~
|
| 1639 |
+
|
| 1640 |
+
- performance improvements for ``print_summary``.
|
| 1641 |
+
|
| 1642 |
+
.. _api-changes-23:
|
| 1643 |
+
|
| 1644 |
+
API changes
|
| 1645 |
+
~~~~~~~~~~~
|
| 1646 |
+
|
| 1647 |
+
- ``utils.survival_events_from_table`` returns an integer weight vector
|
| 1648 |
+
as well as durations and censoring vector.
|
| 1649 |
+
- in ``AalenJohansenFitter``, the ``variance`` parameter is renamed to
|
| 1650 |
+
``variance_`` to align with the usual lifelines convention.
|
| 1651 |
+
|
| 1652 |
+
.. _bug-fixes-52:
|
| 1653 |
+
|
| 1654 |
+
Bug fixes
|
| 1655 |
+
~~~~~~~~~
|
| 1656 |
+
|
| 1657 |
+
- Fixed an error in the ``CoxTimeVaryingFitter``\ ’s likelihood ratio
|
| 1658 |
+
test when using strata.
|
| 1659 |
+
- Fixed some plotting bugs with ``AalenJohansenFitter``
|
| 1660 |
+
|
| 1661 |
+
.. _section-70:
|
| 1662 |
+
|
| 1663 |
+
0.20.4 - 2019-03-27
|
| 1664 |
+
-------------------
|
| 1665 |
+
|
| 1666 |
+
.. _new-features-47:
|
| 1667 |
+
|
| 1668 |
+
New features
|
| 1669 |
+
~~~~~~~~~~~~
|
| 1670 |
+
|
| 1671 |
+
- left-truncation support in AFT models, using the ``entry_col`` kwarg
|
| 1672 |
+
in ``fit()``
|
| 1673 |
+
- ``generate_datasets.piecewise_exponential_survival_data`` for
|
| 1674 |
+
generating piecewise exp. data
|
| 1675 |
+
- Faster ``print_summary`` for AFT models.
|
| 1676 |
+
|
| 1677 |
+
.. _api-changes-24:
|
| 1678 |
+
|
| 1679 |
+
API changes
|
| 1680 |
+
~~~~~~~~~~~
|
| 1681 |
+
|
| 1682 |
+
- Pandas is now correctly pinned to >= 0.23.0. This was always the
|
| 1683 |
+
case, but not specified in setup.py correctly.
|
| 1684 |
+
|
| 1685 |
+
.. _bug-fixes-53:
|
| 1686 |
+
|
| 1687 |
+
Bug fixes
|
| 1688 |
+
~~~~~~~~~
|
| 1689 |
+
|
| 1690 |
+
- Better handling for extremely large numbers in ``print_summary``
|
| 1691 |
+
- ``PiecewiseExponentialFitter`` is available with
|
| 1692 |
+
``from lifelines import *``.
|
| 1693 |
+
|
| 1694 |
+
.. _section-71:
|
| 1695 |
+
|
| 1696 |
+
0.20.3 - 2019-03-23
|
| 1697 |
+
-------------------
|
| 1698 |
+
|
| 1699 |
+
.. _new-features-48:
|
| 1700 |
+
|
| 1701 |
+
New features
|
| 1702 |
+
~~~~~~~~~~~~
|
| 1703 |
+
|
| 1704 |
+
- Now ``cumulative_density_`` & ``survival_function_`` are *always*
|
| 1705 |
+
present on a fitted ``KaplanMeierFitter``.
|
| 1706 |
+
- New attributes/methods on ``KaplanMeierFitter``:
|
| 1707 |
+
``plot_cumulative_density()``,
|
| 1708 |
+
``confidence_interval_cumulative_density_``,
|
| 1709 |
+
``plot_survival_function`` and
|
| 1710 |
+
``confidence_interval_survival_function_``.
|
| 1711 |
+
|
| 1712 |
+
.. _section-72:
|
| 1713 |
+
|
| 1714 |
+
0.20.2 - 2019-03-21
|
| 1715 |
+
-------------------
|
| 1716 |
+
|
| 1717 |
+
.. _new-features-49:
|
| 1718 |
+
|
| 1719 |
+
New features
|
| 1720 |
+
~~~~~~~~~~~~
|
| 1721 |
+
|
| 1722 |
+
- Left censoring is now supported in univariate parametric models:
|
| 1723 |
+
``.fit(..., left_censorship=True)``. Examples are in the docs.
|
| 1724 |
+
- new dataset: ``lifelines.datasets.load_nh4()``
|
| 1725 |
+
- Univariate parametric models now include, by default, support for the
|
| 1726 |
+
cumulative density function: ``.cumulative_density_``,
|
| 1727 |
+
``.confidence_interval_cumulative_density_``,
|
| 1728 |
+
``plot_cumulative_density()``, ``cumulative_density_at_times(t)``.
|
| 1729 |
+
- add a ``lifelines.plotting.qq_plot`` for univariate parametric models
|
| 1730 |
+
that handles censored data.
|
| 1731 |
+
|
| 1732 |
+
.. _api-changes-25:
|
| 1733 |
+
|
| 1734 |
+
API changes
|
| 1735 |
+
~~~~~~~~~~~
|
| 1736 |
+
|
| 1737 |
+
- ``plot_lifetimes`` no longer reverses the order when plotting. Thanks
|
| 1738 |
+
@vpolimenov!
|
| 1739 |
+
- The ``C`` column in ``load_lcd`` dataset is renamed to ``E``.
|
| 1740 |
+
|
| 1741 |
+
.. _bug-fixes-54:
|
| 1742 |
+
|
| 1743 |
+
Bug fixes
|
| 1744 |
+
~~~~~~~~~
|
| 1745 |
+
|
| 1746 |
+
- fixed a naming error in ``KaplanMeierFitter`` when
|
| 1747 |
+
``left_censorship`` was set to True, ``plot_cumulative_density_()``
|
| 1748 |
+
is now ``plot_cumulative_density()``.
|
| 1749 |
+
- added some error handling when passing in timedeltas. Ideally, users
|
| 1750 |
+
don’t pass in timedeltas, as the scale is ambiguous. However, the
|
| 1751 |
+
error message before was not obvious, so we do some conversion, warn
|
| 1752 |
+
the user, and pass it through.
|
| 1753 |
+
- ``qth_survival_times`` for a truncated CDF would return ``np.inf`` if
|
| 1754 |
+
the q parameter was below the truncation limit. This should have been
|
| 1755 |
+
``-np.inf``
|
| 1756 |
+
|
| 1757 |
+
.. _section-73:
|
| 1758 |
+
|
| 1759 |
+
0.20.1 - 2019-03-16
|
| 1760 |
+
-------------------
|
| 1761 |
+
|
| 1762 |
+
- Some performance improvements to ``CoxPHFitter`` (about 30%). I know
|
| 1763 |
+
it may seem silly, but we are now about the same or slightly faster
|
| 1764 |
+
than the Cox model in R’s ``survival`` package (for some testing
|
| 1765 |
+
datasets and some configurations). This is a big deal, because 1)
|
| 1766 |
+
lifelines does more error checking prior, 2) R’s cox model is written
|
| 1767 |
+
in C, and we are still pure Python/NumPy, 3) R’s cox model has
|
| 1768 |
+
decades of development.
|
| 1769 |
+
- suppressed unimportant warnings
|
| 1770 |
+
|
| 1771 |
+
.. _api-changes-26:
|
| 1772 |
+
|
| 1773 |
+
API changes
|
| 1774 |
+
~~~~~~~~~~~
|
| 1775 |
+
|
| 1776 |
+
- Previously, lifelines *always* added a 0 row to
|
| 1777 |
+
``cph.baseline_hazard_``, even if there were no event at this time.
|
| 1778 |
+
This is no longer the case. A 0 will still be added if there is a
|
| 1779 |
+
duration (observed or not) at 0 occurs however.
|
| 1780 |
+
|
| 1781 |
+
.. _section-74:
|
| 1782 |
+
|
| 1783 |
+
0.20.0 - 2019-03-05
|
| 1784 |
+
-------------------
|
| 1785 |
+
|
| 1786 |
+
- Starting with 0.20.0, only Python3 will be supported. Over 75% of
|
| 1787 |
+
recent installs where Py3.
|
| 1788 |
+
- Updated minimum dependencies, specifically Matplotlib and Pandas.
|
| 1789 |
+
|
| 1790 |
+
.. _new-features-50:
|
| 1791 |
+
|
| 1792 |
+
New features
|
| 1793 |
+
~~~~~~~~~~~~
|
| 1794 |
+
|
| 1795 |
+
- smarter initialization for AFT models which should improve
|
| 1796 |
+
convergence.
|
| 1797 |
+
|
| 1798 |
+
.. _api-changes-27:
|
| 1799 |
+
|
| 1800 |
+
API changes
|
| 1801 |
+
~~~~~~~~~~~
|
| 1802 |
+
|
| 1803 |
+
- ``initial_beta`` in Cox model’s ``.fit`` is now ``initial_point``.
|
| 1804 |
+
- ``initial_point`` is now available in AFT models and
|
| 1805 |
+
``CoxTimeVaryingFitter``
|
| 1806 |
+
- the DataFrame ``confidence_intervals_`` for univariate models is
|
| 1807 |
+
transposed now (previous parameters where columns, now parameters are
|
| 1808 |
+
rows).
|
| 1809 |
+
|
| 1810 |
+
.. _bug-fixes-55:
|
| 1811 |
+
|
| 1812 |
+
Bug fixes
|
| 1813 |
+
~~~~~~~~~
|
| 1814 |
+
|
| 1815 |
+
- Fixed a bug with plotting and ``check_assumptions``.
|
| 1816 |
+
|
| 1817 |
+
.. _section-75:
|
| 1818 |
+
|
| 1819 |
+
0.19.5 - 2019-02-26
|
| 1820 |
+
-------------------
|
| 1821 |
+
|
| 1822 |
+
.. _new-features-51:
|
| 1823 |
+
|
| 1824 |
+
New features
|
| 1825 |
+
~~~~~~~~~~~~
|
| 1826 |
+
|
| 1827 |
+
- ``plot_covariate_group`` can accept multiple covariates to plot. This
|
| 1828 |
+
is useful for columns that have implicit correlation like polynomial
|
| 1829 |
+
features or categorical variables.
|
| 1830 |
+
- Convergence improvements for AFT models.
|
| 1831 |
+
|
| 1832 |
+
.. _section-76:
|
| 1833 |
+
|
| 1834 |
+
0.19.4 - 2019-02-25
|
| 1835 |
+
-------------------
|
| 1836 |
+
|
| 1837 |
+
.. _bug-fixes-56:
|
| 1838 |
+
|
| 1839 |
+
Bug fixes
|
| 1840 |
+
~~~~~~~~~
|
| 1841 |
+
|
| 1842 |
+
- remove some bad print statements in ``CoxPHFitter``.
|
| 1843 |
+
|
| 1844 |
+
.. _section-77:
|
| 1845 |
+
|
| 1846 |
+
0.19.3 - 2019-02-25
|
| 1847 |
+
-------------------
|
| 1848 |
+
|
| 1849 |
+
.. _new-features-52:
|
| 1850 |
+
|
| 1851 |
+
New features
|
| 1852 |
+
~~~~~~~~~~~~
|
| 1853 |
+
|
| 1854 |
+
- new AFT models: ``LogNormalAFTFitter`` and ``LogLogisticAFTFitter``.
|
| 1855 |
+
- AFT models now accept a ``weights_col`` argument to ``fit``.
|
| 1856 |
+
- Robust errors (sandwich errors) are now available in AFT models using
|
| 1857 |
+
the ``robust=True`` kwarg in ``fit``.
|
| 1858 |
+
- Performance increase to ``print_summary`` in the ``CoxPHFitter`` and
|
| 1859 |
+
``CoxTimeVaryingFitter`` model.
|
| 1860 |
+
|
| 1861 |
+
.. _section-78:
|
| 1862 |
+
|
| 1863 |
+
0.19.2 - 2019-02-22
|
| 1864 |
+
-------------------
|
| 1865 |
+
|
| 1866 |
+
.. _new-features-53:
|
| 1867 |
+
|
| 1868 |
+
New features
|
| 1869 |
+
~~~~~~~~~~~~
|
| 1870 |
+
|
| 1871 |
+
- ``ParametricUnivariateFitters``, like ``WeibullFitter``, have
|
| 1872 |
+
smoothed plots when plotting (vs stepped plots)
|
| 1873 |
+
|
| 1874 |
+
.. _bug-fixes-57:
|
| 1875 |
+
|
| 1876 |
+
Bug fixes
|
| 1877 |
+
~~~~~~~~~
|
| 1878 |
+
|
| 1879 |
+
- The ``ExponentialFitter`` log likelihood *value* was incorrect -
|
| 1880 |
+
inference was correct however.
|
| 1881 |
+
- Univariate fitters are more flexiable and can allow 2-d and
|
| 1882 |
+
DataFrames as inputs.
|
| 1883 |
+
|
| 1884 |
+
.. _section-79:
|
| 1885 |
+
|
| 1886 |
+
0.19.1 - 2019-02-21
|
| 1887 |
+
-------------------
|
| 1888 |
+
|
| 1889 |
+
.. _new-features-54:
|
| 1890 |
+
|
| 1891 |
+
New features
|
| 1892 |
+
~~~~~~~~~~~~
|
| 1893 |
+
|
| 1894 |
+
- improved stability of ``LogNormalFitter``
|
| 1895 |
+
- Matplotlib for Python3 users are not longer forced to use 2.x.
|
| 1896 |
+
|
| 1897 |
+
.. _api-changes-28:
|
| 1898 |
+
|
| 1899 |
+
API changes
|
| 1900 |
+
~~~~~~~~~~~
|
| 1901 |
+
|
| 1902 |
+
- **Important**: we changed the parameterization of the
|
| 1903 |
+
``PiecewiseExponential`` to the same as ``ExponentialFitter`` (from
|
| 1904 |
+
``\lambda * t`` to ``t / \lambda``).
|
| 1905 |
+
|
| 1906 |
+
.. _section-80:
|
| 1907 |
+
|
| 1908 |
+
0.19.0 - 2019-02-20
|
| 1909 |
+
-------------------
|
| 1910 |
+
|
| 1911 |
+
.. _new-features-55:
|
| 1912 |
+
|
| 1913 |
+
New features
|
| 1914 |
+
~~~~~~~~~~~~
|
| 1915 |
+
|
| 1916 |
+
- New regression model ``WeibullAFTFitter`` for fitting accelerated
|
| 1917 |
+
failure time models. Docs have been added to our
|
| 1918 |
+
`documentation <https://lifelines.readthedocs.io/>`__ about how to
|
| 1919 |
+
use ``WeibullAFTFitter`` (spoiler: it’s API is similar to the other
|
| 1920 |
+
regression models) and how to interpret the output.
|
| 1921 |
+
- ``CoxPHFitter`` performance improvements (about 10%)
|
| 1922 |
+
- ``CoxTimeVaryingFitter`` performance improvements (about 10%)
|
| 1923 |
+
|
| 1924 |
+
.. _api-changes-29:
|
| 1925 |
+
|
| 1926 |
+
API changes
|
| 1927 |
+
~~~~~~~~~~~
|
| 1928 |
+
|
| 1929 |
+
- **Important**: we changed the ``.hazards_`` and ``.standard_errors_``
|
| 1930 |
+
on Cox models to be pandas Series (instead of Dataframes). This felt
|
| 1931 |
+
like a more natural representation of them. You may need to update
|
| 1932 |
+
your code to reflect this. See notes here:
|
| 1933 |
+
https://github.com/CamDavidsonPilon/lifelines/issues/636
|
| 1934 |
+
- **Important**: we changed the ``.confidence_intervals_`` on Cox
|
| 1935 |
+
models to be transposed. This felt like a more natural representation
|
| 1936 |
+
of them. You may need to update your code to reflect this. See notes
|
| 1937 |
+
here: https://github.com/CamDavidsonPilon/lifelines/issues/636
|
| 1938 |
+
- **Important**: we changed the parameterization of the
|
| 1939 |
+
``WeibullFitter`` and ``ExponentialFitter`` from ``\lambda * t`` to
|
| 1940 |
+
``t / \lambda``. This was for a few reasons: 1) it is a more common
|
| 1941 |
+
parameterization in literature, 2) it helps in convergence.
|
| 1942 |
+
- **Important**: in models where we add an intercept (currently only
|
| 1943 |
+
``AalenAdditiveModel``), the name of the added column has been
|
| 1944 |
+
changed from ``baseline`` to ``_intercept``
|
| 1945 |
+
- **Important**: the meaning of ``alpha`` in all fitters has changed to
|
| 1946 |
+
be the standard interpretation of alpha in confidence intervals. That
|
| 1947 |
+
means that the *default* for alpha is set to 0.05 in the latest
|
| 1948 |
+
lifelines, instead of 0.95 in previous versions.
|
| 1949 |
+
|
| 1950 |
+
.. _bug-fixes-58:
|
| 1951 |
+
|
| 1952 |
+
Bug Fixes
|
| 1953 |
+
~~~~~~~~~
|
| 1954 |
+
|
| 1955 |
+
- Fixed a bug in the ``_log_likelihood_`` property of
|
| 1956 |
+
``ParametericUnivariateFitter`` models. It was showing the “average”
|
| 1957 |
+
log-likelihood (i.e. scaled by 1/n) instead of the total. It now
|
| 1958 |
+
displays the total.
|
| 1959 |
+
- In model ``print_summary``\ s, correct a label erroring. Instead of
|
| 1960 |
+
“Likelihood test”, it should have read “Log-likelihood test”.
|
| 1961 |
+
- Fixed a bug that was too frequently rejecting the dtype of ``event``
|
| 1962 |
+
columns.
|
| 1963 |
+
- Fixed a calculation bug in the concordance index for stratified Cox
|
| 1964 |
+
models. Thanks @airanmehr!
|
| 1965 |
+
- Fixed some Pandas <0.24 bugs.
|
| 1966 |
+
|
| 1967 |
+
.. _section-81:
|
| 1968 |
+
|
| 1969 |
+
0.18.6 - 2019-02-13
|
| 1970 |
+
-------------------
|
| 1971 |
+
|
| 1972 |
+
- some improvements to the output of ``check_assumptions``.
|
| 1973 |
+
``show_plots`` is turned to ``False`` by default now. It only shows
|
| 1974 |
+
``rank`` and ``km`` p-values now.
|
| 1975 |
+
- some performance improvements to ``qth_survival_time``.
|
| 1976 |
+
|
| 1977 |
+
.. _section-82:
|
| 1978 |
+
|
| 1979 |
+
0.18.5 - 2019-02-11
|
| 1980 |
+
-------------------
|
| 1981 |
+
|
| 1982 |
+
- added new plotting methods to parametric univariate models:
|
| 1983 |
+
``plot_survival_function``, ``plot_hazard`` and
|
| 1984 |
+
``plot_cumulative_hazard``. The last one is an alias for ``plot``.
|
| 1985 |
+
- added new properties to parametric univarite models:
|
| 1986 |
+
``confidence_interval_survival_function_``,
|
| 1987 |
+
``confidence_interval_hazard_``,
|
| 1988 |
+
``confidence_interval_cumulative_hazard_``. The last one is an alias
|
| 1989 |
+
for ``confidence_interval_``.
|
| 1990 |
+
- Fixed some overflow issues with ``AalenJohansenFitter``\ ’s variance
|
| 1991 |
+
calculations when using large datasets.
|
| 1992 |
+
- Fixed an edgecase in ``AalenJohansenFitter`` that causing some
|
| 1993 |
+
datasets with to be jittered too often.
|
| 1994 |
+
- Add a new kwarg to ``AalenJohansenFitter``, ``calculate_variance``
|
| 1995 |
+
that can be used to turn off variance calculations since this can
|
| 1996 |
+
take a long time for large datasets. Thanks @pzivich!
|
| 1997 |
+
|
| 1998 |
+
.. _section-83:
|
| 1999 |
+
|
| 2000 |
+
0.18.4 - 2019-02-10
|
| 2001 |
+
-------------------
|
| 2002 |
+
|
| 2003 |
+
- fixed confidence intervals in cumulative hazards for parametric
|
| 2004 |
+
univarite models. They were previously serverly depressed.
|
| 2005 |
+
- adding left-truncation support to parametric univarite models with
|
| 2006 |
+
the ``entry`` kwarg in ``.fit``
|
| 2007 |
+
|
| 2008 |
+
.. _section-84:
|
| 2009 |
+
|
| 2010 |
+
0.18.3 - 2019-02-07
|
| 2011 |
+
-------------------
|
| 2012 |
+
|
| 2013 |
+
- Some performance improvements to parametric univariate models.
|
| 2014 |
+
- Suppressing some irrelevant NumPy and autograd warnings, so lifeline
|
| 2015 |
+
warnings are more noticeable.
|
| 2016 |
+
- Improved some warning and error messages.
|
| 2017 |
+
|
| 2018 |
+
.. _section-85:
|
| 2019 |
+
|
| 2020 |
+
0.18.2 - 2019-02-05
|
| 2021 |
+
-------------------
|
| 2022 |
+
|
| 2023 |
+
- New univariate fitter ``PiecewiseExponentialFitter`` for creating a
|
| 2024 |
+
stepwise hazard model. See docs online.
|
| 2025 |
+
- Ability to create novel parametric univariate models using the new
|
| 2026 |
+
``ParametericUnivariateFitter`` super class. See docs online for how
|
| 2027 |
+
to do this.
|
| 2028 |
+
- Unfortunately, parametric univariate fitters are not serializable
|
| 2029 |
+
with ``pickle``. The library ``dill`` is still useable.
|
| 2030 |
+
- Complete overhaul of all internals for parametric univariate fitters.
|
| 2031 |
+
Moved them all (most) to use ``autograd``.
|
| 2032 |
+
- ``LogNormalFitter`` no longer models ``log_sigma``.
|
| 2033 |
+
|
| 2034 |
+
.. _section-86:
|
| 2035 |
+
|
| 2036 |
+
0.18.1 - 2019-02-02
|
| 2037 |
+
-------------------
|
| 2038 |
+
|
| 2039 |
+
- bug fixes in ``LogNormalFitter`` variance estimates
|
| 2040 |
+
- improve convergence of ``LogNormalFitter``. We now model the log of
|
| 2041 |
+
sigma internally, but still expose sigma externally.
|
| 2042 |
+
- use the ``autograd`` lib to help with gradients.
|
| 2043 |
+
- New ``LogLogisticFitter`` univariate fitter available.
|
| 2044 |
+
|
| 2045 |
+
.. _section-87:
|
| 2046 |
+
|
| 2047 |
+
0.18.0 - 2019-01-31
|
| 2048 |
+
-------------------
|
| 2049 |
+
|
| 2050 |
+
- ``LogNormalFitter`` is a new univariate fitter you can use.
|
| 2051 |
+
- ``WeibullFitter`` now correctly returns the confidence intervals
|
| 2052 |
+
(previously returned only NaNs)
|
| 2053 |
+
- ``WeibullFitter.print_summary()`` displays p-values associated with
|
| 2054 |
+
its parameters not equal to 1.0 - previously this was (implicitly)
|
| 2055 |
+
comparing against 0, which is trivially always true (the parameters
|
| 2056 |
+
must be greater than 0)
|
| 2057 |
+
- ``ExponentialFitter.print_summary()`` displays p-values associated
|
| 2058 |
+
with its parameters not equal to 1.0 - previously this was
|
| 2059 |
+
(implicitly) comparing against 0, which is trivially always true (the
|
| 2060 |
+
parameters must be greater than 0)
|
| 2061 |
+
- ``ExponentialFitter.plot`` now displays the cumulative hazard,
|
| 2062 |
+
instead of the survival function. This is to make it easier to
|
| 2063 |
+
compare to ``WeibullFitter`` and ``LogNormalFitter``
|
| 2064 |
+
- Univariate fitters’ ``cumulative_hazard_at_times``,
|
| 2065 |
+
``hazard_at_times``, ``survival_function_at_times`` return pandas
|
| 2066 |
+
Series now (use to be numpy arrays)
|
| 2067 |
+
- remove ``alpha`` keyword from all statistical functions. This was
|
| 2068 |
+
never being used.
|
| 2069 |
+
- Gone are asterisks and dots in ``print_summary`` functions that
|
| 2070 |
+
represent signficance thresholds.
|
| 2071 |
+
- In models’ ``summary`` (including ``print_summary``), the ``log(p)``
|
| 2072 |
+
term has changed to ``-log2(p)``. This is known as the s-value. See
|
| 2073 |
+
https://lesslikely.com/statistics/s-values/
|
| 2074 |
+
- introduce new statistical tests between univariate datasets:
|
| 2075 |
+
``survival_difference_at_fixed_point_in_time_test``,…
|
| 2076 |
+
- new warning message when Cox models detects possible non-unique
|
| 2077 |
+
solutions to maximum likelihood.
|
| 2078 |
+
- Generally: clean up lifelines exception handling. Ex: catch
|
| 2079 |
+
``LinAlgError: Matrix is singular.`` and report back to the user
|
| 2080 |
+
advice.
|
| 2081 |
+
|
| 2082 |
+
.. _section-88:
|
| 2083 |
+
|
| 2084 |
+
0.17.5 - 2019-01-25
|
| 2085 |
+
-------------------
|
| 2086 |
+
|
| 2087 |
+
- more bugs in ``plot_covariate_groups`` fixed when using non-numeric
|
| 2088 |
+
strata.
|
| 2089 |
+
|
| 2090 |
+
.. _section-89:
|
| 2091 |
+
|
| 2092 |
+
0.17.4 -2019-01-25
|
| 2093 |
+
------------------
|
| 2094 |
+
|
| 2095 |
+
- Fix bug in ``plot_covariate_groups`` that wasn’t allowing for strata
|
| 2096 |
+
to be used.
|
| 2097 |
+
- change name of ``multicenter_aids_cohort_study`` to
|
| 2098 |
+
``load_multicenter_aids_cohort_study``
|
| 2099 |
+
- ``groups`` is now called ``values`` in
|
| 2100 |
+
``CoxPHFitter.plot_covariate_groups``
|
| 2101 |
+
|
| 2102 |
+
.. _section-90:
|
| 2103 |
+
|
| 2104 |
+
0.17.3 - 2019-01-24
|
| 2105 |
+
-------------------
|
| 2106 |
+
|
| 2107 |
+
- Fix in ``compute_residuals`` when using ``schoenfeld`` and the
|
| 2108 |
+
minimum duration has only censored subjects.
|
| 2109 |
+
|
| 2110 |
+
.. _section-91:
|
| 2111 |
+
|
| 2112 |
+
0.17.2 2019-01-22
|
| 2113 |
+
-----------------
|
| 2114 |
+
|
| 2115 |
+
- Another round of serious performance improvements for the Cox models.
|
| 2116 |
+
Up to 2x faster for CoxPHFitter and CoxTimeVaryingFitter. This was
|
| 2117 |
+
mostly the result of using NumPy��s ``einsum`` to simplify a previous
|
| 2118 |
+
``for`` loop. The downside is the code is more esoteric now. I’ve
|
| 2119 |
+
added comments as necessary though 🤞
|
| 2120 |
+
|
| 2121 |
+
.. _section-92:
|
| 2122 |
+
|
| 2123 |
+
0.17.1 - 2019-01-20
|
| 2124 |
+
-------------------
|
| 2125 |
+
|
| 2126 |
+
- adding bottleneck as a dependency. This library is highly-recommended
|
| 2127 |
+
by Pandas, and in lifelines we see some nice performance improvements
|
| 2128 |
+
with it too. (~15% for ``CoxPHFitter``)
|
| 2129 |
+
- There was a small bug in ``CoxPHFitter`` when using ``batch_mode``
|
| 2130 |
+
that was causing coefficients to deviate from their MLE value. This
|
| 2131 |
+
bug eluded tests, which means that it’s discrepancy was less than
|
| 2132 |
+
0.0001 difference. It’s fixed now, and even more accurate tests are
|
| 2133 |
+
added.
|
| 2134 |
+
- Faster ``CoxPHFitter._compute_likelihood_ratio_test()``
|
| 2135 |
+
- Fixes a Pandas performance warning in ``CoxTimeVaryingFitter``.
|
| 2136 |
+
- Performances improvements to ``CoxTimeVaryingFitter``.
|
| 2137 |
+
|
| 2138 |
+
.. _section-93:
|
| 2139 |
+
|
| 2140 |
+
0.17.0 - 2019-01-11
|
| 2141 |
+
-------------------
|
| 2142 |
+
|
| 2143 |
+
- corrected behaviour in ``CoxPHFitter`` where ``score_`` was not being
|
| 2144 |
+
refreshed on every new ``fit``.
|
| 2145 |
+
- Reimplentation of ``AalenAdditiveFitter``. There were significant
|
| 2146 |
+
changes to it:
|
| 2147 |
+
|
| 2148 |
+
- implementation is at least 10x faster, and possibly up to 100x
|
| 2149 |
+
faster for some datasets.
|
| 2150 |
+
- memory consumption is way down
|
| 2151 |
+
- removed the time-varying component from ``AalenAdditiveFitter``.
|
| 2152 |
+
This will return in a future release.
|
| 2153 |
+
- new ``print_summary``
|
| 2154 |
+
- ``weights_col`` is added
|
| 2155 |
+
- ``nn_cumulative_hazard`` is removed (may add back)
|
| 2156 |
+
|
| 2157 |
+
- some plotting improvements to ``plotting.plot_lifetimes``
|
| 2158 |
+
|
| 2159 |
+
.. _section-94:
|
| 2160 |
+
|
| 2161 |
+
0.16.3 - 2019-01-03
|
| 2162 |
+
-------------------
|
| 2163 |
+
|
| 2164 |
+
- More ``CoxPHFitter`` performance improvements. Up to a 40% reduction
|
| 2165 |
+
vs 0.16.2 for some datasets.
|
| 2166 |
+
|
| 2167 |
+
.. _section-95:
|
| 2168 |
+
|
| 2169 |
+
0.16.2 - 2019-01-02
|
| 2170 |
+
-------------------
|
| 2171 |
+
|
| 2172 |
+
- Fixed ``CoxTimeVaryingFitter`` to allow more than one variable to be
|
| 2173 |
+
stratafied
|
| 2174 |
+
- Significant performance improvements for ``CoxPHFitter`` with dataset
|
| 2175 |
+
has lots of duplicate times. See
|
| 2176 |
+
https://github.com/CamDavidsonPilon/lifelines/issues/591
|
| 2177 |
+
|
| 2178 |
+
.. _section-96:
|
| 2179 |
+
|
| 2180 |
+
0.16.1 - 2019-01-01
|
| 2181 |
+
-------------------
|
| 2182 |
+
|
| 2183 |
+
- Fixed py2 division error in ``concordance`` method.
|
| 2184 |
+
|
| 2185 |
+
.. _section-97:
|
| 2186 |
+
|
| 2187 |
+
0.16.0 - 2019-01-01
|
| 2188 |
+
-------------------
|
| 2189 |
+
|
| 2190 |
+
- Drop Python 3.4 support.
|
| 2191 |
+
- introduction of residual calculations in
|
| 2192 |
+
``CoxPHFitter.compute_residuals``. Residuals include “schoenfeld”,
|
| 2193 |
+
“score”, “delta_beta”, “deviance”, “martingale”, and
|
| 2194 |
+
“scaled_schoenfeld”.
|
| 2195 |
+
- removes ``estimation`` namespace for fitters. Should be using
|
| 2196 |
+
``from lifelines import xFitter`` now. Thanks @usmanatron
|
| 2197 |
+
- removes ``predict_log_hazard_relative_to_mean`` from Cox model.
|
| 2198 |
+
Thanks @usmanatron
|
| 2199 |
+
- ``StatisticalResult`` has be generalized to allow for multiple
|
| 2200 |
+
results (ex: from pairwise comparisons). This means a slightly
|
| 2201 |
+
changed API that is mostly backwards compatible. See doc string for
|
| 2202 |
+
how to use it.
|
| 2203 |
+
- ``statistics.pairwise_logrank_test`` now returns a
|
| 2204 |
+
``StatisticalResult`` object instead of a nasty NxN DataFrame 💗
|
| 2205 |
+
- Display log(p-values) as well as p-values in ``print_summary``. Also,
|
| 2206 |
+
p-values below thresholds will be truncated. The original p-values are
|
| 2207 |
+
still recoverable using ``.summary``.
|
| 2208 |
+
- Floats ``print_summary`` is now displayed to 2 decimal points. This
|
| 2209 |
+
can be changed using the ``decimal`` kwarg.
|
| 2210 |
+
- removed ``standardized`` from ``Cox`` model plotting. It was
|
| 2211 |
+
confusing.
|
| 2212 |
+
- visual improvements to Cox models ``.plot``
|
| 2213 |
+
- ``print_summary`` methods accepts kwargs to also be displayed.
|
| 2214 |
+
- ``CoxPHFitter`` has a new human-readable method,
|
| 2215 |
+
``check_assumptions``, to check the assumptions of your Cox
|
| 2216 |
+
proportional hazard model.
|
| 2217 |
+
- A new helper util to “expand” static datasets into long-form:
|
| 2218 |
+
``lifelines.utils.to_episodic_format``.
|
| 2219 |
+
- ``CoxTimeVaryingFitter`` now accepts ``strata``.
|
| 2220 |
+
|
| 2221 |
+
.. _section-98:
|
| 2222 |
+
|
| 2223 |
+
0.15.4
|
| 2224 |
+
------
|
| 2225 |
+
|
| 2226 |
+
- bug fix for the Cox model likelihood ratio test when using
|
| 2227 |
+
non-trivial weights.
|
| 2228 |
+
|
| 2229 |
+
.. _section-99:
|
| 2230 |
+
|
| 2231 |
+
0.15.3 - 2018-12-18
|
| 2232 |
+
-------------------
|
| 2233 |
+
|
| 2234 |
+
- Only allow matplotlib less than 3.0.
|
| 2235 |
+
|
| 2236 |
+
.. _section-100:
|
| 2237 |
+
|
| 2238 |
+
0.15.2 - 2018-11-23
|
| 2239 |
+
-------------------
|
| 2240 |
+
|
| 2241 |
+
- API changes to ``plotting.plot_lifetimes``
|
| 2242 |
+
- ``cluster_col`` and ``strata`` can be used together in
|
| 2243 |
+
``CoxPHFitter``
|
| 2244 |
+
- removed ``entry`` from ``ExponentialFitter`` and ``WeibullFitter`` as
|
| 2245 |
+
it was doing nothing.
|
| 2246 |
+
|
| 2247 |
+
.. _section-101:
|
| 2248 |
+
|
| 2249 |
+
0.15.1 - 2018-11-23
|
| 2250 |
+
-------------------
|
| 2251 |
+
|
| 2252 |
+
- Bug fixes for v0.15.0
|
| 2253 |
+
- Raise NotImplementedError if the ``robust`` flag is used in
|
| 2254 |
+
``CoxTimeVaryingFitter`` - that’s not ready yet.
|
| 2255 |
+
|
| 2256 |
+
.. _section-102:
|
| 2257 |
+
|
| 2258 |
+
0.15.0 - 2018-11-22
|
| 2259 |
+
-------------------
|
| 2260 |
+
|
| 2261 |
+
- adding ``robust`` params to ``CoxPHFitter``\ ’s ``fit``. This enables
|
| 2262 |
+
atleast i) using non-integer weights in the model (these could be
|
| 2263 |
+
sampling weights like IPTW), and ii) mis-specified models (ex:
|
| 2264 |
+
non-proportional hazards). Under the hood it’s a sandwich estimator.
|
| 2265 |
+
This does not handle ties, so if there are high number of ties,
|
| 2266 |
+
results may significantly differ from other software.
|
| 2267 |
+
- ``standard_errors_`` is now a property on fitted ``CoxPHFitter``
|
| 2268 |
+
which describes the standard errors of the coefficients.
|
| 2269 |
+
- ``variance_matrix_`` is now a property on fitted ``CoxPHFitter``
|
| 2270 |
+
which describes the variance matrix of the coefficients.
|
| 2271 |
+
- new criteria for convergence of ``CoxPHFitter`` and
|
| 2272 |
+
``CoxTimeVaryingFitter`` called the Newton-decrement. Tests show it
|
| 2273 |
+
is as accurate (w.r.t to previous coefficients) and typically shaves
|
| 2274 |
+
off a single step, resulting in generally faster convergence. See
|
| 2275 |
+
https://www.cs.cmu.edu/~pradeepr/convexopt/Lecture_Slides/Newton_methods.pdf.
|
| 2276 |
+
Details about the Newton-decrement are added to the ``show_progress``
|
| 2277 |
+
statements.
|
| 2278 |
+
- Minimum support for scipy is 1.0
|
| 2279 |
+
- Convergence errors in models that use Newton-Rhapson methods now
|
| 2280 |
+
throw a ``ConvergenceError``, instead of a ``ValueError`` (the former
|
| 2281 |
+
is a subclass of the latter, however).
|
| 2282 |
+
- ``AalenAdditiveModel`` raises ``ConvergenceWarning`` instead of
|
| 2283 |
+
printing a warning.
|
| 2284 |
+
- ``KaplanMeierFitter`` now has a cumulative plot option. Example
|
| 2285 |
+
``kmf.plot(invert_y_axis=True)``
|
| 2286 |
+
- a ``weights_col`` option has been added to ``CoxTimeVaryingFitter``
|
| 2287 |
+
that allows for time-varying weights.
|
| 2288 |
+
- ``WeibullFitter`` has a new ``show_progress`` param and additional
|
| 2289 |
+
information if the convergence fails.
|
| 2290 |
+
- ``CoxPHFitter``, ``ExponentialFitter``, ``WeibullFitter`` and
|
| 2291 |
+
``CoxTimeVaryFitter`` method ``print_summary`` is updated with new
|
| 2292 |
+
fields.
|
| 2293 |
+
- ``WeibullFitter`` has renamed the incorrect ``_jacobian`` to
|
| 2294 |
+
``_hessian_``.
|
| 2295 |
+
- ``variance_matrix_`` is now a property on fitted ``WeibullFitter``
|
| 2296 |
+
which describes the variance matrix of the parameters.
|
| 2297 |
+
- The default ``WeibullFitter().timeline`` has changed from integers
|
| 2298 |
+
between the min and max duration to *n* floats between the max and
|
| 2299 |
+
min durations, where *n* is the number of observations.
|
| 2300 |
+
- Performance improvements for ``CoxPHFitter`` (~20% faster)
|
| 2301 |
+
- Performance improvements for ``CoxTimeVaryingFitter`` (~100% faster)
|
| 2302 |
+
- In Python3, Univariate models are now serialisable with ``pickle``.
|
| 2303 |
+
Thanks @dwilson1988 for the contribution. For Python2, ``dill`` is
|
| 2304 |
+
still the preferred method.
|
| 2305 |
+
- ``baseline_cumulative_hazard_`` (and derivatives of that) on
|
| 2306 |
+
``CoxPHFitter`` now correctly incorporate the ``weights_col``.
|
| 2307 |
+
- Fixed a bug in ``KaplanMeierFitter`` when late entry times lined up
|
| 2308 |
+
with death events. Thanks @pzivich
|
| 2309 |
+
- Adding ``cluster_col`` argument to ``CoxPHFitter`` so users can
|
| 2310 |
+
specify groups of subjects/rows that may be correlated.
|
| 2311 |
+
- Shifting the “signficance codes” for p-values down an order of
|
| 2312 |
+
magnitude. (Example, p-values between 0.1 and 0.05 are not noted at
|
| 2313 |
+
all and p-values between 0.05 and 0.1 are noted with ``.``, etc.).
|
| 2314 |
+
This deviates with how they are presented in other software. There is
|
| 2315 |
+
an argument to be made to remove p-values from lifelines altogether
|
| 2316 |
+
(*become the changes you want to see in the world* lol), but I worry
|
| 2317 |
+
that people could compute the p-values by hand incorrectly, a worse
|
| 2318 |
+
outcome I think. So, this is my stance. P-values between 0.1 and 0.05
|
| 2319 |
+
offer *very* little information, so they are removed. There is a
|
| 2320 |
+
growing movement in statistics to shift “significant” findings to
|
| 2321 |
+
p-values less than 0.01 anyways.
|
| 2322 |
+
- New fitter for cumulative incidence of multiple risks
|
| 2323 |
+
``AalenJohansenFitter``. Thanks @pzivich! See “Methodologic Issues
|
| 2324 |
+
When Estimating Risks in Pharmacoepidemiology” for a nice overview of
|
| 2325 |
+
the model.
|
| 2326 |
+
|
| 2327 |
+
.. _section-103:
|
| 2328 |
+
|
| 2329 |
+
0.14.6 - 2018-07-02
|
| 2330 |
+
-------------------
|
| 2331 |
+
|
| 2332 |
+
- fix for n > 2 groups in ``multivariate_logrank_test`` (again).
|
| 2333 |
+
- fix bug for when ``event_observed`` column was not boolean.
|
| 2334 |
+
|
| 2335 |
+
.. _section-104:
|
| 2336 |
+
|
| 2337 |
+
0.14.5 - 2018-06-29
|
| 2338 |
+
-------------------
|
| 2339 |
+
|
| 2340 |
+
- fix for n > 2 groups in ``multivariate_logrank_test``
|
| 2341 |
+
- fix weights in KaplanMeierFitter when using a pandas Series.
|
| 2342 |
+
|
| 2343 |
+
.. _section-105:
|
| 2344 |
+
|
| 2345 |
+
0.14.4 - 2018-06-14
|
| 2346 |
+
-------------------
|
| 2347 |
+
|
| 2348 |
+
- Adds ``baseline_cumulative_hazard_`` and ``baseline_survival_`` to
|
| 2349 |
+
``CoxTimeVaryingFitter``. Because of this, new prediction methods are
|
| 2350 |
+
available.
|
| 2351 |
+
- fixed a bug in ``add_covariate_to_timeline`` when using
|
| 2352 |
+
``cumulative_sum`` with multiple columns.
|
| 2353 |
+
- Added ``Likelihood ratio test`` to ``CoxPHFitter.print_summary`` and
|
| 2354 |
+
``CoxTimeVaryingFitter.print_summary``
|
| 2355 |
+
- New checks in ``CoxTimeVaryingFitter`` that check for immediate
|
| 2356 |
+
deaths and redundant rows.
|
| 2357 |
+
- New ``delay`` parameter in ``add_covariate_to_timeline``
|
| 2358 |
+
- removed ``two_sided_z_test`` from ``statistics``
|
| 2359 |
+
|
| 2360 |
+
.. _section-106:
|
| 2361 |
+
|
| 2362 |
+
0.14.3 - 2018-05-24
|
| 2363 |
+
-------------------
|
| 2364 |
+
|
| 2365 |
+
- fixes a bug when subtracting or dividing two ``UnivariateFitters``
|
| 2366 |
+
with labels.
|
| 2367 |
+
- fixes an import error with using ``CoxTimeVaryingFitter`` predict
|
| 2368 |
+
methods.
|
| 2369 |
+
- adds a ``column`` argument to ``CoxTimeVaryingFitter`` and
|
| 2370 |
+
``CoxPHFitter`` ``plot`` method to plot only a subset of columns.
|
| 2371 |
+
|
| 2372 |
+
.. _section-107:
|
| 2373 |
+
|
| 2374 |
+
0.14.2 - 2018-05-18
|
| 2375 |
+
-------------------
|
| 2376 |
+
|
| 2377 |
+
- some quality of life improvements for working with
|
| 2378 |
+
``CoxTimeVaryingFitter`` including new ``predict_`` methods.
|
| 2379 |
+
|
| 2380 |
+
.. _section-108:
|
| 2381 |
+
|
| 2382 |
+
0.14.1 - 2018-04-01
|
| 2383 |
+
-------------------
|
| 2384 |
+
|
| 2385 |
+
- fixed bug with using weights and strata in ``CoxPHFitter``
|
| 2386 |
+
- fixed bug in using non-integer weights in ``KaplanMeierFitter``
|
| 2387 |
+
- Performance optimizations in ``CoxPHFitter`` for up to 40% faster
|
| 2388 |
+
completion of ``fit``.
|
| 2389 |
+
|
| 2390 |
+
- even smarter ``step_size`` calculations for iterative
|
| 2391 |
+
optimizations.
|
| 2392 |
+
- simple code optimizations & cleanup in specific hot spots.
|
| 2393 |
+
|
| 2394 |
+
- Performance optimizations in ``AalenAdditiveFitter`` for up to 50%
|
| 2395 |
+
faster completion of ``fit`` for large dataframes, and up to 10%
|
| 2396 |
+
faster for small dataframes.
|
| 2397 |
+
|
| 2398 |
+
.. _section-109:
|
| 2399 |
+
|
| 2400 |
+
0.14.0 - 2018-03-03
|
| 2401 |
+
-------------------
|
| 2402 |
+
|
| 2403 |
+
- adding ``plot_covariate_groups`` to ``CoxPHFitter`` to visualize what
|
| 2404 |
+
happens to survival as we vary a covariate, all else being equal.
|
| 2405 |
+
- ``utils`` functions like ``qth_survival_times`` and
|
| 2406 |
+
``median_survival_times`` now return the transpose of the DataFrame
|
| 2407 |
+
compared to previous version of lifelines. The reason for this is
|
| 2408 |
+
that we often treat survival curves as columns in DataFrames, and
|
| 2409 |
+
functions of the survival curve as index (ex:
|
| 2410 |
+
KaplanMeierFitter.survival_function\_ returns a survival curve *at*
|
| 2411 |
+
time *t*).
|
| 2412 |
+
- ``KaplanMeierFitter.fit`` and ``NelsonAalenFitter.fit`` accept a
|
| 2413 |
+
``weights`` vector that can be used for pre-aggregated datasets. See
|
| 2414 |
+
this
|
| 2415 |
+
`issue <https://github.com/CamDavidsonPilon/lifelines/issues/396>`__.
|
| 2416 |
+
- Convergence errors now return a custom ``ConvergenceWarning`` instead
|
| 2417 |
+
of a ``RuntimeWarning``
|
| 2418 |
+
- New checks for complete separation in the dataset for regressions.
|
| 2419 |
+
|
| 2420 |
+
.. _section-110:
|
| 2421 |
+
|
| 2422 |
+
0.13.0 - 2017-12-22
|
| 2423 |
+
-------------------
|
| 2424 |
+
|
| 2425 |
+
- removes ``is_significant`` and ``test_result`` from
|
| 2426 |
+
``StatisticalResult``. Users can instead choose their significance
|
| 2427 |
+
level by comparing to ``p_value``. The string representation of this
|
| 2428 |
+
class has changed aswell.
|
| 2429 |
+
- ``CoxPHFitter`` and ``AalenAdditiveFitter`` now have a ``score_``
|
| 2430 |
+
property that is the concordance-index of the dataset to the fitted
|
| 2431 |
+
model.
|
| 2432 |
+
- ``CoxPHFitter`` and ``AalenAdditiveFitter`` no longer have the
|
| 2433 |
+
``data`` property. It was an *almost* duplicate of the training data,
|
| 2434 |
+
but was causing the model to be very large when serialized.
|
| 2435 |
+
- Implements a new fitter ``CoxTimeVaryingFitter`` available under the
|
| 2436 |
+
``lifelines`` namespace. This model implements the Cox model for
|
| 2437 |
+
time-varying covariates.
|
| 2438 |
+
- Utils for creating time varying datasets available in ``utils``.
|
| 2439 |
+
- less noisy check for complete separation.
|
| 2440 |
+
- removed ``datasets`` namespace from the main ``lifelines`` namespace
|
| 2441 |
+
- ``CoxPHFitter`` has a slightly more intelligent (barely…) way to pick
|
| 2442 |
+
a step size, so convergence should generally be faster.
|
| 2443 |
+
- ``CoxPHFitter.fit`` now has accepts a ``weight_col`` kwarg so one can
|
| 2444 |
+
pass in weights per observation. This is very useful if you have many
|
| 2445 |
+
subjects, and the space of covariates is not large. Thus you can
|
| 2446 |
+
group the same subjects together and give that observation a weight
|
| 2447 |
+
equal to the count. Altogether, this means a much faster regression.
|
| 2448 |
+
|
| 2449 |
+
.. _section-111:
|
| 2450 |
+
|
| 2451 |
+
0.12.0
|
| 2452 |
+
------
|
| 2453 |
+
|
| 2454 |
+
- removes ``include_likelihood`` from ``CoxPHFitter.fit`` - it was not
|
| 2455 |
+
slowing things down much (empirically), and often I wanted it for
|
| 2456 |
+
debugging (I suppose others do too). It’s also another exit
|
| 2457 |
+
condition, so we many exit from the NR iterations faster.
|
| 2458 |
+
- added ``step_size`` param to ``CoxPHFitter.fit`` - the default is
|
| 2459 |
+
good, but for extremely large or small datasets this may want to be
|
| 2460 |
+
set manually.
|
| 2461 |
+
- added a warning to ``CoxPHFitter`` to check for complete separation:
|
| 2462 |
+
https://stats.idre.ucla.edu/other/mult-pkg/faq/general/faqwhat-is-complete-or-quasi-complete-separation-in-logisticprobit-regression-and-how-do-we-deal-with-them/
|
| 2463 |
+
- Additional functionality to ``utils.survival_table_from_events`` to
|
| 2464 |
+
bin the index to make the resulting table more readable.
|
| 2465 |
+
|
| 2466 |
+
.. _section-112:
|
| 2467 |
+
|
| 2468 |
+
0.11.3
|
| 2469 |
+
------
|
| 2470 |
+
|
| 2471 |
+
- No longer support matplotlib 1.X
|
| 2472 |
+
- Adding ``times`` argument to ``CoxPHFitter``\ ’s
|
| 2473 |
+
``predict_survival_function`` and ``predict_cumulative_hazard`` to
|
| 2474 |
+
predict the estimates at, instead uses the default times of
|
| 2475 |
+
observation or censorship.
|
| 2476 |
+
- More accurate prediction methods parametrics univariate models.
|
| 2477 |
+
|
| 2478 |
+
.. _section-113:
|
| 2479 |
+
|
| 2480 |
+
0.11.2
|
| 2481 |
+
------
|
| 2482 |
+
|
| 2483 |
+
- Changing license to valilla MIT.
|
| 2484 |
+
- Speed up ``NelsonAalenFitter.fit`` considerably.
|
| 2485 |
+
|
| 2486 |
+
.. _section-114:
|
| 2487 |
+
|
| 2488 |
+
0.11.1 - 2017-06-22
|
| 2489 |
+
-------------------
|
| 2490 |
+
|
| 2491 |
+
- Python3 fix for ``CoxPHFitter.plot``.
|
| 2492 |
+
|
| 2493 |
+
.. _section-115:
|
| 2494 |
+
|
| 2495 |
+
0.11.0 - 2017-06-21
|
| 2496 |
+
-------------------
|
| 2497 |
+
|
| 2498 |
+
- fixes regression in ``KaplanMeierFitter.plot`` when using Seaborn and
|
| 2499 |
+
lifelines.
|
| 2500 |
+
- introduce a new ``.plot`` function to a fitted ``CoxPHFitter``
|
| 2501 |
+
instance. This plots the hazard coefficients and their confidence
|
| 2502 |
+
intervals.
|
| 2503 |
+
- in all plot methods, the ``ix`` kwarg has been deprecated in favour
|
| 2504 |
+
of a new ``loc`` kwarg. This is to align with Pandas deprecating
|
| 2505 |
+
``ix``
|
| 2506 |
+
|
| 2507 |
+
.. _section-116:
|
| 2508 |
+
|
| 2509 |
+
0.10.1 - 2017-06-05
|
| 2510 |
+
-------------------
|
| 2511 |
+
|
| 2512 |
+
- fix in internal normalization for ``CoxPHFitter`` predict methods.
|
| 2513 |
+
|
| 2514 |
+
.. _section-117:
|
| 2515 |
+
|
| 2516 |
+
0.10.0
|
| 2517 |
+
------
|
| 2518 |
+
|
| 2519 |
+
- corrected bug that was returning the wrong baseline survival and
|
| 2520 |
+
hazard values in ``CoxPHFitter`` when ``normalize=True``.
|
| 2521 |
+
- removed ``normalize`` kwarg in ``CoxPHFitter``. This was causing lots
|
| 2522 |
+
of confusion for users, and added code complexity. It’s really nice
|
| 2523 |
+
to be able to remove it.
|
| 2524 |
+
- correcting column name in ``CoxPHFitter.baseline_survival_``
|
| 2525 |
+
- ``CoxPHFitter.baseline_cumulative_hazard_`` is always centered, to
|
| 2526 |
+
mimic R’s ``basehaz`` API.
|
| 2527 |
+
- new ``predict_log_partial_hazards`` to ``CoxPHFitter``
|
| 2528 |
+
|
| 2529 |
+
.. _section-118:
|
| 2530 |
+
|
| 2531 |
+
0.9.4
|
| 2532 |
+
-----
|
| 2533 |
+
|
| 2534 |
+
- adding ``plot_loglogs`` to ``KaplanMeierFitter``
|
| 2535 |
+
- added a (correct) check to see if some columns in a dataset will
|
| 2536 |
+
cause convergence problems.
|
| 2537 |
+
- removing ``flat`` argument in ``plot`` methods. It was causing
|
| 2538 |
+
confusion. To replicate it, one can set ``ci_force_lines=True`` and
|
| 2539 |
+
``show_censors=True``.
|
| 2540 |
+
- adding ``strata`` keyword argument to ``CoxPHFitter`` on
|
| 2541 |
+
initialization (ex: ``CoxPHFitter(strata=['v1', 'v2'])``. Why?
|
| 2542 |
+
Fitters initialized with ``strata`` can now be passed into
|
| 2543 |
+
``k_fold_cross_validation``, plus it makes unit testing ``strata``
|
| 2544 |
+
fitters easier.
|
| 2545 |
+
- If using ``strata`` in ``CoxPHFitter``, access to strata specific
|
| 2546 |
+
baseline hazards and survival functions are available (previously it
|
| 2547 |
+
was a blended valie). Prediction also uses the specific baseline
|
| 2548 |
+
hazards/survivals.
|
| 2549 |
+
- performance improvements in ``CoxPHFitter`` - should see at least a
|
| 2550 |
+
10% speed improvement in ``fit``.
|
| 2551 |
+
|
| 2552 |
+
.. _section-119:
|
| 2553 |
+
|
| 2554 |
+
0.9.2
|
| 2555 |
+
-----
|
| 2556 |
+
|
| 2557 |
+
- deprecates Pandas versions before 0.18.
|
| 2558 |
+
- throw an error if no admissible pairs in the c-index calculation.
|
| 2559 |
+
Previously a NaN was returned.
|
| 2560 |
+
|
| 2561 |
+
.. _section-120:
|
| 2562 |
+
|
| 2563 |
+
0.9.1
|
| 2564 |
+
-----
|
| 2565 |
+
|
| 2566 |
+
- add two summary functions to Weibull and Exponential fitter, solves
|
| 2567 |
+
#224
|
| 2568 |
+
|
| 2569 |
+
.. _section-121:
|
| 2570 |
+
|
| 2571 |
+
0.9.0
|
| 2572 |
+
-----
|
| 2573 |
+
|
| 2574 |
+
- new prediction function in ``CoxPHFitter``,
|
| 2575 |
+
``predict_log_hazard_relative_to_mean``, that mimics what R’s
|
| 2576 |
+
``predict.coxph`` does.
|
| 2577 |
+
- removing the ``predict`` method in CoxPHFitter and
|
| 2578 |
+
AalenAdditiveFitter. This is because the choice of ``predict_median``
|
| 2579 |
+
as a default was causing too much confusion, and no other natual
|
| 2580 |
+
choice as a default was available. All other ``predict_`` methods
|
| 2581 |
+
remain.
|
| 2582 |
+
- Default predict method in ``k_fold_cross_validation`` is now
|
| 2583 |
+
``predict_expectation``
|
| 2584 |
+
|
| 2585 |
+
.. _section-122:
|
| 2586 |
+
|
| 2587 |
+
0.8.1 - 2015-08-01
|
| 2588 |
+
------------------
|
| 2589 |
+
|
| 2590 |
+
- supports matplotlib 1.5.
|
| 2591 |
+
- introduction of a param ``nn_cumulative_hazards`` in
|
| 2592 |
+
AalenAdditiveModel’s ``__init__`` (default True). This parameter will
|
| 2593 |
+
truncate all non-negative cumulative hazards in prediction methods to
|
| 2594 |
+
0.
|
| 2595 |
+
- bug fixes including:
|
| 2596 |
+
|
| 2597 |
+
- fixed issue where the while loop in ``_newton_rhaphson`` would
|
| 2598 |
+
break too early causing a variable not to be set properly.
|
| 2599 |
+
- scaling of smooth hazards in NelsonAalenFitter was off by a factor
|
| 2600 |
+
of 0.5.
|
| 2601 |
+
|
| 2602 |
+
.. _section-123:
|
| 2603 |
+
|
| 2604 |
+
0.8.0
|
| 2605 |
+
-----
|
| 2606 |
+
|
| 2607 |
+
- reorganized lifelines directories:
|
| 2608 |
+
|
| 2609 |
+
- moved test files out of main directory.
|
| 2610 |
+
- moved ``utils.py`` into it’s own directory.
|
| 2611 |
+
- moved all estimators ``fitters`` directory.
|
| 2612 |
+
|
| 2613 |
+
- added a ``at_risk`` column to the output of
|
| 2614 |
+
``group_survival_table_from_events`` and
|
| 2615 |
+
``survival_table_from_events``
|
| 2616 |
+
- added sample size and power calculations for statistical tests. See
|
| 2617 |
+
``lifeline.statistics. sample_size_necessary_under_cph`` and
|
| 2618 |
+
``lifelines.statistics. power_under_cph``.
|
| 2619 |
+
- fixed a bug when using KaplanMeierFitter for left-censored data.
|
| 2620 |
+
|
| 2621 |
+
.. _section-124:
|
| 2622 |
+
|
| 2623 |
+
0.7.1
|
| 2624 |
+
-----
|
| 2625 |
+
|
| 2626 |
+
- addition of a l2 ``penalizer`` to ``CoxPHFitter``.
|
| 2627 |
+
- dropped Fortran implementation of efficient Python version. Lifelines
|
| 2628 |
+
is pure python once again!
|
| 2629 |
+
- addition of ``strata`` keyword argument to ``CoxPHFitter`` to allow
|
| 2630 |
+
for stratification of a single or set of categorical variables in
|
| 2631 |
+
your dataset.
|
| 2632 |
+
- ``datetimes_to_durations`` now accepts a list as ``na_values``, so
|
| 2633 |
+
multiple values can be checked.
|
| 2634 |
+
- fixed a bug in ``datetimes_to_durations`` where ``fill_date`` was not
|
| 2635 |
+
properly being applied.
|
| 2636 |
+
- Changed warning in ``datetimes_to_durations`` to be correct.
|
| 2637 |
+
- refactor each fitter into it’s own submodule. For now, the tests are
|
| 2638 |
+
still in the same file. This will also *not* break the API.
|
| 2639 |
+
|
| 2640 |
+
.. _section-125:
|
| 2641 |
+
|
| 2642 |
+
0.7.0 - 2015-03-01
|
| 2643 |
+
------------------
|
| 2644 |
+
|
| 2645 |
+
- allow for multiple fitters to be passed into
|
| 2646 |
+
``k_fold_cross_validation``.
|
| 2647 |
+
- statistical tests in ``lifelines.statistics``. now return a
|
| 2648 |
+
``StatisticalResult`` object with properties like ``p_value``,
|
| 2649 |
+
``test_results``, and ``summary``.
|
| 2650 |
+
- fixed a bug in how log-rank statistical tests are performed. The
|
| 2651 |
+
covariance matrix was not being correctly calculated. This resulted
|
| 2652 |
+
in slightly different p-values.
|
| 2653 |
+
- ``WeibullFitter``, ``ExponentialFitter``, ``KaplanMeierFitter`` and
|
| 2654 |
+
``BreslowFlemingHarringtonFitter`` all have a
|
| 2655 |
+
``conditional_time_to_event_`` property that measures the median
|
| 2656 |
+
duration remaining until the death event, given survival up until
|
| 2657 |
+
time t.
|
| 2658 |
+
|
| 2659 |
+
.. _section-126:
|
| 2660 |
+
|
| 2661 |
+
0.6.1
|
| 2662 |
+
-----
|
| 2663 |
+
|
| 2664 |
+
- addition of ``median_`` property to ``WeibullFitter`` and
|
| 2665 |
+
``ExponentialFitter``.
|
| 2666 |
+
- ``WeibullFitter`` and ``ExponentialFitter`` will use integer
|
| 2667 |
+
timelines instead of float provided by ``linspace``. This is so if
|
| 2668 |
+
your work is to sum up the survival function (for expected values or
|
| 2669 |
+
something similar), it’s more difficult to make a mistake.
|
| 2670 |
+
|
| 2671 |
+
.. _section-127:
|
| 2672 |
+
|
| 2673 |
+
0.6.0 - 2015-02-04
|
| 2674 |
+
------------------
|
| 2675 |
+
|
| 2676 |
+
- Inclusion of the univariate fitters ``WeibullFitter`` and
|
| 2677 |
+
``ExponentialFitter``.
|
| 2678 |
+
- Removing ``BayesianFitter`` from lifelines.
|
| 2679 |
+
- Added new penalization scheme to AalenAdditiveFitter. You can now add
|
| 2680 |
+
a smoothing penalizer that will try to keep subsequent values of a
|
| 2681 |
+
hazard curve close together. The penalizing coefficient is
|
| 2682 |
+
``smoothing_penalizer``.
|
| 2683 |
+
- Changed ``penalizer`` keyword arg to ``coef_penalizer`` in
|
| 2684 |
+
AalenAdditiveFitter.
|
| 2685 |
+
- new ``ridge_regression`` function in ``utils.py`` to perform linear
|
| 2686 |
+
regression with l2 penalizer terms.
|
| 2687 |
+
- Matplotlib is no longer a mandatory dependency.
|
| 2688 |
+
- ``.predict(time)`` method on univariate fitters can now accept a
|
| 2689 |
+
scalar (and returns a scalar) and an iterable (and returns a numpy
|
| 2690 |
+
array)
|
| 2691 |
+
- In ``KaplanMeierFitter``, ``epsilon`` has been renamed to
|
| 2692 |
+
``precision``.
|
| 2693 |
+
|
| 2694 |
+
.. _section-128:
|
| 2695 |
+
|
| 2696 |
+
0.5.1 - 2014-12-24
|
| 2697 |
+
------------------
|
| 2698 |
+
|
| 2699 |
+
- New API for ``CoxPHFitter`` and ``AalenAdditiveFitter``: the default
|
| 2700 |
+
arguments for ``event_col`` and ``duration_col``. ``duration_col`` is
|
| 2701 |
+
now mandatory, and ``event_col`` now accepts a column, or by default,
|
| 2702 |
+
``None``, which assumes all events are observed (non-censored).
|
| 2703 |
+
- Fix statistical tests.
|
| 2704 |
+
- Allow negative durations in Fitters.
|
| 2705 |
+
- New API in ``survival_table_from_events``: ``min_observations`` is
|
| 2706 |
+
replaced by ``birth_times`` (default ``None``).
|
| 2707 |
+
- New API in ``CoxPHFitter`` for summary: ``summary`` will return a
|
| 2708 |
+
dataframe with statistics, ``print_summary()`` will print the
|
| 2709 |
+
dataframe (plus some other statistics) in a pretty manner.
|
| 2710 |
+
- Adding “At Risk” counts option to univariate fitter ``plot`` methods,
|
| 2711 |
+
``.plot(at_risk_counts=True)``, and the function
|
| 2712 |
+
``lifelines.plotting.add_at_risk_counts``.
|
| 2713 |
+
- Fix bug Epanechnikov kernel.
|
| 2714 |
+
|
| 2715 |
+
.. _section-129:
|
| 2716 |
+
|
| 2717 |
+
0.5.0 - 2014-12-07
|
| 2718 |
+
------------------
|
| 2719 |
+
|
| 2720 |
+
- move testing to py.test
|
| 2721 |
+
- refactor tests into smaller files
|
| 2722 |
+
- make
|
| 2723 |
+
``test_pairwise_logrank_test_with_identical_data_returns_inconclusive``
|
| 2724 |
+
a better test
|
| 2725 |
+
- add test for summary()
|
| 2726 |
+
- Alternate metrics can be used for ``k_fold_cross_validation``.
|
| 2727 |
+
|
| 2728 |
+
.. _section-130:
|
| 2729 |
+
|
| 2730 |
+
0.4.4 - 2014-11-27
|
| 2731 |
+
------------------
|
| 2732 |
+
|
| 2733 |
+
- Lots of improvements to numerical stability (but something things
|
| 2734 |
+
still need work)
|
| 2735 |
+
- Additions to ``summary`` in CoxPHFitter.
|
| 2736 |
+
- Make all prediction methods output a DataFrame
|
| 2737 |
+
- Fixes bug in 1-d input not returning in CoxPHFitter
|
| 2738 |
+
- Lots of new tests.
|
| 2739 |
+
|
| 2740 |
+
.. _section-131:
|
| 2741 |
+
|
| 2742 |
+
0.4.3 - 2014-07-23
|
| 2743 |
+
------------------
|
| 2744 |
+
|
| 2745 |
+
- refactoring of ``qth_survival_times``: it can now accept an iterable
|
| 2746 |
+
(or a scalar still) of probabilities in the q argument, and will
|
| 2747 |
+
return a DataFrame with these as columns. If len(q)==1 and a single
|
| 2748 |
+
survival function is given, will return a scalar, not a DataFrame.
|
| 2749 |
+
Also some good speed improvements.
|
| 2750 |
+
- KaplanMeierFitter and NelsonAalenFitter now have a ``_label``
|
| 2751 |
+
property that is passed in during the fit.
|
| 2752 |
+
- KaplanMeierFitter/NelsonAalenFitter’s initial ``alpha`` value is
|
| 2753 |
+
overwritten if a new ``alpha`` value is passed in during the ``fit``.
|
| 2754 |
+
- New method for KaplanMeierFitter: ``conditional_time_to``. This
|
| 2755 |
+
returns a DataFrame of the estimate: med(S(t \| T>s)) - s, human
|
| 2756 |
+
readable: the estimated time left of living, given an individual is
|
| 2757 |
+
aged s.
|
| 2758 |
+
- Adds option ``include_likelihood`` to CoxPHFitter fit method to save
|
| 2759 |
+
the final log-likelihood value.
|
| 2760 |
+
|
| 2761 |
+
.. _section-132:
|
| 2762 |
+
|
| 2763 |
+
0.4.2 - 2014-06-19
|
| 2764 |
+
------------------
|
| 2765 |
+
|
| 2766 |
+
- Massive speed improvements to CoxPHFitter.
|
| 2767 |
+
- Additional prediction method: ``predict_percentile`` is available on
|
| 2768 |
+
CoxPHFitter and AalenAdditiveFitter. Given a percentile, p, this
|
| 2769 |
+
function returns the value t such that *S(t \| x) = p*. It is a
|
| 2770 |
+
generalization of ``predict_median``.
|
| 2771 |
+
- Additional kwargs in ``k_fold_cross_validation`` that will accept
|
| 2772 |
+
different prediction methods (default is ``predict_median``).
|
| 2773 |
+
- Bug fix in CoxPHFitter ``predict_expectation`` function.
|
| 2774 |
+
- Correct spelling mistake in newton-rhapson algorithm.
|
| 2775 |
+
- ``datasets`` now contains functions for generating the respective
|
| 2776 |
+
datasets, ex: ``generate_waltons_dataset``.
|
| 2777 |
+
- Bumping up the number of samples in statistical tests to prevent them
|
| 2778 |
+
from failing so often (this a stop-gap)
|
| 2779 |
+
- pep8 everything
|
| 2780 |
+
|
| 2781 |
+
.. _section-133:
|
| 2782 |
+
|
| 2783 |
+
0.4.1.1
|
| 2784 |
+
-------
|
| 2785 |
+
|
| 2786 |
+
- Ability to specify default printing in statistical tests with the
|
| 2787 |
+
``suppress_print`` keyword argument (default False).
|
| 2788 |
+
- For the multivariate log rank test, the inverse step has been
|
| 2789 |
+
replaced with the generalized inverse. This seems to be what other
|
| 2790 |
+
packages use.
|
| 2791 |
+
- Adding more robust cross validation scheme based on issue #67.
|
| 2792 |
+
- fixing ``regression_dataset`` in ``datasets``.
|
| 2793 |
+
|
| 2794 |
+
.. _section-134:
|
| 2795 |
+
|
| 2796 |
+
0.4.1 - 2014-06-11
|
| 2797 |
+
------------------
|
| 2798 |
+
|
| 2799 |
+
- ``CoxFitter`` is now known as ``CoxPHFitter``
|
| 2800 |
+
- refactoring some tests that used redundant data from
|
| 2801 |
+
``lifelines.datasets``.
|
| 2802 |
+
- Adding cross validation: in ``utils`` is a new
|
| 2803 |
+
``k_fold_cross_validation`` for model selection in regression
|
| 2804 |
+
problems.
|
| 2805 |
+
- Change CoxPHFitter’s fit method’s ``display_output`` to ``False``.
|
| 2806 |
+
- fixing bug in CoxPHFitter’s ``_compute_baseline_hazard`` that errored
|
| 2807 |
+
when sending Series objects to ``survival_table_from_events``.
|
| 2808 |
+
- CoxPHFitter’s ``fit`` now looks to columns with too low variance, and
|
| 2809 |
+
halts NR algorithm if a NaN is found.
|
| 2810 |
+
- Adding a Changelog.
|
| 2811 |
+
- more sanitizing for the statistical tests =)
|
| 2812 |
+
|
| 2813 |
+
.. _section-135:
|
| 2814 |
+
|
| 2815 |
+
0.4.0 - 2014-06-08
|
| 2816 |
+
------------------
|
| 2817 |
+
|
| 2818 |
+
- ``CoxFitter`` implements Cox Proportional Hazards model in lifelines.
|
| 2819 |
+
- lifelines moves the wheels distributions.
|
| 2820 |
+
- tests in the ``statistics`` module now prints the summary (and still
|
| 2821 |
+
return the regular values)
|
| 2822 |
+
- new ``BaseFitter`` class is inherited from all fitters.
|
lifelines/source/docs/Citing lifelines.rst
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
.. image:: https://i.imgur.com/EOowdSD.png
|
| 2 |
+
|
| 3 |
+
-------------------------------------
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
Citing lifelines
|
| 7 |
+
==================================
|
| 8 |
+
|
| 9 |
+
*lifelines* is published in JOSS (August 2019):
|
| 10 |
+
|
| 11 |
+
.. code-block:: python
|
| 12 |
+
|
| 13 |
+
Davidson-Pilon, (2019). lifelines: survival analysis in Python. Journal of Open Source Software, 4(40), 1317, https://doi.org/10.21105/joss.01317
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
.. code-block:: python
|
| 17 |
+
|
| 18 |
+
@article{Davidson-Pilon2019,
|
| 19 |
+
doi = {10.21105/joss.01317},
|
| 20 |
+
url = {https://doi.org/10.21105/joss.01317},
|
| 21 |
+
year = {2019},
|
| 22 |
+
publisher = {The Open Journal},
|
| 23 |
+
volume = {4},
|
| 24 |
+
number = {40},
|
| 25 |
+
pages = {1317},
|
| 26 |
+
author = {Cameron Davidson-Pilon},
|
| 27 |
+
title = {lifelines: survival analysis in Python},
|
| 28 |
+
journal = {Journal of Open Source Software}
|
| 29 |
+
}
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
See also the `Zenodo webpage <https://zenodo.org/record/4816284#.YR0RH9NKgr0>`_ for an up-to-date DOI for the software releases.
|
lifelines/source/docs/Contributing.rst
ADDED
|
@@ -0,0 +1,93 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Contributing to lifelines
|
| 2 |
+
-------------------------
|
| 3 |
+
|
| 4 |
+
Questions about survival analysis?
|
| 5 |
+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
| 6 |
+
|
| 7 |
+
If you are using lifelines for survival analysis and have a question
|
| 8 |
+
about “how do I do X?” or “what does Y do?”, the best place to ask that
|
| 9 |
+
is either in our `discussions
|
| 10 |
+
channel <https://github.com/camdavidsonpilon/lifelines/discussions>`__ or at
|
| 11 |
+
`stats.stackexchange.com <https://stats.stackexchange.com/>`__.
|
| 12 |
+
|
| 13 |
+
Submitting bugs or other errors observed
|
| 14 |
+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
| 15 |
+
|
| 16 |
+
We appreciate all bug reports submitted, as this will help the entire
|
| 17 |
+
community get a better product. Please open up an issue in the Github
|
| 18 |
+
Repository. If possible, please provide a code snippet, and what version
|
| 19 |
+
of lifelines you are using.
|
| 20 |
+
|
| 21 |
+
Submitting new feature requests
|
| 22 |
+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
| 23 |
+
|
| 24 |
+
Please open up an issue in the Github Repository with as much context as
|
| 25 |
+
possible about the feature you would like to see. Also useful is to link
|
| 26 |
+
to other libraries/software that have that feature.
|
| 27 |
+
|
| 28 |
+
Submitting code, or other changes
|
| 29 |
+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
| 30 |
+
|
| 31 |
+
If you are interested in contributing to lifelines (and we thank you for
|
| 32 |
+
the interest!), we recommend first opening up an issue in the GitHub
|
| 33 |
+
repository to discuss the changes. From there, we can together plan how
|
| 34 |
+
to execute the changes. See the Development section below for how to
|
| 35 |
+
setup a local environment.
|
| 36 |
+
|
| 37 |
+
Development
|
| 38 |
+
-----------
|
| 39 |
+
|
| 40 |
+
Setting up a lifelines development environment
|
| 41 |
+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
| 42 |
+
|
| 43 |
+
1. From the root directory of ``lifelines`` activate your `virtual
|
| 44 |
+
environment <https://realpython.com/python-virtual-environments-a-primer/>`__
|
| 45 |
+
(if you plan to use one).
|
| 46 |
+
2. Install the development requirements and
|
| 47 |
+
`pre-commit <https://pre-commit.com>`__ hooks. If you are on Mac,
|
| 48 |
+
Linux, or `Windows
|
| 49 |
+
WSL <https://docs.microsoft.com/en-us/windows/wsl/faq>`__ you can
|
| 50 |
+
use the provided
|
| 51 |
+
`Makefile <https://github.com/CamDavidsonPilon/lifelines/blob/master/Makefile>`__.
|
| 52 |
+
Just type ``make`` into the console and you’re ready to start
|
| 53 |
+
developing. This will also install the dev-requirements.
|
| 54 |
+
|
| 55 |
+
Formatting
|
| 56 |
+
~~~~~~~~~~
|
| 57 |
+
|
| 58 |
+
``lifelines`` uses the `black <https://github.com/ambv/black>`__
|
| 59 |
+
python formatter. There are 3 different ways to format your code.
|
| 60 |
+
|
| 61 |
+
1. Use the
|
| 62 |
+
`Makefile <https://github.com/CamDavidsonPilon/lifelines/blob/master/Makefile>`__.
|
| 63 |
+
|
| 64 |
+
``make lint``
|
| 65 |
+
|
| 66 |
+
2. Call ``black`` directly and pass the correct line
|
| 67 |
+
length.
|
| 68 |
+
|
| 69 |
+
``black . -l 120``
|
| 70 |
+
|
| 71 |
+
3. Have your code formatted automatically
|
| 72 |
+
during commit with the ``pre-commit`` hook.
|
| 73 |
+
|
| 74 |
+
* Stage and commit your unformatted changes:
|
| 75 |
+
|
| 76 |
+
``git commit -m "your_commit_message"``
|
| 77 |
+
|
| 78 |
+
* Code that needs to be formatted will “fail” the commit hooks and be
|
| 79 |
+
formatted for you.
|
| 80 |
+
* Stage the newly formatted python code:
|
| 81 |
+
|
| 82 |
+
``git add *.py``
|
| 83 |
+
|
| 84 |
+
* Recall your original commit command and commit again:
|
| 85 |
+
|
| 86 |
+
``git commit -m "your_commit_message"``
|
| 87 |
+
|
| 88 |
+
Running the tests
|
| 89 |
+
~~~~~~~~~~~~~~~~~
|
| 90 |
+
|
| 91 |
+
You can optionally run the test suite after install with
|
| 92 |
+
|
| 93 |
+
``py.test``
|
lifelines/source/docs/Examples.rst
ADDED
|
@@ -0,0 +1,1097 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
.. image:: https://i.imgur.com/EOowdSD.png
|
| 2 |
+
|
| 3 |
+
-------------------------------------
|
| 4 |
+
|
| 5 |
+
More examples and recipes
|
| 6 |
+
==================================
|
| 7 |
+
|
| 8 |
+
This section goes through some examples and recipes to help you use *lifelines*.
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
Worked Examples
|
| 13 |
+
####################
|
| 14 |
+
|
| 15 |
+
If you are looking for some full examples of *lifelines*, there are `full Jupyter notebooks and scripts here <https://github.com/CamDavidsonPilon/lifelines/tree/master/examples>`_ and examples and ideas on the `development blog <https://dataorigami.net/blogs/napkin-folding/tagged/lifelines>`_.
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
Statistically compare two populations
|
| 19 |
+
##############################################
|
| 20 |
+
|
| 21 |
+
Often researchers want to compare survival-ness between different populations. Here are some techniques to do that:
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
Logrank test
|
| 26 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
| 27 |
+
|
| 28 |
+
.. note:: The logrank test has maximum power when the assumption of proportional hazards is true. As a consequence, if the survival functions cross, the logrank test will give an inaccurate assessment of differences.
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
The :func:`lifelines.statistics.logrank_test` function compares whether the "death" generation process of the two populations are equal:
|
| 32 |
+
|
| 33 |
+
.. code-block:: python
|
| 34 |
+
|
| 35 |
+
from lifelines.statistics import logrank_test
|
| 36 |
+
from lifelines.datasets import load_waltons
|
| 37 |
+
|
| 38 |
+
df = load_waltons()
|
| 39 |
+
ix = df['group'] == 'miR-137'
|
| 40 |
+
T_exp, E_exp = df.loc[ix, 'T'], df.loc[ix, 'E']
|
| 41 |
+
T_con, E_con = df.loc[~ix, 'T'], df.loc[~ix, 'E']
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
results = logrank_test(T_exp, T_con, event_observed_A=E_exp, event_observed_B=E_con)
|
| 45 |
+
results.print_summary()
|
| 46 |
+
|
| 47 |
+
"""
|
| 48 |
+
t_0 = -1
|
| 49 |
+
alpha = 0.95
|
| 50 |
+
null_distribution = chi squared
|
| 51 |
+
df = 1
|
| 52 |
+
use_bonferroni = True
|
| 53 |
+
|
| 54 |
+
---
|
| 55 |
+
test_statistic p
|
| 56 |
+
3.528 0.00034 **
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
"""
|
| 60 |
+
|
| 61 |
+
print(results.p_value) # 0.46759
|
| 62 |
+
print(results.test_statistic) # 0.528
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
If you have more than two populations, you can use :func:`~lifelines.statistics.pairwise_logrank_test` (which compares
|
| 66 |
+
each pair in the same manner as above), or :func:`~lifelines.statistics.multivariate_logrank_test` (which tests the
|
| 67 |
+
hypothesis that all the populations have the same "death" generation process).
|
| 68 |
+
|
| 69 |
+
|
| 70 |
+
.. code-block:: python
|
| 71 |
+
|
| 72 |
+
import pandas as pd
|
| 73 |
+
from lifelines.statistics import multivariate_logrank_test
|
| 74 |
+
|
| 75 |
+
df = pd.DataFrame({
|
| 76 |
+
'durations': [5, 3, 9, 8, 7, 4, 4, 3, 2, 5, 6, 7],
|
| 77 |
+
'groups': [0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2], # could be strings too
|
| 78 |
+
'events': [1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0],
|
| 79 |
+
})
|
| 80 |
+
|
| 81 |
+
results = multivariate_logrank_test(df['durations'], df['groups'], df['events'])
|
| 82 |
+
results.print_summary()
|
| 83 |
+
|
| 84 |
+
"""
|
| 85 |
+
t_0 = -1
|
| 86 |
+
alpha = 0.95
|
| 87 |
+
null_distribution = chi squared
|
| 88 |
+
df = 2
|
| 89 |
+
|
| 90 |
+
---
|
| 91 |
+
test_statistic p
|
| 92 |
+
1.0800 0.5827
|
| 93 |
+
---
|
| 94 |
+
"""
|
| 95 |
+
|
| 96 |
+
The logrank test statistic is calculated from the differences between the observed deaths for a group and expected
|
| 97 |
+
deaths, under the null hypothesis that all groups share the same survival curve, summed across all ordered death times.
|
| 98 |
+
It therefore weights differences between the survival curves equally at each death time, resulting in maximum power
|
| 99 |
+
when the assumption of proportional hazards is true. To test for early or late differences in survival between
|
| 100 |
+
groups, a weighted logrank test that are more sensitive to non-proportional hazards might be a better choice.
|
| 101 |
+
|
| 102 |
+
Four types of weighted logrank test are currently available in lifelines through the ``weightings`` argument:
|
| 103 |
+
the Wilcoxon (``weightings='wilcoxon'``), Tarone-Ware (``weightings='tarone-ware'``), Peto (``weightings='peto'``)
|
| 104 |
+
and Fleming-Harrington (``weightings='fleming-harrington'``) tests.
|
| 105 |
+
The following weightings are applied at the ith ordered failure time, :math:`t_{i}`:
|
| 106 |
+
|
| 107 |
+
.. math:: \text{Wilcoxon:}\quad n_i
|
| 108 |
+
.. math:: \text{Tarone-Ware:}\quad \sqrt{n_i}
|
| 109 |
+
.. math:: \text{Peto:}\quad \bar{S}(t_i)
|
| 110 |
+
.. math:: \text{Fleming-Harrington}\quad \hat{S}(t_i)^p \times (1 - \hat{S}(t_i))^q
|
| 111 |
+
|
| 112 |
+
where :math:`n_i` is the number at risk just prior to time :math:`t_{i}`, :math:`\bar{S}(t_i)` is
|
| 113 |
+
Peto-Peto's modified survival estimate and :math:`\hat{S}(t_i)` is the left-continuous
|
| 114 |
+
Kaplan-Meier survival estimate at time :math:`t_{i}`.
|
| 115 |
+
|
| 116 |
+
The Wilcoxon, Tarone-Ware and Peto tests apply more weight to earlier death times. The Peto test is more robust than
|
| 117 |
+
the Wilcoxon or Tarone-Ware tests when many observations are censored. When p > q, the Fleming-Harrington
|
| 118 |
+
applies more weight to earlier death times whilst when p < q, it is more sensitive to late differences (for p=q=0 it
|
| 119 |
+
reduces to the unweighted logrank test). The choice of which test to perform should be made in advance and not
|
| 120 |
+
retrospectively to avoid introducing bias.
|
| 121 |
+
|
| 122 |
+
.. code-block:: python
|
| 123 |
+
|
| 124 |
+
import pandas as pd
|
| 125 |
+
from lifelines.statistics import multivariate_logrank_test
|
| 126 |
+
|
| 127 |
+
df = pd.DataFrame({
|
| 128 |
+
'durations': [5, 3, 9, 8, 7, 4, 4, 3, 2, 5, 6, 7],
|
| 129 |
+
'groups': [0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2], # could be strings too
|
| 130 |
+
'events': [1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0],
|
| 131 |
+
})
|
| 132 |
+
|
| 133 |
+
results = multivariate_logrank_test(df['durations'], df['groups'], df['events'], weightings='peto')
|
| 134 |
+
results.print_summary()
|
| 135 |
+
|
| 136 |
+
"""
|
| 137 |
+
t_0 = -1
|
| 138 |
+
null_distribution = chi squared
|
| 139 |
+
degrees_of_freedom = 2
|
| 140 |
+
test_name = multivariate_Peto_test
|
| 141 |
+
---
|
| 142 |
+
test_statistic p -log2(p)
|
| 143 |
+
0.95 0.62 0.68
|
| 144 |
+
"""
|
| 145 |
+
|
| 146 |
+
Survival differences at a point in time
|
| 147 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
| 148 |
+
|
| 149 |
+
Often analysts want to compare the survival-ness of groups at specific times, rather than comparing the entire survival curves against each other. For example, analysts may be interested in 5-year survival. Statistically comparing the naive Kaplan-Meier points at a specific time
|
| 150 |
+
actually has reduced power. By transforming the Kaplan-Meier curve, we can recover more power. The function :func:`lifelines.statistics.survival_difference_at_fixed_point_in_time_test` uses
|
| 151 |
+
the log(-log) transformation implicitly and compares the survival-ness of populations at a specific point in time using chi-squared test.
|
| 152 |
+
|
| 153 |
+
|
| 154 |
+
|
| 155 |
+
.. code-block:: python
|
| 156 |
+
|
| 157 |
+
from lifelines.statistics import survival_difference_at_fixed_point_in_time_test
|
| 158 |
+
from lifelines.datasets import load_waltons
|
| 159 |
+
|
| 160 |
+
df = load_waltons()
|
| 161 |
+
ix = df['group'] == 'miR-137'
|
| 162 |
+
T_exp, E_exp = df.loc[ix, 'T'], df.loc[ix, 'E']
|
| 163 |
+
T_con, E_con = df.loc[~ix, 'T'], df.loc[~ix, 'E']
|
| 164 |
+
|
| 165 |
+
kmf_exp = KaplanMeierFitter(label="exp").fit(T_exp, E_exp)
|
| 166 |
+
kmf_con = KaplanMeierFitter(label="con").fit(T_con, E_con)
|
| 167 |
+
|
| 168 |
+
point_in_time = 10.
|
| 169 |
+
results = survival_difference_at_fixed_point_in_time_test(point_in_time, kmf_exp, kmf_con)
|
| 170 |
+
results.print_summary()
|
| 171 |
+
|
| 172 |
+
"""
|
| 173 |
+
t_0 = -1
|
| 174 |
+
null_distribution = chi squared
|
| 175 |
+
degrees_of_freedom = 1
|
| 176 |
+
point_in_time = 10.0
|
| 177 |
+
test_name = survival_difference_at_fixed_point_in_time_test
|
| 178 |
+
---
|
| 179 |
+
test_statistic p -log2(p)
|
| 180 |
+
4.77 0.03 5.11
|
| 181 |
+
"""
|
| 182 |
+
|
| 183 |
+
|
| 184 |
+
Moreover, we can plot the two survival curves and compare them at the fixed point in time:
|
| 185 |
+
|
| 186 |
+
|
| 187 |
+
.. code-block:: python
|
| 188 |
+
|
| 189 |
+
kmf_exp.plot_survival_function(point_in_time=point_in_time)
|
| 190 |
+
kmf_con.plot_survival_function(point_in_time=point_in_time)
|
| 191 |
+
|
| 192 |
+
.. image:: images/plot_survival_difference_at_fixed_point_in_time_test.png
|
| 193 |
+
|
| 194 |
+
|
| 195 |
+
We can see that the expermintal's survival function value (blue) is lower than the control's group value (orange).
|
| 196 |
+
It is worth observing that at that particular point, the confidence intervals for both groups overlap to some extent, which is not consistently observed at all other time points.
|
| 197 |
+
|
| 198 |
+
|
| 199 |
+
Restricted mean survival times (RMST)
|
| 200 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
| 201 |
+
*lifelines* has a function to accurately compute the restricted mean survival time, defined as
|
| 202 |
+
|
| 203 |
+
|
| 204 |
+
.. math:: \text{RMST}(t) = \int_0^t S(\tau) d\tau
|
| 205 |
+
|
| 206 |
+
|
| 207 |
+
This is a good metric for comparing two survival curves, as their difference represents the area between the curves (see figure below) which is a measure of "time lost". The upper limit of the integral above is often finite because the tail of the estimated survival curve has high variance and can strongly influence the integral.
|
| 208 |
+
|
| 209 |
+
.. code-block:: python
|
| 210 |
+
|
| 211 |
+
from lifelines.utils import restricted_mean_survival_time
|
| 212 |
+
from lifelines.datasets import load_waltons
|
| 213 |
+
from lifelines import KaplanMeierFitter
|
| 214 |
+
|
| 215 |
+
df = load_waltons()
|
| 216 |
+
ix = df['group'] == 'miR-137'
|
| 217 |
+
T, E = df['T'], df['E']
|
| 218 |
+
|
| 219 |
+
time_limit = 50
|
| 220 |
+
|
| 221 |
+
kmf_exp = KaplanMeierFitter().fit(T[ix], E[ix], label='exp')
|
| 222 |
+
rmst_exp = restricted_mean_survival_time(kmf_exp, t=time_limit)
|
| 223 |
+
|
| 224 |
+
kmf_con = KaplanMeierFitter().fit(T[~ix], E[~ix], label='control')
|
| 225 |
+
rmst_con = restricted_mean_survival_time(kmf_con, t=time_limit)
|
| 226 |
+
|
| 227 |
+
|
| 228 |
+
|
| 229 |
+
Furthermore, there exist plotting functions to plot the RMST:
|
| 230 |
+
|
| 231 |
+
.. code-block:: python
|
| 232 |
+
|
| 233 |
+
from matplotlib import pyplot as plt
|
| 234 |
+
from lifelines.plotting import rmst_plot
|
| 235 |
+
|
| 236 |
+
ax = plt.subplot(311)
|
| 237 |
+
rmst_plot(kmf_exp, t=time_limit, ax=ax)
|
| 238 |
+
|
| 239 |
+
|
| 240 |
+
ax = plt.subplot(312)
|
| 241 |
+
rmst_plot(kmf_con, t=time_limit, ax=ax)
|
| 242 |
+
|
| 243 |
+
|
| 244 |
+
ax = plt.subplot(313)
|
| 245 |
+
rmst_plot(kmf_exp, model2=kmf_con, t=time_limit, ax=ax)
|
| 246 |
+
|
| 247 |
+
|
| 248 |
+
|
| 249 |
+
.. image:: images/rmst_example.png
|
| 250 |
+
|
| 251 |
+
|
| 252 |
+
|
| 253 |
+
Model selection using lifelines
|
| 254 |
+
#####################################################
|
| 255 |
+
|
| 256 |
+
If using *lifelines* for prediction work, it's ideal that you perform some type of cross-validation scheme. This cross-validation allows you to be confident that your out-of-sample predictions will work well in practice. It also allows you to choose between multiple models.
|
| 257 |
+
|
| 258 |
+
*lifelines* has a built-in k-fold cross-validation function. For example, consider the following example:
|
| 259 |
+
|
| 260 |
+
.. code-block:: python
|
| 261 |
+
|
| 262 |
+
import numpy as np
|
| 263 |
+
from lifelines import AalenAdditiveFitter, CoxPHFitter
|
| 264 |
+
from lifelines.datasets import load_regression_dataset
|
| 265 |
+
from lifelines.utils import k_fold_cross_validation
|
| 266 |
+
|
| 267 |
+
df = load_regression_dataset()
|
| 268 |
+
|
| 269 |
+
#create the three models we'd like to compare.
|
| 270 |
+
aaf_1 = AalenAdditiveFitter(coef_penalizer=0.5)
|
| 271 |
+
aaf_2 = AalenAdditiveFitter(coef_penalizer=10)
|
| 272 |
+
cph = CoxPHFitter()
|
| 273 |
+
|
| 274 |
+
print(np.mean(k_fold_cross_validation(cph, df, duration_col='T', event_col='E', scoring_method="concordance_index")))
|
| 275 |
+
print(np.mean(k_fold_cross_validation(aaf_1, df, duration_col='T', event_col='E', scoring_method="concordance_index")))
|
| 276 |
+
print(np.mean(k_fold_cross_validation(aaf_2, df, duration_col='T', event_col='E', scoring_method="concordance_index")))
|
| 277 |
+
|
| 278 |
+
From these results, Aalen's Additive model with a penalizer of 10 is best model of predicting future survival times.
|
| 279 |
+
|
| 280 |
+
*lifelines* also has wrappers to use scikit-learn's cross validation and grid search tools. See `how to use lifelines with scikit learn <https://lifelines.readthedocs.io/en/latest/Compatibility%20with%20scikit-learn.html>`_.
|
| 281 |
+
|
| 282 |
+
Selecting a parametric model using QQ plots
|
| 283 |
+
###############################################
|
| 284 |
+
|
| 285 |
+
QQ plots normally are constructed by sorting the values. However, this isn't appropriate when there is censored data. In *lifelines*, there are routines to still create QQ plots with censored data. These are available under :func:`lifelines.plotting.qq_plots`, and accepts fitted a parametric lifelines model.
|
| 286 |
+
|
| 287 |
+
.. code-block:: python
|
| 288 |
+
|
| 289 |
+
from lifelines import *
|
| 290 |
+
from lifelines.plotting import qq_plot
|
| 291 |
+
|
| 292 |
+
# generate some fake log-normal data
|
| 293 |
+
N = 1000
|
| 294 |
+
T_actual = np.exp(np.random.randn(N))
|
| 295 |
+
C = np.exp(np.random.randn(N))
|
| 296 |
+
E = T_actual < C
|
| 297 |
+
T = np.minimum(T_actual, C)
|
| 298 |
+
|
| 299 |
+
fig, axes = plt.subplots(2, 2, figsize=(8, 6))
|
| 300 |
+
axes = axes.reshape(4,)
|
| 301 |
+
|
| 302 |
+
for i, model in enumerate([WeibullFitter(), LogNormalFitter(), LogLogisticFitter(), ExponentialFitter()]):
|
| 303 |
+
model.fit(T, E)
|
| 304 |
+
qq_plot(model, ax=axes[i])
|
| 305 |
+
|
| 306 |
+
.. image:: images/qq_plot.png
|
| 307 |
+
|
| 308 |
+
|
| 309 |
+
This graphical test can be used to invalidate models. For example, in the above figure, we can see that only the log-normal parametric model is appropriate (we expect deviance in the tails, but not too much). Another use case is choosing the correct parametric AFT model.
|
| 310 |
+
|
| 311 |
+
The :func:`~lifelines.plotting.qq_plots` also works with left censorship as well.
|
| 312 |
+
|
| 313 |
+
Selecting a parametric model using AIC
|
| 314 |
+
###############################################
|
| 315 |
+
|
| 316 |
+
|
| 317 |
+
A natural way to compare different models is the AIC:
|
| 318 |
+
|
| 319 |
+
.. math:: \text{AIC}(\text{model}) = -2 \text{ll} + 2k
|
| 320 |
+
|
| 321 |
+
where :math:`k` is the number of parameters (degrees-of-freedom) of the model and :math:`\text{ll}` is the maximum log-likelihood. The model with the lowest AIC is desirable, since it's a trade off between maximizing the log-likelihood with as few parameters as possible.
|
| 322 |
+
|
| 323 |
+
All lifelines models have the `AIC_` property after fitting.
|
| 324 |
+
|
| 325 |
+
|
| 326 |
+
Further more, *lifelines* has a built in function to automate AIC comparisons between univariate parametric models:
|
| 327 |
+
|
| 328 |
+
.. code:: python
|
| 329 |
+
|
| 330 |
+
from lifelines.utils import find_best_parametric_model
|
| 331 |
+
from lifelines.datasets import load_lymph_node
|
| 332 |
+
|
| 333 |
+
T = load_lymph_node()['rectime']
|
| 334 |
+
E = load_lymph_node()['censrec']
|
| 335 |
+
|
| 336 |
+
best_model, best_aic_ = find_best_parametric_model(T, E, scoring_method="AIC")
|
| 337 |
+
|
| 338 |
+
print(best_model)
|
| 339 |
+
# <lifelines.SplineFitter:"Spline_estimate", fitted with 686 total observations, 387 right-censored observations>
|
| 340 |
+
|
| 341 |
+
best_model.plot_hazard()
|
| 342 |
+
|
| 343 |
+
.. image:: images/best_parametric_model.png
|
| 344 |
+
:width: 500px
|
| 345 |
+
:align: center
|
| 346 |
+
|
| 347 |
+
Plotting multiple figures on a plot
|
| 348 |
+
##############################################
|
| 349 |
+
|
| 350 |
+
When ``.plot`` is called, an ``axis`` object is returned which can be passed into future calls of ``.plot``:
|
| 351 |
+
|
| 352 |
+
.. code-block:: python
|
| 353 |
+
|
| 354 |
+
kmf.fit(data1)
|
| 355 |
+
ax = kmf.plot_survival_function()
|
| 356 |
+
|
| 357 |
+
kmf.fit(data2)
|
| 358 |
+
ax = kmf.plot_survival_function(ax=ax)
|
| 359 |
+
|
| 360 |
+
|
| 361 |
+
If you have a pandas DataFrame with columns "T", "E", and some categorical variable, then something like the following would work:
|
| 362 |
+
|
| 363 |
+
.. code-block:: python
|
| 364 |
+
|
| 365 |
+
from matplotlib import pyplot as plt
|
| 366 |
+
|
| 367 |
+
from lifelines.datasets import load_waltons
|
| 368 |
+
from lifelines import KaplanMeierFitter
|
| 369 |
+
df = load_waltons()
|
| 370 |
+
|
| 371 |
+
ax = plt.subplot(111)
|
| 372 |
+
kmf = KaplanMeierFitter()
|
| 373 |
+
|
| 374 |
+
for name, grouped_df in df.groupby('group'):
|
| 375 |
+
kmf.fit(grouped_df["T"], grouped_df["E"], label=name)
|
| 376 |
+
kmf.plot_survival_function(ax=ax)
|
| 377 |
+
|
| 378 |
+
|
| 379 |
+
Plotting interval censored data
|
| 380 |
+
##############################################
|
| 381 |
+
|
| 382 |
+
.. note:: New in *lifelines* v0.24.6
|
| 383 |
+
|
| 384 |
+
.. code-block:: python
|
| 385 |
+
|
| 386 |
+
from lifelines.datasets import load_diabetes
|
| 387 |
+
from lifelines.plotting import plot_interval_censored_lifetimes
|
| 388 |
+
|
| 389 |
+
df_sample = load_diabetes().sample(frac=0.02)
|
| 390 |
+
ax = plot_interval_censored_lifetimes(df_sample['left'], df_sample['right'])
|
| 391 |
+
|
| 392 |
+
|
| 393 |
+
.. image:: /images/interval_censored_viz.png
|
| 394 |
+
:width: 500px
|
| 395 |
+
:align: center
|
| 396 |
+
|
| 397 |
+
|
| 398 |
+
Plotting options and styles
|
| 399 |
+
##############################################
|
| 400 |
+
|
| 401 |
+
Let's load some data
|
| 402 |
+
|
| 403 |
+
|
| 404 |
+
.. code-block:: python
|
| 405 |
+
|
| 406 |
+
from lifelines.datasets import load_waltons
|
| 407 |
+
|
| 408 |
+
waltons = load_waltons()
|
| 409 |
+
T = waltons['T']
|
| 410 |
+
E = waltons['E']
|
| 411 |
+
|
| 412 |
+
|
| 413 |
+
Standard
|
| 414 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
| 415 |
+
|
| 416 |
+
.. code-block:: python
|
| 417 |
+
|
| 418 |
+
|
| 419 |
+
kmf = KaplanMeierFitter()
|
| 420 |
+
kmf.fit(T, E, label="kmf.plot_survival_function()")
|
| 421 |
+
kmf.plot_survival_function()
|
| 422 |
+
|
| 423 |
+
.. image:: /images/normal_plot.png
|
| 424 |
+
:width: 500px
|
| 425 |
+
:align: center
|
| 426 |
+
|
| 427 |
+
Show censors and edit markers
|
| 428 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
| 429 |
+
|
| 430 |
+
.. code-block:: python
|
| 431 |
+
|
| 432 |
+
kmf.fit(T, E, label="kmf.plot_survival_function(show_censors=True, \ncensor_styles={'ms': 6, 'marker': 's'})")
|
| 433 |
+
kmf.plot_survival_function(show_censors=True, censor_styles={'ms': 6, 'marker': 's'})
|
| 434 |
+
|
| 435 |
+
.. image:: images/flat_plot.png
|
| 436 |
+
:width: 500px
|
| 437 |
+
:align: center
|
| 438 |
+
|
| 439 |
+
|
| 440 |
+
Hide confidence intervals
|
| 441 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
| 442 |
+
|
| 443 |
+
.. code-block:: python
|
| 444 |
+
|
| 445 |
+
kmf.fit(T, E, label="kmf.plot_survival_function(ci_show=False)")
|
| 446 |
+
kmf.plot_survival_function(ci_show=False)
|
| 447 |
+
|
| 448 |
+
.. image:: /images/ci_show_plot.png
|
| 449 |
+
:width: 500px
|
| 450 |
+
:align: center
|
| 451 |
+
|
| 452 |
+
|
| 453 |
+
Displaying at-risk counts below plots
|
| 454 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
| 455 |
+
|
| 456 |
+
.. code-block:: python
|
| 457 |
+
|
| 458 |
+
kmf.fit(T, E, label="label name")
|
| 459 |
+
kmf.plot_survival_function(at_risk_counts=True)
|
| 460 |
+
plt.tight_layout()
|
| 461 |
+
|
| 462 |
+
|
| 463 |
+
|
| 464 |
+
.. image:: /images/single_at_risk_plots.png
|
| 465 |
+
:width: 500px
|
| 466 |
+
:align: center
|
| 467 |
+
|
| 468 |
+
Displaying multiple at-risk counts below plots
|
| 469 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
| 470 |
+
|
| 471 |
+
The function :func:`lifelines.plotting.add_at_risk_counts` allows you to add counts at the bottom of your figures. For example:
|
| 472 |
+
|
| 473 |
+
.. code-block:: python
|
| 474 |
+
|
| 475 |
+
from lifelines import KaplanMeierFitter
|
| 476 |
+
from lifelines.datasets import load_waltons
|
| 477 |
+
|
| 478 |
+
waltons = load_waltons()
|
| 479 |
+
ix = waltons['group'] == 'control'
|
| 480 |
+
|
| 481 |
+
ax = plt.subplot(111)
|
| 482 |
+
|
| 483 |
+
kmf_control = KaplanMeierFitter()
|
| 484 |
+
ax = kmf_control.fit(waltons.loc[ix]['T'], waltons.loc[ix]['E'], label='control').plot_survival_function(ax=ax)
|
| 485 |
+
|
| 486 |
+
kmf_exp = KaplanMeierFitter()
|
| 487 |
+
ax = kmf_exp.fit(waltons.loc[~ix]['T'], waltons.loc[~ix]['E'], label='exp').plot_survival_function(ax=ax)
|
| 488 |
+
|
| 489 |
+
|
| 490 |
+
from lifelines.plotting import add_at_risk_counts
|
| 491 |
+
add_at_risk_counts(kmf_exp, kmf_control, ax=ax)
|
| 492 |
+
plt.tight_layout()
|
| 493 |
+
|
| 494 |
+
will display
|
| 495 |
+
|
| 496 |
+
.. image:: /images/add_at_risk.png
|
| 497 |
+
:width: 500px
|
| 498 |
+
:align: center
|
| 499 |
+
|
| 500 |
+
Transforming survival-table data into *lifelines* format
|
| 501 |
+
#########################################################
|
| 502 |
+
|
| 503 |
+
Some *lifelines* classes are designed for lists or arrays that represent one individual per row. If you instead have data in a *survival table* format, there exists a utility method to get it into *lifelines* format.
|
| 504 |
+
|
| 505 |
+
**Example:** Suppose you have a CSV file with data that looks like this:
|
| 506 |
+
|
| 507 |
+
========================= ================== ============
|
| 508 |
+
time observed deaths censored
|
| 509 |
+
========================= ================== ============
|
| 510 |
+
0 7 0
|
| 511 |
+
1 1 1
|
| 512 |
+
2 2 0
|
| 513 |
+
3 1 2
|
| 514 |
+
4 5 2
|
| 515 |
+
... ... ...
|
| 516 |
+
========================= ================== ============
|
| 517 |
+
|
| 518 |
+
|
| 519 |
+
.. code-block:: python
|
| 520 |
+
|
| 521 |
+
import pandas as pd
|
| 522 |
+
from lifelines.utils import survival_events_from_table
|
| 523 |
+
|
| 524 |
+
df = pd.read_csv('file.csv')
|
| 525 |
+
df = df.set_index('time')
|
| 526 |
+
|
| 527 |
+
T, E, W = survival_events_from_table(df, observed_deaths_col='observed deaths', censored_col='censored')
|
| 528 |
+
# weights, W, is the number of occurrences of each observation - helps with data compression.
|
| 529 |
+
|
| 530 |
+
kmf = KaplanMeierFitter().fit(T, E, weights=W)
|
| 531 |
+
|
| 532 |
+
|
| 533 |
+
Transforming observational data into survival-table format
|
| 534 |
+
##########################################################
|
| 535 |
+
|
| 536 |
+
Perhaps you are interested in viewing the survival table given some durations and censoring vectors.
|
| 537 |
+
|
| 538 |
+
|
| 539 |
+
.. code:: python
|
| 540 |
+
|
| 541 |
+
from lifelines.utils import survival_table_from_events
|
| 542 |
+
|
| 543 |
+
table = survival_table_from_events(T, E)
|
| 544 |
+
print(table.head())
|
| 545 |
+
|
| 546 |
+
"""
|
| 547 |
+
removed observed censored entrance at_risk
|
| 548 |
+
event_at
|
| 549 |
+
0 0 0 0 60 60
|
| 550 |
+
2 2 1 1 0 60
|
| 551 |
+
3 3 1 2 0 58
|
| 552 |
+
4 5 3 2 0 55
|
| 553 |
+
5 12 6 6 0 50
|
| 554 |
+
"""
|
| 555 |
+
|
| 556 |
+
|
| 557 |
+
|
| 558 |
+
Set the index/timeline of a estimate
|
| 559 |
+
##############################################
|
| 560 |
+
|
| 561 |
+
Suppose your dataset has lifetimes grouped near time 60, thus after fitting
|
| 562 |
+
:class:`lifelines.fitters.kaplan_meier_fitter.KaplanMeierFitter`, you survival function might look something like:
|
| 563 |
+
|
| 564 |
+
.. code-block:: python
|
| 565 |
+
|
| 566 |
+
print(kmf.survival_function_)
|
| 567 |
+
|
| 568 |
+
"""
|
| 569 |
+
KM-estimate
|
| 570 |
+
0 1.00
|
| 571 |
+
47 0.99
|
| 572 |
+
49 0.97
|
| 573 |
+
50 0.96
|
| 574 |
+
51 0.95
|
| 575 |
+
52 0.91
|
| 576 |
+
53 0.86
|
| 577 |
+
54 0.84
|
| 578 |
+
55 0.79
|
| 579 |
+
56 0.74
|
| 580 |
+
57 0.71
|
| 581 |
+
58 0.67
|
| 582 |
+
59 0.58
|
| 583 |
+
60 0.49
|
| 584 |
+
61 0.41
|
| 585 |
+
62 0.31
|
| 586 |
+
63 0.24
|
| 587 |
+
64 0.19
|
| 588 |
+
65 0.14
|
| 589 |
+
66 0.10
|
| 590 |
+
68 0.07
|
| 591 |
+
69 0.04
|
| 592 |
+
70 0.02
|
| 593 |
+
71 0.01
|
| 594 |
+
74 0.00
|
| 595 |
+
"""
|
| 596 |
+
|
| 597 |
+
|
| 598 |
+
What you would like is to have a predictable and full index from 40 to 75. (Notice that
|
| 599 |
+
in the above index, the last two time points are not adjacent -- the cause is observing no lifetimes
|
| 600 |
+
existing for times 72 or 73). This is especially useful for comparing multiple survival functions at specific time points. To do this, all fitter methods accept a ``timeline`` argument:
|
| 601 |
+
|
| 602 |
+
.. code-block:: python
|
| 603 |
+
|
| 604 |
+
kmf.fit(T, timeline=range(40,75))
|
| 605 |
+
print(kmf.survival_function_)
|
| 606 |
+
|
| 607 |
+
"""
|
| 608 |
+
KM-estimate
|
| 609 |
+
40 1.00
|
| 610 |
+
41 1.00
|
| 611 |
+
42 1.00
|
| 612 |
+
43 1.00
|
| 613 |
+
44 1.00
|
| 614 |
+
45 1.00
|
| 615 |
+
46 1.00
|
| 616 |
+
47 0.99
|
| 617 |
+
48 0.99
|
| 618 |
+
49 0.97
|
| 619 |
+
50 0.96
|
| 620 |
+
51 0.95
|
| 621 |
+
52 0.91
|
| 622 |
+
53 0.86
|
| 623 |
+
54 0.84
|
| 624 |
+
55 0.79
|
| 625 |
+
56 0.74
|
| 626 |
+
57 0.71
|
| 627 |
+
58 0.67
|
| 628 |
+
59 0.58
|
| 629 |
+
60 0.49
|
| 630 |
+
61 0.41
|
| 631 |
+
62 0.31
|
| 632 |
+
63 0.24
|
| 633 |
+
64 0.19
|
| 634 |
+
65 0.14
|
| 635 |
+
66 0.10
|
| 636 |
+
67 0.10
|
| 637 |
+
68 0.07
|
| 638 |
+
69 0.04
|
| 639 |
+
70 0.02
|
| 640 |
+
71 0.01
|
| 641 |
+
72 0.01
|
| 642 |
+
73 0.01
|
| 643 |
+
74 0.00
|
| 644 |
+
"""
|
| 645 |
+
|
| 646 |
+
|
| 647 |
+
*lifelines* will intelligently forward-fill the estimates to unseen time points.
|
| 648 |
+
|
| 649 |
+
|
| 650 |
+
Example SQL query to get survival data from a table
|
| 651 |
+
#####################################################
|
| 652 |
+
|
| 653 |
+
Below is a way to get an example dataset from a relational database (this may vary depending on your database):
|
| 654 |
+
|
| 655 |
+
.. code-block:: mysql
|
| 656 |
+
|
| 657 |
+
SELECT
|
| 658 |
+
id,
|
| 659 |
+
DATEDIFF('dd', started_at, COALESCE(ended_at, CURRENT_DATE)) AS "T",
|
| 660 |
+
(ended_at IS NOT NULL) AS "E"
|
| 661 |
+
FROM table
|
| 662 |
+
|
| 663 |
+
Explanation
|
| 664 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
| 665 |
+
|
| 666 |
+
Each row is an ``id``, a duration, and a boolean indicating whether the event occurred or not. Recall that we denote a
|
| 667 |
+
"True" if the event *did* occur, that is, ``ended_at`` is filled in (we observed the ``ended_at``). Ex:
|
| 668 |
+
|
| 669 |
+
================== ============ ============
|
| 670 |
+
id T E
|
| 671 |
+
================== ============ ============
|
| 672 |
+
10 40 True
|
| 673 |
+
11 42 False
|
| 674 |
+
12 42 False
|
| 675 |
+
13 36 True
|
| 676 |
+
14 33 True
|
| 677 |
+
================== ============ ============
|
| 678 |
+
|
| 679 |
+
|
| 680 |
+
Example SQL queries and transformations to get time varying data
|
| 681 |
+
####################################################################
|
| 682 |
+
|
| 683 |
+
For Cox time-varying models, we discussed what the dataset should look like in :ref:`Dataset creation for time-varying regression`. Typically we have a base dataset, and then we fold in the covariate datasets. Below are some SQL queries and Python transformations from end-to-end.
|
| 684 |
+
|
| 685 |
+
|
| 686 |
+
Base dataset: ``base_df``
|
| 687 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
| 688 |
+
|
| 689 |
+
.. code-block:: mysql
|
| 690 |
+
|
| 691 |
+
SELECT
|
| 692 |
+
id,
|
| 693 |
+
group,
|
| 694 |
+
DATEDIFF('dd', dt.started_at, COALESCE(dt.ended_at, CURRENT_DATE)) AS "T",
|
| 695 |
+
(ended_at IS NOT NULL) AS "E"
|
| 696 |
+
FROM dimension_table dt
|
| 697 |
+
|
| 698 |
+
|
| 699 |
+
Time-varying variables: ``cv``
|
| 700 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
| 701 |
+
|
| 702 |
+
.. code-block:: mysql
|
| 703 |
+
|
| 704 |
+
-- this could produce more than 1 row per subject
|
| 705 |
+
SELECT
|
| 706 |
+
id,
|
| 707 |
+
DATEDIFF('dd', dt.started_at, ft.event_at) AS "time",
|
| 708 |
+
ft.var1
|
| 709 |
+
FROM fact_table ft
|
| 710 |
+
JOIN dimension_table dt
|
| 711 |
+
USING(id)
|
| 712 |
+
|
| 713 |
+
|
| 714 |
+
.. code-block:: python
|
| 715 |
+
|
| 716 |
+
from lifelines.utils import to_long_format
|
| 717 |
+
from lifelines.utils import add_covariate_to_timeline
|
| 718 |
+
|
| 719 |
+
base_df = to_long_format(base_df, duration_col="T")
|
| 720 |
+
df = add_covariate_to_timeline(base_df, cv, duration_col="time", id_col="id", event_col="E")
|
| 721 |
+
|
| 722 |
+
|
| 723 |
+
Event variables: ``event_df``
|
| 724 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
| 725 |
+
|
| 726 |
+
Another very common operation is to add event data to our time-varying dataset. For example, a dataset/SQL table that contains information about the dates of an event (and NULLS if the event didn't occur). An example SQL query may look like:
|
| 727 |
+
|
| 728 |
+
.. code-block:: mysql
|
| 729 |
+
|
| 730 |
+
SELECT
|
| 731 |
+
id,
|
| 732 |
+
DATEDIFF('dd', dt.started_at, ft.event1_at) AS "E1",
|
| 733 |
+
DATEDIFF('dd', dt.started_at, ft.event2_at) AS "E2",
|
| 734 |
+
DATEDIFF('dd', dt.started_at, ft.event3_at) AS "E3"
|
| 735 |
+
...
|
| 736 |
+
FROM dimension_table dt
|
| 737 |
+
|
| 738 |
+
|
| 739 |
+
In Pandas, this may look like:
|
| 740 |
+
|
| 741 |
+
.. code-block:: python
|
| 742 |
+
|
| 743 |
+
"""
|
| 744 |
+
id E1 E2 E3
|
| 745 |
+
0 1 1.0 NaN 2.0
|
| 746 |
+
1 2 NaN 5.0 NaN
|
| 747 |
+
2 3 3.0 5.0 7.0
|
| 748 |
+
...
|
| 749 |
+
"""
|
| 750 |
+
|
| 751 |
+
Initially, this can't be added to our baseline time-varying dataset. Using :func:`lifelines.utils.covariates_from_event_matrix` we can convert a DataFrame like this into one that can be easily added.
|
| 752 |
+
|
| 753 |
+
.. code-block:: python
|
| 754 |
+
|
| 755 |
+
from lifelines.utils import covariates_from_event_matrix
|
| 756 |
+
|
| 757 |
+
cv = covariates_from_event_matrix(event_df, id_col='id')
|
| 758 |
+
print(cv)
|
| 759 |
+
|
| 760 |
+
"""
|
| 761 |
+
id duration E1 E2 E3
|
| 762 |
+
0 1 1.0 1 0 0
|
| 763 |
+
1 1 2.0 0 1 0
|
| 764 |
+
2 2 5.0 0 1 0
|
| 765 |
+
3 3 3.0 1 0 0
|
| 766 |
+
4 3 5.0 0 1 0
|
| 767 |
+
5 3 7.0 0 0 1
|
| 768 |
+
"""
|
| 769 |
+
|
| 770 |
+
base_df = add_covariate_to_timeline(base_df, cv, duration_col="time", id_col="id", event_col="E")
|
| 771 |
+
|
| 772 |
+
|
| 773 |
+
Example cumulative sums over time-varying covariates
|
| 774 |
+
############################################################
|
| 775 |
+
|
| 776 |
+
Often we have either transactional covariate datasets or state covariate datasets. In a transactional dataset, it may make sense to sum up the covariates to represent administration of a treatment over time. For example, in the risky world of start-ups, we may want to sum up the funding amount received at a certain time. We also may be interested in the amount of the last round of funding. Below is an example to do just that:
|
| 777 |
+
|
| 778 |
+
Suppose we have an initial DataFrame of start-ups like:
|
| 779 |
+
|
| 780 |
+
.. code-block:: python
|
| 781 |
+
|
| 782 |
+
seed_df = pd.DataFrame([
|
| 783 |
+
{'id': 'FB', 'E': True, 'T': 12, 'funding': 0},
|
| 784 |
+
{'id': 'SU', 'E': True, 'T': 10, 'funding': 0},
|
| 785 |
+
])
|
| 786 |
+
|
| 787 |
+
|
| 788 |
+
And a covariate DataFrame representing funding rounds like:
|
| 789 |
+
|
| 790 |
+
|
| 791 |
+
.. code-block:: python
|
| 792 |
+
|
| 793 |
+
cv = pd.DataFrame([
|
| 794 |
+
{'id': 'FB', 'funding': 30, 't': 5},
|
| 795 |
+
{'id': 'FB', 'funding': 15, 't': 10},
|
| 796 |
+
{'id': 'FB', 'funding': 50, 't': 15},
|
| 797 |
+
{'id': 'SU', 'funding': 10, 't': 6},
|
| 798 |
+
{'id': 'SU', 'funding': 9, 't': 10},
|
| 799 |
+
])
|
| 800 |
+
|
| 801 |
+
|
| 802 |
+
We can do the following to get both the cumulative funding received and the latest round of funding:
|
| 803 |
+
|
| 804 |
+
.. code-block:: python
|
| 805 |
+
|
| 806 |
+
from lifelines.utils import to_long_format
|
| 807 |
+
from lifelines.utils import add_covariate_to_timeline
|
| 808 |
+
|
| 809 |
+
df = seed_df.pipe(to_long_format, 'T')\
|
| 810 |
+
.pipe(add_covariate_to_timeline, cv, 'id', 't', 'E', cumulative_sum=True)\
|
| 811 |
+
.pipe(add_covariate_to_timeline, cv, 'id', 't', 'E', cumulative_sum=False)
|
| 812 |
+
|
| 813 |
+
|
| 814 |
+
"""
|
| 815 |
+
start cumsum_funding funding stop id E
|
| 816 |
+
0 0 0.0 0.0 5.0 FB False
|
| 817 |
+
1 5 30.0 30.0 10.0 FB False
|
| 818 |
+
2 10 45.0 15.0 12.0 FB True
|
| 819 |
+
3 0 0.0 0.0 6.0 SU False
|
| 820 |
+
4 6 10.0 10.0 10.0 SU False
|
| 821 |
+
5 10 19.0 9.0 10.0 SU True
|
| 822 |
+
"""
|
| 823 |
+
|
| 824 |
+
|
| 825 |
+
Sample size determination under a CoxPH model
|
| 826 |
+
##############################################
|
| 827 |
+
|
| 828 |
+
Suppose you wish to measure the hazard ratio between two populations under the CoxPH model. That is, we want to evaluate the hypothesis
|
| 829 |
+
H0: relative hazard ratio = 1 vs H1: relative hazard ratio != 1, where the relative hazard ratio is :math:`\exp{\left(\beta\right)}` for the experiment group vs the control group. A priori, we are interested in the sample sizes of the two groups necessary to achieve a certain statistical power. To do this in lifelines, there is the :func:`lifelines.statistics.sample_size_necessary_under_cph` function. For example:
|
| 830 |
+
|
| 831 |
+
.. code-block:: python
|
| 832 |
+
|
| 833 |
+
from lifelines.statistics import sample_size_necessary_under_cph
|
| 834 |
+
|
| 835 |
+
desired_power = 0.8
|
| 836 |
+
ratio_of_participants = 1.
|
| 837 |
+
p_exp = 0.25
|
| 838 |
+
p_con = 0.35
|
| 839 |
+
postulated_hazard_ratio = 0.7
|
| 840 |
+
n_exp, n_con = sample_size_necessary_under_cph(desired_power, ratio_of_participants, p_exp, p_con, postulated_hazard_ratio)
|
| 841 |
+
# (421, 421)
|
| 842 |
+
|
| 843 |
+
This assumes you have estimates of the probability of event occurring for both the experiment and control group. This could be determined from previous experiments.
|
| 844 |
+
|
| 845 |
+
Power determination under a CoxPH model
|
| 846 |
+
##############################################
|
| 847 |
+
|
| 848 |
+
Suppose you wish to measure the hazard ratio between two populations under the CoxPH model. To determine the statistical power of a hazard ratio hypothesis test, under the CoxPH model, we can use :func:`lifelines.statistics.power_under_cph`. That is, suppose we want to know the probability that we reject the null hypothesis that the relative hazard ratio is 1, assuming the relative hazard ratio is truly different from 1. This function will give you that probability.
|
| 849 |
+
|
| 850 |
+
|
| 851 |
+
.. code-block:: python
|
| 852 |
+
|
| 853 |
+
from lifelines.statistics import power_under_cph
|
| 854 |
+
|
| 855 |
+
n_exp = 50
|
| 856 |
+
n_con = 100
|
| 857 |
+
p_exp = 0.25
|
| 858 |
+
p_con = 0.35
|
| 859 |
+
postulated_hazard_ratio = 0.5
|
| 860 |
+
power = power_under_cph(n_exp, n_con, p_exp, p_con, postulated_hazard_ratio)
|
| 861 |
+
# 0.4957
|
| 862 |
+
|
| 863 |
+
Problems with convergence in the Cox proportional hazard model
|
| 864 |
+
################################################################
|
| 865 |
+
Since the estimation of the coefficients in the Cox proportional hazard model is done using the Newton-Raphson algorithm, there are sometimes problems with convergence. Here are some common symptoms and resolutions:
|
| 866 |
+
|
| 867 |
+
1. First check: look for ``ConvergenceWarning`` in the output. Most often problems in convergence are the result of problems in the dataset. *lifelines* has checks it runs against the dataset before fitting and warnings are outputted to the user.
|
| 868 |
+
|
| 869 |
+
2. ``delta contains nan value(s).``: First try adding ``show_progress=True`` in the ``fit`` function. If the values in ``delta`` grow unbounded, it's possible the ``step_size`` is too large. Try setting it to a small value (0.1-0.5).
|
| 870 |
+
|
| 871 |
+
3. ``Convergence halted due to matrix inversion problems``: This means that there is high collinearity in your dataset. That is, a column is equal to the linear combination of 1 or more other columns. A common cause of this error is dummying categorical variables but not dropping a column, or some hierarchical structure in your dataset. Try to find the relationship by:
|
| 872 |
+
|
| 873 |
+
1. adding a penalizer to the model, ex: `CoxPHFitter(penalizer=0.1).fit(...)` until the model converges. In the `print_summary()`, the coefficients that have high collinearity will have large (absolute) magnitude in the `coefs` column.
|
| 874 |
+
2. using the variance inflation factor (VIF) to find redundant variables.
|
| 875 |
+
3. looking at the correlation matrix of your dataset, or
|
| 876 |
+
|
| 877 |
+
4. Some coefficients are many orders of magnitude larger than others, and the standard error of the coefficient is also large *or* there are ``nan``'s in the results. This can be seen using the ``print_summary`` method on a fitted :class:`~lifelines.fitters.coxph_fitter.CoxPHFitter` object.
|
| 878 |
+
|
| 879 |
+
1. Look for a ``ConvergenceWarning`` about variances being too small. The dataset may contain a constant column, which provides no information for the regression (Cox model doesn't have a traditional "intercept" term like other regression models).
|
| 880 |
+
|
| 881 |
+
2. The data is completely separable, which means that there exists a covariate the completely determines whether an event occurred or not. For example, for all "death" events in the dataset, there exists a covariate that is constant amongst all of them. Look for a ``ConvergenceWarning`` after the ``fit`` call. See https://stats.stackexchange.com/questions/11109/how-to-deal-with-perfect-separation-in-logistic-regression
|
| 882 |
+
|
| 883 |
+
3. Related to above, the relationship between a covariate and the duration may be completely determined. For example, if the rank correlation between a covariate and the duration is very close to 1 or -1, then the log-likelihood can be increased arbitrarily using just that covariate. Look for a ``ConvergenceWarning`` after the ``fit`` call.
|
| 884 |
+
|
| 885 |
+
4. Another problem may be a collinear relationship in your dataset. See point 3. above.
|
| 886 |
+
|
| 887 |
+
5. If adding a very small ``penalizer`` significantly changes the results (``CoxPHFitter(penalizer=0.0001)``), then this probably means that the step size in the iterative algorithm is too large. Try decreasing it (``.fit(..., step_size=0.50)`` or smaller), and returning the ``penalizer`` term to 0.
|
| 888 |
+
|
| 889 |
+
6. If using the ``strata`` argument, make sure your stratification group sizes are not too small. Try ``df.groupby(strata).size()``.
|
| 890 |
+
|
| 891 |
+
Adding weights to observations in a Cox model
|
| 892 |
+
##############################################
|
| 893 |
+
|
| 894 |
+
There are two common uses for weights in a model. The first is as a data size reduction technique (known as case weights). If the dataset has more than one subjects with identical attributes, including duration and event, then their likelihood contribution is the same as well. Thus, instead of computing the log-likelihood for each individual, we can compute it once and multiple it by the count of users with identical attributes. In practice, this involves first grouping subjects by covariates and counting. For example, using the Rossi dataset, we will use Pandas to group by the attributes (but other data processing tools, like Spark, could do this as well):
|
| 895 |
+
|
| 896 |
+
.. code-block:: python
|
| 897 |
+
|
| 898 |
+
from lifelines.datasets import load_rossi
|
| 899 |
+
|
| 900 |
+
rossi = load_rossi()
|
| 901 |
+
|
| 902 |
+
rossi_weights = rossi.copy()
|
| 903 |
+
rossi_weights['weights'] = 1.
|
| 904 |
+
rossi_weights = rossi_weights.groupby(rossi.columns.tolist())['weights'].sum()\
|
| 905 |
+
.reset_index()
|
| 906 |
+
|
| 907 |
+
|
| 908 |
+
The original dataset has 432 rows, while the grouped dataset has 387 rows plus an additional ``weights`` column. :class:`~lifelines.fitters.coxph_fitter.CoxPHFitter` has an additional parameter to specify which column is the weight column.
|
| 909 |
+
|
| 910 |
+
.. code-block:: python
|
| 911 |
+
|
| 912 |
+
from lifelines import CoxPHFitter
|
| 913 |
+
|
| 914 |
+
cph = CoxPHFitter()
|
| 915 |
+
cph.fit(rossi_weights, 'week', 'arrest', weights_col='weights')
|
| 916 |
+
|
| 917 |
+
|
| 918 |
+
The fitting should be faster, and the results identical to the unweighted dataset. This option is also available in the :class:`~lifelines.fitters.cox_time_varying_fitter.CoxTimeVaryingFitter`.
|
| 919 |
+
|
| 920 |
+
|
| 921 |
+
The second use of weights is sampling weights. These are typically positive, non-integer weights that represent some artificial under/over sampling of observations (ex: inverse probability of treatment weights). It is recommended to set ``robust=True`` in the call to the ``fit`` as the usual standard error is incorrect for sampling weights. The ``robust`` flag will use the sandwich estimator for the standard error.
|
| 922 |
+
|
| 923 |
+
.. warning:: The implementation of the sandwich estimator does not handle ties correctly (under the Efron handling of ties), and will give slightly or significantly different results from other software depending on the frequency of ties.
|
| 924 |
+
|
| 925 |
+
|
| 926 |
+
Correlations between subjects in a Cox model
|
| 927 |
+
###################################################
|
| 928 |
+
|
| 929 |
+
There are cases when your dataset contains correlated subjects, which breaks the independent-and-identically-distributed assumption. What are some cases when this may happen?
|
| 930 |
+
|
| 931 |
+
1. If a subject appears more than once in the dataset (common when subjects can have the event more than once)
|
| 932 |
+
2. If using a matching technique, like propensity-score matching, there is a correlation between pairs.
|
| 933 |
+
|
| 934 |
+
In both cases, the reported standard errors from a unadjusted Cox model will be wrong. In order to adjust for these correlations, there is a ``cluster_col`` keyword in :meth:`~lifelines.fitters.coxph_fitter.CoxPHFitter.fit` that allows you to specify the column in the DataFrame that contains designations for correlated subjects. For example, if subjects in rows 1 & 2 are correlated, but no other subjects are correlated, then ``cluster_col`` column should have the same value for rows 1 & 2, and all others unique. Another example: for matched pairs, each subject in the pair should have the same value.
|
| 935 |
+
|
| 936 |
+
.. code-block:: python
|
| 937 |
+
|
| 938 |
+
from lifelines.datasets import load_rossi
|
| 939 |
+
from lifelines import CoxPHFitter
|
| 940 |
+
|
| 941 |
+
rossi = load_rossi()
|
| 942 |
+
|
| 943 |
+
# this may come from a database, or other libraries that specialize in matching
|
| 944 |
+
matched_pairs = [
|
| 945 |
+
(156, 230),
|
| 946 |
+
(275, 228),
|
| 947 |
+
(61, 252),
|
| 948 |
+
(364, 201),
|
| 949 |
+
(54, 340),
|
| 950 |
+
(130, 33),
|
| 951 |
+
(183, 145),
|
| 952 |
+
(268, 140),
|
| 953 |
+
(332, 259),
|
| 954 |
+
(314, 413),
|
| 955 |
+
(330, 211),
|
| 956 |
+
(372, 255),
|
| 957 |
+
# ...
|
| 958 |
+
]
|
| 959 |
+
|
| 960 |
+
rossi['id'] = None # we will populate this column
|
| 961 |
+
|
| 962 |
+
for i, pair in enumerate(matched_pairs):
|
| 963 |
+
subjectA, subjectB = pair
|
| 964 |
+
rossi.loc[subjectA, 'id'] = i
|
| 965 |
+
rossi.loc[subjectB, 'id'] = i
|
| 966 |
+
|
| 967 |
+
rossi = rossi.dropna(subset=['id'])
|
| 968 |
+
|
| 969 |
+
cph = CoxPHFitter()
|
| 970 |
+
cph.fit(rossi, 'week', 'arrest', cluster_col='id')
|
| 971 |
+
|
| 972 |
+
Specifying ``cluster_col`` will handle correlations, and invoke the robust sandwich estimator for standard errors (the same as setting ``robust=True``).
|
| 973 |
+
|
| 974 |
+
|
| 975 |
+
|
| 976 |
+
Serialize a *lifelines* model to disk
|
| 977 |
+
##########################################
|
| 978 |
+
|
| 979 |
+
When you want to save (and later load) a *lifelines* model to disk, you can use the `loads` and `dumps` API from most popular serialization library (dill, pickle, joblib):
|
| 980 |
+
|
| 981 |
+
.. code-block:: python
|
| 982 |
+
|
| 983 |
+
from dill import loads, dumps
|
| 984 |
+
from pickle import loads, dumps
|
| 985 |
+
|
| 986 |
+
s_cph = dumps(cph)
|
| 987 |
+
cph_new = loads(s_cph)
|
| 988 |
+
cph_new.summary
|
| 989 |
+
|
| 990 |
+
|
| 991 |
+
s_kmf = dumps(kmf)
|
| 992 |
+
kmf_new = loads(s_kmf)
|
| 993 |
+
kmf_new.survival_function_
|
| 994 |
+
|
| 995 |
+
|
| 996 |
+
The codes above save the trained models as binary objects in memory. To serialize a *lifelines* model to a given path on disk:
|
| 997 |
+
|
| 998 |
+
.. code-block:: python
|
| 999 |
+
|
| 1000 |
+
import pickle
|
| 1001 |
+
|
| 1002 |
+
with open('/path/my.pickle', 'wb') as f:
|
| 1003 |
+
pickle.dump(cph, f) # saving my trained cph model as my.pickle
|
| 1004 |
+
|
| 1005 |
+
with open('/path/my.pickle', 'rb') as f:
|
| 1006 |
+
cph_new = pickle.load(f)
|
| 1007 |
+
|
| 1008 |
+
cph_new.summary # should produce the same output as cph.summary
|
| 1009 |
+
|
| 1010 |
+
|
| 1011 |
+
Produce a LaTex or HTML table
|
| 1012 |
+
##########################################
|
| 1013 |
+
|
| 1014 |
+
New in version 0.23.1, *lifelines* models now have the ability to output a LaTeX or HTML table from the ``print_summary`` option:
|
| 1015 |
+
|
| 1016 |
+
|
| 1017 |
+
.. code-block:: python
|
| 1018 |
+
|
| 1019 |
+
from lifelines.datasets import load_rossi
|
| 1020 |
+
from lifelines import CoxPHFitter
|
| 1021 |
+
|
| 1022 |
+
rossi = load_rossi()
|
| 1023 |
+
|
| 1024 |
+
cph = CoxPHFitter().fit(rossi, 'week', 'arrest')
|
| 1025 |
+
|
| 1026 |
+
# print a LaTeX table:
|
| 1027 |
+
cph.print_summary(style="latex")
|
| 1028 |
+
|
| 1029 |
+
# print a HTML summary and table:
|
| 1030 |
+
cph.print_summary(style="html")
|
| 1031 |
+
|
| 1032 |
+
|
| 1033 |
+
In order to use the produced table summary in LaTeX, make sure you import the package ``booktabs`` in your preamble (``\usepackage{booktabs}``), since it is required to `display the table properly. <https://en.wikibooks.org/wiki/LaTeX/Tables#Using_booktabs>`_
|
| 1034 |
+
|
| 1035 |
+
|
| 1036 |
+
Filter a ``print_summary`` table
|
| 1037 |
+
##########################################
|
| 1038 |
+
|
| 1039 |
+
The information provided by ``print_summary`` can be a lot, and even too much for some screens. You can filter to specific columns use the ``columns`` kwarg (default is to display all columns):
|
| 1040 |
+
|
| 1041 |
+
.. code-block:: python
|
| 1042 |
+
|
| 1043 |
+
from lifelines.datasets import load_rossi
|
| 1044 |
+
from lifelines import CoxPHFitter
|
| 1045 |
+
|
| 1046 |
+
rossi = load_rossi()
|
| 1047 |
+
|
| 1048 |
+
cph = CoxPHFitter().fit(rossi, 'week', 'arrest')
|
| 1049 |
+
|
| 1050 |
+
cph.print_summary(columns=["coef", "se(coef)", "p"])
|
| 1051 |
+
|
| 1052 |
+
|
| 1053 |
+
|
| 1054 |
+
Fixing a ``FormulaSyntaxError``
|
| 1055 |
+
##############################################
|
| 1056 |
+
|
| 1057 |
+
As a of *lifelines* v0.25.0, formulas can be used to model your dataframe. This may cause problems if your dataframe has column names with spaces, periods, or other characters. The cheapest way to fix this is to change your column names:
|
| 1058 |
+
|
| 1059 |
+
|
| 1060 |
+
.. code-block:: python
|
| 1061 |
+
|
| 1062 |
+
df = pd.DataFrame({
|
| 1063 |
+
'T': [1, 2, 3, 4],
|
| 1064 |
+
'column with spaces': [1.5, 1.0, 2.5, 1.0],
|
| 1065 |
+
'column.with.periods': [2.5, -1.0, -2.5, 1.0],
|
| 1066 |
+
'column': [2.0, 1.0, 3.0, 4.0]
|
| 1067 |
+
})
|
| 1068 |
+
|
| 1069 |
+
cph = CoxPHFitter().fit(df, 'T')
|
| 1070 |
+
|
| 1071 |
+
"""
|
| 1072 |
+
FormulaSyntaxError:
|
| 1073 |
+
...
|
| 1074 |
+
"""
|
| 1075 |
+
|
| 1076 |
+
df.columns = df.columns.str.replace(' ', '')
|
| 1077 |
+
df.columns = df.columns.str.replace('.', '')
|
| 1078 |
+
cph = CoxPHFitter().fit(df, 'T')
|
| 1079 |
+
|
| 1080 |
+
"""
|
| 1081 |
+
👍
|
| 1082 |
+
"""
|
| 1083 |
+
|
| 1084 |
+
|
| 1085 |
+
Another option is to use the formula syntax to handle this:
|
| 1086 |
+
|
| 1087 |
+
|
| 1088 |
+
.. code-block:: python
|
| 1089 |
+
|
| 1090 |
+
df = pd.DataFrame({
|
| 1091 |
+
'T': [1, 2, 3, 4],
|
| 1092 |
+
'column with spaces': [1.5, 1.0, 2.5, 1.0],
|
| 1093 |
+
'column.with.periods': [2.5, -1.0, -2.5, 1.0],
|
| 1094 |
+
'column': [2.0, 1.0, 3.0, 4.0]
|
| 1095 |
+
})
|
| 1096 |
+
|
| 1097 |
+
cph = CoxPHFitter().fit(df, 'T', formula="column + Q('column with spaces') + Q('column.with.periods')")
|
lifelines/source/docs/Makefile
ADDED
|
@@ -0,0 +1,177 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Makefile for Sphinx documentation
|
| 2 |
+
#
|
| 3 |
+
|
| 4 |
+
# You can set these variables from the command line.
|
| 5 |
+
SPHINXOPTS =
|
| 6 |
+
SPHINXBUILD = sphinx-build
|
| 7 |
+
PAPER =
|
| 8 |
+
BUILDDIR = _build
|
| 9 |
+
|
| 10 |
+
# User-friendly check for sphinx-build
|
| 11 |
+
ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1)
|
| 12 |
+
$(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/)
|
| 13 |
+
endif
|
| 14 |
+
|
| 15 |
+
# Internal variables.
|
| 16 |
+
PAPEROPT_a4 = -D latex_paper_size=a4
|
| 17 |
+
PAPEROPT_letter = -D latex_paper_size=letter
|
| 18 |
+
ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
|
| 19 |
+
# the i18n builder cannot share the environment and doctrees with the others
|
| 20 |
+
I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
|
| 21 |
+
|
| 22 |
+
.PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext
|
| 23 |
+
|
| 24 |
+
help:
|
| 25 |
+
@echo "Please use \`make <target>' where <target> is one of"
|
| 26 |
+
@echo " html to make standalone HTML files"
|
| 27 |
+
@echo " dirhtml to make HTML files named index.html in directories"
|
| 28 |
+
@echo " singlehtml to make a single large HTML file"
|
| 29 |
+
@echo " pickle to make pickle files"
|
| 30 |
+
@echo " json to make JSON files"
|
| 31 |
+
@echo " htmlhelp to make HTML files and a HTML help project"
|
| 32 |
+
@echo " qthelp to make HTML files and a qthelp project"
|
| 33 |
+
@echo " devhelp to make HTML files and a Devhelp project"
|
| 34 |
+
@echo " epub to make an epub"
|
| 35 |
+
@echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
|
| 36 |
+
@echo " latexpdf to make LaTeX files and run them through pdflatex"
|
| 37 |
+
@echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx"
|
| 38 |
+
@echo " text to make text files"
|
| 39 |
+
@echo " man to make manual pages"
|
| 40 |
+
@echo " texinfo to make Texinfo files"
|
| 41 |
+
@echo " info to make Texinfo files and run them through makeinfo"
|
| 42 |
+
@echo " gettext to make PO message catalogs"
|
| 43 |
+
@echo " changes to make an overview of all changed/added/deprecated items"
|
| 44 |
+
@echo " xml to make Docutils-native XML files"
|
| 45 |
+
@echo " pseudoxml to make pseudoxml-XML files for display purposes"
|
| 46 |
+
@echo " linkcheck to check all external links for integrity"
|
| 47 |
+
@echo " doctest to run all doctests embedded in the documentation (if enabled)"
|
| 48 |
+
|
| 49 |
+
clean:
|
| 50 |
+
rm -rf $(BUILDDIR)/*
|
| 51 |
+
|
| 52 |
+
html:
|
| 53 |
+
$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
|
| 54 |
+
@echo
|
| 55 |
+
@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
|
| 56 |
+
|
| 57 |
+
dirhtml:
|
| 58 |
+
$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
|
| 59 |
+
@echo
|
| 60 |
+
@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
|
| 61 |
+
|
| 62 |
+
singlehtml:
|
| 63 |
+
$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
|
| 64 |
+
@echo
|
| 65 |
+
@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
|
| 66 |
+
|
| 67 |
+
pickle:
|
| 68 |
+
$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
|
| 69 |
+
@echo
|
| 70 |
+
@echo "Build finished; now you can process the pickle files."
|
| 71 |
+
|
| 72 |
+
json:
|
| 73 |
+
$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
|
| 74 |
+
@echo
|
| 75 |
+
@echo "Build finished; now you can process the JSON files."
|
| 76 |
+
|
| 77 |
+
htmlhelp:
|
| 78 |
+
$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
|
| 79 |
+
@echo
|
| 80 |
+
@echo "Build finished; now you can run HTML Help Workshop with the" \
|
| 81 |
+
".hhp project file in $(BUILDDIR)/htmlhelp."
|
| 82 |
+
|
| 83 |
+
qthelp:
|
| 84 |
+
$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
|
| 85 |
+
@echo
|
| 86 |
+
@echo "Build finished; now you can run "qcollectiongenerator" with the" \
|
| 87 |
+
".qhcp project file in $(BUILDDIR)/qthelp, like this:"
|
| 88 |
+
@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/lifelines.qhcp"
|
| 89 |
+
@echo "To view the help file:"
|
| 90 |
+
@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/lifelines.qhc"
|
| 91 |
+
|
| 92 |
+
devhelp:
|
| 93 |
+
$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
|
| 94 |
+
@echo
|
| 95 |
+
@echo "Build finished."
|
| 96 |
+
@echo "To view the help file:"
|
| 97 |
+
@echo "# mkdir -p $$HOME/.local/share/devhelp/lifelines"
|
| 98 |
+
@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/lifelines"
|
| 99 |
+
@echo "# devhelp"
|
| 100 |
+
|
| 101 |
+
epub:
|
| 102 |
+
$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
|
| 103 |
+
@echo
|
| 104 |
+
@echo "Build finished. The epub file is in $(BUILDDIR)/epub."
|
| 105 |
+
|
| 106 |
+
latex:
|
| 107 |
+
$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
|
| 108 |
+
@echo
|
| 109 |
+
@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
|
| 110 |
+
@echo "Run \`make' in that directory to run these through (pdf)latex" \
|
| 111 |
+
"(use \`make latexpdf' here to do that automatically)."
|
| 112 |
+
|
| 113 |
+
latexpdf:
|
| 114 |
+
$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
|
| 115 |
+
@echo "Running LaTeX files through pdflatex..."
|
| 116 |
+
$(MAKE) -C $(BUILDDIR)/latex all-pdf
|
| 117 |
+
@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
|
| 118 |
+
|
| 119 |
+
latexpdfja:
|
| 120 |
+
$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
|
| 121 |
+
@echo "Running LaTeX files through platex and dvipdfmx..."
|
| 122 |
+
$(MAKE) -C $(BUILDDIR)/latex all-pdf-ja
|
| 123 |
+
@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
|
| 124 |
+
|
| 125 |
+
text:
|
| 126 |
+
$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
|
| 127 |
+
@echo
|
| 128 |
+
@echo "Build finished. The text files are in $(BUILDDIR)/text."
|
| 129 |
+
|
| 130 |
+
man:
|
| 131 |
+
$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
|
| 132 |
+
@echo
|
| 133 |
+
@echo "Build finished. The manual pages are in $(BUILDDIR)/man."
|
| 134 |
+
|
| 135 |
+
texinfo:
|
| 136 |
+
$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
|
| 137 |
+
@echo
|
| 138 |
+
@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
|
| 139 |
+
@echo "Run \`make' in that directory to run these through makeinfo" \
|
| 140 |
+
"(use \`make info' here to do that automatically)."
|
| 141 |
+
|
| 142 |
+
info:
|
| 143 |
+
$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
|
| 144 |
+
@echo "Running Texinfo files through makeinfo..."
|
| 145 |
+
make -C $(BUILDDIR)/texinfo info
|
| 146 |
+
@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
|
| 147 |
+
|
| 148 |
+
gettext:
|
| 149 |
+
$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
|
| 150 |
+
@echo
|
| 151 |
+
@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
|
| 152 |
+
|
| 153 |
+
changes:
|
| 154 |
+
$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
|
| 155 |
+
@echo
|
| 156 |
+
@echo "The overview file is in $(BUILDDIR)/changes."
|
| 157 |
+
|
| 158 |
+
linkcheck:
|
| 159 |
+
$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
|
| 160 |
+
@echo
|
| 161 |
+
@echo "Link check complete; look for any errors in the above output " \
|
| 162 |
+
"or in $(BUILDDIR)/linkcheck/output.txt."
|
| 163 |
+
|
| 164 |
+
doctest:
|
| 165 |
+
$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
|
| 166 |
+
@echo "Testing of doctests in the sources finished, look at the " \
|
| 167 |
+
"results in $(BUILDDIR)/doctest/output.txt."
|
| 168 |
+
|
| 169 |
+
xml:
|
| 170 |
+
$(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml
|
| 171 |
+
@echo
|
| 172 |
+
@echo "Build finished. The XML files are in $(BUILDDIR)/xml."
|
| 173 |
+
|
| 174 |
+
pseudoxml:
|
| 175 |
+
$(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
|
| 176 |
+
@echo
|
| 177 |
+
@echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."
|
lifelines/source/docs/Quickstart.rst
ADDED
|
@@ -0,0 +1,366 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
.. _code_directive:
|
| 2 |
+
|
| 3 |
+
.. image:: https://i.imgur.com/EOowdSD.png
|
| 4 |
+
|
| 5 |
+
-------------------------------------
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
Quickstart
|
| 9 |
+
''''''''''
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
Installation
|
| 13 |
+
------------
|
| 14 |
+
|
| 15 |
+
Install via ``pip``:
|
| 16 |
+
|
| 17 |
+
.. code-block:: console
|
| 18 |
+
|
| 19 |
+
pip install lifelines
|
| 20 |
+
|
| 21 |
+
OR
|
| 22 |
+
|
| 23 |
+
Install via `conda <https://anaconda.org/conda-forge/lifelines>`_:
|
| 24 |
+
|
| 25 |
+
.. code-block:: console
|
| 26 |
+
|
| 27 |
+
conda install -c conda-forge lifelines
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
Kaplan-Meier, Nelson-Aalen, and parametric models
|
| 31 |
+
---------------------------------------------------
|
| 32 |
+
|
| 33 |
+
.. note:: For readers looking for an introduction to survival analysis, it's recommended to start at :ref:`Introduction to Survival Analysis`
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
Let's start by importing some data. We need the durations that individuals are observed for, and whether they "died" or not.
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
.. code:: python
|
| 40 |
+
|
| 41 |
+
from lifelines.datasets import load_waltons
|
| 42 |
+
df = load_waltons() # returns a Pandas DataFrame
|
| 43 |
+
|
| 44 |
+
print(df.head())
|
| 45 |
+
"""
|
| 46 |
+
T E group
|
| 47 |
+
0 6 1 miR-137
|
| 48 |
+
1 13 1 miR-137
|
| 49 |
+
2 13 1 miR-137
|
| 50 |
+
3 13 1 miR-137
|
| 51 |
+
4 19 1 miR-137
|
| 52 |
+
"""
|
| 53 |
+
|
| 54 |
+
T = df['T']
|
| 55 |
+
E = df['E']
|
| 56 |
+
|
| 57 |
+
``T`` is an array of durations, ``E`` is a either boolean or binary array representing whether the "death" was observed or not (alternatively an individual can be censored). We will fit a Kaplan Meier model to this, implemented as :class:`~lifelines.fitters.kaplan_meier_fitter.KaplanMeierFitter`:
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
.. code:: python
|
| 62 |
+
|
| 63 |
+
from lifelines import KaplanMeierFitter
|
| 64 |
+
kmf = KaplanMeierFitter()
|
| 65 |
+
kmf.fit(T, event_observed=E) # or, more succinctly, kmf.fit(T, E)
|
| 66 |
+
|
| 67 |
+
After calling the :meth:`~lifelines.fitters.kaplan_meier_fitter.KaplanMeierFitter.fit` method, we have access to new properties like :attr:`~lifelines.fitters.kaplan_meier_fitter.KaplanMeierFitter.survival_function_` and methods like :meth:`~lifelines.fitters.kaplan_meier_fitter.KaplanMeierFitter.plot`. The latter is a wrapper around Panda's internal plotting library.
|
| 68 |
+
|
| 69 |
+
.. code:: python
|
| 70 |
+
|
| 71 |
+
kmf.survival_function_
|
| 72 |
+
kmf.cumulative_density_
|
| 73 |
+
kmf.plot_survival_function()
|
| 74 |
+
|
| 75 |
+
|
| 76 |
+
.. image:: images/quickstart_kmf.png
|
| 77 |
+
:width: 620px
|
| 78 |
+
:align: center
|
| 79 |
+
|
| 80 |
+
Alternatively, you can plot the cumulative density function:
|
| 81 |
+
|
| 82 |
+
.. code:: python
|
| 83 |
+
|
| 84 |
+
kmf.plot_cumulative_density()
|
| 85 |
+
|
| 86 |
+
.. image:: images/quickstart_kmf_cdf.png
|
| 87 |
+
:width: 620px
|
| 88 |
+
:align: center
|
| 89 |
+
|
| 90 |
+
By specifying the ``timeline`` keyword argument in :meth:`~lifelines.fitters.kaplan_meier_fitter.KaplanMeierFitter.fit`, we can change how the above models are indexed:
|
| 91 |
+
|
| 92 |
+
.. code:: python
|
| 93 |
+
|
| 94 |
+
kmf.fit(T, E, timeline=range(0, 100, 2))
|
| 95 |
+
|
| 96 |
+
kmf.survival_function_ # index is now the same as range(0, 100, 2)
|
| 97 |
+
kmf.confidence_interval_ # index is now the same as range(0, 100, 2)
|
| 98 |
+
|
| 99 |
+
|
| 100 |
+
A useful summary stat is the median survival time, which represents when 50% of the population has died:
|
| 101 |
+
|
| 102 |
+
.. code:: python
|
| 103 |
+
|
| 104 |
+
from lifelines.utils import median_survival_times
|
| 105 |
+
|
| 106 |
+
median_ = kmf.median_survival_time_
|
| 107 |
+
median_confidence_interval_ = median_survival_times(kmf.confidence_interval_)
|
| 108 |
+
|
| 109 |
+
|
| 110 |
+
Instead of the Kaplan-Meier estimator, you may be interested in a parametric model. *lifelines* has builtin parametric models. For example, Weibull, Log-Normal, Log-Logistic, and more.
|
| 111 |
+
|
| 112 |
+
.. code:: python
|
| 113 |
+
|
| 114 |
+
import matplotlib.pyplot as plt
|
| 115 |
+
import numpy as np
|
| 116 |
+
from lifelines import *
|
| 117 |
+
|
| 118 |
+
fig, axes = plt.subplots(3, 3, figsize=(13.5, 7.5))
|
| 119 |
+
|
| 120 |
+
kmf = KaplanMeierFitter().fit(T, E, label='KaplanMeierFitter')
|
| 121 |
+
wbf = WeibullFitter().fit(T, E, label='WeibullFitter')
|
| 122 |
+
exf = ExponentialFitter().fit(T, E, label='ExponentialFitter')
|
| 123 |
+
lnf = LogNormalFitter().fit(T, E, label='LogNormalFitter')
|
| 124 |
+
llf = LogLogisticFitter().fit(T, E, label='LogLogisticFitter')
|
| 125 |
+
pwf = PiecewiseExponentialFitter([40, 60]).fit(T, E, label='PiecewiseExponentialFitter')
|
| 126 |
+
ggf = GeneralizedGammaFitter().fit(T, E, label='GeneralizedGammaFitter')
|
| 127 |
+
sf = SplineFitter(np.percentile(T.loc[E.astype(bool)], [0, 50, 100])).fit(T, E, label='SplineFitter')
|
| 128 |
+
|
| 129 |
+
wbf.plot_survival_function(ax=axes[0][0])
|
| 130 |
+
exf.plot_survival_function(ax=axes[0][1])
|
| 131 |
+
lnf.plot_survival_function(ax=axes[0][2])
|
| 132 |
+
kmf.plot_survival_function(ax=axes[1][0])
|
| 133 |
+
llf.plot_survival_function(ax=axes[1][1])
|
| 134 |
+
pwf.plot_survival_function(ax=axes[1][2])
|
| 135 |
+
ggf.plot_survival_function(ax=axes[2][0])
|
| 136 |
+
sf.plot_survival_function(ax=axes[2][1])
|
| 137 |
+
|
| 138 |
+
.. image:: images/waltons_survival_function.png
|
| 139 |
+
|
| 140 |
+
|
| 141 |
+
Multiple groups
|
| 142 |
+
^^^^^^^^^^^^^^^
|
| 143 |
+
|
| 144 |
+
.. code:: python
|
| 145 |
+
|
| 146 |
+
groups = df['group']
|
| 147 |
+
ix = (groups == 'miR-137')
|
| 148 |
+
|
| 149 |
+
kmf.fit(T[~ix], E[~ix], label='control')
|
| 150 |
+
ax = kmf.plot_survival_function()
|
| 151 |
+
|
| 152 |
+
kmf.fit(T[ix], E[ix], label='miR-137')
|
| 153 |
+
ax = kmf.plot_survival_function(ax=ax)
|
| 154 |
+
|
| 155 |
+
|
| 156 |
+
.. image:: images/quickstart_multi.png
|
| 157 |
+
:width: 620px
|
| 158 |
+
:align: center
|
| 159 |
+
|
| 160 |
+
Alternatively, for many more groups and more "pandas-esque":
|
| 161 |
+
|
| 162 |
+
.. code:: python
|
| 163 |
+
|
| 164 |
+
|
| 165 |
+
ax = plt.subplot(111)
|
| 166 |
+
|
| 167 |
+
kmf = KaplanMeierFitter()
|
| 168 |
+
|
| 169 |
+
for name, grouped_df in df.groupby('group'):
|
| 170 |
+
kmf.fit(grouped_df["T"], grouped_df["E"], label=name)
|
| 171 |
+
kmf.plot_survival_function(ax=ax)
|
| 172 |
+
|
| 173 |
+
|
| 174 |
+
Similar functionality exists for the :class:`~lifelines.fitters.nelson_aalen_fitter.NelsonAalenFitter`:
|
| 175 |
+
|
| 176 |
+
.. code:: python
|
| 177 |
+
|
| 178 |
+
from lifelines import NelsonAalenFitter
|
| 179 |
+
naf = NelsonAalenFitter()
|
| 180 |
+
naf.fit(T, event_observed=E)
|
| 181 |
+
|
| 182 |
+
but instead of a ``survival_function_`` being exposed, a ``cumulative_hazard_`` is.
|
| 183 |
+
|
| 184 |
+
.. note:: Similar to `Scikit-Learn <http://scikit-learn.org>`_, all statistically estimated quantities append an underscore to the property name.
|
| 185 |
+
|
| 186 |
+
.. note:: More detailed docs about estimating the survival function and cumulative hazard are available in `Survival analysis with lifelines`_.
|
| 187 |
+
|
| 188 |
+
|
| 189 |
+
Getting data in the right format
|
| 190 |
+
--------------------------------
|
| 191 |
+
|
| 192 |
+
Often you'll have data that looks like:::
|
| 193 |
+
|
| 194 |
+
*start_time1*, *end_time1*
|
| 195 |
+
*start_time2*, *end_time2*
|
| 196 |
+
*start_time3*, None
|
| 197 |
+
*start_time4*, *end_time4*
|
| 198 |
+
|
| 199 |
+
*lifelines* has some utility functions to transform this dataset into duration and censoring vectors. The most common one is :func:`lifelines.utils.datetimes_to_durations`.
|
| 200 |
+
|
| 201 |
+
.. code:: python
|
| 202 |
+
|
| 203 |
+
from lifelines.utils import datetimes_to_durations
|
| 204 |
+
|
| 205 |
+
# start_times is a vector or list of datetime objects or datetime strings
|
| 206 |
+
# end_times is a vector or list of (possibly missing) datetime objects or datetime strings
|
| 207 |
+
T, E = datetimes_to_durations(start_times, end_times, freq='h')
|
| 208 |
+
|
| 209 |
+
|
| 210 |
+
Perhaps you are interested in viewing the survival table given some durations and censoring vectors. The function :func:`lifelines.utils.survival_table_from_events` will help with that:
|
| 211 |
+
|
| 212 |
+
|
| 213 |
+
.. code:: python
|
| 214 |
+
|
| 215 |
+
from lifelines.utils import survival_table_from_events
|
| 216 |
+
|
| 217 |
+
table = survival_table_from_events(T, E)
|
| 218 |
+
print(table.head())
|
| 219 |
+
|
| 220 |
+
"""
|
| 221 |
+
removed observed censored entrance at_risk
|
| 222 |
+
event_at
|
| 223 |
+
0 0 0 0 163 163
|
| 224 |
+
6 1 1 0 0 163
|
| 225 |
+
7 2 1 1 0 162
|
| 226 |
+
9 3 3 0 0 160
|
| 227 |
+
13 3 3 0 0 157
|
| 228 |
+
"""
|
| 229 |
+
|
| 230 |
+
|
| 231 |
+
Survival regression
|
| 232 |
+
-------------------
|
| 233 |
+
|
| 234 |
+
While the above :class:`~lifelines.fitters.kaplan_meier_fitter.KaplanMeierFitter` model is useful, it only gives us an "average" view of the population. Often we have specific data at the individual level that we would like to use. For this, we turn to **survival regression**.
|
| 235 |
+
|
| 236 |
+
.. note:: More detailed documentation and tutorials are available in `Survival Regression`_.
|
| 237 |
+
|
| 238 |
+
|
| 239 |
+
The dataset for regression models is different than the datasets above. All the data, including durations, censored indicators and covariates must be contained in **a Pandas DataFrame**.
|
| 240 |
+
|
| 241 |
+
.. code:: python
|
| 242 |
+
|
| 243 |
+
from lifelines.datasets import load_regression_dataset
|
| 244 |
+
regression_dataset = load_regression_dataset() # a Pandas DataFrame
|
| 245 |
+
|
| 246 |
+
|
| 247 |
+
A regression model is instantiated, and a model is fit to a dataset using ``fit``. The duration column and event column are specified in the call to ``fit``. Below we model our regression dataset using the Cox proportional hazard model, full docs `here <https://lifelines.readthedocs.io/en/latest/Survival%20Regression.html#cox-s-proportional-hazard-model>`_.
|
| 248 |
+
|
| 249 |
+
.. code:: python
|
| 250 |
+
|
| 251 |
+
from lifelines import CoxPHFitter
|
| 252 |
+
|
| 253 |
+
# Using Cox Proportional Hazards model
|
| 254 |
+
cph = CoxPHFitter()
|
| 255 |
+
cph.fit(regression_dataset, 'T', event_col='E')
|
| 256 |
+
cph.print_summary()
|
| 257 |
+
|
| 258 |
+
"""
|
| 259 |
+
<lifelines.CoxPHFitter: fitted with 200 total observations, 11 right-censored observations>
|
| 260 |
+
duration col = 'T'
|
| 261 |
+
event col = 'E'
|
| 262 |
+
baseline estimation = breslow
|
| 263 |
+
number of observations = 200
|
| 264 |
+
number of events observed = 189
|
| 265 |
+
partial log-likelihood = -807.62
|
| 266 |
+
time fit was run = 2020-06-21 12:26:28 UTC
|
| 267 |
+
|
| 268 |
+
---
|
| 269 |
+
coef exp(coef) se(coef) coef lower 95% coef upper 95% exp(coef) lower 95% exp(coef) upper 95%
|
| 270 |
+
var1 0.22 1.25 0.07 0.08 0.37 1.08 1.44
|
| 271 |
+
var2 0.05 1.05 0.08 -0.11 0.21 0.89 1.24
|
| 272 |
+
var3 0.22 1.24 0.08 0.07 0.37 1.07 1.44
|
| 273 |
+
|
| 274 |
+
z p -log2(p)
|
| 275 |
+
var1 2.99 <0.005 8.49
|
| 276 |
+
var2 0.61 0.54 0.89
|
| 277 |
+
var3 2.88 <0.005 7.97
|
| 278 |
+
---
|
| 279 |
+
Concordance = 0.58
|
| 280 |
+
Partial AIC = 1621.24
|
| 281 |
+
log-likelihood ratio test = 15.54 on 3 df
|
| 282 |
+
-log2(p) of ll-ratio test = 9.47
|
| 283 |
+
"""
|
| 284 |
+
|
| 285 |
+
cph.plot()
|
| 286 |
+
|
| 287 |
+
.. image:: images/coxph_plot_quickstart.png
|
| 288 |
+
:width: 600px
|
| 289 |
+
:align: center
|
| 290 |
+
|
| 291 |
+
The same dataset, but with a *Weibull accelerated failure time model*. This model was two parameters (see docs `here <https://lifelines.readthedocs.io/en/latest/fitters/regression/WeibullAFTFitter.html>`_), and we can choose to model both using our covariates or just one. Below we model just the scale parameter, ``lambda_``.
|
| 292 |
+
|
| 293 |
+
.. code:: python
|
| 294 |
+
|
| 295 |
+
from lifelines import WeibullAFTFitter
|
| 296 |
+
|
| 297 |
+
wft = WeibullAFTFitter()
|
| 298 |
+
wft.fit(regression_dataset, 'T', event_col='E')
|
| 299 |
+
wft.print_summary()
|
| 300 |
+
|
| 301 |
+
"""
|
| 302 |
+
<lifelines.WeibullAFTFitter: fitted with 200 total observations, 11 right-censored observations>
|
| 303 |
+
duration col = 'T'
|
| 304 |
+
event col = 'E'
|
| 305 |
+
number of observations = 200
|
| 306 |
+
number of events observed = 189
|
| 307 |
+
log-likelihood = -504.48
|
| 308 |
+
time fit was run = 2020-06-21 12:27:05 UTC
|
| 309 |
+
|
| 310 |
+
---
|
| 311 |
+
coef exp(coef) se(coef) coef lower 95% coef upper 95% exp(coef) lower 95% exp(coef) upper 95%
|
| 312 |
+
lambda_ var1 -0.08 0.92 0.02 -0.13 -0.04 0.88 0.97
|
| 313 |
+
var2 -0.02 0.98 0.03 -0.07 0.04 0.93 1.04
|
| 314 |
+
var3 -0.08 0.92 0.02 -0.13 -0.03 0.88 0.97
|
| 315 |
+
Intercept 2.53 12.57 0.05 2.43 2.63 11.41 13.85
|
| 316 |
+
rho_ Intercept 1.09 2.98 0.05 0.99 1.20 2.68 3.32
|
| 317 |
+
|
| 318 |
+
z p -log2(p)
|
| 319 |
+
lambda_ var1 -3.45 <0.005 10.78
|
| 320 |
+
var2 -0.56 0.57 0.80
|
| 321 |
+
var3 -3.33 <0.005 10.15
|
| 322 |
+
Intercept 51.12 <0.005 inf
|
| 323 |
+
rho_ Intercept 20.12 <0.005 296.66
|
| 324 |
+
---
|
| 325 |
+
Concordance = 0.58
|
| 326 |
+
AIC = 1018.97
|
| 327 |
+
log-likelihood ratio test = 19.73 on 3 df
|
| 328 |
+
-log2(p) of ll-ratio test = 12.34
|
| 329 |
+
"""
|
| 330 |
+
|
| 331 |
+
wft.plot()
|
| 332 |
+
|
| 333 |
+
.. image:: images/waft_plot_quickstart.png
|
| 334 |
+
:width: 600px
|
| 335 |
+
:align: center
|
| 336 |
+
|
| 337 |
+
Other AFT models are available as well, see `here <https://lifelines.readthedocs.io/en/latest/Survival%20Regression.html#the-log-normal-and-log-logistic-aft-models>`_. An alternative regression model is Aalen's Additive model, which has time-varying hazards:
|
| 338 |
+
|
| 339 |
+
.. code:: python
|
| 340 |
+
|
| 341 |
+
# Using Aalen's Additive model
|
| 342 |
+
from lifelines import AalenAdditiveFitter
|
| 343 |
+
aaf = AalenAdditiveFitter(fit_intercept=False)
|
| 344 |
+
aaf.fit(regression_dataset, 'T', event_col='E')
|
| 345 |
+
|
| 346 |
+
|
| 347 |
+
Along with :class:`~lifelines.fitters.coxph_fitter.CoxPHFitter` and :class:`~lifelines.fitters.weibull_aft_fitter.WeibullAFTFitter`, after fitting you'll have access to properties like ``summary`` and methods like ``plot``, ``predict_cumulative_hazards``, and ``predict_survival_function``. The latter two methods require an additional argument of covariates:
|
| 348 |
+
|
| 349 |
+
.. code:: python
|
| 350 |
+
|
| 351 |
+
X = regression_dataset.loc[0]
|
| 352 |
+
|
| 353 |
+
ax = wft.predict_survival_function(X).rename(columns={0:'WeibullAFT'}).plot()
|
| 354 |
+
cph.predict_survival_function(X).rename(columns={0:'CoxPHFitter'}).plot(ax=ax)
|
| 355 |
+
aaf.predict_survival_function(X).rename(columns={0:'AalenAdditive'}).plot(ax=ax)
|
| 356 |
+
|
| 357 |
+
.. image:: images/quickstart_predict_aaf.png
|
| 358 |
+
:width: 620px
|
| 359 |
+
:align: center
|
| 360 |
+
|
| 361 |
+
|
| 362 |
+
.. note:: More detailed documentation and tutorials are available in `Survival Regression`_.
|
| 363 |
+
|
| 364 |
+
|
| 365 |
+
.. _Survival Regression: Survival%20Regression.html
|
| 366 |
+
.. _Survival analysis with lifelines: Survival%20analysis%20with%20lifelines.html
|
lifelines/source/docs/References.rst
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
API Reference
|
| 2 |
+
==================================
|
| 3 |
+
|
| 4 |
+
.. toctree::
|
| 5 |
+
|
| 6 |
+
lifelines.fitters
|
| 7 |
+
lifelines.utils
|
| 8 |
+
lifelines.statistics
|
| 9 |
+
lifelines.plotting
|
| 10 |
+
lifelines.datasets
|
| 11 |
+
lifelines.calibration
|
lifelines/source/docs/Survival Analysis intro.rst
ADDED
|
@@ -0,0 +1,232 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
.. image:: https://i.imgur.com/EOowdSD.png
|
| 2 |
+
|
| 3 |
+
-------------------------------------
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
Introduction to survival analysis
|
| 7 |
+
'''''''''''''''''''''''''''''''''
|
| 8 |
+
|
| 9 |
+
Applications
|
| 10 |
+
------------
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
Traditionally, survival analysis was developed to measure lifespans of individuals.
|
| 14 |
+
An actuary or health professional would ask questions like
|
| 15 |
+
"how long does this population live for?", and answer it using survival analysis.
|
| 16 |
+
For example, the population may be a nation's population (for actuaries),
|
| 17 |
+
or a population stricken by a disease (in the medical professional's case).
|
| 18 |
+
Traditionally, sort of a morbid subject.
|
| 19 |
+
|
| 20 |
+
But survival analysis can be applied to not only *births and
|
| 21 |
+
deaths*, but *any* duration. Medical professionals might be interested in
|
| 22 |
+
the *time between childbirths*, where a birth in this case is the event
|
| 23 |
+
of having a child, and a death is becoming pregnant again! (obviously,
|
| 24 |
+
we are loose with our definitions of *birth and death*) Another example
|
| 25 |
+
is users subscribing to a service: a birth is a user who joins the
|
| 26 |
+
service, and a death is when the user leaves the service.
|
| 27 |
+
|
| 28 |
+
Censoring
|
| 29 |
+
----------
|
| 30 |
+
|
| 31 |
+
At the time you want to make inferences about durations, it is possible that not all the death events have occurred yet. For example, a
|
| 32 |
+
medical professional will not wait 50 years for each individual in the
|
| 33 |
+
study to pass away before investigating -- he or she is interested in
|
| 34 |
+
making decisions after only a few years, or months possibly.
|
| 35 |
+
|
| 36 |
+
The individuals in a population who have not been subject to the death
|
| 37 |
+
event are labeled as *right-censored*, i.e.,
|
| 38 |
+
we did not (or can not) view the rest of their life history
|
| 39 |
+
due to some external circumstances. All the information we have on
|
| 40 |
+
these individuals are their current lifetime durations (which is
|
| 41 |
+
naturally *less* than their actual lifetimes).
|
| 42 |
+
|
| 43 |
+
.. note:: There is also left-censoring and interval censoring, which are expanded on later.
|
| 44 |
+
|
| 45 |
+
A common mistake data analysts make is choosing to ignore the
|
| 46 |
+
right-censored individuals. We will see why this is a mistake next.
|
| 47 |
+
|
| 48 |
+
Consider a case where the population is actually made up of two
|
| 49 |
+
subpopulations, :math:`A` and :math:`B`. Population :math:`A` has a very
|
| 50 |
+
small lifespan, say 2 months on average, and population :math:`B`
|
| 51 |
+
enjoys a much larger lifespan, say 12 months on average. We don't
|
| 52 |
+
know this distinction beforehand. At :math:`t=10`, we
|
| 53 |
+
wish to investigate the average lifespan for the entire population.
|
| 54 |
+
|
| 55 |
+
In the figure below, the red lines denote the lifespan of individuals where the death event
|
| 56 |
+
has been observed, and the blue lines denote the lifespan of the
|
| 57 |
+
right-censored individuals (deaths have not been observed). If we are
|
| 58 |
+
asked to estimate the average lifetime of our population, and we naively
|
| 59 |
+
decided to *not* included the right-censored individuals, it is clear
|
| 60 |
+
that we would be severely underestimating the true average lifespan.
|
| 61 |
+
|
| 62 |
+
.. code:: python
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
from lifelines.plotting import plot_lifetimes
|
| 66 |
+
import numpy as np
|
| 67 |
+
from numpy.random import uniform, exponential
|
| 68 |
+
|
| 69 |
+
N = 25
|
| 70 |
+
|
| 71 |
+
CURRENT_TIME = 10
|
| 72 |
+
|
| 73 |
+
actual_lifetimes = np.array([
|
| 74 |
+
exponential(12) if (uniform() < 0.5) else exponential(2) for i in range(N)
|
| 75 |
+
])
|
| 76 |
+
observed_lifetimes = np.minimum(actual_lifetimes, CURRENT_TIME)
|
| 77 |
+
death_observed = actual_lifetimes < CURRENT_TIME
|
| 78 |
+
|
| 79 |
+
ax = plot_lifetimes(observed_lifetimes, event_observed=death_observed)
|
| 80 |
+
|
| 81 |
+
ax.set_xlim(0, 25)
|
| 82 |
+
ax.vlines(10, 0, 30, lw=2, linestyles='--')
|
| 83 |
+
ax.set_xlabel("time")
|
| 84 |
+
ax.set_title("Births and deaths of our population, at $t=10$")
|
| 85 |
+
print("Observed lifetimes at time %d:\n" % (CURRENT_TIME), observed_lifetimes)
|
| 86 |
+
|
| 87 |
+
|
| 88 |
+
.. figure:: images/survival_analysis_intro_censoring.png
|
| 89 |
+
:width: 650px
|
| 90 |
+
:align: center
|
| 91 |
+
:figclass: align-center
|
| 92 |
+
|
| 93 |
+
Example lifetimes of individuals. We only observe up to time 10, but the blue individuals have not died yet (i.e. they are censored).
|
| 94 |
+
|
| 95 |
+
|
| 96 |
+
.. parsed-literal::
|
| 97 |
+
|
| 98 |
+
Observed lifetimes at time 10:
|
| 99 |
+
[10. 1.1 8. 10. 3.43 0.63 6.28 1.03 2.37 6.17 10.
|
| 100 |
+
0.21 2.71 1.25 10. 3.4 0.62 1.94 0.22 7.43 6.16 10.
|
| 101 |
+
9.41 10. 10.]
|
| 102 |
+
|
| 103 |
+
|
| 104 |
+
Furthermore, if we instead simply took the mean of *all*
|
| 105 |
+
lifespans, including the current lifespans of right-censored instances,
|
| 106 |
+
we would *still* be underestimating the true average lifespan. Below we
|
| 107 |
+
plot the actual lifetimes of all instances (recall we do not see this
|
| 108 |
+
information at :math:`t=10`).
|
| 109 |
+
|
| 110 |
+
.. code:: python
|
| 111 |
+
|
| 112 |
+
ax = plot_lifetimes(actual_lifetimes, event_observed=death_observed)
|
| 113 |
+
ax.vlines(10, 0, 30, lw=2, linestyles='--')
|
| 114 |
+
ax.set_xlim(0, 25)
|
| 115 |
+
|
| 116 |
+
|
| 117 |
+
.. figure:: images/survival_analysis_intro_censoring_revealed.png
|
| 118 |
+
:width: 650px
|
| 119 |
+
:align: center
|
| 120 |
+
:figclass: align-center
|
| 121 |
+
|
| 122 |
+
Revealing the actual lifetimes of individuals.
|
| 123 |
+
|
| 124 |
+
|
| 125 |
+
Survival analysis was originally developed to solve this type of
|
| 126 |
+
problem, that is, to deal with estimation when our data is
|
| 127 |
+
right-censored. However, even in the case where all events have been
|
| 128 |
+
observed, i.e. there is no censoring, survival analysis is still a very useful tool
|
| 129 |
+
to understand durations and rates.
|
| 130 |
+
|
| 131 |
+
The observations need not always start at zero, either. This was done
|
| 132 |
+
only for understanding in the above example. Consider the example where
|
| 133 |
+
a customer entering a store is a birth: a customer can enter at
|
| 134 |
+
any time, and not necessarily at time zero. In survival analysis, durations
|
| 135 |
+
are relative: individuals may start at different times.
|
| 136 |
+
(We actually only need the *duration* of the observation, and not
|
| 137 |
+
necessarily the start and end time.)
|
| 138 |
+
|
| 139 |
+
We next introduce the three fundamental objects in survival analysis, the
|
| 140 |
+
*survival function*, *hazard function* and the *cumulative hazard function*.
|
| 141 |
+
|
| 142 |
+
--------------
|
| 143 |
+
|
| 144 |
+
Survival function
|
| 145 |
+
-----------------
|
| 146 |
+
|
| 147 |
+
|
| 148 |
+
Let :math:`T` be a (possibly infinite, but always non-negative) random
|
| 149 |
+
lifetime taken from the population under study. For example, the
|
| 150 |
+
amount of time a couple is married. Or the time it takes a user to enter
|
| 151 |
+
a webpage (an infinite time if they never do). The survival function -
|
| 152 |
+
:math:`S(t)` - of a population is defined as
|
| 153 |
+
|
| 154 |
+
.. math:: S(t) = Pr(T > t)
|
| 155 |
+
|
| 156 |
+
Simply, the survival function defines the probability the death event has not occurred yet at time
|
| 157 |
+
:math:`t`, or equivalently, the probability of surviving past time
|
| 158 |
+
:math:`t`. Note the following properties of the survival function:
|
| 159 |
+
|
| 160 |
+
1. :math:`0 \le S(t) \le 1`
|
| 161 |
+
2. :math:`F_T(t) = 1 - S(t)`, where :math:`F_T(t)` is the CDF of :math:`T`, which implies
|
| 162 |
+
3. :math:`S(t)` is a non-increasing function of :math:`t`.
|
| 163 |
+
|
| 164 |
+
Here's an example of a survival function:
|
| 165 |
+
|
| 166 |
+
.. image:: images/intro_survival_function.png
|
| 167 |
+
:width: 550px
|
| 168 |
+
:align: center
|
| 169 |
+
|
| 170 |
+
Reading from this graph, we can see that at time 40, about 75% of the population is still alive.
|
| 171 |
+
|
| 172 |
+
Hazard function
|
| 173 |
+
-----------------
|
| 174 |
+
|
| 175 |
+
|
| 176 |
+
We are also interested in the probability of the death event occurring at time :math:`t`,
|
| 177 |
+
given that the death event has not occurred yet. Mathematically, that is:
|
| 178 |
+
|
| 179 |
+
.. math:: \lim_{\delta t \rightarrow 0 } \; Pr( t \le T \le t + \delta t | T > t)
|
| 180 |
+
|
| 181 |
+
This quantity goes to 0 as :math:`\delta t` shrinks, so we divide this
|
| 182 |
+
by the interval :math:`\delta t` (like we might do in calculus). This
|
| 183 |
+
defines the hazard function at time :math:`t`, :math:`h(t)`:
|
| 184 |
+
|
| 185 |
+
.. math:: h(t) = \lim_{\delta t \rightarrow 0 } \; \frac{Pr( t \le T \le t + \delta t | T > t)}{\delta t}
|
| 186 |
+
|
| 187 |
+
It can be shown that this is equal to:
|
| 188 |
+
|
| 189 |
+
.. math:: h(t) = \frac{-S'(t)}{S(t)}
|
| 190 |
+
|
| 191 |
+
and solving this differential equation (cool, it is a differential
|
| 192 |
+
equation!), we get:
|
| 193 |
+
|
| 194 |
+
.. math:: S(t) = \exp\left( -\int_0^t h(z) \mathrm{d}z \right)
|
| 195 |
+
|
| 196 |
+
The integral has a more common name: the *cumulative hazard function*, denoted :math:`H(t)`. We can rewrite the above as:
|
| 197 |
+
|
| 198 |
+
|
| 199 |
+
.. math:: S(t) = \exp\left(-H(t) \right)
|
| 200 |
+
|
| 201 |
+
|
| 202 |
+
With that, the two figures below represent the hazard and the cumulative hazard, respectively, of the survival function in the figure above.
|
| 203 |
+
|
| 204 |
+
.. image:: images/intro_hazards.png
|
| 205 |
+
:width: 550px
|
| 206 |
+
:align: center
|
| 207 |
+
|
| 208 |
+
|
| 209 |
+
|
| 210 |
+
What I like about the above relationships is that it defines **all** survival
|
| 211 |
+
functions. Notice that we can now speak either about the
|
| 212 |
+
survival function, :math:`S(t)`, the hazard, :math:`h(t)`, or the cumulative hazard function,
|
| 213 |
+
:math:`H(t)`, and we can convert back and forth quite easily. Below is a graphic of all the relationships between the quantities.
|
| 214 |
+
|
| 215 |
+
|
| 216 |
+
.. figure:: images/map.png
|
| 217 |
+
:width: 550px
|
| 218 |
+
:figwidth: 600px
|
| 219 |
+
:align: center
|
| 220 |
+
:figclass: align-center
|
| 221 |
+
|
| 222 |
+
Map of the mathematical entities used in survival analysis and the transforms between them.
|
| 223 |
+
Don't panic: *lifelines* does this all for you.
|
| 224 |
+
|
| 225 |
+
|
| 226 |
+
|
| 227 |
+
|
| 228 |
+
Next steps
|
| 229 |
+
-----------------
|
| 230 |
+
|
| 231 |
+
Of course, we do not observe the true survival function or hazard of a population. We
|
| 232 |
+
must use the observed data to estimate it. There are many ways to estimate the survival function and the hazard functions, which brings us to :doc:`estimation using lifelines</Survival analysis with lifelines>`.
|
lifelines/source/docs/Survival Regression.rst
ADDED
|
@@ -0,0 +1,1298 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
.. image:: https://i.imgur.com/EOowdSD.png
|
| 2 |
+
|
| 3 |
+
-------------------------------------
|
| 4 |
+
|
| 5 |
+
Survival regression
|
| 6 |
+
#######################
|
| 7 |
+
|
| 8 |
+
Often we have additional data aside from the duration that we want to use.
|
| 9 |
+
The technique is called *survival regression* -- the name implies
|
| 10 |
+
we regress covariates (e.g., age, country, etc.) against
|
| 11 |
+
another variable -- in this case durations. Similar to the
|
| 12 |
+
logic in the first part of this tutorial, we cannot use traditional
|
| 13 |
+
methods like linear regression because of censoring.
|
| 14 |
+
|
| 15 |
+
There are a few popular models in survival regression: Cox's
|
| 16 |
+
model, accelerated failure models, and Aalen's additive model. All models attempt to represent the
|
| 17 |
+
hazard rate :math:`h(t | x)` as a function of :math:`t` and some covariates :math:`x`. We explore these models next.
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
The dataset for regression
|
| 21 |
+
===========================
|
| 22 |
+
The dataset required for survival regression must be in the format of a Pandas DataFrame. Each row of the DataFrame represents an observation. There should be a column denoting the durations of the observations. There may (or may not) be a column denoting the event status of each observation (1 if event occurred, 0 if censored). There are also the additional covariates you wish to regress against. Optionally, there could be columns in the DataFrame that are used for stratification, weights, and clusters which will be discussed later in this tutorial.
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
An example dataset we will use is the Rossi recidivism dataset, available in *lifelines* as :meth:`~lifelines.datasets.load_rossi`.
|
| 26 |
+
|
| 27 |
+
.. code:: python
|
| 28 |
+
|
| 29 |
+
from lifelines.datasets import load_rossi
|
| 30 |
+
|
| 31 |
+
rossi = load_rossi()
|
| 32 |
+
|
| 33 |
+
"""
|
| 34 |
+
week arrest fin age race wexp mar paro prio
|
| 35 |
+
0 20 1 0 27 1 0 0 1 3
|
| 36 |
+
1 17 1 0 18 1 0 0 1 8
|
| 37 |
+
2 25 1 0 19 0 1 0 1 13
|
| 38 |
+
3 52 0 1 23 1 1 1 1 1
|
| 39 |
+
"""
|
| 40 |
+
|
| 41 |
+
The DataFrame ``rossi`` contains 432 observations. The ``week`` column is the duration, the ``arrest`` column denotes if the event (a re-arrest) occurred, and the other columns represent variables we wish to regress against.
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
Cox's proportional hazard model
|
| 45 |
+
=================================
|
| 46 |
+
|
| 47 |
+
The idea behind Cox's proportional hazard model is that the log-hazard of an individual is a linear function of their covariates *and* a population-level baseline hazard that changes over time. Mathematically:
|
| 48 |
+
|
| 49 |
+
.. math:: \underbrace{h(t | x)}_{\text{hazard}} = \overbrace{b_0(t)}^{\text{baseline hazard}} \underbrace{\exp \overbrace{\left(\sum_{i=1}^n b_i (x_i - \overline{x_i})\right)}^{\text{log-partial hazard}}}_ {\text{partial hazard}}
|
| 50 |
+
|
| 51 |
+
Note a few behaviors about this model: the only *time* component is in the baseline hazard, :math:`b_0(t)`. In the above equation, the partial hazard is a time-invariant scalar factor that only increases or decreases the baseline hazard. Thus changes in covariates will only inflate or deflate the baseline hazard.
|
| 52 |
+
|
| 53 |
+
.. note:: In other regression models, a column of 1s might be added that represents that intercept or baseline. This is not necessary in the Cox model. In fact, there is no intercept in the Cox model - the baseline hazard represents this. *lifelines* will throw warnings and may experience convergence errors if a column of 1s is present in your dataset or formula.
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
Fitting the regression
|
| 57 |
+
-----------------------
|
| 58 |
+
|
| 59 |
+
The implementation of the Cox model in *lifelines* is under :class:`~lifelines.fitters.coxph_fitter.CoxPHFitter`. We fit the model to the dataset using :meth:`~lifelines.fitters.coxph_fitter.CoxPHFitter.fit`. It has a :meth:`~lifelines.fitters.coxph_fitter.CoxPHFitter.print_summary` function that prints a tabular view of coefficients and related stats.
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
.. code:: python
|
| 63 |
+
|
| 64 |
+
from lifelines import CoxPHFitter
|
| 65 |
+
from lifelines.datasets import load_rossi
|
| 66 |
+
|
| 67 |
+
rossi = load_rossi()
|
| 68 |
+
|
| 69 |
+
cph = CoxPHFitter()
|
| 70 |
+
cph.fit(rossi, duration_col='week', event_col='arrest')
|
| 71 |
+
|
| 72 |
+
cph.print_summary() # access the individual results using cph.summary
|
| 73 |
+
|
| 74 |
+
"""
|
| 75 |
+
<lifelines.CoxPHFitter: fitted with 432 total observations, 318 right-censored observations>
|
| 76 |
+
duration col = 'week'
|
| 77 |
+
event col = 'arrest'
|
| 78 |
+
number of observations = 432
|
| 79 |
+
number of events observed = 114
|
| 80 |
+
partial log-likelihood = -658.75
|
| 81 |
+
time fit was run = 2019-10-05 14:24:44 UTC
|
| 82 |
+
|
| 83 |
+
---
|
| 84 |
+
coef exp(coef) se(coef) coef lower 95% coef upper 95% exp(coef) lower 95% exp(coef) upper 95%
|
| 85 |
+
fin -0.38 0.68 0.19 -0.75 -0.00 0.47 1.00
|
| 86 |
+
age -0.06 0.94 0.02 -0.10 -0.01 0.90 0.99
|
| 87 |
+
race 0.31 1.37 0.31 -0.29 0.92 0.75 2.50
|
| 88 |
+
wexp -0.15 0.86 0.21 -0.57 0.27 0.57 1.30
|
| 89 |
+
mar -0.43 0.65 0.38 -1.18 0.31 0.31 1.37
|
| 90 |
+
paro -0.08 0.92 0.20 -0.47 0.30 0.63 1.35
|
| 91 |
+
prio 0.09 1.10 0.03 0.04 0.15 1.04 1.16
|
| 92 |
+
|
| 93 |
+
z p -log2(p)
|
| 94 |
+
fin -1.98 0.05 4.40
|
| 95 |
+
age -2.61 0.01 6.79
|
| 96 |
+
race 1.02 0.31 1.70
|
| 97 |
+
wexp -0.71 0.48 1.06
|
| 98 |
+
mar -1.14 0.26 1.97
|
| 99 |
+
paro -0.43 0.66 0.59
|
| 100 |
+
prio 3.19 <0.005 9.48
|
| 101 |
+
---
|
| 102 |
+
Concordance = 0.64
|
| 103 |
+
Partial AIC = 1331.50
|
| 104 |
+
log-likelihood ratio test = 33.27 on 7 df
|
| 105 |
+
-log2(p) of ll-ratio test = 15.37
|
| 106 |
+
"""
|
| 107 |
+
|
| 108 |
+
New in v0.25.0, We can also use ✨formulas✨ to handle the right-hand-side of the linear model. For example:
|
| 109 |
+
|
| 110 |
+
.. code:: python
|
| 111 |
+
|
| 112 |
+
cph.fit(rossi, duration_col='week', event_col='arrest', formula="fin + wexp + age * prio")
|
| 113 |
+
|
| 114 |
+
is analogous to the linear model with interaction term:
|
| 115 |
+
|
| 116 |
+
.. math::
|
| 117 |
+
\beta_1\text{fin} + \beta_2\text{wexp} + \beta_3 \text{age} + \beta_4 \text{prio} + \beta_5 \text{age} \cdot \text{prio}
|
| 118 |
+
|
| 119 |
+
.. code:: python
|
| 120 |
+
|
| 121 |
+
cph.fit(rossi, duration_col='week', event_col='arrest', formula="fin + wexp + age * prio")
|
| 122 |
+
cph.print_summary()
|
| 123 |
+
|
| 124 |
+
"""
|
| 125 |
+
<lifelines.CoxPHFitter: fitted with 432 total observations, 318 right-censored observations>
|
| 126 |
+
duration col = 'week'
|
| 127 |
+
event col = 'arrest'
|
| 128 |
+
baseline estimation = breslow
|
| 129 |
+
number of observations = 432
|
| 130 |
+
number of events observed = 114
|
| 131 |
+
partial log-likelihood = -659.39
|
| 132 |
+
time fit was run = 2020-07-13 19:30:33 UTC
|
| 133 |
+
|
| 134 |
+
---
|
| 135 |
+
coef exp(coef) se(coef) coef lower 95% coef upper 95% exp(coef) lower 95% exp(coef) upper 95%
|
| 136 |
+
covariate
|
| 137 |
+
fin -0.33 0.72 0.19 -0.70 0.04 0.49 1.05
|
| 138 |
+
wexp -0.24 0.79 0.21 -0.65 0.17 0.52 1.19
|
| 139 |
+
age -0.03 0.97 0.03 -0.09 0.03 0.92 1.03
|
| 140 |
+
prio 0.31 1.36 0.17 -0.03 0.64 0.97 1.90
|
| 141 |
+
age:prio -0.01 0.99 0.01 -0.02 0.01 0.98 1.01
|
| 142 |
+
|
| 143 |
+
z p -log2(p)
|
| 144 |
+
covariate
|
| 145 |
+
fin -1.73 0.08 3.57
|
| 146 |
+
wexp -1.14 0.26 1.97
|
| 147 |
+
age -0.93 0.35 1.51
|
| 148 |
+
prio 1.80 0.07 3.80
|
| 149 |
+
age:prio -1.28 0.20 2.32
|
| 150 |
+
---
|
| 151 |
+
Concordance = 0.64
|
| 152 |
+
Partial AIC = 1328.77
|
| 153 |
+
log-likelihood ratio test = 31.99 on 5 df
|
| 154 |
+
-log2(p) of ll-ratio test = 17.35
|
| 155 |
+
"""
|
| 156 |
+
|
| 157 |
+
Formulas can be used to create interactions, encode categorical variables, create basis splines, and so on. The formulas used are (almost) the same as what's available in R and statsmodels.
|
| 158 |
+
|
| 159 |
+
|
| 160 |
+
Interpretation
|
| 161 |
+
-----------------------
|
| 162 |
+
|
| 163 |
+
To access the coefficients and the baseline hazard directly, you can use :attr:`~lifelines.fitters.coxph_fitter.CoxPHFitter.params_` and :attr:`~lifelines.fitters.coxph_fitter.CoxPHFitter.baseline_hazard_` respectively. Taking a look at these coefficients for a moment, ``prio`` (the number of prior arrests) has a coefficient of about 0.09. Thus, a one unit increase in ``prio`` means the the baseline hazard will increase by a factor of :math:`\exp{(0.09)} = 1.10` - about a 10% increase. Recall, in the Cox proportional hazard model, a higher hazard means more at risk of the event occurring. The value :math:`\exp{(0.09)}` is called the *hazard ratio*, a name that will be clear with another example.
|
| 164 |
+
|
| 165 |
+
Consider the coefficient of ``mar`` (whether the subject is married or not). The values in the column are binary: 0 or 1, representing either unmarried or married. The value of the coefficient associated with ``mar``, :math:`\exp{(-.43)}`, is the value of ratio of *hazards* associated with being married, that is:
|
| 166 |
+
|
| 167 |
+
.. math::
|
| 168 |
+
|
| 169 |
+
\exp(-0.43) = \frac{\text{hazard of married subjects at time $t$}}{\text{hazard of unmarried subjects at time $t$}}
|
| 170 |
+
|
| 171 |
+
|
| 172 |
+
Note that left-hand side is a constant (specifically, it's independent of time, :math:`t`), but the right-hand side has two factors that may vary with time. The *proportional hazard assumption* is that relationship is true. That is, hazards can change over time, but their ratio between levels remains a constant. Later we will deal with checking this assumption. However, in reality, it's very common for the hazard ratio to change over the study duration. The hazard ratio then has the interpretation of some sort of weighted average of period-specific hazard ratios. As a result, the hazard ratio may critically depend on the duration of the follow-up.
|
| 173 |
+
|
| 174 |
+
|
| 175 |
+
Convergence
|
| 176 |
+
-----------------------
|
| 177 |
+
|
| 178 |
+
Fitting the Cox model to the data involves using iterative methods. *lifelines* takes extra effort to help with convergence, so please be attentive to any warnings that appear. Fixing any warnings will generally help convergence and decrease the number of iterative steps required. If you wish to see more information during fitting, there is a ``show_progress`` parameter in :meth:`~lifelines.fitters.coxph_fitter.CoxPHFitter.fit` function. For further help, see :ref:`Problems with convergence in the Cox Proportional Hazard Model`.
|
| 179 |
+
|
| 180 |
+
After fitting, the value of the maximum log-likelihood this available using :attr:`~lifelines.fitters.coxph_fitter.CoxPHFitter.log_likelihood_`. The variance matrix of the coefficients is available under :attr:`~lifelines.fitters.coxph_fitter.CoxPHFitter.variance_matrix_`.
|
| 181 |
+
|
| 182 |
+
|
| 183 |
+
Goodness of fit
|
| 184 |
+
-----------------------
|
| 185 |
+
|
| 186 |
+
After fitting, you may want to know how "good" of a fit your model was to the data. A few methods the author has found useful is to
|
| 187 |
+
|
| 188 |
+
- inspect the survival probability calibration plot (see below section on :ref:`Model probability calibration`)
|
| 189 |
+
- look at the concordance-index (see below section on :ref:`Model selection and calibration in survival regression`), available as :attr:`~lifelines.fitters.coxph_fitter.CoxPHFitter.concordance_index_` or in the :meth:`~lifelines.fitters.coxph_fitter.CoxPHFitter.print_summary` as a measure of predictive accuracy.
|
| 190 |
+
- look at the log-likelihood test result in the :meth:`~lifelines.fitters.coxph_fitter.CoxPHFitter.print_summary` or :meth:`~lifelines.fitters.coxph_fitter.CoxPHFitter.log_likelihood_ratio_test`
|
| 191 |
+
- check the proportional hazards assumption with the :meth:`~lifelines.fitters.coxph_fitter.CoxPHFitter.check_assumptions` method. See section later on this page for more details.
|
| 192 |
+
|
| 193 |
+
|
| 194 |
+
Prediction
|
| 195 |
+
-----------------------
|
| 196 |
+
|
| 197 |
+
|
| 198 |
+
After fitting, you can use use the suite of prediction methods: :meth:`~lifelines.fitters.coxph_fitter.CoxPHFitter.predict_partial_hazard`, :meth:`~lifelines.fitters.coxph_fitter.CoxPHFitter.predict_survival_function`, and others. See also the section on `Predicting censored subjects below <https://lifelines.readthedocs.io/en/latest/Survival%20Regression.html#prediction-on-censored-subjects>`_
|
| 199 |
+
|
| 200 |
+
.. code:: python
|
| 201 |
+
|
| 202 |
+
X = rossi
|
| 203 |
+
|
| 204 |
+
cph.predict_survival_function(X)
|
| 205 |
+
cph.predict_median(X)
|
| 206 |
+
cph.predict_partial_hazard(X)
|
| 207 |
+
...
|
| 208 |
+
|
| 209 |
+
|
| 210 |
+
|
| 211 |
+
Penalties and sparse regression
|
| 212 |
+
-----------------------------------------------
|
| 213 |
+
|
| 214 |
+
It's possible to add a penalizer term to the Cox regression as well. One can use these to i) stabilize the coefficients, ii) shrink the estimates to 0, iii) encourages a Bayesian viewpoint, and iv) create sparse coefficients. All regression models, including the Cox model, include both an L1 and L2 penalty:
|
| 215 |
+
|
| 216 |
+
.. math:: \frac{1}{2} \text{penalizer} \left((1-\text{l1-ratio}) \cdot ||\beta||_2^2 + \text{l1-ratio} \cdot ||\beta||_1\right)
|
| 217 |
+
|
| 218 |
+
|
| 219 |
+
.. note:: It's not clear from the above, but intercept (when applicable) are not penalized.
|
| 220 |
+
|
| 221 |
+
|
| 222 |
+
To use this in *lifelines*, both the ``penalizer`` and ``l1_ratio`` can be specified in the class creation:
|
| 223 |
+
|
| 224 |
+
|
| 225 |
+
.. code:: python
|
| 226 |
+
|
| 227 |
+
from lifelines import CoxPHFitter
|
| 228 |
+
from lifelines.datasets import load_rossi
|
| 229 |
+
|
| 230 |
+
rossi = load_rossi()
|
| 231 |
+
|
| 232 |
+
cph = CoxPHFitter(penalizer=0.1, l1_ratio=1.0) # sparse solutions,
|
| 233 |
+
cph.fit(rossi, 'week', 'arrest')
|
| 234 |
+
cph.print_summary()
|
| 235 |
+
|
| 236 |
+
|
| 237 |
+
Instead of a float, an *array* can be provided that is the same size as the number of penalized parameters. The values in the array are specific penalty coefficients for each covariate. This is useful for more complicated covariate structure. Some examples:
|
| 238 |
+
|
| 239 |
+
1. you have lots of confounders you wish to penalizer, but not the main treatment(s).
|
| 240 |
+
|
| 241 |
+
.. code:: python
|
| 242 |
+
|
| 243 |
+
from lifelines import CoxPHFitter
|
| 244 |
+
from lifelines.datasets import load_rossi
|
| 245 |
+
|
| 246 |
+
rossi = load_rossi()
|
| 247 |
+
|
| 248 |
+
# variable `fin` is the treatment of interest so don't penalize it at all
|
| 249 |
+
penalty = np.array([0, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5])
|
| 250 |
+
|
| 251 |
+
cph = CoxPHFitter(penalizer=penalty)
|
| 252 |
+
cph.fit(rossi, 'week', 'arrest')
|
| 253 |
+
cph.print_summary()
|
| 254 |
+
|
| 255 |
+
2. you have to `fuse categories together <https://stats.stackexchange.com/questions/146907/principled-way-of-collapsing-categorical-variables-with-many-levels>`_.
|
| 256 |
+
|
| 257 |
+
3. you want to implement a `very sparse solution <https://dataorigami.net/blogs/napkin-folding/an-l1-2-penalty-in-cox-regression>`_.
|
| 258 |
+
|
| 259 |
+
See more about penalties and their implementation on our development blog.
|
| 260 |
+
|
| 261 |
+
- `L₁ Penalty in Cox Regression <https://dataorigami.net/blogs/napkin-folding/l1-penalty-in-cox-regression>`_
|
| 262 |
+
- `An L½ penalty in Cox Regression <https://dataorigami.net/blogs/napkin-folding/an-l1-2-penalty-in-cox-regression>`_
|
| 263 |
+
|
| 264 |
+
Plotting the coefficients
|
| 265 |
+
------------------------------
|
| 266 |
+
|
| 267 |
+
With a fitted model, an alternative way to view the coefficients and their ranges is to use the ``plot`` method.
|
| 268 |
+
|
| 269 |
+
.. code:: python
|
| 270 |
+
|
| 271 |
+
from lifelines.datasets import load_rossi
|
| 272 |
+
from lifelines import CoxPHFitter
|
| 273 |
+
|
| 274 |
+
rossi = load_rossi()
|
| 275 |
+
cph = CoxPHFitter()
|
| 276 |
+
cph.fit(rossi, duration_col='week', event_col='arrest')
|
| 277 |
+
|
| 278 |
+
cph.plot()
|
| 279 |
+
|
| 280 |
+
.. image:: images/coxph_plot.png
|
| 281 |
+
:width: 650px
|
| 282 |
+
:align: center
|
| 283 |
+
|
| 284 |
+
|
| 285 |
+
Plotting the effect of varying a covariate
|
| 286 |
+
-------------------------------------------
|
| 287 |
+
|
| 288 |
+
|
| 289 |
+
|
| 290 |
+
After fitting, we can plot what the survival curves look like as we vary a single covariate while
|
| 291 |
+
holding everything else equal. This is useful to understand the impact of a covariate, *given the model*. To do this, we use the :meth:`~lifelines.fitters.coxph_fitter.CoxPHFitter.plot_partial_effects_on_outcome` method and give it the covariate of interest, and the values to display.
|
| 292 |
+
|
| 293 |
+
.. note::
|
| 294 |
+
Prior to lifelines v0.25.0, this method used to be called ``plot_covariate_groups``. It's been renamed to ``plot_partial_effects_on_outcome`` (a much clearer name, I hope).
|
| 295 |
+
|
| 296 |
+
|
| 297 |
+
.. code:: python
|
| 298 |
+
|
| 299 |
+
from lifelines.datasets import load_rossi
|
| 300 |
+
from lifelines import CoxPHFitter
|
| 301 |
+
|
| 302 |
+
rossi = load_rossi()
|
| 303 |
+
cph = CoxPHFitter()
|
| 304 |
+
cph.fit(rossi, duration_col='week', event_col='arrest')
|
| 305 |
+
|
| 306 |
+
cph.plot_partial_effects_on_outcome(covariates='prio', values=[0, 2, 4, 6, 8, 10], cmap='coolwarm')
|
| 307 |
+
|
| 308 |
+
.. image:: images/coxph_plot_covarite_groups.png
|
| 309 |
+
:width: 600px
|
| 310 |
+
:align: center
|
| 311 |
+
|
| 312 |
+
|
| 313 |
+
If there are derivative features in your dataset, for example, suppose you have included ``prio`` and ``prio**2`` in your dataset. It doesn't make sense to just vary ``year`` and leave ``year**2`` fixed. You'll need to specify manually the values the covariates take on in a N-d array or list (where N is the number of covariates being varied.)
|
| 314 |
+
|
| 315 |
+
.. code:: python
|
| 316 |
+
|
| 317 |
+
rossi['prio**2'] = rossi['prio'] ** 2
|
| 318 |
+
|
| 319 |
+
cph.fit(rossi, 'week', 'arrest')
|
| 320 |
+
|
| 321 |
+
cph.plot_partial_effects_on_outcome(
|
| 322 |
+
covariates=['prio', 'prio**2'],
|
| 323 |
+
values=[
|
| 324 |
+
[0, 0],
|
| 325 |
+
[1, 1],
|
| 326 |
+
[2, 4],
|
| 327 |
+
[3, 9],
|
| 328 |
+
[8, 64],
|
| 329 |
+
],
|
| 330 |
+
cmap='coolwarm')
|
| 331 |
+
|
| 332 |
+
|
| 333 |
+
However, if you used the ``formula`` kwarg in fit, all the necessary transformations will be made internally for you.
|
| 334 |
+
|
| 335 |
+
.. code:: python
|
| 336 |
+
|
| 337 |
+
cph.fit(rossi, 'week', 'arrest', formula="prio + I(prio**2)")
|
| 338 |
+
|
| 339 |
+
cph.plot_partial_effects_on_outcome(
|
| 340 |
+
covariates=['prio'],
|
| 341 |
+
values=[0, 1, 2, 3, 8],
|
| 342 |
+
cmap='coolwarm')
|
| 343 |
+
|
| 344 |
+
This feature is also useful for analyzing categorical variables:
|
| 345 |
+
|
| 346 |
+
.. code:: python
|
| 347 |
+
|
| 348 |
+
cph.plot_partial_effects_on_outcome(
|
| 349 |
+
covariates=["a_categorical_variable"]
|
| 350 |
+
values=["A", "B", ...],
|
| 351 |
+
plot_baseline=False)
|
| 352 |
+
|
| 353 |
+
|
| 354 |
+
Checking the proportional hazards assumption
|
| 355 |
+
-----------------------------------------------
|
| 356 |
+
|
| 357 |
+
To make proper inferences, we should ask if our Cox model is appropriate for our dataset. Recall from above that when using the Cox model, we are implicitly applying the proportional hazard assumption. We should ask, does our dataset obey this assumption?
|
| 358 |
+
|
| 359 |
+
|
| 360 |
+
:class:`~lifelines.fitters.coxph_fitter.CoxPHFitter` has a :meth:`~lifelines.fitters.coxph_fitter.CoxPHFitter.check_assumptions` method that will output violations of the proportional hazard assumption. For a tutorial on how to fix violations, see `Testing the Proportional Hazard Assumptions`_. Suggestions are to look for ways to *stratify* a column (see docs below), or use a `time varying model`_.
|
| 361 |
+
|
| 362 |
+
.. note:: Checking assumptions like this is only necessary if your goal is inference or correlation. That is, you wish to understand the influence of a covariate on the survival duration & outcome. If your goal is prediction, checking model assumptions is less important since your goal is to maximize an accuracy metric, and not learn about *how* the model is making that prediction.
|
| 363 |
+
|
| 364 |
+
|
| 365 |
+
Stratification
|
| 366 |
+
-----------------------------------------------
|
| 367 |
+
|
| 368 |
+
Sometimes one or more covariates may not obey the proportional hazard assumption. In this case, we can allow the covariate(s) to still be included in the model without estimating its effect. This is called stratification. At a high level, think of it as splitting the dataset into *m* smaller datasets, partitioned by the unique values of the stratifying covariate(s). Each dataset has its own baseline hazard (the non-parametric part of the model), but they all share the regression parameters (the parametric part of the model). Since covariates are the same within each dataset, there is no regression parameter for the covariates stratified on, hence they will not show up in the output. However there will be *m* baseline hazards under :attr:`~lifelines.fitters.coxph_fitter.CoxPHFitter.baseline_cumulative_hazard_`.
|
| 369 |
+
|
| 370 |
+
To specify variables to be used in stratification, we define them in the call to :meth:`~lifelines.fitters.coxph_fitter.CoxPHFitter.fit`:
|
| 371 |
+
|
| 372 |
+
.. code:: python
|
| 373 |
+
|
| 374 |
+
from lifelines.datasets import load_rossi
|
| 375 |
+
from lifelines import CoxPHFitter
|
| 376 |
+
rossi = load_rossi()
|
| 377 |
+
|
| 378 |
+
cph = CoxPHFitter()
|
| 379 |
+
cph.fit(rossi, 'week', event_col='arrest', strata=['wexp'])
|
| 380 |
+
cph.print_summary()
|
| 381 |
+
|
| 382 |
+
"""
|
| 383 |
+
<lifelines.CoxPHFitter: fitted with 432 total observations, 318 right-censored observations>
|
| 384 |
+
duration col = 'week'
|
| 385 |
+
event col = 'arrest'
|
| 386 |
+
strata = ['wexp']
|
| 387 |
+
baseline estimation = breslow
|
| 388 |
+
number of observations = 432
|
| 389 |
+
number of events observed = 114
|
| 390 |
+
partial log-likelihood = -580.89
|
| 391 |
+
time fit was run = 2020-08-09 21:25:37 UTC
|
| 392 |
+
|
| 393 |
+
---
|
| 394 |
+
coef exp(coef) se(coef) coef lower 95% coef upper 95% exp(coef) lower 95% exp(coef) upper 95%
|
| 395 |
+
covariate
|
| 396 |
+
fin -0.38 0.68 0.19 -0.76 -0.01 0.47 0.99
|
| 397 |
+
age -0.06 0.94 0.02 -0.10 -0.01 0.90 0.99
|
| 398 |
+
race 0.31 1.36 0.31 -0.30 0.91 0.74 2.49
|
| 399 |
+
mar -0.45 0.64 0.38 -1.20 0.29 0.30 1.34
|
| 400 |
+
paro -0.08 0.92 0.20 -0.47 0.30 0.63 1.35
|
| 401 |
+
prio 0.09 1.09 0.03 0.03 0.15 1.04 1.16
|
| 402 |
+
z p -log2(p)
|
| 403 |
+
covariate
|
| 404 |
+
fin -1.99 0.05 4.42
|
| 405 |
+
age -2.64 0.01 6.91
|
| 406 |
+
race 1.00 0.32 1.65
|
| 407 |
+
mar -1.19 0.23 2.09
|
| 408 |
+
paro -0.42 0.67 0.57
|
| 409 |
+
prio 3.16 <0.005 9.33
|
| 410 |
+
---
|
| 411 |
+
Concordance = 0.61
|
| 412 |
+
Partial AIC = 1173.77
|
| 413 |
+
log-likelihood ratio test = 23.77 on 6 df
|
| 414 |
+
-log2(p) of ll-ratio test = 10.77
|
| 415 |
+
|
| 416 |
+
"""
|
| 417 |
+
|
| 418 |
+
cph.baseline_survival_.shape
|
| 419 |
+
# (49, 2)
|
| 420 |
+
cph.baseline_cumulative_hazard_.plot(drawstyle="steps")
|
| 421 |
+
|
| 422 |
+
Weights & robust errors
|
| 423 |
+
-----------------------------------------------
|
| 424 |
+
|
| 425 |
+
Observations can come with weights, as well. These weights may be integer values representing some commonly occurring observation, or they may be float values representing some sampling weights (ex: inverse probability weights). In the :meth:`~lifelines.fitters.coxph_fitter.CoxPHFitter.fit` method, an kwarg is present for specifying which column in the DataFrame should be used as weights, ex: ``CoxPHFitter(df, 'T', 'E', weights_col='weights')``.
|
| 426 |
+
|
| 427 |
+
When using sampling weights, it's correct to also change the standard error calculations. That is done by turning on the ``robust`` flag in :meth:`~lifelines.fitters.coxph_fitter.CoxPHFitter.fit`. Internally, :class:`~lifelines.fitters.coxph_fitter.CoxPHFitter` will use the sandwich estimator to compute the errors.
|
| 428 |
+
|
| 429 |
+
|
| 430 |
+
.. code:: python
|
| 431 |
+
|
| 432 |
+
import pandas as pd
|
| 433 |
+
from lifelines import CoxPHFitter
|
| 434 |
+
|
| 435 |
+
df = pd.DataFrame({
|
| 436 |
+
'T': [5, 3, 9, 8, 7, 4, 4, 3, 2, 5, 6, 7],
|
| 437 |
+
'E': [1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0],
|
| 438 |
+
'weights': [1.1, 0.5, 2.0, 1.6, 1.2, 4.3, 1.4, 4.5, 3.0, 3.2, 0.4, 6.2],
|
| 439 |
+
'month': [10, 3, 9, 8, 7, 4, 4, 3, 2, 5, 6, 7],
|
| 440 |
+
'age': [4, 3, 9, 8, 7, 4, 4, 3, 2, 5, 6, 7],
|
| 441 |
+
})
|
| 442 |
+
|
| 443 |
+
cph = CoxPHFitter()
|
| 444 |
+
cph.fit(df, 'T', 'E', weights_col='weights', robust=True)
|
| 445 |
+
cph.print_summary()
|
| 446 |
+
|
| 447 |
+
See more examples in `Adding weights to observations in a Cox model <https://lifelines.readthedocs.io/en/latest/Examples.html#adding-weights-to-observations-in-a-cox-model>`_.
|
| 448 |
+
|
| 449 |
+
Clusters & correlations
|
| 450 |
+
-----------------------------------------------
|
| 451 |
+
|
| 452 |
+
Another property your dataset may have is groups of related subjects. This could be caused by:
|
| 453 |
+
|
| 454 |
+
- a single individual having multiple occurrences, and hence showing up in the dataset more than once.
|
| 455 |
+
- subjects that share some common property, like members of the same family or being matched on propensity scores.
|
| 456 |
+
|
| 457 |
+
We call these grouped subjects "clusters", and assume they are designated by some column in the DataFrame (example below). When using cluster, the point estimates of the model don't change, but the standard errors will increase. An intuitive argument for this is that 100 observations on 100 individuals provide more information than 100 observations on 10 individuals (or clusters).
|
| 458 |
+
|
| 459 |
+
|
| 460 |
+
.. code:: python
|
| 461 |
+
|
| 462 |
+
from lifelines import CoxPHFitter
|
| 463 |
+
|
| 464 |
+
df = pd.DataFrame({
|
| 465 |
+
'T': [5, 3, 9, 8, 7, 4, 4, 3, 2, 5, 6, 7],
|
| 466 |
+
'E': [1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0],
|
| 467 |
+
'month': [10, 3, 9, 8, 7, 4, 4, 3, 2, 5, 6, 7],
|
| 468 |
+
'age': [4, 3, 9, 8, 7, 4, 4, 3, 2, 5, 6, 7],
|
| 469 |
+
'id': [1, 1, 1, 1, 2, 3, 3, 4, 4, 5, 6, 7]
|
| 470 |
+
})
|
| 471 |
+
|
| 472 |
+
cph = CoxPHFitter()
|
| 473 |
+
cph.fit(df, 'T', 'E', cluster_col='id')
|
| 474 |
+
cph.print_summary()
|
| 475 |
+
|
| 476 |
+
|
| 477 |
+
For more examples, see `Correlations between subjects in a Cox model <https://lifelines.readthedocs.io/en/latest/Examples.html#correlations-between-subjects-in-a-cox-model>`_.
|
| 478 |
+
|
| 479 |
+
Residuals
|
| 480 |
+
-----------------------------------------------
|
| 481 |
+
|
| 482 |
+
After fitting a Cox model, we can look back and compute important model residuals. These residuals can tell us about non-linearities not captured, violations of proportional hazards, and help us answer other useful modeling questions. See `Assessing Cox model fit using residuals`_.
|
| 483 |
+
|
| 484 |
+
|
| 485 |
+
Modeling baseline hazard and survival with parametric models
|
| 486 |
+
---------------------------------------------------------------
|
| 487 |
+
|
| 488 |
+
Normally, the Cox model is *semi-parametric*, which means that its baseline hazard, :math:`h_0(t)`, has no parametric form. This is the default for *lifelines*. However, it is sometimes valuable to produce a parametric baseline instead. A parametric baseline makes survival predictions more efficient, allows for better understanding of baseline behaviour, and allows interpolation/extrapolation.
|
| 489 |
+
|
| 490 |
+
In *lifelines*, there is an option to fit to a parametric baseline with 1) cubic splines, or 2) piecewise constant hazards. Cubic splines are highly flexible and can capture the underlying data almost as well as non-parametric methods, and with much more efficiency.
|
| 491 |
+
|
| 492 |
+
.. code:: python
|
| 493 |
+
|
| 494 |
+
|
| 495 |
+
from lifelines.datasets import load_rossi
|
| 496 |
+
from lifelines import CoxPHFitter
|
| 497 |
+
|
| 498 |
+
rossi = load_rossi()
|
| 499 |
+
|
| 500 |
+
cph_spline = CoxPHFitter(baseline_estimation_method="spline", n_baseline_knots=5)
|
| 501 |
+
cph_spline.fit(rossi, 'week', event_col='arrest')
|
| 502 |
+
|
| 503 |
+
To access the baseline hazard and baseline survival, one can use :attr:`~lifelines.fitters.coxph_fitter.CoxPHFitter.baseline_hazard_` and :attr:`~lifelines.fitters.coxph_fitter.CoxPHFitter.baseline_survival_` respectively. One nice thing about parametric models is we can interpolate baseline survival / hazards too, see :meth:`~lifelines.fitters.coxph_fitter.ParametricSplinePHFitter.baseline_hazard_at_times` and :meth:`~lifelines.fitters.coxph_fitter.ParametricSplinePHFitter.baseline_survival_at_times`.
|
| 504 |
+
|
| 505 |
+
Below we compare the non-parametric and the fully parametric baseline survivals:
|
| 506 |
+
|
| 507 |
+
.. code:: python
|
| 508 |
+
|
| 509 |
+
cph_semi = CoxPHFitter().fit(rossi, 'week', event_col='arrest')
|
| 510 |
+
cph_piecewise = CoxPHFitter(baseline_estimation_method="piecewise", breakpoints=[20, 35]).fit(rossi, 'week', event_col='arrest')
|
| 511 |
+
|
| 512 |
+
bch_key = "baseline cumulative hazard"
|
| 513 |
+
|
| 514 |
+
ax = cph_spline.baseline_cumulative_hazard_[bch_key].plot(label="spline")
|
| 515 |
+
cph_semi.baseline_cumulative_hazard_[bch_key].plot(ax=ax, drawstyle="steps-post", label="semi")
|
| 516 |
+
cph_piecewise.baseline_cumulative_hazard_[bch_key].plot(ax=ax, label="piecewise[20,35]")
|
| 517 |
+
plt.legend()
|
| 518 |
+
|
| 519 |
+
|
| 520 |
+
.. figure:: images/spline_and_semi.png
|
| 521 |
+
:width: 600px
|
| 522 |
+
:align: center
|
| 523 |
+
|
| 524 |
+
Modeling the baseline survival with splines vs non-parametric.
|
| 525 |
+
|
| 526 |
+
*lifelines'* spline Cox model can also use almost all the non-parametric options, including: `strata`, `penalizer`, `timeline`, `formula`, etc.
|
| 527 |
+
|
| 528 |
+
|
| 529 |
+
|
| 530 |
+
Parametric survival models
|
| 531 |
+
==================================
|
| 532 |
+
|
| 533 |
+
We ended the previous section discussing a *fully*-parametric Cox model, but there are many many more parametric models to consider. Below we go over these, starting with the most common: AFT models.
|
| 534 |
+
|
| 535 |
+
Accelerated failure time models
|
| 536 |
+
-----------------------------------------------
|
| 537 |
+
|
| 538 |
+
Suppose we have two populations, A and B, with different survival functions, :math:`S_A(t)` and :math:`S_B(t)`, and they are related by some *accelerated failure rate*, :math:`\lambda`:
|
| 539 |
+
|
| 540 |
+
.. math::
|
| 541 |
+
S_A(t) = S_B\left(\frac{t}{\lambda}\right)
|
| 542 |
+
|
| 543 |
+
This can be interpreted as slowing down or speeding up moving along the survival function. A classic example of this is that dogs age at 7 times the rate of humans, i.e. :math:`\lambda = \frac{1}{7}`. This model has some other nice properties: the average survival time of population B is :math:`{\lambda}` times the average survival time of population A. Likewise with the *median* survival time.
|
| 544 |
+
|
| 545 |
+
More generally, we can model the :math:`\lambda` as a function of covariates available, that is:
|
| 546 |
+
|
| 547 |
+
.. math::
|
| 548 |
+
S_A(t) = S_B\left(\frac{t}{\lambda(x)}\right)\\
|
| 549 |
+
\lambda(x) = \exp\left(b_0 + \sum_{i=1}^n b_i x_i \right)
|
| 550 |
+
|
| 551 |
+
This model can accelerate or decelerate failure times depending on subjects' covariates. Another nice feature of this is the ease of interpretation of the coefficients: a unit increase in :math:`x_i` means the average/median survival time changes by a factor of :math:`\exp(b_i)`.
|
| 552 |
+
|
| 553 |
+
|
| 554 |
+
.. note:: An important note on interpretation: Suppose :math:`b_i` was positive, then the factor :math:`\exp(b_i)` is greater than 1, which will decelerate the event time since we divide time by the factor ⇿ increase mean/median survival. Hence, it will be a *protective effect*. Likewise, a negative :math:`b_i` will hasten the event time ⇿ reduce the mean/median survival time. This interpretation is *opposite* of how the sign influences event times in the Cox model! This is standard survival analysis convention.
|
| 555 |
+
|
| 556 |
+
|
| 557 |
+
Next, we pick a parametric form for the survival function, :math:`S(t)`. The most common is the Weibull form. So if we assume the relationship above and a Weibull form, our hazard function is quite easy to write down:
|
| 558 |
+
|
| 559 |
+
.. math::
|
| 560 |
+
H(t; x) = \left( \frac{t}{\lambda(x)} \right)^\rho
|
| 561 |
+
|
| 562 |
+
|
| 563 |
+
We call these accelerated failure time models, shortened often to just AFT models. Using *lifelines*, we can fit this model (and the unknown :math:`\rho` parameter too).
|
| 564 |
+
|
| 565 |
+
The Weibull AFT model
|
| 566 |
+
-----------------------------------------------
|
| 567 |
+
|
| 568 |
+
|
| 569 |
+
The Weibull AFT model is implemented under :class:`~lifelines.fitters.weibull_aft_fitter.WeibullAFTFitter`. The API for the class is similar to the other regression models in *lifelines*. After fitting, the coefficients can be accessed using :attr:`~lifelines.fitters.weibull_aft_fitter.WeibullAFTFitter.params_` or :attr:`~lifelines.fitters.weibull_aft_fitter.WeibullAFTFitter.summary`, or alternatively printed using :meth:`~lifelines.fitters.weibull_aft_fitter.WeibullAFTFitter.print_summary`.
|
| 570 |
+
|
| 571 |
+
.. code:: python
|
| 572 |
+
|
| 573 |
+
from lifelines import WeibullAFTFitter
|
| 574 |
+
from lifelines.datasets import load_rossi
|
| 575 |
+
|
| 576 |
+
rossi = load_rossi()
|
| 577 |
+
|
| 578 |
+
aft = WeibullAFTFitter()
|
| 579 |
+
aft.fit(rossi, duration_col='week', event_col='arrest')
|
| 580 |
+
|
| 581 |
+
aft.print_summary(3) # access the results using aft.summary
|
| 582 |
+
|
| 583 |
+
"""
|
| 584 |
+
<lifelines.WeibullAFTFitter: fitted with 432 observations, 318 censored>
|
| 585 |
+
duration col = 'week'
|
| 586 |
+
event col = 'arrest'
|
| 587 |
+
number of subjects = 432
|
| 588 |
+
number of events = 114
|
| 589 |
+
log-likelihood = -679.917
|
| 590 |
+
time fit was run = 2019-02-20 17:47:19 UTC
|
| 591 |
+
|
| 592 |
+
---
|
| 593 |
+
coef exp(coef) se(coef) z p -log2(p) lower 0.95 upper 0.95
|
| 594 |
+
lambda_ fin 0.272 1.313 0.138 1.973 0.049 4.365 0.002 0.543
|
| 595 |
+
age 0.041 1.042 0.016 2.544 0.011 6.512 0.009 0.072
|
| 596 |
+
race -0.225 0.799 0.220 -1.021 0.307 1.703 -0.656 0.207
|
| 597 |
+
wexp 0.107 1.112 0.152 0.703 0.482 1.053 -0.190 0.404
|
| 598 |
+
mar 0.311 1.365 0.273 1.139 0.255 1.973 -0.224 0.847
|
| 599 |
+
paro 0.059 1.061 0.140 0.421 0.674 0.570 -0.215 0.333
|
| 600 |
+
prio -0.066 0.936 0.021 -3.143 0.002 9.224 -0.107 -0.025
|
| 601 |
+
Intercept 3.990 54.062 0.419 9.521 <0.0005 68.979 3.169 4.812
|
| 602 |
+
rho_ Intercept 0.339 1.404 0.089 3.809 <0.0005 12.808 0.165 0.514
|
| 603 |
+
---
|
| 604 |
+
Concordance = 0.640
|
| 605 |
+
AIC = 1377.833
|
| 606 |
+
log-likelihood ratio test = 33.416 on 7 df
|
| 607 |
+
-log2(p) of ll-ratio test = 15.462
|
| 608 |
+
"""
|
| 609 |
+
|
| 610 |
+
From above, we can see that ``prio``, which is the number of previous incarcerations, has a large negative coefficient. This means that each addition incarcerations changes a subject's mean/median survival time by :math:`\exp(-0.066) = 0.936`, approximately a 7% decrease in mean/median survival time. What is the mean/median survival time?
|
| 611 |
+
|
| 612 |
+
|
| 613 |
+
.. code:: python
|
| 614 |
+
|
| 615 |
+
print(aft.median_survival_time_)
|
| 616 |
+
print(aft.mean_survival_time_)
|
| 617 |
+
|
| 618 |
+
# 100.325
|
| 619 |
+
# 118.67
|
| 620 |
+
|
| 621 |
+
|
| 622 |
+
What does the ``rho_ _intercept`` row mean in the above table? Internally, we model the log of the ``rho_`` parameter, so the value of :math:`\rho` is the exponential of the value, so in case above it's :math:`\hat{\rho} = \exp0.339 = 1.404`. This brings us to the next point - modelling :math:`\rho` with covariates as well:
|
| 623 |
+
|
| 624 |
+
|
| 625 |
+
Modeling ancillary parameters
|
| 626 |
+
-----------------------------------------------
|
| 627 |
+
|
| 628 |
+
In the above model, we left the parameter :math:`\rho` as a single unknown. We can also choose to model this parameter as well. Why might we want to do this? It can help in survival prediction to allow heterogeneity in the :math:`\rho` parameter. The model is no longer an AFT model, but we can still recover and understand the influence of changing a covariate by looking at its outcome plot (see section below). To model :math:`\rho`, we use the ``ancillary`` keyword argument in the call to :meth:`~lifelines.fitters.weibull_aft_fitter.WeibullAFTFitter.fit`. There are four valid options:
|
| 629 |
+
|
| 630 |
+
1. ``False`` or ``None``: explicitly do not model the ``rho_`` parameter (except for its intercept).
|
| 631 |
+
2. a Pandas DataFrame. This option will use the columns in the Pandas DataFrame as the covariates in the regression for ``rho_``. This DataFrame could be a equal to, or a subset of, the original dataset using for modeling ``lambda_``, or it could be a totally different dataset.
|
| 632 |
+
3. ``True``. Passing in ``True`` will internally reuse the dataset that is being used to model ``lambda_``.
|
| 633 |
+
4. A R-like formula.
|
| 634 |
+
|
| 635 |
+
.. code:: python
|
| 636 |
+
|
| 637 |
+
aft = WeibullAFTFitter()
|
| 638 |
+
|
| 639 |
+
aft.fit(rossi, duration_col='week', event_col='arrest', ancillary=False)
|
| 640 |
+
# identical to aft.fit(rossi, duration_col='week', event_col='arrest', ancillary=None)
|
| 641 |
+
|
| 642 |
+
|
| 643 |
+
aft.fit(rossi, duration_col='week', event_col='arrest', ancillary=some_df)
|
| 644 |
+
|
| 645 |
+
|
| 646 |
+
aft.fit(rossi, duration_col='week', event_col='arrest', ancillary=True)
|
| 647 |
+
# identical to aft.fit(rossi, duration_col='week', event_col='arrest', ancillary=rossi)
|
| 648 |
+
# identical to aft.fit(rossi, duration_col='week', event_col='arrest', ancillary="fin + age + race + wexp + mar + paro + prio")
|
| 649 |
+
|
| 650 |
+
aft.print_summary()
|
| 651 |
+
|
| 652 |
+
"""
|
| 653 |
+
<lifelines.WeibullAFTFitter: fitted with 432 observations, 318 censored>
|
| 654 |
+
duration col = 'week'
|
| 655 |
+
event col = 'arrest'
|
| 656 |
+
number of subjects = 432
|
| 657 |
+
number of events = 114
|
| 658 |
+
log-likelihood = -669.40
|
| 659 |
+
time fit was run = 2019-02-20 17:42:55 UTC
|
| 660 |
+
|
| 661 |
+
---
|
| 662 |
+
coef exp(coef) se(coef) z p -log2(p) lower 0.95 upper 0.95
|
| 663 |
+
lambda_ fin 0.24 1.28 0.15 1.60 0.11 3.18 -0.06 0.55
|
| 664 |
+
age 0.10 1.10 0.03 3.43 <0.005 10.69 0.04 0.16
|
| 665 |
+
race 0.07 1.07 0.19 0.36 0.72 0.48 -0.30 0.44
|
| 666 |
+
wexp -0.34 0.71 0.15 -2.22 0.03 5.26 -0.64 -0.04
|
| 667 |
+
mar 0.26 1.30 0.30 0.86 0.39 1.35 -0.33 0.85
|
| 668 |
+
paro 0.09 1.10 0.15 0.61 0.54 0.88 -0.21 0.39
|
| 669 |
+
prio -0.08 0.92 0.02 -4.24 <0.005 15.46 -0.12 -0.04
|
| 670 |
+
Intercept 2.68 14.65 0.60 4.50 <0.005 17.14 1.51 3.85
|
| 671 |
+
rho_ fin -0.01 0.99 0.15 -0.09 0.92 0.11 -0.31 0.29
|
| 672 |
+
age -0.05 0.95 0.02 -3.10 <0.005 9.01 -0.08 -0.02
|
| 673 |
+
race -0.46 0.63 0.25 -1.79 0.07 3.77 -0.95 0.04
|
| 674 |
+
wexp 0.56 1.74 0.17 3.32 <0.005 10.13 0.23 0.88
|
| 675 |
+
mar 0.10 1.10 0.27 0.36 0.72 0.47 -0.44 0.63
|
| 676 |
+
paro 0.02 1.02 0.16 0.12 0.90 0.15 -0.29 0.33
|
| 677 |
+
prio 0.03 1.03 0.02 1.44 0.15 2.73 -0.01 0.08
|
| 678 |
+
Intercept 1.48 4.41 0.41 3.60 <0.005 11.62 0.68 2.29
|
| 679 |
+
---
|
| 680 |
+
Concordance = 0.63
|
| 681 |
+
Log-likelihood ratio test = 54.45 on 14 df, -log2(p)=19.83
|
| 682 |
+
"""
|
| 683 |
+
|
| 684 |
+
|
| 685 |
+
|
| 686 |
+
Plotting
|
| 687 |
+
-----------------------------------------------
|
| 688 |
+
|
| 689 |
+
The plotting API is the same as in :class:`~lifelines.fitters.coxph_fitter.CoxPHFitter`. We can view all covariates in a forest plot:
|
| 690 |
+
|
| 691 |
+
.. code:: python
|
| 692 |
+
|
| 693 |
+
from matplotlib import pyplot as plt
|
| 694 |
+
|
| 695 |
+
wft = WeibullAFTFitter().fit(rossi, 'week', 'arrest', ancillary=True)
|
| 696 |
+
wft.plot()
|
| 697 |
+
|
| 698 |
+
.. image:: images/weibull_aft_forest.png
|
| 699 |
+
:width: 650px
|
| 700 |
+
:align: center
|
| 701 |
+
|
| 702 |
+
|
| 703 |
+
We can observe the influence a variable in the model by plotting the *outcome* (i.e. survival) of changing the variable. This is done using :meth:`~lifelines.fitters.weibull_aft_fitter.WeibullAFTFitter.plot_partial_effects_on_outcome`, and this is also a nice time to observe the effects of modeling ``rho_`` vs keeping it fixed. Below we fit the Weibull model to the same dataset twice, but in the first model we model ``rho_`` and in the second model we don't. We when vary the ``prio`` (which is the number of prior arrests) and observe how the survival changes.
|
| 704 |
+
|
| 705 |
+
|
| 706 |
+
.. note::
|
| 707 |
+
Prior to lifelines v0.25.0, this method used to be called ``plot_covariate_group``. It's been renamed to ``plot_partial_effects_on_outcome`` (a much clearer name, I hope).
|
| 708 |
+
|
| 709 |
+
.. code:: python
|
| 710 |
+
|
| 711 |
+
fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(10, 4))
|
| 712 |
+
|
| 713 |
+
times = np.arange(0, 100)
|
| 714 |
+
wft_model_rho = WeibullAFTFitter().fit(rossi, 'week', 'arrest', ancillary=True, timeline=times)
|
| 715 |
+
wft_model_rho.plot_partial_effects_on_outcome('prio', range(0, 16, 3), cmap='coolwarm', ax=ax[0])
|
| 716 |
+
ax[0].set_title("Modelling rho_")
|
| 717 |
+
|
| 718 |
+
wft_not_model_rho = WeibullAFTFitter().fit(rossi, 'week', 'arrest', ancillary=False, timeline=times)
|
| 719 |
+
wft_not_model_rho.plot_partial_effects_on_outcome('prio', range(0, 16, 3), cmap='coolwarm', ax=ax[1])
|
| 720 |
+
ax[1].set_title("Not modelling rho_");
|
| 721 |
+
|
| 722 |
+
.. image:: images/weibull_aft_two_models.png
|
| 723 |
+
|
| 724 |
+
|
| 725 |
+
Comparing a few of these survival functions side by side, be can see that modeling ``rho_`` produces a more flexible (diverse) set of survival functions.
|
| 726 |
+
|
| 727 |
+
.. code:: python
|
| 728 |
+
|
| 729 |
+
fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(7, 4))
|
| 730 |
+
|
| 731 |
+
# modeling rho == solid line
|
| 732 |
+
wft_model_rho.plot_partial_effects_on_outcome('prio', range(0, 16, 5), cmap='coolwarm', ax=ax, lw=2, plot_baseline=False)
|
| 733 |
+
|
| 734 |
+
# not modeling rho == dashed line
|
| 735 |
+
wft_not_model_rho.plot_partial_effects_on_outcome('prio', range(0, 16, 5), cmap='coolwarm', ax=ax, ls='--', lw=2, plot_baseline=False)
|
| 736 |
+
|
| 737 |
+
ax.get_legend().remove()
|
| 738 |
+
|
| 739 |
+
.. image:: images/weibull_aft_two_models_side_by_side.png
|
| 740 |
+
:width: 500px
|
| 741 |
+
:align: center
|
| 742 |
+
|
| 743 |
+
You read more about and see other examples of the extensions to in the docs for :meth:`~lifelines.fitters.weibull_aft_fitter.WeibullAFTFitter.plot_partial_effects_on_outcome`
|
| 744 |
+
|
| 745 |
+
|
| 746 |
+
Prediction
|
| 747 |
+
-----------------------------------------------
|
| 748 |
+
|
| 749 |
+
Given a new subject, we'd like to ask questions about their future survival. When are they likely to experience the event? What does their survival function look like? The :class:`~lifelines.fitters.weibull_aft_fitter.WeibullAFTFitter` is able to answer these. If we have modeled the ancillary covariates, we are required to include those as well:
|
| 750 |
+
|
| 751 |
+
.. code:: python
|
| 752 |
+
|
| 753 |
+
X = rossi.loc[:10]
|
| 754 |
+
|
| 755 |
+
aft.predict_cumulative_hazard(X, ancillary=X)
|
| 756 |
+
aft.predict_survival_function(X, ancillary=X)
|
| 757 |
+
aft.predict_median(X, ancillary=X)
|
| 758 |
+
aft.predict_percentile(X, p=0.9, ancillary=X)
|
| 759 |
+
aft.predict_expectation(X, ancillary=X)
|
| 760 |
+
|
| 761 |
+
|
| 762 |
+
There are two hyper-parameters that can be used to to achieve a better test score. These are ``penalizer`` and ``l1_ratio`` in the call to :class:`~lifelines.fitters.weibull_aft_fitter.WeibullAFTFitter`. The penalizer is similar to scikit-learn's ``ElasticNet`` model, see their `docs <https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.ElasticNet.html>`_. (However, *lifelines* will also accept an array for custom penalty value per variable, see `Cox docs above <https://lifelines.readthedocs.io/en/latest/Survival%20Regression.html#penalties-and-sparse-regression>`_)
|
| 763 |
+
|
| 764 |
+
.. code:: python
|
| 765 |
+
|
| 766 |
+
|
| 767 |
+
aft_with_elastic_penalty = WeibullAFTFitter(penalizer=1e-4, l1_ratio=1.0)
|
| 768 |
+
aft_with_elastic_penalty.fit(rossi, 'week', 'arrest')
|
| 769 |
+
aft_with_elastic_penalty.predict_median(rossi)
|
| 770 |
+
|
| 771 |
+
aft_with_elastic_penalty.print_summary(columns=['coef', 'exp(coef)'])
|
| 772 |
+
|
| 773 |
+
"""
|
| 774 |
+
<lifelines.WeibullAFTFitter: fitted with 432 total observations, 318 right-censored observations>
|
| 775 |
+
duration col = 'week'
|
| 776 |
+
event col = 'arrest'
|
| 777 |
+
penalizer = 0.0001
|
| 778 |
+
number of observations = 432
|
| 779 |
+
number of events observed = 114
|
| 780 |
+
log-likelihood = -679.97
|
| 781 |
+
time fit was run = 2020-08-09 15:04:35 UTC
|
| 782 |
+
|
| 783 |
+
---
|
| 784 |
+
coef exp(coef)
|
| 785 |
+
param covariate
|
| 786 |
+
lambda_ age 0.04 1.04
|
| 787 |
+
fin 0.27 1.31
|
| 788 |
+
mar 0.31 1.36
|
| 789 |
+
paro 0.06 1.06
|
| 790 |
+
prio -0.07 0.94
|
| 791 |
+
race -0.22 0.80
|
| 792 |
+
wexp 0.11 1.11
|
| 793 |
+
Intercept 3.99 54.11
|
| 794 |
+
rho_ Intercept 0.34 1.40
|
| 795 |
+
---
|
| 796 |
+
Concordance = 0.64
|
| 797 |
+
AIC = 1377.93
|
| 798 |
+
log-likelihood ratio test = 33.31 on 7 df
|
| 799 |
+
-log2(p) of ll-ratio test = 15.40
|
| 800 |
+
|
| 801 |
+
"""
|
| 802 |
+
|
| 803 |
+
|
| 804 |
+
The log-normal and log-logistic AFT models
|
| 805 |
+
-----------------------------------------------
|
| 806 |
+
|
| 807 |
+
There are also the :class:`~lifelines.fitters.log_normal_aft_fitter.LogNormalAFTFitter` and :class:`~lifelines.fitters.log_logistic_aft_fitter.LogLogisticAFTFitter` models, which instead of assuming that the survival time distribution is Weibull, we assume it is Log-Normal or Log-Logistic, respectively. They have identical APIs to the :class:`~lifelines.fitters.weibull_aft_fitter.WeibullAFTFitter`, but the parameter names are different.
|
| 808 |
+
|
| 809 |
+
|
| 810 |
+
.. code:: python
|
| 811 |
+
|
| 812 |
+
from lifelines import LogLogisticAFTFitter
|
| 813 |
+
from lifelines import LogNormalAFTFitter
|
| 814 |
+
|
| 815 |
+
llf = LogLogisticAFTFitter().fit(rossi, 'week', 'arrest')
|
| 816 |
+
lnf = LogNormalAFTFitter().fit(rossi, 'week', 'arrest')
|
| 817 |
+
|
| 818 |
+
More AFT models: CRC model and generalized gamma model
|
| 819 |
+
------------------------------------------------------------
|
| 820 |
+
|
| 821 |
+
For a flexible and *smooth* parametric model, there is the :class:`~lifelines.fitters.generalized_gamma_regression_fitter.GeneralizedGammaRegressionFitter`. This model is actually a generalization of all the AFT models above (that is, specific values of its parameters represent another model ) - see docs for specific parameter values. The API is slightly different however, and looks more like how custom regression models are built (see next section on *Custom Regression Models*).
|
| 822 |
+
|
| 823 |
+
.. code:: python
|
| 824 |
+
|
| 825 |
+
from lifelines import GeneralizedGammaRegressionFitter
|
| 826 |
+
from lifelines.datasets import load_rossi
|
| 827 |
+
|
| 828 |
+
df = load_rossi()
|
| 829 |
+
df['Intercept'] = 1.
|
| 830 |
+
|
| 831 |
+
# this will regress df against all 3 parameters
|
| 832 |
+
ggf = GeneralizedGammaRegressionFitter(penalizer=1.).fit(df, 'week', 'arrest')
|
| 833 |
+
ggf.print_summary()
|
| 834 |
+
|
| 835 |
+
# If we want fine control over the parameters <-> covariates.
|
| 836 |
+
# The values in the dict become can be formulas, or column names in lists:
|
| 837 |
+
regressors = {
|
| 838 |
+
'mu_': rossi.columns.difference(['arrest', 'week']),
|
| 839 |
+
'sigma_': ["age", "Intercept"],
|
| 840 |
+
'lambda_': 'age + 1',
|
| 841 |
+
}
|
| 842 |
+
|
| 843 |
+
ggf = GeneralizedGammaRegressionFitter(penalizer=0.0001).fit(df, 'week', 'arrest', regressors=regressors)
|
| 844 |
+
ggf.print_summary()
|
| 845 |
+
|
| 846 |
+
Similarly, there is the CRC model that is uses splines to model the time. See a blog post about it `here <https://dataorigami.net/blogs/napkin-folding/an-accelerated-lifetime-spline-model>`_.
|
| 847 |
+
|
| 848 |
+
|
| 849 |
+
The piecewise-exponential regression models
|
| 850 |
+
-------------------------------------------------------------------------
|
| 851 |
+
|
| 852 |
+
Another class of parametric models involves more flexible modeling of the hazard function. The :class:`~lifelines.fitters.piecewise_exponential_regression_fitter.PiecewiseExponentialRegressionFitter` can model jumps in the hazard (think: the differences in "survival-of-staying-in-school" between 1st year, 2nd year, 3rd year, and 4th year students), and constant values between jumps. The ability to specify *when* these jumps occur, called breakpoints, offers modelers great flexibility. An example application involving customer churn is available in this `notebook <https://github.com/CamDavidsonPilon/lifelines/blob/master/examples/SaaS%20churn%20and%20piecewise%20regression%20models.ipynb>`_.
|
| 853 |
+
|
| 854 |
+
.. image:: images/piecewise_churn.png
|
| 855 |
+
|
| 856 |
+
|
| 857 |
+
AIC and model selection for parametric models
|
| 858 |
+
-----------------------------------------------
|
| 859 |
+
|
| 860 |
+
Often, you don't know *a priori* which parametric model to use. Each model has some assumptions built-in (not implemented yet in *lifelines*), but a quick and effective method is to compare the `AICs <https://en.wikipedia.org/wiki/Akaike_information_criterion>`_ for each fitted model. (In this case, the number of parameters for each model is the same, so really this is comparing the log-likelihood). The model with the smallest AIC does the best job of fitting to the data with a minimal degrees of freedom.
|
| 861 |
+
|
| 862 |
+
.. code:: python
|
| 863 |
+
|
| 864 |
+
from lifelines import LogLogisticAFTFitter, WeibullAFTFitter, LogNormalAFTFitter
|
| 865 |
+
from lifelines.datasets import load_rossi
|
| 866 |
+
|
| 867 |
+
rossi = load_rossi()
|
| 868 |
+
|
| 869 |
+
llf = LogLogisticAFTFitter().fit(rossi, 'week', 'arrest')
|
| 870 |
+
lnf = LogNormalAFTFitter().fit(rossi, 'week', 'arrest')
|
| 871 |
+
wf = WeibullAFTFitter().fit(rossi, 'week', 'arrest')
|
| 872 |
+
|
| 873 |
+
print(llf.AIC_) # 1377.877
|
| 874 |
+
print(lnf.AIC_) # 1384.469
|
| 875 |
+
print(wf.AIC_) # 1377.833, slightly the best model.
|
| 876 |
+
|
| 877 |
+
|
| 878 |
+
# with some heterogeneity in the ancillary parameters
|
| 879 |
+
ancillary = rossi[['prio']]
|
| 880 |
+
llf = LogLogisticAFTFitter().fit(rossi, 'week', 'arrest', ancillary=ancillary)
|
| 881 |
+
lnf = LogNormalAFTFitter().fit(rossi, 'week', 'arrest', ancillary=ancillary)
|
| 882 |
+
wf = WeibullAFTFitter().fit(rossi, 'week', 'arrest', ancillary=ancillary)
|
| 883 |
+
|
| 884 |
+
print(llf.AIC_) # 1377.89, the best model here, but not the overall best.
|
| 885 |
+
print(lnf.AIC_) # 1380.79
|
| 886 |
+
print(wf.AIC_) # 1379.21
|
| 887 |
+
|
| 888 |
+
|
| 889 |
+
Left, right and interval censored data
|
| 890 |
+
-----------------------------------------------
|
| 891 |
+
|
| 892 |
+
The parametric models have APIs that handle left and interval censored data, too. The API for them is different than the API for fitting to right censored data. Here's an example with interval censored data.
|
| 893 |
+
|
| 894 |
+
.. code:: python
|
| 895 |
+
|
| 896 |
+
from lifelines.datasets import load_diabetes
|
| 897 |
+
|
| 898 |
+
df = load_diabetes()
|
| 899 |
+
df['gender'] = df['gender'] == 'male'
|
| 900 |
+
|
| 901 |
+
print(df.head())
|
| 902 |
+
"""
|
| 903 |
+
left right gender
|
| 904 |
+
1 24 27 True
|
| 905 |
+
2 22 22 False
|
| 906 |
+
3 37 39 True
|
| 907 |
+
4 20 20 True
|
| 908 |
+
5 1 16 True
|
| 909 |
+
"""
|
| 910 |
+
|
| 911 |
+
wf = WeibullAFTFitter().fit_interval_censoring(df, lower_bound_col='left', upper_bound_col='right')
|
| 912 |
+
wf.print_summary()
|
| 913 |
+
|
| 914 |
+
"""
|
| 915 |
+
<lifelines.WeibullAFTFitter: fitted with 731 total observations, 136 interval-censored observations>
|
| 916 |
+
lower bound col = 'left'
|
| 917 |
+
upper bound col = 'right'
|
| 918 |
+
event col = 'E_lifelines_added'
|
| 919 |
+
number of observations = 731
|
| 920 |
+
number of events observed = 595
|
| 921 |
+
log-likelihood = -2027.20
|
| 922 |
+
time fit was run = 2020-08-09 15:05:09 UTC
|
| 923 |
+
|
| 924 |
+
---
|
| 925 |
+
coef exp(coef) se(coef) coef lower 95% coef upper 95% exp(coef) lower 95% exp(coef) upper 95%
|
| 926 |
+
param covariate
|
| 927 |
+
lambda_ gender 0.05 1.05 0.03 -0.01 0.10 0.99 1.10
|
| 928 |
+
Intercept 2.91 18.32 0.02 2.86 2.95 17.53 19.14
|
| 929 |
+
rho_ Intercept 1.04 2.83 0.03 0.98 1.09 2.67 2.99
|
| 930 |
+
z p -log2(p)
|
| 931 |
+
param covariate
|
| 932 |
+
lambda_ gender 1.66 0.10 3.38
|
| 933 |
+
Intercept 130.15 <0.005 inf
|
| 934 |
+
rho_ Intercept 36.91 <0.005 988.46
|
| 935 |
+
---
|
| 936 |
+
AIC = 4060.39
|
| 937 |
+
log-likelihood ratio test = 2.74 on 1 df
|
| 938 |
+
-log2(p) of ll-ratio test = 3.35
|
| 939 |
+
"""
|
| 940 |
+
|
| 941 |
+
|
| 942 |
+
Another example of using lifelines for interval censored data is located `here <https://dataorigami.net/blogs/napkin-folding/counting-and-interval-censoring>`_.
|
| 943 |
+
|
| 944 |
+
|
| 945 |
+
Custom parametric regression models
|
| 946 |
+
-------------------------------------
|
| 947 |
+
|
| 948 |
+
*lifelines* has a very general syntax for creating your own parametric regression models. If you are looking to create your own custom models, see docs `Custom Regression Models`_.
|
| 949 |
+
|
| 950 |
+
|
| 951 |
+
|
| 952 |
+
Aalen's additive model
|
| 953 |
+
=============================
|
| 954 |
+
|
| 955 |
+
.. warning:: This implementation is still experimental.
|
| 956 |
+
|
| 957 |
+
Aalen's Additive model is another regression model we can use. Like the Cox model, it defines
|
| 958 |
+
the hazard rate, but instead of the linear model being multiplicative like the Cox model, the Aalen model is
|
| 959 |
+
additive. Specifically:
|
| 960 |
+
|
| 961 |
+
|
| 962 |
+
.. math::
|
| 963 |
+
h(t|x) = b_0(t) + b_1(t) x_1 + ... + b_N(t) x_N
|
| 964 |
+
|
| 965 |
+
|
| 966 |
+
Inference typically does not estimate the individual
|
| 967 |
+
:math:`b_i(t)` but instead estimates :math:`\int_0^t b_i(s) \; ds`
|
| 968 |
+
(similar to the estimate of the hazard rate using ``NelsonAalenFitter``). This is important
|
| 969 |
+
when interpreting plots produced.
|
| 970 |
+
|
| 971 |
+
|
| 972 |
+
For this
|
| 973 |
+
exercise, we will use the regime dataset and include the categorical
|
| 974 |
+
variables ``un_continent_name`` (eg: Asia, North America,...), the
|
| 975 |
+
``regime`` type (e.g., monarchy, civilian,...) and the year the regime
|
| 976 |
+
started in, ``start_year``. The estimator to fit unknown coefficients in Aalen's additive model is
|
| 977 |
+
located under :class:`~lifelines.fitters.aalen_additive_fitter.AalenAdditiveFitter`.
|
| 978 |
+
|
| 979 |
+
.. code:: python
|
| 980 |
+
|
| 981 |
+
from lifelines import AalenAdditiveFitter
|
| 982 |
+
from lifelines.datasets import load_dd
|
| 983 |
+
|
| 984 |
+
data = load_dd()
|
| 985 |
+
data.head()
|
| 986 |
+
|
| 987 |
+
|
| 988 |
+
.. table::
|
| 989 |
+
|
| 990 |
+
+-----------+--------+----------+--------------+-----------------+---------------------+---------------------------------------------------------+-------------+-------------+----------+--------+--------+
|
| 991 |
+
| ctryname |cowcode2|politycode|un_region_name|un_continent_name| ehead | leaderspellreg | democracy | regime |start_year|duration|observed|
|
| 992 |
+
+===========+========+==========+==============+=================+=====================+=========================================================+=============+=============+==========+========+========+
|
| 993 |
+
|Afghanistan| 700| 700|Southern Asia |Asia |Mohammad Zahir Shah |Mohammad Zahir Shah.Afghanistan.1946.1952.Monarchy |Non-democracy|Monarchy | 1946| 7| 1|
|
| 994 |
+
+-----------+--------+----------+--------------+-----------------+---------------------+---------------------------------------------------------+-------------+-------------+----------+--------+--------+
|
| 995 |
+
|Afghanistan| 700| 700|Southern Asia |Asia |Sardar Mohammad Daoud|Sardar Mohammad Daoud.Afghanistan.1953.1962.Civilian Dict|Non-democracy|Civilian Dict| 1953| 10| 1|
|
| 996 |
+
+-----------+--------+----------+--------------+-----------------+---------------------+---------------------------------------------------------+-------------+-------------+----------+--------+--------+
|
| 997 |
+
|Afghanistan| 700| 700|Southern Asia |Asia |Mohammad Zahir Shah |Mohammad Zahir Shah.Afghanistan.1963.1972.Monarchy |Non-democracy|Monarchy | 1963| 10| 1|
|
| 998 |
+
+-----------+--------+----------+--------------+-----------------+---------------------+---------------------------------------------------------+-------------+-------------+----------+--------+--------+
|
| 999 |
+
|Afghanistan| 700| 700|Southern Asia |Asia |Sardar Mohammad Daoud|Sardar Mohammad Daoud.Afghanistan.1973.1977.Civilian Dict|Non-democracy|Civilian Dict| 1973| 5| 0|
|
| 1000 |
+
+-----------+--------+----------+--------------+-----------------+---------------------+---------------------------------------------------------+-------------+-------------+----------+--------+--------+
|
| 1001 |
+
|Afghanistan| 700| 700|Southern Asia |Asia |Nur Mohammad Taraki |Nur Mohammad Taraki.Afghanistan.1978.1978.Civilian Dict |Non-democracy|Civilian Dict| 1978| 1| 0|
|
| 1002 |
+
+-----------+--------+----------+--------------+-----------------+---------------------+---------------------------------------------------------+-------------+-------------+----------+--------+--------+
|
| 1003 |
+
|
| 1004 |
+
|
| 1005 |
+
|
| 1006 |
+
We have also included the ``coef_penalizer`` option. During the estimation, a
|
| 1007 |
+
linear regression is computed at each step. Often the regression can be
|
| 1008 |
+
unstable (due to high co-linearity or small sample sizes) -- adding a penalizer term controls the stability. I recommend always starting with a small penalizer term -- if the estimates still appear to be too unstable, try increasing it.
|
| 1009 |
+
|
| 1010 |
+
.. code:: python
|
| 1011 |
+
|
| 1012 |
+
aaf = AalenAdditiveFitter(coef_penalizer=1.0, fit_intercept=False)
|
| 1013 |
+
|
| 1014 |
+
An instance of :class:`~lifelines.fitters.aalen_additive_fitter.AalenAdditiveFitter`
|
| 1015 |
+
includes a :meth:`~lifelines.fitters.aalen_additive_fitter.AalenAdditiveFitter.fit` method that performs the inference on the coefficients. This method accepts a pandas DataFrame: each row is an individual and columns are the covariates and
|
| 1016 |
+
two individual columns: a *duration* column and a boolean *event occurred* column (where event occurred refers to the event of interest - expulsion from government in this case)
|
| 1017 |
+
|
| 1018 |
+
|
| 1019 |
+
.. code:: python
|
| 1020 |
+
|
| 1021 |
+
data['T'] = data['duration']
|
| 1022 |
+
data['E'] = data['observed']
|
| 1023 |
+
|
| 1024 |
+
|
| 1025 |
+
.. code:: python
|
| 1026 |
+
|
| 1027 |
+
aaf.fit(data, 'T', event_col='E', formula='un_continent_name + regime + start_year')
|
| 1028 |
+
|
| 1029 |
+
|
| 1030 |
+
After fitting, the instance exposes a :attr:`~lifelines.fitters.aalen_additive_fitter.AalenAdditiveFitter.cumulative_hazards_` DataFrame
|
| 1031 |
+
containing the estimates of :math:`\int_0^t b_i(s) \; ds`:
|
| 1032 |
+
|
| 1033 |
+
.. code:: python
|
| 1034 |
+
|
| 1035 |
+
aaf.cumulative_hazards_.head()
|
| 1036 |
+
|
| 1037 |
+
|
| 1038 |
+
.. table::
|
| 1039 |
+
|
| 1040 |
+
+--------+-----------------------------+-------------------------+---------------------------+----------------------------+-----------------------+-------------------+------------------+---------------------------+--------------------------+----------+
|
| 1041 |
+
|baseline|un_continent_name[T.Americas]|un_continent_name[T.Asia]|un_continent_name[T.Europe]|un_continent_name[T.Oceania]|regime[T.Military Dict]|regime[T.Mixed Dem]|regime[T.Monarchy]|regime[T.Parliamentary Dem]|regime[T.Presidential Dem]|start_year|
|
| 1042 |
+
+========+=============================+=========================+===========================+============================+=======================+===================+==================+===========================+==========================+==========+
|
| 1043 |
+
|-0.03447| -0.03173| 0.06216| 0.2058| -0.009559| 0.07611| 0.08729| -0.1362| 0.04885| 0.1285| 0.000092|
|
| 1044 |
+
+--------+-----------------------------+-------------------------+---------------------------+----------------------------+-----------------------+-------------------+------------------+---------------------------+--------------------------+----------+
|
| 1045 |
+
| 0.14278| -0.02496| 0.11122| 0.2083| -0.079042| 0.11704| 0.36254| -0.2293| 0.17103| 0.1238| 0.000044|
|
| 1046 |
+
+--------+-----------------------------+-------------------------+---------------------------+----------------------------+-----------------------+-------------------+------------------+---------------------------+--------------------------+----------+
|
| 1047 |
+
| 0.30153| -0.07212| 0.10929| 0.1614| 0.063030| 0.16553| 0.68693| -0.2738| 0.33300| 0.1499| 0.000004|
|
| 1048 |
+
+--------+-----------------------------+-------------------------+---------------------------+----------------------------+-----------------------+-------------------+------------------+---------------------------+--------------------------+----------+
|
| 1049 |
+
| 0.37969| 0.06853| 0.15162| 0.2609| 0.185569| 0.22695| 0.95016| -0.2961| 0.37351| 0.4311| -0.000032|
|
| 1050 |
+
+--------+-----------------------------+-------------------------+---------------------------+----------------------------+-----------------------+-------------------+------------------+---------------------------+--------------------------+----------+
|
| 1051 |
+
| 0.36749| 0.20201| 0.21252| 0.2429| 0.188740| 0.25127| 1.15132| -0.3926| 0.54952| 0.7593| -0.000000|
|
| 1052 |
+
+--------+-----------------------------+-------------------------+---------------------------+----------------------------+-----------------------+-------------------+------------------+---------------------------+--------------------------+----------+
|
| 1053 |
+
|
| 1054 |
+
|
| 1055 |
+
|
| 1056 |
+
:class:`~lifelines.fitters.aalen_additive_fitter.AalenAdditiveFitter` also has built in plotting:
|
| 1057 |
+
|
| 1058 |
+
.. code:: python
|
| 1059 |
+
|
| 1060 |
+
aaf.plot(columns=['regime[T.Presidential Dem]', 'Intercept', 'un_continent_name[T.Europe]'], iloc=slice(1,15))
|
| 1061 |
+
|
| 1062 |
+
|
| 1063 |
+
.. image:: images/survival_regression_aaf.png
|
| 1064 |
+
|
| 1065 |
+
|
| 1066 |
+
Regression is most interesting if we use it on data we have not yet
|
| 1067 |
+
seen, i.e., prediction! We can use what we have learned to predict
|
| 1068 |
+
individual hazard rates, survival functions, and median survival time.
|
| 1069 |
+
The dataset we are using is available up until 2008, so let's use this data to
|
| 1070 |
+
predict the duration of former Canadian
|
| 1071 |
+
Prime Minister Stephen Harper.
|
| 1072 |
+
|
| 1073 |
+
.. code:: python
|
| 1074 |
+
|
| 1075 |
+
ix = (data['ctryname'] == 'Canada') & (data['start_year'] == 2006)
|
| 1076 |
+
harper = data.loc[ix]
|
| 1077 |
+
print("Harper's unique data point:")
|
| 1078 |
+
print(harper)
|
| 1079 |
+
|
| 1080 |
+
.. parsed-literal::
|
| 1081 |
+
|
| 1082 |
+
Harper's unique data point:
|
| 1083 |
+
baseline un_continent_name[T.Americas] un_continent_name[T.Asia] ... start_year T E
|
| 1084 |
+
268 1.0 1.0 0.0 ... 2006.0 3 0
|
| 1085 |
+
|
| 1086 |
+
|
| 1087 |
+
.. code:: python
|
| 1088 |
+
|
| 1089 |
+
ax = plt.subplot(2,1,1)
|
| 1090 |
+
aaf.predict_cumulative_hazard(harper).plot(ax=ax)
|
| 1091 |
+
|
| 1092 |
+
ax = plt.subplot(2,1,2)
|
| 1093 |
+
aaf.predict_survival_function(harper).plot(ax=ax);
|
| 1094 |
+
|
| 1095 |
+
|
| 1096 |
+
.. image:: images/survival_regression_harper.png
|
| 1097 |
+
|
| 1098 |
+
.. note:: Because of the nature of the model, estimated survival functions of individuals can increase. This is an expected artifact of Aalen's additive model.
|
| 1099 |
+
|
| 1100 |
+
|
| 1101 |
+
Model selection and calibration in survival regression
|
| 1102 |
+
==========================================================
|
| 1103 |
+
|
| 1104 |
+
Parametric vs semi-parametric models
|
| 1105 |
+
---------------------------------------
|
| 1106 |
+
Above, we've displayed two *semi-parametric* models (Cox model and Aalen's model), and a family of *parametric* models. Which should you choose? What are the advantages and disadvantages of either? I suggest reading the two following StackExchange answers to get a better idea of what experts think:
|
| 1107 |
+
|
| 1108 |
+
1. `In survival analysis, why do we use semi-parametric models (Cox proportional hazards) instead of fully parametric models? <https://stats.stackexchange.com/q/64739/11867>`__
|
| 1109 |
+
2. `In survival analysis, when should we use fully parametric models over semi-parametric ones? <https://stats.stackexchange.com/q/399544/11867>`__
|
| 1110 |
+
|
| 1111 |
+
|
| 1112 |
+
Model selection based on residuals
|
| 1113 |
+
-----------------------------------------------
|
| 1114 |
+
|
| 1115 |
+
The sections `Testing the Proportional Hazard Assumptions`_ and `Assessing Cox model fit using residuals`_ may be useful for modeling your data better.
|
| 1116 |
+
|
| 1117 |
+
.. note:: Work is being done to extend residual methods to all regression models. Stay tuned.
|
| 1118 |
+
|
| 1119 |
+
|
| 1120 |
+
Model selection based on predictive power and fit
|
| 1121 |
+
---------------------------------------------------
|
| 1122 |
+
|
| 1123 |
+
If censoring is present, it's not appropriate to use a loss function like mean-squared-error or
|
| 1124 |
+
mean-absolute-loss. This is because the difference between a censored value and the predicted value could be due to poor prediction *or* due to censoring. Below we introduce alternative ways to measure prediction performance.
|
| 1125 |
+
|
| 1126 |
+
Log-likelihood
|
| 1127 |
+
****************************
|
| 1128 |
+
|
| 1129 |
+
|
| 1130 |
+
In this author's opinion, the best way to measure predictive performance is evaluating the log-likelihood on out-of-sample data. The log-likelihood correctly handles any type of censoring, and is precisely what we are maximizing in the model training. The in-sample log-likelihood is available under ``log_likelihood_`` of any regression model. For out-of-sample data, the :meth:`~lifelines.fitters.cox_ph_fitter.CoxPHFitter.score` method (available on all regression models) can be used. This returns the *average evaluation of the out-of-sample log-likelihood*. We want to maximize this.
|
| 1131 |
+
|
| 1132 |
+
.. code:: python
|
| 1133 |
+
|
| 1134 |
+
from lifelines import CoxPHFitter
|
| 1135 |
+
from lifelines.datasets import load_rossi
|
| 1136 |
+
|
| 1137 |
+
rossi = load_rossi().sample(frac=1.0, random_state=25) # ensures the reproducibility of the example
|
| 1138 |
+
train_rossi = rossi.iloc[:400]
|
| 1139 |
+
test_rossi = rossi.iloc[400:]
|
| 1140 |
+
|
| 1141 |
+
cph_l1 = CoxPHFitter(penalizer=0.1, l1_ratio=1.).fit(train_rossi, 'week', 'arrest')
|
| 1142 |
+
cph_l2 = CoxPHFitter(penalizer=0.1, l1_ratio=0.).fit(train_rossi, 'week', 'arrest')
|
| 1143 |
+
|
| 1144 |
+
print(cph_l1.score(test_rossi))
|
| 1145 |
+
print(cph_l2.score(test_rossi)) # higher is better
|
| 1146 |
+
|
| 1147 |
+
Akaike information criterion (AIC)
|
| 1148 |
+
*****************************************
|
| 1149 |
+
|
| 1150 |
+
For within-sample validation, the AIC is a great metric for comparing models as it relies on the log-likelihood. It's available under ``AIC_`` for parametric models, and ``AIC_partial_`` for Cox models (because the Cox model maximizes a *partial* log-likelihood, it can't be reliably compared to parametric model's AIC.)
|
| 1151 |
+
|
| 1152 |
+
|
| 1153 |
+
.. code:: python
|
| 1154 |
+
|
| 1155 |
+
from lifelines import CoxPHFitter
|
| 1156 |
+
from lifelines.datasets import load_rossi
|
| 1157 |
+
|
| 1158 |
+
rossi = load_rossi()
|
| 1159 |
+
|
| 1160 |
+
cph_l2 = CoxPHFitter(penalizer=0.1, l1_ratio=0.).fit(rossi, 'week', 'arrest')
|
| 1161 |
+
cph_l1 = CoxPHFitter(penalizer=0.1, l1_ratio=1.).fit(rossi, 'week', 'arrest')
|
| 1162 |
+
|
| 1163 |
+
print(cph_l2.AIC_partial_) # lower is better
|
| 1164 |
+
print(cph_l1.AIC_partial_)
|
| 1165 |
+
|
| 1166 |
+
Concordance Index
|
| 1167 |
+
*****************************************
|
| 1168 |
+
|
| 1169 |
+
|
| 1170 |
+
Another censoring-sensitive measure is the concordance-index, also known as the c-index. This measure evaluates the accuracy of the *ranking* of predicted time. It is in fact a generalization of AUC, another common loss function, and is interpreted similarly:
|
| 1171 |
+
|
| 1172 |
+
* 0.5 is the expected result from random predictions,
|
| 1173 |
+
* 1.0 is perfect concordance and,
|
| 1174 |
+
* 0.0 is perfect anti-concordance (multiply predictions with -1 to get 1.0)
|
| 1175 |
+
|
| 1176 |
+
`Here <https://stats.stackexchange.com/a/478305/11867>`_ is an excellent introduction & description of the c-index for new users.
|
| 1177 |
+
|
| 1178 |
+
Fitted survival models typically have a concordance index between 0.55 and 0.75 (this may seem bad, but even a perfect model has a lot of noise than can make a high score impossible). In *lifelines*, a fitted model's concordance-index is present in the output of :meth:`~lifelines.fitters.cox_ph_fitter.CoxPHFitter.score`, but also available under the ``concordance_index_`` property. Generally, the measure is implemented in *lifelines* under :meth:`lifelines.utils.concordance_index` and accepts the actual times (along with any censored subjects) and the predicted times.
|
| 1179 |
+
|
| 1180 |
+
.. code:: python
|
| 1181 |
+
|
| 1182 |
+
from lifelines import CoxPHFitter
|
| 1183 |
+
from lifelines.datasets import load_rossi
|
| 1184 |
+
|
| 1185 |
+
rossi = load_rossi()
|
| 1186 |
+
|
| 1187 |
+
cph = CoxPHFitter()
|
| 1188 |
+
cph.fit(rossi, duration_col="week", event_col="arrest")
|
| 1189 |
+
|
| 1190 |
+
# fours ways to view the c-index:
|
| 1191 |
+
# method one
|
| 1192 |
+
cph.print_summary()
|
| 1193 |
+
|
| 1194 |
+
# method two
|
| 1195 |
+
print(cph.concordance_index_)
|
| 1196 |
+
|
| 1197 |
+
# method three
|
| 1198 |
+
print(cph.score(rossi, scoring_method="concordance_index"))
|
| 1199 |
+
|
| 1200 |
+
# method four
|
| 1201 |
+
from lifelines.utils import concordance_index
|
| 1202 |
+
print(concordance_index(rossi['week'], -cph.predict_partial_hazard(rossi), rossi['arrest']))
|
| 1203 |
+
|
| 1204 |
+
.. note:: Remember, the concordance score evaluates the relative rankings of subject's event times. Thus, it is scale and shift invariant (i.e. you can multiple by a positive constant, or add a constant, and the rankings won't change). A model maximized for concordance-index does not necessarily give good predicted *times*, but will give good predicted *rankings*.
|
| 1205 |
+
|
| 1206 |
+
|
| 1207 |
+
Cross validation
|
| 1208 |
+
****************************
|
| 1209 |
+
|
| 1210 |
+
*lifelines* has an implementation of k-fold cross validation under :func:`lifelines.utils.k_fold_cross_validation`. This function accepts an instance of a regression fitter (either :class:`~lifelines.fitters.coxph_fitter.CoxPHFitter` of :class:`~lifelines.fitters.aalen_additive_fitter.AalenAdditiveFitter`), a dataset, plus ``k`` (the number of folds to perform, default 5). On each fold, it splits the data
|
| 1211 |
+
into a training set and a testing set fits itself on the training set and evaluates itself on the testing set (using the concordance measure by default).
|
| 1212 |
+
|
| 1213 |
+
.. code:: python
|
| 1214 |
+
|
| 1215 |
+
from lifelines import CoxPHFitter
|
| 1216 |
+
from lifelines.datasets import load_regression_dataset
|
| 1217 |
+
from lifelines.utils import k_fold_cross_validation
|
| 1218 |
+
|
| 1219 |
+
regression_dataset = load_regression_dataset()
|
| 1220 |
+
cph = CoxPHFitter()
|
| 1221 |
+
scores = k_fold_cross_validation(cph, regression_dataset, 'T', event_col='E', k=3)
|
| 1222 |
+
print(scores)
|
| 1223 |
+
#[-2.9896, -3.08810, -3.02747]
|
| 1224 |
+
|
| 1225 |
+
scores = k_fold_cross_validation(cph, regression_dataset, 'T', event_col='E', k=3, scoring_method="concordance_index")
|
| 1226 |
+
print(scores)
|
| 1227 |
+
# [0.5449, 0.5587, 0.6179]
|
| 1228 |
+
|
| 1229 |
+
Also, lifelines has wrappers for `compatibility with scikit learn`_ for making cross-validation and grid-search even easier.
|
| 1230 |
+
|
| 1231 |
+
|
| 1232 |
+
Model probability calibration
|
| 1233 |
+
---------------------------------------------------
|
| 1234 |
+
|
| 1235 |
+
New in *lifelines* v0.24.11 is the :func:`~lifelines.calibration.survival_probability_calibration` function to measure your fitted survival model against observed frequencies of events. We follow the advice in "Graphical calibration curves and the integrated calibration index (ICI) for survival models" by P. Austin and co., and create a smoothed calibration curve using a flexible spline regression model (this avoids the traditional problem of binning the continuous-valued probability, and handles censored data).
|
| 1236 |
+
|
| 1237 |
+
|
| 1238 |
+
.. code:: python
|
| 1239 |
+
|
| 1240 |
+
from lifelines import CoxPHFitter
|
| 1241 |
+
from lifelines.datasets import load_rossi
|
| 1242 |
+
from lifelines.calibration import survival_probability_calibration
|
| 1243 |
+
|
| 1244 |
+
regression_dataset = load_rossi()
|
| 1245 |
+
cph = CoxPHFitter(baseline_estimation_method="spline", n_baseline_knots=3)
|
| 1246 |
+
cph.fit(rossi, "week", "arrest")
|
| 1247 |
+
|
| 1248 |
+
|
| 1249 |
+
survival_probability_calibration(cph, rossi, t0=25)
|
| 1250 |
+
|
| 1251 |
+
.. image:: images/survival_calibration_probablilty.png
|
| 1252 |
+
:width: 600
|
| 1253 |
+
:align: center
|
| 1254 |
+
|
| 1255 |
+
|
| 1256 |
+
Prediction on censored subjects
|
| 1257 |
+
===================================
|
| 1258 |
+
|
| 1259 |
+
A common use case is to predict the event time of censored subjects. This is easy to do, but we first have to calculate an important conditional probability. Let :math:`T` be the (random) event time for some subject, and :math:`S(t)≔P(T > t)` be their survival function. We are interested in answering the following: *What is a subject's new survival function given I know the subject has lived past time :math:`s`?* Mathematically:
|
| 1260 |
+
|
| 1261 |
+
.. math::
|
| 1262 |
+
|
| 1263 |
+
\begin{align*}
|
| 1264 |
+
P(T > t \;|\; T > s) &= \frac{P(T > t \;\text{and}\; T > s)}{P(T > s)} \\
|
| 1265 |
+
&= \frac{P(T > t)}{P(T > s)} \\
|
| 1266 |
+
&= \frac{S(t)}{S(s)}
|
| 1267 |
+
\end{align*}
|
| 1268 |
+
|
| 1269 |
+
Thus we scale the original survival function by the survival function at time :math:`s` (everything prior to :math:`s` should be mapped to 1.0 as well, since we are working with probabilities and we know that the subject was alive before :math:`s`).
|
| 1270 |
+
|
| 1271 |
+
This is such a common calculation that *lifelines* has all this built in. The ``conditional_after`` kwarg in all prediction methods
|
| 1272 |
+
allows you to specify what :math:`s` is per subject. Below we predict the remaining life of censored subjects:
|
| 1273 |
+
|
| 1274 |
+
.. code:: python
|
| 1275 |
+
|
| 1276 |
+
# all regression models can be used here, WeibullAFTFitter is used for illustration
|
| 1277 |
+
wf = WeibullAFTFitter().fit(rossi, "week", "arrest")
|
| 1278 |
+
|
| 1279 |
+
# filter down to just censored subjects to predict remaining survival
|
| 1280 |
+
censored_subjects = rossi.loc[~rossi['arrest'].astype(bool)]
|
| 1281 |
+
censored_subjects_last_obs = censored_subjects['week']
|
| 1282 |
+
|
| 1283 |
+
# predict new survival function
|
| 1284 |
+
wf.predict_survival_function(censored_subjects, conditional_after=censored_subjects_last_obs)
|
| 1285 |
+
|
| 1286 |
+
# predict median remaining life
|
| 1287 |
+
wf.predict_median(censored_subjects, conditional_after=censored_subjects_last_obs)
|
| 1288 |
+
|
| 1289 |
+
.. note:: It's important to remember that this is now computing a *conditional* probability (or metric), so if the result of ``predict_median`` is 10.5, then the *entire lifetime* is 10.5 + ``conditional_after``.
|
| 1290 |
+
|
| 1291 |
+
.. note:: If using ``conditional_after`` to predict on *uncensored* subjects, then ``conditional_after`` should probably be set to 0, or left blank.
|
| 1292 |
+
|
| 1293 |
+
|
| 1294 |
+
.. _Assessing Cox model fit using residuals: jupyter_notebooks/Cox%20residuals.html
|
| 1295 |
+
.. _Testing the Proportional Hazard Assumptions: jupyter_notebooks/Proportional%20hazard%20assumption.html
|
| 1296 |
+
.. _Custom Regression Models: jupyter_notebooks/Custom%20Regression%20Models.html
|
| 1297 |
+
.. _time varying model: Time%20varying%20survival%20regression.html
|
| 1298 |
+
.. _compatibility with scikit learn: Compatibility%20with%20scikit-learn.html
|
lifelines/source/docs/Survival analysis with lifelines.rst
ADDED
|
@@ -0,0 +1,850 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
.. image:: https://i.imgur.com/EOowdSD.png
|
| 2 |
+
|
| 3 |
+
-------------------------------------
|
| 4 |
+
|
| 5 |
+
Estimating univariate models
|
| 6 |
+
=====================================
|
| 7 |
+
|
| 8 |
+
In the previous :doc:`section</Survival Analysis intro>`,
|
| 9 |
+
we introduced the applications of survival analysis and the
|
| 10 |
+
mathematical objects on which it relies. In this article, we will work
|
| 11 |
+
with real data and the *lifelines* library to estimate these objects.
|
| 12 |
+
|
| 13 |
+
Estimating the survival function using Kaplan-Meier
|
| 14 |
+
''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
|
| 15 |
+
|
| 16 |
+
For this example, we will be investigating the lifetimes of political
|
| 17 |
+
leaders around the world. A political leader, in this case, is defined by a single individual's
|
| 18 |
+
time in office who controls the ruling regime. This political leader could be an elected president,
|
| 19 |
+
unelected dictator, monarch, etc. The birth event is the start of the individual's tenure, and the death
|
| 20 |
+
event is the voluntary retirement of the individual. Censoring can occur if they are a) still in offices at the time
|
| 21 |
+
of dataset compilation (2008), or b) die while in power (this includes assassinations).
|
| 22 |
+
|
| 23 |
+
For example, the Bush regime began in 2000 and officially ended in 2008
|
| 24 |
+
upon his retirement, thus the regime's lifespan was eight years, and there was a
|
| 25 |
+
"death" event observed. On the other hand, the JFK regime lasted 2
|
| 26 |
+
years, from 1961 and 1963, and the regime's official death event *was
|
| 27 |
+
not* observed -- JFK died before his official retirement.
|
| 28 |
+
|
| 29 |
+
(This is an example that has gladly redefined the birth and death
|
| 30 |
+
events, and in fact completely flips the idea upside down by using deaths
|
| 31 |
+
as the censoring event. This is also an example where the current time
|
| 32 |
+
is not the only cause of censoring; there are the alternative events (e.g., death in office) that can
|
| 33 |
+
be the cause of censoring.
|
| 34 |
+
|
| 35 |
+
To estimate the survival function, we first will use the `Kaplan-Meier
|
| 36 |
+
Estimate <http://en.wikipedia.org/wiki/Kaplan%E2%80%93Meier_estimator>`__,
|
| 37 |
+
defined:
|
| 38 |
+
|
| 39 |
+
.. math:: \hat{S}(t) = \prod_{t_i \lt t} \frac{n_i - d_i}{n_i}
|
| 40 |
+
|
| 41 |
+
where :math:`d_i` are the number of death events at time :math:`t` and
|
| 42 |
+
:math:`n_i` is the number of subjects at risk of death just prior to time
|
| 43 |
+
:math:`t`.
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
Let's bring in our dataset.
|
| 47 |
+
|
| 48 |
+
.. code:: python
|
| 49 |
+
|
| 50 |
+
from lifelines.datasets import load_dd
|
| 51 |
+
|
| 52 |
+
data = load_dd()
|
| 53 |
+
data.head()
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
.. table::
|
| 58 |
+
|
| 59 |
+
+-------------+-------------+----------+--------+--------+-----------+--------+----------+--------------+-----------------+---------------------+---------------------------------------------------------+
|
| 60 |
+
| democracy | regime |start_year|duration|observed| ctryname |cowcode2|politycode|un_region_name|un_continent_name| ehead | leaderspellreg |
|
| 61 |
+
+=============+=============+==========+========+========+===========+========+==========+==============+=================+=====================+=========================================================+
|
| 62 |
+
|Non-democracy|Monarchy | 1946| 7| 1|Afghanistan| 700| 700|Southern Asia |Asia |Mohammad Zahir Shah |Mohammad Zahir Shah.Afghanistan.1946.1952.Monarchy |
|
| 63 |
+
+-------------+-------------+----------+--------+--------+-----------+--------+----------+--------------+-----------------+---------------------+---------------------------------------------------------+
|
| 64 |
+
|Non-democracy|Civilian Dict| 1953| 10| 1|Afghanistan| 700| 700|Southern Asia |Asia |Sardar Mohammad Daoud|Sardar Mohammad Daoud.Afghanistan.1953.1962.Civilian Dict|
|
| 65 |
+
+-------------+-------------+----------+--------+--------+-----------+--------+----------+--------------+-----------------+---------------------+---------------------------------------------------------+
|
| 66 |
+
|Non-democracy|Monarchy | 1963| 10| 1|Afghanistan| 700| 700|Southern Asia |Asia |Mohammad Zahir Shah |Mohammad Zahir Shah.Afghanistan.1963.1972.Monarchy |
|
| 67 |
+
+-------------+-------------+----------+--------+--------+-----------+--------+----------+--------------+-----------------+---------------------+---------------------------------------------------------+
|
| 68 |
+
|Non-democracy|Civilian Dict| 1973| 5| 0|Afghanistan| 700| 700|Southern Asia |Asia |Sardar Mohammad Daoud|Sardar Mohammad Daoud.Afghanistan.1973.1977.Civilian Dict|
|
| 69 |
+
+-------------+-------------+----------+--------+--------+-----------+--------+----------+--------------+-----------------+---------------------+---------------------------------------------------------+
|
| 70 |
+
|Non-democracy|Civilian Dict| 1978| 1| 0|Afghanistan| 700| 700|Southern Asia |Asia |Nur Mohammad Taraki |Nur Mohammad Taraki.Afghanistan.1978.1978.Civilian Dict |
|
| 71 |
+
+-------------+-------------+----------+--------+--------+-----------+--------+----------+--------------+-----------------+---------------------+---------------------------------------------------------+
|
| 72 |
+
|
| 73 |
+
|
| 74 |
+
|
| 75 |
+
From the *lifelines* library, we'll need the
|
| 76 |
+
:class:`~lifelines.fitters.kaplan_meier_fitter.KaplanMeierFitter` for this exercise:
|
| 77 |
+
|
| 78 |
+
.. code:: python
|
| 79 |
+
|
| 80 |
+
from lifelines import KaplanMeierFitter
|
| 81 |
+
kmf = KaplanMeierFitter()
|
| 82 |
+
|
| 83 |
+
.. note:: Other ways to estimate the survival function in *lifelines* are discussed below.
|
| 84 |
+
|
| 85 |
+
For this estimation, we need the duration each leader was/has been in
|
| 86 |
+
office, and whether or not they were observed to have left office
|
| 87 |
+
(leaders who died in office or were in office in 2008, the latest date
|
| 88 |
+
this data was record at, do not have observed death events)
|
| 89 |
+
|
| 90 |
+
We next use the :class:`~lifelines.fitters.kaplan_meier_fitter.KaplanMeierFitter` method :meth:`~lifelines.fitters.kaplan_meier_fitter.KaplanMeierFitter.fit` to fit the model to
|
| 91 |
+
the data. (This is similar to, and inspired by, scikit-learn's fit/predict API).
|
| 92 |
+
|
| 93 |
+
Below we fit our data with the :class:`~lifelines.fitters.kaplan_meier_fitter.KaplanMeierFitter`:
|
| 94 |
+
|
| 95 |
+
|
| 96 |
+
.. code:: python
|
| 97 |
+
|
| 98 |
+
T = data["duration"]
|
| 99 |
+
E = data["observed"]
|
| 100 |
+
|
| 101 |
+
kmf.fit(T, event_observed=E)
|
| 102 |
+
|
| 103 |
+
|
| 104 |
+
After calling the :meth:`~lifelines.fitters.kaplan_meier_fitter.KaplanMeierFitter.fit` method, the :class:`~lifelines.fitters.kaplan_meier_fitter.KaplanMeierFitter` has a property
|
| 105 |
+
called :attr:`~lifelines.fitters.kaplan_meier_fitter.KaplanMeierFitter.survival_function_` (again, we follow the styling of scikit-learn, and append an underscore to all properties that were estimated).
|
| 106 |
+
The property is a Pandas DataFrame, so we can call :meth:`~lifelines.fitters.kaplan_meier_fitter.KaplanMeierFitter.plot` on it:
|
| 107 |
+
|
| 108 |
+
.. code:: python
|
| 109 |
+
|
| 110 |
+
from matplotlib import pyplot as plt
|
| 111 |
+
|
| 112 |
+
|
| 113 |
+
kmf.survival_function_.plot()
|
| 114 |
+
plt.title('Survival function of political regimes');
|
| 115 |
+
|
| 116 |
+
.. image:: images/lifelines_intro_kmf_curve.png
|
| 117 |
+
:width: 600px
|
| 118 |
+
:align: center
|
| 119 |
+
|
| 120 |
+
How do we interpret this? The y-axis represents the probability a leader is still
|
| 121 |
+
around after :math:`t` years, where :math:`t` years is on the x-axis. We
|
| 122 |
+
see that very few leaders make it past 20 years in office. Of course, we need to report how uncertain we are about these point estimates, i.e., we need confidence intervals. They are computed in
|
| 123 |
+
the call to :meth:`~lifelines.fitters.kaplan_meier_fitter.KaplanMeierFitter.fit`, and located under the :attr:`~lifelines.fitters.kaplan_meier_fitter.KaplanMeierFitter.confidence_interval_`
|
| 124 |
+
property. (The method uses exponential Greenwood confidence interval. The mathematics are found in `these notes <https://www.math.wustl.edu/%7Esawyer/handouts/greenwood.pdf>`_.) We can call :meth:`~lifelines.fitters.kaplan_meier_fitter.KaplanMeierFitter.plot` on the :class:`~lifelines.fitters.kaplan_meier_fitter.KaplanMeierFitter` itself to plot both the KM estimate and its confidence intervals:
|
| 125 |
+
|
| 126 |
+
.. code:: python
|
| 127 |
+
|
| 128 |
+
kmf.plot_survival_function()
|
| 129 |
+
|
| 130 |
+
.. image:: images/lifelines_intro_kmf_fitter.png
|
| 131 |
+
:width: 600px
|
| 132 |
+
:align: center
|
| 133 |
+
|
| 134 |
+
The median time in office, which defines the point in time where on
|
| 135 |
+
average 50% of the population has expired, is a property:
|
| 136 |
+
|
| 137 |
+
.. code:: python
|
| 138 |
+
|
| 139 |
+
kmf.median_survival_time_
|
| 140 |
+
# 4.0
|
| 141 |
+
|
| 142 |
+
|
| 143 |
+
Interesting that it is only four years. That means, around the world, elected leaders
|
| 144 |
+
have a 50% chance of cessation in four years or less! To get the confidence interval of the median, you can use:
|
| 145 |
+
|
| 146 |
+
.. code:: python
|
| 147 |
+
|
| 148 |
+
from lifelines.utils import median_survival_times
|
| 149 |
+
median_ci = median_survival_times(kmf.confidence_interval_)
|
| 150 |
+
|
| 151 |
+
|
| 152 |
+
Let's segment on democratic regimes vs non-democratic regimes. Calling
|
| 153 |
+
``plot`` on either the estimate itself or the fitter object will return
|
| 154 |
+
an ``axis`` object, that can be used for plotting further estimates:
|
| 155 |
+
|
| 156 |
+
.. code:: python
|
| 157 |
+
|
| 158 |
+
ax = plt.subplot(111)
|
| 159 |
+
|
| 160 |
+
dem = (data["democracy"] == "Democracy")
|
| 161 |
+
|
| 162 |
+
kmf.fit(T[dem], event_observed=E[dem], label="Democratic Regimes")
|
| 163 |
+
kmf.plot_survival_function(ax=ax)
|
| 164 |
+
|
| 165 |
+
kmf.fit(T[~dem], event_observed=E[~dem], label="Non-democratic Regimes")
|
| 166 |
+
kmf.plot_survival_function(ax=ax)
|
| 167 |
+
|
| 168 |
+
plt.title("Lifespans of different global regimes");
|
| 169 |
+
|
| 170 |
+
|
| 171 |
+
.. image:: images/lifelines_intro_multi_kmf_fitter.png
|
| 172 |
+
:width: 650px
|
| 173 |
+
:align: center
|
| 174 |
+
|
| 175 |
+
We might be interested in estimating the probabilities in between some
|
| 176 |
+
points. We can do that with the ``timeline`` argument. We specify the
|
| 177 |
+
times we are interested in and are returned a DataFrame with the
|
| 178 |
+
probabilities of survival at those points:
|
| 179 |
+
|
| 180 |
+
.. code:: python
|
| 181 |
+
|
| 182 |
+
import numpy as np
|
| 183 |
+
|
| 184 |
+
ax = plt.subplot(111)
|
| 185 |
+
|
| 186 |
+
t = np.linspace(0, 50, 51)
|
| 187 |
+
kmf.fit(T[dem], event_observed=E[dem], timeline=t, label="Democratic Regimes")
|
| 188 |
+
ax = kmf.plot_survival_function(ax=ax)
|
| 189 |
+
|
| 190 |
+
kmf.fit(T[~dem], event_observed=E[~dem], timeline=t, label="Non-democratic Regimes")
|
| 191 |
+
ax = kmf.plot_survival_function(ax=ax)
|
| 192 |
+
|
| 193 |
+
plt.title("Lifespans of different global regimes");
|
| 194 |
+
|
| 195 |
+
.. image:: images/lifelines_intro_multi_kmf_fitter_2.png
|
| 196 |
+
:width: 650px
|
| 197 |
+
:align: center
|
| 198 |
+
|
| 199 |
+
It is incredible how much longer these non-democratic regimes exist for.
|
| 200 |
+
A democratic regime does have a natural bias towards death though: both
|
| 201 |
+
via elections and natural limits (the US imposes a strict eight-year limit).
|
| 202 |
+
The median of a non-democratic is only about twice as large as a
|
| 203 |
+
democratic regime, but the difference is apparent in the tails:
|
| 204 |
+
if you're a non-democratic leader, and you've made it past the 10 year
|
| 205 |
+
mark, you probably have a long life ahead. Meanwhile, a democratic
|
| 206 |
+
leader rarely makes it past ten years, and then have a very short
|
| 207 |
+
lifetime past that.
|
| 208 |
+
|
| 209 |
+
|
| 210 |
+
Here the difference between survival functions is very obvious, and
|
| 211 |
+
performing a statistical test seems pedantic. If the curves are more
|
| 212 |
+
similar, or we possess less data, we may be interested in performing a
|
| 213 |
+
statistical test. In this case, *lifelines* contains routines in
|
| 214 |
+
:mod:`lifelines.statistics` to compare two survival functions. Below we
|
| 215 |
+
demonstrate this routine. The function :func:`lifelines.statistics.logrank_test` is a common
|
| 216 |
+
statistical test in survival analysis that compares two event series'
|
| 217 |
+
generators. If the value returned exceeds some pre-specified value, then
|
| 218 |
+
we rule that the series have different generators.
|
| 219 |
+
|
| 220 |
+
.. code:: python
|
| 221 |
+
|
| 222 |
+
from lifelines.statistics import logrank_test
|
| 223 |
+
|
| 224 |
+
results = logrank_test(T[dem], T[~dem], E[dem], E[~dem], alpha=.99)
|
| 225 |
+
|
| 226 |
+
results.print_summary()
|
| 227 |
+
|
| 228 |
+
"""
|
| 229 |
+
<lifelines.StatisticalResult>
|
| 230 |
+
t_0 = -1
|
| 231 |
+
null_distribution = chi squared
|
| 232 |
+
degrees_of_freedom = 1
|
| 233 |
+
alpha = 0.99
|
| 234 |
+
|
| 235 |
+
---
|
| 236 |
+
test_statistic p -log2(p)
|
| 237 |
+
260.47 <0.005 192.23
|
| 238 |
+
"""
|
| 239 |
+
|
| 240 |
+
There are alternative (and sometimes better) tests of survival functions, and we explain more here: `Statistically compare two populations <https://github.com/CamDavidsonPilon/lifelines/blob/master/docs/Examples.rst#statistically-compare-two-populations>`_
|
| 241 |
+
|
| 242 |
+
|
| 243 |
+
Lets compare the different *types* of regimes present in the dataset:
|
| 244 |
+
|
| 245 |
+
.. code:: python
|
| 246 |
+
|
| 247 |
+
regime_types = data['regime'].unique()
|
| 248 |
+
|
| 249 |
+
for i, regime_type in enumerate(regime_types):
|
| 250 |
+
ax = plt.subplot(2, 3, i + 1)
|
| 251 |
+
|
| 252 |
+
ix = data['regime'] == regime_type
|
| 253 |
+
kmf.fit(T[ix], E[ix], label=regime_type)
|
| 254 |
+
kmf.plot_survival_function(ax=ax, legend=False)
|
| 255 |
+
|
| 256 |
+
plt.title(regime_type)
|
| 257 |
+
plt.xlim(0, 50)
|
| 258 |
+
|
| 259 |
+
if i==0:
|
| 260 |
+
plt.ylabel('Frac. in power after $n$ years')
|
| 261 |
+
|
| 262 |
+
plt.tight_layout()
|
| 263 |
+
|
| 264 |
+
|
| 265 |
+
.. image:: images/lifelines_intro_all_regimes.png
|
| 266 |
+
:align: center
|
| 267 |
+
:width: 700px
|
| 268 |
+
|
| 269 |
+
Best practices for presenting Kaplan Meier plots
|
| 270 |
+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
| 271 |
+
|
| 272 |
+
A recent survey of statisticians, medical professionals, and other stakeholders suggested that the addition
|
| 273 |
+
of two pieces of information, summary tables and confidence intervals, greatly increased the effectiveness of Kaplan Meier plots, see "Morris TP, Jarvis CI, Cragg W, et al. Proposals on Kaplan–Meier plots in medical research and a survey of stakeholder views: KMunicate. BMJ Open 2019;9:e030215. doi:10.1136/bmjopen-2019-030215".
|
| 274 |
+
|
| 275 |
+
In *lifelines*, confidence intervals are automatically added, but there is the ``at_risk_counts`` kwarg to add summary tables as well:
|
| 276 |
+
|
| 277 |
+
.. code:: python
|
| 278 |
+
|
| 279 |
+
kmf = KaplanMeierFitter().fit(T, E, label="all_regimes")
|
| 280 |
+
kmf.plot_survival_function(at_risk_counts=True)
|
| 281 |
+
plt.tight_layout()
|
| 282 |
+
|
| 283 |
+
|
| 284 |
+
|
| 285 |
+
.. image:: images/intro_add_at_risk.png
|
| 286 |
+
:align: center
|
| 287 |
+
:width: 700px
|
| 288 |
+
|
| 289 |
+
For more details, and how to extend this to multiple curves, see `docs here <https://lifelines.readthedocs.io/en/latest/Examples.html#displaying-multiple-at-risk-counts-below-plots>`_.
|
| 290 |
+
|
| 291 |
+
Getting data into the right format
|
| 292 |
+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
| 293 |
+
|
| 294 |
+
*lifelines* data format is consistent across all estimator class and
|
| 295 |
+
functions: an array of individual durations, and the individuals
|
| 296 |
+
event observation (if any). These are often denoted ``T`` and ``E``
|
| 297 |
+
respectively. For example:
|
| 298 |
+
|
| 299 |
+
::
|
| 300 |
+
|
| 301 |
+
T = [0, 3, 3, 2, 1, 2]
|
| 302 |
+
E = [1, 1, 0, 0, 1, 1]
|
| 303 |
+
kmf.fit(T, event_observed=E)
|
| 304 |
+
|
| 305 |
+
The raw data is not always available in this format -- *lifelines*
|
| 306 |
+
includes some helper functions to transform data formats to *lifelines*
|
| 307 |
+
format. These are located in the :mod:`lifelines.utils` sub-library. For
|
| 308 |
+
example, the function :func:`~lifelines.utils.datetimes_to_durations` accepts an array or
|
| 309 |
+
Pandas object of start times/dates, and an array or Pandas objects of
|
| 310 |
+
end times/dates (or ``None`` if not observed):
|
| 311 |
+
|
| 312 |
+
.. code:: python
|
| 313 |
+
|
| 314 |
+
from lifelines.utils import datetimes_to_durations
|
| 315 |
+
|
| 316 |
+
start_date = ['2013-10-10 0:00:00', '2013-10-09', '2013-10-10']
|
| 317 |
+
end_date = ['2013-10-13', '2013-10-10', None]
|
| 318 |
+
T, E = datetimes_to_durations(start_date, end_date, fill_date='2013-10-15')
|
| 319 |
+
print('T (durations): ', T)
|
| 320 |
+
print('E (event_observed): ', E)
|
| 321 |
+
|
| 322 |
+
.. parsed-literal::
|
| 323 |
+
|
| 324 |
+
T (durations): [ 3. 1. 5.]
|
| 325 |
+
E (event_observed): [ True True False]
|
| 326 |
+
|
| 327 |
+
|
| 328 |
+
The function :func:`~lifelines.utils.datetimes_to_durations` is very flexible, and has many
|
| 329 |
+
keywords to tinker with.
|
| 330 |
+
|
| 331 |
+
|
| 332 |
+
Estimating hazard rates using Nelson-Aalen
|
| 333 |
+
''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
|
| 334 |
+
|
| 335 |
+
The survival functions is a great way to summarize and visualize the
|
| 336 |
+
survival dataset, however it is not the only way. If we are curious about the hazard function :math:`h(t)` of a
|
| 337 |
+
population, we unfortunately cannot transform the Kaplan Meier estimate
|
| 338 |
+
-- statistics doesn't work quite that well. Fortunately, there is a
|
| 339 |
+
proper non-parametric estimator of the *cumulative* hazard function, :math:`H(t)`:
|
| 340 |
+
|
| 341 |
+
.. math:: \text{Let} H(t) = \int_0^t \h(z) \;dz
|
| 342 |
+
|
| 343 |
+
|
| 344 |
+
|
| 345 |
+
The estimator for this quantity is called the Nelson Aalen estimator:
|
| 346 |
+
|
| 347 |
+
|
| 348 |
+
|
| 349 |
+
.. math:: \hat{H}(t) = \sum_{t_i \le t} \frac{d_i}{n_i}
|
| 350 |
+
|
| 351 |
+
where :math:`d_i` is the number of deaths at time :math:`t_i` and
|
| 352 |
+
:math:`n_i` is the number of susceptible individuals.
|
| 353 |
+
|
| 354 |
+
In *lifelines*, this estimator is available as the :class:`~lifelines.fitters.nelson_aalen_fitter.NelsonAalenFitter`. Let's use the regime dataset from above:
|
| 355 |
+
|
| 356 |
+
.. code:: python
|
| 357 |
+
|
| 358 |
+
T = data["duration"]
|
| 359 |
+
E = data["observed"]
|
| 360 |
+
|
| 361 |
+
from lifelines import NelsonAalenFitter
|
| 362 |
+
naf = NelsonAalenFitter()
|
| 363 |
+
|
| 364 |
+
naf.fit(T,event_observed=E)
|
| 365 |
+
|
| 366 |
+
|
| 367 |
+
After fitting, the class exposes the property :meth:`~lifelines.fitters.nelson_aalen_fitter.NelsonAalenFitter.cumulative_hazard_`` as
|
| 368 |
+
a DataFrame:
|
| 369 |
+
|
| 370 |
+
.. code:: python
|
| 371 |
+
|
| 372 |
+
print(naf.cumulative_hazard_.head())
|
| 373 |
+
naf.plot_cumulative_hazard()
|
| 374 |
+
|
| 375 |
+
.. parsed-literal::
|
| 376 |
+
|
| 377 |
+
NA-estimate
|
| 378 |
+
0 0.000000
|
| 379 |
+
1 0.325912
|
| 380 |
+
2 0.507356
|
| 381 |
+
3 0.671251
|
| 382 |
+
4 0.869867
|
| 383 |
+
|
| 384 |
+
[5 rows x 1 columns]
|
| 385 |
+
|
| 386 |
+
|
| 387 |
+
|
| 388 |
+
.. image:: images/lifelines_intro_naf_fitter.png
|
| 389 |
+
:width: 650px
|
| 390 |
+
:align: center
|
| 391 |
+
|
| 392 |
+
The cumulative hazard has less obvious understanding than the survival
|
| 393 |
+
functions, but the hazard functions is the basis of more advanced techniques in
|
| 394 |
+
survival analysis. Recall that we are estimating *cumulative hazard
|
| 395 |
+
functions*, :math:`H(t)`. (Why? The sum of estimates is much more
|
| 396 |
+
stable than the point-wise estimates.) Thus we know the *rate of change*
|
| 397 |
+
of this curve is an estimate of the hazard function.
|
| 398 |
+
|
| 399 |
+
Looking at figure above, it looks like the hazard starts off high and
|
| 400 |
+
gets smaller (as seen by the decreasing rate of change). Let's break the
|
| 401 |
+
regimes down between democratic and non-democratic, during the first 20
|
| 402 |
+
years:
|
| 403 |
+
|
| 404 |
+
.. note:: We are using the ``loc`` argument in the call to ``plot_cumulative_hazard`` here: it accepts a ``slice`` and plots only points within that slice.
|
| 405 |
+
|
| 406 |
+
.. code:: python
|
| 407 |
+
|
| 408 |
+
naf.fit(T[dem], event_observed=E[dem], label="Democratic Regimes")
|
| 409 |
+
ax = naf.plot_cumulative_hazard(loc=slice(0, 20))
|
| 410 |
+
|
| 411 |
+
naf.fit(T[~dem], event_observed=E[~dem], label="Non-democratic Regimes")
|
| 412 |
+
naf.plot_cumulative_hazard(ax=ax, loc=slice(0, 20))
|
| 413 |
+
|
| 414 |
+
plt.title("Cumulative hazard function of different global regimes");
|
| 415 |
+
|
| 416 |
+
|
| 417 |
+
.. image:: images/lifelines_intro_naf_fitter_multi.png
|
| 418 |
+
:width: 600px
|
| 419 |
+
:align: center
|
| 420 |
+
|
| 421 |
+
Looking at the rates of change, I would say that both political
|
| 422 |
+
philosophies have a constant hazard, albeit democratic regimes have a
|
| 423 |
+
much *higher* constant hazard.
|
| 424 |
+
|
| 425 |
+
Smoothing the hazard function
|
| 426 |
+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
| 427 |
+
|
| 428 |
+
Interpretation of the cumulative hazard function can be difficult -- it
|
| 429 |
+
is not how we usually interpret functions. On the other hand, most
|
| 430 |
+
survival analysis is done using the cumulative hazard function, so understanding
|
| 431 |
+
it is recommended.
|
| 432 |
+
|
| 433 |
+
Alternatively, we can derive the more interpretable hazard function, but
|
| 434 |
+
there is a catch. The derivation involves a kernel smoother (to smooth
|
| 435 |
+
out the differences of the cumulative hazard function) , and this requires
|
| 436 |
+
us to specify a bandwidth parameter that controls the amount of
|
| 437 |
+
smoothing. This functionality is in the :meth:`~lifelines.fitters.nelson_aalen_fitter.NelsonAalenFitter.smoothed_hazard_`
|
| 438 |
+
and :meth:`~lifelines.fitters.nelson_aalen_fitter.NelsonAalenFitter.smoothed_hazard_confidence_intervals_` methods. Why methods?
|
| 439 |
+
They require an argument representing the bandwidth.
|
| 440 |
+
|
| 441 |
+
|
| 442 |
+
There is also a :meth:`~lifelines.fitters.nelson_aalen_fitter.NelsonAalenFitter.plot_hazard` function (that also requires a
|
| 443 |
+
``bandwidth`` keyword) that will plot the estimate plus the confidence
|
| 444 |
+
intervals, similar to the traditional :meth:`~lifelines.fitters.nelson_aalen_fitter.NelsonAalenFitter.plot` functionality.
|
| 445 |
+
|
| 446 |
+
.. code:: python
|
| 447 |
+
|
| 448 |
+
bandwidth = 3.
|
| 449 |
+
|
| 450 |
+
naf.fit(T[dem], event_observed=E[dem], label="Democratic Regimes")
|
| 451 |
+
ax = naf.plot_hazard(bandwidth=bandwidth)
|
| 452 |
+
|
| 453 |
+
naf.fit(T[~dem], event_observed=E[~dem], label="Non-democratic Regimes")
|
| 454 |
+
naf.plot_hazard(ax=ax, bandwidth=bandwidth)
|
| 455 |
+
|
| 456 |
+
plt.title("Hazard function of different global regimes | bandwidth=%.1f" % bandwidth);
|
| 457 |
+
plt.ylim(0, 0.4)
|
| 458 |
+
plt.xlim(0, 25);
|
| 459 |
+
|
| 460 |
+
|
| 461 |
+
.. image:: images/lifelines_intro_naf_smooth_multi.png
|
| 462 |
+
:width: 600px
|
| 463 |
+
:align: center
|
| 464 |
+
|
| 465 |
+
It is more clear here which group has the higher hazard, and Non-democratic regimes appear to have a constant hazard.
|
| 466 |
+
|
| 467 |
+
There is no obvious way to choose a bandwidth, and different
|
| 468 |
+
bandwidths produce different inferences, so it's best to be very careful
|
| 469 |
+
here. My advice: stick with the cumulative hazard function.
|
| 470 |
+
|
| 471 |
+
.. code:: python
|
| 472 |
+
|
| 473 |
+
bandwidth = 8.0
|
| 474 |
+
|
| 475 |
+
naf.fit(T[dem], event_observed=E[dem], label="Democratic Regimes")
|
| 476 |
+
ax = naf.plot_hazard(bandwidth=bandwidth)
|
| 477 |
+
|
| 478 |
+
naf.fit(T[~dem], event_observed=E[~dem], label="Non-democratic Regimes")
|
| 479 |
+
naf.plot_hazard(ax=ax, bandwidth=bandwidth)
|
| 480 |
+
|
| 481 |
+
plt.title("Hazard function of different global regimes | bandwidth=%.1f" % bandwidth);
|
| 482 |
+
|
| 483 |
+
|
| 484 |
+
|
| 485 |
+
.. image:: images/lifelines_intro_naf_smooth_multi_2.png
|
| 486 |
+
:width: 600px
|
| 487 |
+
:align: center
|
| 488 |
+
|
| 489 |
+
Estimating cumulative hazards using parametric models
|
| 490 |
+
''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
|
| 491 |
+
|
| 492 |
+
|
| 493 |
+
Fitting to a Weibull model
|
| 494 |
+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
| 495 |
+
|
| 496 |
+
Another very popular model for survival data is the Weibull model. In contrast the the Nelson-Aalen estimator, this model is a *parametric model*, meaning it has a functional form with parameters that we are fitting the data to. (The Nelson-Aalen estimator has no parameters to fit to). The survival function looks like:
|
| 497 |
+
|
| 498 |
+
|
| 499 |
+
.. math:: S(t) = \exp\left(-\left(\frac{t}{\lambda}\right)^\rho\right), \lambda >0, \rho > 0,
|
| 500 |
+
|
| 501 |
+
A priori, we do not know what :math:`\lambda` and :math:`\rho` are, but we use the data on hand to estimate these parameters. We model and estimate the cumulative hazard rate instead of the survival function (this is different than the Kaplan-Meier estimator):
|
| 502 |
+
|
| 503 |
+
.. math:: H(t) = \left(\frac{t}{\lambda}\right)^\rho
|
| 504 |
+
|
| 505 |
+
In lifelines, estimation is available using the :class:`~lifelines.fitters.weibull_fitter.WeibullFitter` class. The :meth:`~lifelines.fitters.weibull_fitter.WeibullFitter.plot` method will plot the cumulative hazard.
|
| 506 |
+
|
| 507 |
+
.. code:: python
|
| 508 |
+
|
| 509 |
+
from lifelines import WeibullFitter
|
| 510 |
+
from lifelines.datasets import load_waltons
|
| 511 |
+
|
| 512 |
+
data = load_waltons()
|
| 513 |
+
|
| 514 |
+
T = data['T']
|
| 515 |
+
E = data['E']
|
| 516 |
+
|
| 517 |
+
wf = WeibullFitter().fit(T, E)
|
| 518 |
+
|
| 519 |
+
wf.print_summary()
|
| 520 |
+
ax = wf.plot_cumulative_hazard()
|
| 521 |
+
ax.set_title("Cumulative hazard of Weibull model; estimated parameters")
|
| 522 |
+
|
| 523 |
+
|
| 524 |
+
"""
|
| 525 |
+
<lifelines.WeibullFitter: fitted with 163 observations, 7 censored>
|
| 526 |
+
number of subjects = 163
|
| 527 |
+
number of events = 156
|
| 528 |
+
log-likelihood = -672.062
|
| 529 |
+
hypothesis = lambda != 1, rho != 1
|
| 530 |
+
|
| 531 |
+
---
|
| 532 |
+
coef se(coef) lower 0.95 upper 0.95 p -log2(p)
|
| 533 |
+
lambda_ 0.02 0.00 0.02 0.02 <0.005 inf
|
| 534 |
+
rho_ 3.45 0.24 2.97 3.93 <0.005 76.83
|
| 535 |
+
"""
|
| 536 |
+
|
| 537 |
+
.. image:: images/survival_weibull.png
|
| 538 |
+
:width: 550px
|
| 539 |
+
:align: center
|
| 540 |
+
|
| 541 |
+
|
| 542 |
+
Other parametric models: Exponential, Log-Logistic, Log-Normal and Splines
|
| 543 |
+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
| 544 |
+
|
| 545 |
+
Similarly, there are other parametric models in *lifelines*. Generally, which parametric model to choose is determined by either knowledge of the distribution of durations, or some sort of model goodness-of-fit. Below are the built-in parametric models, and the Nelson-Aalen non-parametric model, of the same data.
|
| 546 |
+
|
| 547 |
+
.. code:: python
|
| 548 |
+
|
| 549 |
+
from lifelines import (WeibullFitter, ExponentialFitter,
|
| 550 |
+
LogNormalFitter, LogLogisticFitter, NelsonAalenFitter,
|
| 551 |
+
PiecewiseExponentialFitter, GeneralizedGammaFitter, SplineFitter)
|
| 552 |
+
|
| 553 |
+
from lifelines.datasets import load_waltons
|
| 554 |
+
data = load_waltons()
|
| 555 |
+
|
| 556 |
+
fig, axes = plt.subplots(3, 3, figsize=(10, 7.5))
|
| 557 |
+
|
| 558 |
+
T = data['T']
|
| 559 |
+
E = data['E']
|
| 560 |
+
|
| 561 |
+
wbf = WeibullFitter().fit(T, E, label='WeibullFitter')
|
| 562 |
+
exf = ExponentialFitter().fit(T, E, label='ExponentialFitter')
|
| 563 |
+
lnf = LogNormalFitter().fit(T, E, label='LogNormalFitter')
|
| 564 |
+
naf = NelsonAalenFitter().fit(T, E, label='NelsonAalenFitter')
|
| 565 |
+
llf = LogLogisticFitter().fit(T, E, label='LogLogisticFitter')
|
| 566 |
+
pwf = PiecewiseExponentialFitter([40, 60]).fit(T, E, label='PiecewiseExponentialFitter')
|
| 567 |
+
gg = GeneralizedGammaFitter().fit(T, E, label='GeneralizedGammaFitter')
|
| 568 |
+
spf = SplineFitter([6, 20, 40, 75]).fit(T, E, label='SplineFitter')
|
| 569 |
+
|
| 570 |
+
wbf.plot_cumulative_hazard(ax=axes[0][0])
|
| 571 |
+
exf.plot_cumulative_hazard(ax=axes[0][1])
|
| 572 |
+
lnf.plot_cumulative_hazard(ax=axes[0][2])
|
| 573 |
+
naf.plot_cumulative_hazard(ax=axes[1][0])
|
| 574 |
+
llf.plot_cumulative_hazard(ax=axes[1][1])
|
| 575 |
+
pwf.plot_cumulative_hazard(ax=axes[1][2])
|
| 576 |
+
gg.plot_cumulative_hazard(ax=axes[2][0])
|
| 577 |
+
spf.plot_cumulative_hazard(ax=axes[2][1])
|
| 578 |
+
|
| 579 |
+
|
| 580 |
+
.. image:: images/waltons_cumulative_hazard.png
|
| 581 |
+
|
| 582 |
+
*lifelines* can also be used to define your own parametric model. There is a tutorial on this available, see `Piecewise Exponential Models and Creating Custom Models`_.
|
| 583 |
+
|
| 584 |
+
Parametric models can also be used to create and plot the survival function, too. Below we compare the parametric models versus the non-parametric Kaplan-Meier estimate:
|
| 585 |
+
|
| 586 |
+
.. code:: python
|
| 587 |
+
|
| 588 |
+
from lifelines import KaplanMeierFitter
|
| 589 |
+
|
| 590 |
+
fig, axes = plt.subplots(3, 3, figsize=(10, 7.5))
|
| 591 |
+
|
| 592 |
+
T = data['T']
|
| 593 |
+
E = data['E']
|
| 594 |
+
|
| 595 |
+
kmf = KaplanMeierFitter().fit(T, E, label='KaplanMeierFitter')
|
| 596 |
+
wbf = WeibullFitter().fit(T, E, label='WeibullFitter')
|
| 597 |
+
exf = ExponentialFitter().fit(T, E, label='ExponentialFitter')
|
| 598 |
+
lnf = LogNormalFitter().fit(T, E, label='LogNormalFitter')
|
| 599 |
+
llf = LogLogisticFitter().fit(T, E, label='LogLogisticFitter')
|
| 600 |
+
pwf = PiecewiseExponentialFitter([40, 60]).fit(T, E, label='PiecewiseExponentialFitter')
|
| 601 |
+
gg = GeneralizedGammaFitter().fit(T, E, label='GeneralizedGammaFitter')
|
| 602 |
+
spf = SplineFitter([6, 20, 40, 75]).fit(T, E, label='SplineFitter')
|
| 603 |
+
|
| 604 |
+
wbf.plot_survival_function(ax=axes[0][0])
|
| 605 |
+
exf.plot_survival_function(ax=axes[0][1])
|
| 606 |
+
lnf.plot_survival_function(ax=axes[0][2])
|
| 607 |
+
kmf.plot_survival_function(ax=axes[1][0])
|
| 608 |
+
llf.plot_survival_function(ax=axes[1][1])
|
| 609 |
+
pwf.plot_survival_function(ax=axes[1][2])
|
| 610 |
+
gg.plot_survival_function(ax=axes[2][0])
|
| 611 |
+
spf.plot_survival_function(ax=axes[2][1])
|
| 612 |
+
|
| 613 |
+
.. image:: images/waltons_survival_function.png
|
| 614 |
+
|
| 615 |
+
With parametric models, we have a functional form that allows us to extend the survival function (or hazard or cumulative hazard) past our maximum observed duration. This is called extrapolation. We can do this in a few ways.
|
| 616 |
+
|
| 617 |
+
.. code:: python
|
| 618 |
+
|
| 619 |
+
timeline = np.linspace(0, 100, 200)
|
| 620 |
+
|
| 621 |
+
# directly compute the survival function, these return a pandas Series
|
| 622 |
+
wbf = WeibullFitter().fit(T, E)
|
| 623 |
+
wbf.survival_function_at_times(timeline)
|
| 624 |
+
wbf.hazard_at_times(timeline)
|
| 625 |
+
wbf.cumulative_hazard_at_times(timeline)
|
| 626 |
+
|
| 627 |
+
# use the `timeline` kwarg in `fit`
|
| 628 |
+
# by default, all functions and properties will use
|
| 629 |
+
# these values provided
|
| 630 |
+
wbf = WeibullFitter().fit(T, E, timeline=timeline)
|
| 631 |
+
|
| 632 |
+
ax = wbf.plot_survival_function()
|
| 633 |
+
ax.set_title("Survival function of Weibull model; estimated parameters")
|
| 634 |
+
|
| 635 |
+
.. image:: images/weibull_extrapolation.png
|
| 636 |
+
:width: 600px
|
| 637 |
+
:align: center
|
| 638 |
+
|
| 639 |
+
Model Selection
|
| 640 |
+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
| 641 |
+
|
| 642 |
+
When the underlying data generation distribution is unknown, we resort to measures of fit to tell us which model is most appropriate. *lifelines* has provided qq-plots, `Selecting a parametric model using QQ plots`_, and also tools to compare AIC and other measures: `Selecting a parametric model using AIC`_.
|
| 643 |
+
|
| 644 |
+
|
| 645 |
+
Other types of censoring
|
| 646 |
+
''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
|
| 647 |
+
|
| 648 |
+
Left censored data and non-detection
|
| 649 |
+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
| 650 |
+
|
| 651 |
+
We've mainly been focusing on *right-censoring*, which describes cases where we do not observe the death event.
|
| 652 |
+
This situation is the most common one. Alternatively, there are situations where we do not observe the *birth* event
|
| 653 |
+
occurring. Consider the case where a doctor sees a delayed onset of symptoms of an underlying disease. The doctor
|
| 654 |
+
is unsure *when* the disease was contracted (birth), but knows it was before the discovery.
|
| 655 |
+
|
| 656 |
+
Another situation where we have left-censored data is when measurements have only an upper bound, that is, the measurements
|
| 657 |
+
instruments could only detect the measurement was *less* than some upper bound. This bound is often called the limit of detection (LOD). In practice, there could be more than one LOD. One very important statistical lesson: don't "fill-in" this value naively. It's tempting to use something like one-half the LOD, but this will cause *lots* of bias in downstream analysis. An example dataset is below:
|
| 658 |
+
|
| 659 |
+
.. note:: The recommended API for modeling left-censored data using parametric models changed in version 0.21.0. Below is the recommended API.
|
| 660 |
+
|
| 661 |
+
.. code:: python
|
| 662 |
+
|
| 663 |
+
from lifelines.datasets import load_nh4
|
| 664 |
+
df = load_nh4()[['NH4.Orig.mg.per.L', 'NH4.mg.per.L', 'Censored']]
|
| 665 |
+
print(df.head())
|
| 666 |
+
|
| 667 |
+
"""
|
| 668 |
+
NH4.Orig.mg.per.L NH4.mg.per.L Censored
|
| 669 |
+
1 <0.006 0.006 True
|
| 670 |
+
2 <0.006 0.006 True
|
| 671 |
+
3 0.006 0.006 False
|
| 672 |
+
4 0.016 0.016 False
|
| 673 |
+
5 <0.006 0.006 True
|
| 674 |
+
"""
|
| 675 |
+
|
| 676 |
+
|
| 677 |
+
*lifelines* has support for left-censored datasets in most univariate models, including the :class:`~lifelines.fitters.kaplan_meier_fitter.KaplanMeierFitter` class, by using the :meth:`~lifelines.fitters.kaplan_meier_fitter.KaplanMeierFitter.fit_left_censoring` method.
|
| 678 |
+
|
| 679 |
+
.. code:: python
|
| 680 |
+
|
| 681 |
+
|
| 682 |
+
T, E = df['NH4.mg.per.L'], ~df['Censored']
|
| 683 |
+
|
| 684 |
+
kmf = KaplanMeierFitter()
|
| 685 |
+
kmf.fit_left_censoring(T, E)
|
| 686 |
+
|
| 687 |
+
Instead of producing a survival function, left-censored data analysis is more interested in the cumulative density function. This is available as the :attr:`~lifelines.fitters.kaplan_meier_fitter.KaplanMeierFitter.cumulative_density_` property after fitting the data.
|
| 688 |
+
|
| 689 |
+
.. code:: python
|
| 690 |
+
|
| 691 |
+
print(kmf.cumulative_density_.head())
|
| 692 |
+
|
| 693 |
+
kmf.plot_cumulative_density() #will plot the CDF
|
| 694 |
+
plt.xlabel("Concentration of NH_4")
|
| 695 |
+
|
| 696 |
+
"""
|
| 697 |
+
KM_estimate
|
| 698 |
+
timeline
|
| 699 |
+
0.000 0.379897
|
| 700 |
+
0.006 0.401002
|
| 701 |
+
0.007 0.464319
|
| 702 |
+
0.008 0.478828
|
| 703 |
+
0.009 0.536868
|
| 704 |
+
"""
|
| 705 |
+
|
| 706 |
+
|
| 707 |
+
.. image:: images/lifelines_intro_lcd.png
|
| 708 |
+
:width: 600px
|
| 709 |
+
:align: center
|
| 710 |
+
|
| 711 |
+
Alternatively, you can use a parametric model to model the data. This allows for you to "peer" below the LOD, however using a parametric model means you need to correctly specify the distribution. You can use plots like qq-plots to help invalidate some distributions, see `Selecting a parametric model using QQ plots`_ and `Selecting a parametric model using AIC`_.
|
| 712 |
+
|
| 713 |
+
|
| 714 |
+
.. code:: python
|
| 715 |
+
|
| 716 |
+
from lifelines import *
|
| 717 |
+
from lifelines.plotting import qq_plot
|
| 718 |
+
|
| 719 |
+
fig, axes = plt.subplots(3, 2, figsize=(9, 9))
|
| 720 |
+
timeline = np.linspace(0, 0.25, 100)
|
| 721 |
+
|
| 722 |
+
wf = WeibullFitter().fit_left_censoring(T, E, label="Weibull", timeline=timeline)
|
| 723 |
+
lnf = LogNormalFitter().fit_left_censoring(T, E, label="Log Normal", timeline=timeline)
|
| 724 |
+
lgf = LogLogisticFitter().fit_left_censoring(T, E, label="Log Logistic", timeline=timeline)
|
| 725 |
+
|
| 726 |
+
# plot what we just fit, along with the KMF estimate
|
| 727 |
+
kmf.plot_cumulative_density(ax=axes[0][0], ci_show=False)
|
| 728 |
+
wf.plot_cumulative_density(ax=axes[0][0], ci_show=False)
|
| 729 |
+
qq_plot(wf, ax=axes[0][1])
|
| 730 |
+
|
| 731 |
+
kmf.plot_cumulative_density(ax=axes[1][0], ci_show=False)
|
| 732 |
+
lnf.plot_cumulative_density(ax=axes[1][0], ci_show=False)
|
| 733 |
+
qq_plot(lnf, ax=axes[1][1])
|
| 734 |
+
|
| 735 |
+
kmf.plot_cumulative_density(ax=axes[2][0], ci_show=False)
|
| 736 |
+
lgf.plot_cumulative_density(ax=axes[2][0], ci_show=False)
|
| 737 |
+
qq_plot(lgf, ax=axes[2][1])
|
| 738 |
+
|
| 739 |
+
.. image:: images/lcd_parametric.png
|
| 740 |
+
|
| 741 |
+
|
| 742 |
+
Based on the above, the log-normal distribution seems to fit well, and the Weibull not very well at all.
|
| 743 |
+
|
| 744 |
+
|
| 745 |
+
Interval censored data
|
| 746 |
+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
| 747 |
+
|
| 748 |
+
Data can also be *interval* censored. An example of this is periodically recording a population of organisms. Their deaths are interval censored because you know a subject died between two observations periods.
|
| 749 |
+
|
| 750 |
+
|
| 751 |
+
.. code:: python
|
| 752 |
+
|
| 753 |
+
|
| 754 |
+
from lifelines.datasets import load_diabetes
|
| 755 |
+
from lifelines.plotting import plot_interval_censored_lifetimes
|
| 756 |
+
|
| 757 |
+
df = load_diabetes()
|
| 758 |
+
plot_interval_censored_lifetimes(df['left'], df['right'])
|
| 759 |
+
|
| 760 |
+
.. image:: images/interval_censored_lifetimes.png
|
| 761 |
+
:width: 670px
|
| 762 |
+
:align: center
|
| 763 |
+
|
| 764 |
+
|
| 765 |
+
Above, we can see that some subjects' death was exactly observed (denoted by a red ●), and some subjects' deaths is bounded between two times (denoted by the interval between the red ▶︎ ◀︎). We can perform inference on the data using any of our models. Note the use of calling ``fit_interval_censoring`` instead of ``fit``.
|
| 766 |
+
|
| 767 |
+
.. note:: The API for ``fit_interval_censoring`` is different than right and left censored data.
|
| 768 |
+
|
| 769 |
+
.. code:: python
|
| 770 |
+
|
| 771 |
+
wf = WeibullFitter()
|
| 772 |
+
wf.fit_interval_censoring(lower_bound=df['left'], upper_bound=df['right'])
|
| 773 |
+
|
| 774 |
+
# or, a non-parametric estimator:
|
| 775 |
+
# for now, this assumes closed observation intervals, ex: [4,5], not (4, 5) or (4, 5]
|
| 776 |
+
kmf = KaplanMeierFitter()
|
| 777 |
+
kmf.fit_interval_censoring(df['left'], df['right'])
|
| 778 |
+
|
| 779 |
+
ax = kmf.plot_survival_function()
|
| 780 |
+
wf.plot_survival_function(ax=ax)
|
| 781 |
+
|
| 782 |
+
|
| 783 |
+
.. image:: images/interval_censored_inference.png
|
| 784 |
+
:width: 670px
|
| 785 |
+
:align: center
|
| 786 |
+
|
| 787 |
+
|
| 788 |
+
|
| 789 |
+
Another example of using lifelines for interval censored data is located `here <https://dataorigami.net/blogs/napkin-folding/counting-and-interval-censoring>`_.
|
| 790 |
+
|
| 791 |
+
|
| 792 |
+
|
| 793 |
+
Left truncated (late entry) data
|
| 794 |
+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
| 795 |
+
|
| 796 |
+
Another form of bias that is introduced into a dataset is called left-truncation (or late entry). Left-truncation can occur in many situations. One situation is when individuals may have the opportunity to die before entering into the study. For example, if you are measuring time to death of prisoners in prison, the prisoners will enter the study at different ages. So it's possible there are some counter-factual individuals who *would* have entered into your study (that is, went to prison), but instead died early.
|
| 797 |
+
|
| 798 |
+
All fitters, like :class:`~lifelines.fitters.kaplan_meier_fitter.KaplanMeierFitter` and any parametric models, have an optional argument for ``entry``, which is an array of equal size to the duration array. It describes the time between actual "birth" (or "exposure") to entering the study.
|
| 799 |
+
|
| 800 |
+
.. note:: Nothing changes in the duration array: it still measures time from "birth" to time exited study (either by death or censoring). That is, durations refers to the absolute death time rather than a duration relative to the study entry.
|
| 801 |
+
|
| 802 |
+
Another situation with left-truncation occurs when subjects are exposed before entry into study. For example, a study of time to all-cause mortality of AIDS patients that recruited individuals previously diagnosed with AIDS, possibly years before. In our example below we will use a dataset like this, called the Multicenter Aids Cohort Study. In the figure below, we plot the lifetimes of subjects. A solid line is when the subject was under our observation, and a dashed line represents the unobserved period between diagnosis and study entry. A solid dot at the end of the line represents death.
|
| 803 |
+
|
| 804 |
+
.. code:: python
|
| 805 |
+
|
| 806 |
+
from lifelines.datasets import load_multicenter_aids_cohort_study
|
| 807 |
+
from lifelines.plotting import plot_lifetimes
|
| 808 |
+
|
| 809 |
+
df = load_multicenter_aids_cohort_study()
|
| 810 |
+
|
| 811 |
+
plot_lifetimes(
|
| 812 |
+
df["T"],
|
| 813 |
+
event_observed=df["D"],
|
| 814 |
+
entry=df["W"],
|
| 815 |
+
event_observed_color="#383838",
|
| 816 |
+
event_censored_color="#383838",
|
| 817 |
+
left_truncated=True,
|
| 818 |
+
)
|
| 819 |
+
plt.ylabel("Patient Number")
|
| 820 |
+
plt.xlabel("Years from AIDS diagnosis")
|
| 821 |
+
|
| 822 |
+
|
| 823 |
+
.. image:: images/lifetimes_mcas.png
|
| 824 |
+
:width: 670px
|
| 825 |
+
:align: center
|
| 826 |
+
|
| 827 |
+
So subject #77, the subject at the top, was diagnosed with AIDS 7.5 years ago, but wasn't in our study for the first 4.5 years. From this point-of-view, why can't we "fill in" the dashed lines and say, for example, "subject #77 lived for 7.5 years"? If we did this, we would severely underestimate chance of dying early on after diagnosis. Why? It's possible that there were individuals who were diagnosed and then died shortly after, and never had a chance to enter our study. If we did manage to observe them however, they would have depressed the survival function early on. Thus, "filling in" the dashed lines makes us over confident about what occurs in the early period after diagnosis. We can see this below when we model the survival function with and without taking into account late entries.
|
| 828 |
+
|
| 829 |
+
|
| 830 |
+
.. code:: python
|
| 831 |
+
|
| 832 |
+
from lifelines import KaplanMeierFitter
|
| 833 |
+
|
| 834 |
+
kmf = KaplanMeierFitter()
|
| 835 |
+
kmf.fit(df["T"], event_observed=df["D"], entry=df["W"], label='modeling late entries')
|
| 836 |
+
ax = kmf.plot_survival_function()
|
| 837 |
+
|
| 838 |
+
kmf.fit(df["T"], event_observed=df["D"], label='ignoring late entries')
|
| 839 |
+
kmf.plot_survival_function(ax=ax)
|
| 840 |
+
|
| 841 |
+
|
| 842 |
+
.. image:: images/kmf_mcas.png
|
| 843 |
+
:width: 650px
|
| 844 |
+
:align: center
|
| 845 |
+
|
| 846 |
+
|
| 847 |
+
.. _Piecewise Exponential Models and Creating Custom Models: jupyter_notebooks/Piecewise%20Exponential%20Models%20and%20Creating%20Custom%20Models.html
|
| 848 |
+
.. _Statistically compare two populations: Examples.html#statistically-compare-two-populations
|
| 849 |
+
.. _Selecting a parametric model using QQ plots: Examples.html#selecting-a-parametric-model-using-qq-plots
|
| 850 |
+
.. _Selecting a parametric model using AIC: Examples.html#selecting-a-parametric-model-using-AIC
|
lifelines/source/docs/Time varying survival regression.rst
ADDED
|
@@ -0,0 +1,262 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
Time varying survival regression
|
| 3 |
+
=====================================
|
| 4 |
+
|
| 5 |
+
Cox's time varying proportional hazard model
|
| 6 |
+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
| 7 |
+
|
| 8 |
+
Often an individual will have a covariate change over time. An example of this is hospital patients who enter the study and, at some future time, may receive a heart transplant. We would like to know the effect of the transplant, but we must be careful if we condition on whether they received the transplant. Consider that if patients needed to wait at least 1 year before getting a transplant, then everyone who dies before that year is considered as a non-transplant patient, and hence this would overestimate the hazard of not receiving a transplant.
|
| 9 |
+
|
| 10 |
+
We can incorporate changes over time into our survival analysis by using a modification of the Cox model. The general mathematical description is:
|
| 11 |
+
|
| 12 |
+
.. math:: h(t | x) = \overbrace{b_0(t)}^{\text{baseline}}\underbrace{\exp \overbrace{\left(\sum_{i=1}^n \beta_i (x_i(t) - \overline{x_i}) \right)}^{\text{log-partial hazard}}}_ {\text{partial hazard}}
|
| 13 |
+
|
| 14 |
+
Note the time-varying :math:`x_i(t)` to denote that covariates can change over time. This model is implemented in *lifelines* as :class:`~lifelines.fitters.cox_time_varying_fitter.CoxTimeVaryingFitter`. The dataset schema required is different than previous models, so we will spend some time describing it.
|
| 15 |
+
|
| 16 |
+
Dataset creation for time-varying regression
|
| 17 |
+
#############################################
|
| 18 |
+
|
| 19 |
+
*lifelines* requires that the dataset be in what is called the *long* format. This looks like one row per state change, including an ID, the left (exclusive) time point, and right (inclusive) time point. For example, the following dataset tracks three unique subjects.
|
| 20 |
+
|
| 21 |
+
.. table::
|
| 22 |
+
|
| 23 |
+
+--+-----+----+-----+-+-----+
|
| 24 |
+
|id|start|stop|group|z|event|
|
| 25 |
+
+==+=====+====+=====+=+=====+
|
| 26 |
+
| 1| 0| 8| 1|0|False|
|
| 27 |
+
+--+-----+----+-----+-+-----+
|
| 28 |
+
| 2| 0| 5| 0|0|False|
|
| 29 |
+
+--+-----+----+-----+-+-----+
|
| 30 |
+
| 2| 5| 8| 0|1|True |
|
| 31 |
+
+--+-----+----+-----+-+-----+
|
| 32 |
+
| 3| 0| 3| 1|0|False|
|
| 33 |
+
+--+-----+----+-----+-+-----+
|
| 34 |
+
| 3| 3| 12| 1|1|True |
|
| 35 |
+
+--+-----+----+-----+-+-----+
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
In the above dataset, ``start`` and ``stop`` denote the boundaries, ``id`` is the unique identifier per subject, and ``event`` denotes if the subject died at the end of that period. For example, subject ID 2 had variable ``z=0`` up to and including the end of time period 5 (we can think that measurements happen at end of the time period), after which it was set to 1. Since ``event`` is 1 in that row, we conclude that the subject died at time 8,
|
| 39 |
+
|
| 40 |
+
This desired dataset can be built up from smaller datasets. To do this we can use some helper functions provided in *lifelines*. Typically, data will be in a format that looks like it comes out of a relational database. You may have a "base" table with ids, durations alive, and a censored flag, and possibly static covariates. Ex:
|
| 41 |
+
|
| 42 |
+
.. table::
|
| 43 |
+
|
| 44 |
+
+--+--------+-----+----+
|
| 45 |
+
|id|duration|event|var1|
|
| 46 |
+
+==+========+=====+====+
|
| 47 |
+
| 1| 10|True | 0.1|
|
| 48 |
+
+--+--------+-----+----+
|
| 49 |
+
| 2| 12|False| 0.5|
|
| 50 |
+
+--+--------+-----+----+
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
We will perform a light transform to this dataset to modify it into the "long" format.
|
| 54 |
+
|
| 55 |
+
.. code:: python
|
| 56 |
+
|
| 57 |
+
import pandas as pd
|
| 58 |
+
from lifelines.utils import to_long_format
|
| 59 |
+
|
| 60 |
+
base_df = pd.DataFrame([
|
| 61 |
+
{'id': 1, 'duration': 10, 'event': True, 'var1': 0.1},
|
| 62 |
+
{'id': 2, 'duration': 12, 'event': True, 'var1': 0.5}
|
| 63 |
+
])
|
| 64 |
+
|
| 65 |
+
base_df = to_long_format(base_df, duration_col="duration")
|
| 66 |
+
|
| 67 |
+
The new dataset looks like:
|
| 68 |
+
|
| 69 |
+
|
| 70 |
+
.. table::
|
| 71 |
+
|
| 72 |
+
+--+-----+----+----+-----+
|
| 73 |
+
|id|start|stop|var1|event|
|
| 74 |
+
+==+=====+====+====+=====+
|
| 75 |
+
| 1| 0| 10| 0.1|True |
|
| 76 |
+
+--+-----+----+----+-----+
|
| 77 |
+
| 2| 0| 12| 0.5|False|
|
| 78 |
+
+--+-----+----+----+-----+
|
| 79 |
+
|
| 80 |
+
|
| 81 |
+
You'll also have secondary dataset that references future measurements. This could come in two "types". The first is when you have a variable that changes over time (ex: administering varying medication over time, or taking a temperature over time). The second types is an event-based dataset: an event happens at some time in the future (ex: an organ transplant occurs, or an intervention). We will address this second type later. The first type of dataset may look something like:
|
| 82 |
+
|
| 83 |
+
Example:
|
| 84 |
+
|
| 85 |
+
.. table::
|
| 86 |
+
|
| 87 |
+
+--+----+----+
|
| 88 |
+
|id|time|var2|
|
| 89 |
+
+==+====+====+
|
| 90 |
+
| 1| 0| 1.4|
|
| 91 |
+
+--+----+----+
|
| 92 |
+
| 1| 4| 1.2|
|
| 93 |
+
+--+----+----+
|
| 94 |
+
| 1| 8| 1.5|
|
| 95 |
+
+--+----+----+
|
| 96 |
+
| 2| 0| 1.6|
|
| 97 |
+
+--+----+----+
|
| 98 |
+
|
| 99 |
+
where ``time`` is the duration from the entry event. Here we see subject 1 had a change in their ``var2`` covariate at the end of time 4 and at the end of time 8. We can use :func:`lifelines.utils.add_covariate_to_timeline` to fold the covariate dataset into the original dataset.
|
| 100 |
+
|
| 101 |
+
|
| 102 |
+
.. code:: python
|
| 103 |
+
|
| 104 |
+
from lifelines.utils import add_covariate_to_timeline
|
| 105 |
+
|
| 106 |
+
cv = pd.DataFrame([
|
| 107 |
+
{'id': 1, 'time': 0, 'var2': 1.4},
|
| 108 |
+
{'id': 1, 'time': 4, 'var2': 1.2},
|
| 109 |
+
{'id': 1, 'time': 8, 'var2': 1.5},
|
| 110 |
+
{'id': 2, 'time': 0, 'var2': 1.6},
|
| 111 |
+
|
| 112 |
+
])
|
| 113 |
+
|
| 114 |
+
df = add_covariate_to_timeline(base_df, cv, duration_col="time", id_col="id", event_col="event")
|
| 115 |
+
|
| 116 |
+
|
| 117 |
+
.. table::
|
| 118 |
+
|
| 119 |
+
+--+-----+----+----+----+-----+
|
| 120 |
+
|id|start|stop|var1|var2|event|
|
| 121 |
+
+==+=====+====+====+====+=====+
|
| 122 |
+
| 1| 0| 4| 0.1| 1.4|False|
|
| 123 |
+
+--+-----+----+----+----+-----+
|
| 124 |
+
| 1| 4| 8| 0.1| 1.2|False|
|
| 125 |
+
+--+-----+----+----+----+-----+
|
| 126 |
+
| 1| 8| 10| 0.1| 1.5|True |
|
| 127 |
+
+--+-----+----+----+----+-----+
|
| 128 |
+
| 2| 0| 12| 0.5| 1.6|False|
|
| 129 |
+
+--+-----+----+----+----+-----+
|
| 130 |
+
|
| 131 |
+
From the above output, we can see that subject 1 changed state twice over the observation period, finally expiring at the end of time 10. Subject 2 was a censored case, and we lost track of them after time 12.
|
| 132 |
+
|
| 133 |
+
You may have multiple covariates you wish to add, so the above could be streamlined like so:
|
| 134 |
+
|
| 135 |
+
.. code:: python
|
| 136 |
+
|
| 137 |
+
from lifelines.utils import add_covariate_to_timeline
|
| 138 |
+
|
| 139 |
+
df = base_df.pipe(add_covariate_to_timeline, cv1, duration_col="time", id_col="id", event_col="event")\
|
| 140 |
+
.pipe(add_covariate_to_timeline, cv2, duration_col="time", id_col="id", event_col="event")\
|
| 141 |
+
.pipe(add_covariate_to_timeline, cv3, duration_col="time", id_col="id", event_col="event")
|
| 142 |
+
|
| 143 |
+
|
| 144 |
+
If your dataset is of the second type, that is, event-based, your dataset may look something like the following, where values in the matrix denote times since the subject's birth, and ``None`` or ``NaN`` represent the event not happening (subjects can be excluded if the event never occurred as well) :
|
| 145 |
+
|
| 146 |
+
.. code-block:: python
|
| 147 |
+
|
| 148 |
+
event_df = pd.DataFrame([
|
| 149 |
+
{'id': 1, 'E1': 1.0},
|
| 150 |
+
{'id': 2, 'E1': None},
|
| 151 |
+
{'id': 3, 'E1': 3.0},
|
| 152 |
+
])
|
| 153 |
+
|
| 154 |
+
print(event_df)
|
| 155 |
+
|
| 156 |
+
"""
|
| 157 |
+
id E1
|
| 158 |
+
0 1 1.0
|
| 159 |
+
1 2 NaN
|
| 160 |
+
2 3 3.0
|
| 161 |
+
"""
|
| 162 |
+
...
|
| 163 |
+
|
| 164 |
+
Initially, this can't be added to our baseline DataFrame. However, using :func:`lifelines.utils.covariates_from_event_matrix` we can convert a DataFrame like this into one that can be easily added.
|
| 165 |
+
|
| 166 |
+
|
| 167 |
+
.. code-block:: python
|
| 168 |
+
|
| 169 |
+
from lifelines.utils import covariates_from_event_matrix
|
| 170 |
+
|
| 171 |
+
cv = covariates_from_event_matrix(event_df, id_col="id")
|
| 172 |
+
print(cv)
|
| 173 |
+
|
| 174 |
+
"""
|
| 175 |
+
id duration E1
|
| 176 |
+
0 1 1.0 1
|
| 177 |
+
1 2 inf 1
|
| 178 |
+
2 3 3.0 1
|
| 179 |
+
"""
|
| 180 |
+
|
| 181 |
+
base_df = pd.DataFrame([
|
| 182 |
+
{'id': 1, 'duration': 10, 'event': True, 'var1': 0.1},
|
| 183 |
+
{'id': 2, 'duration': 12, 'event': True, 'var1': 0.5}
|
| 184 |
+
])
|
| 185 |
+
base_df = to_long_format(base_df, duration_col="duration")
|
| 186 |
+
|
| 187 |
+
|
| 188 |
+
base_df = add_covariate_to_timeline(base_df, cv, duration_col="duration", id_col="id", event_col="event")
|
| 189 |
+
"""
|
| 190 |
+
start E1 var1 stop id event
|
| 191 |
+
0 0.0 NaN 0.1 1.0 1 False
|
| 192 |
+
1 1.0 1.0 0.1 10.0 1 True
|
| 193 |
+
2 0.0 NaN 0.5 12.0 2 True
|
| 194 |
+
"""
|
| 195 |
+
|
| 196 |
+
For an example of pulling datasets like this from a SQL-store, and other helper functions, see :ref:`Example SQL queries and transformations to get time varying data`.
|
| 197 |
+
|
| 198 |
+
Cumulative sums
|
| 199 |
+
#############################################
|
| 200 |
+
|
| 201 |
+
One additional flag on :func:`~lifelines.utils.add_covariate_to_timeline` that is of interest is the ``cumulative_sum`` flag. By default it is False, but turning it to True will perform a cumulative sum on the covariate before joining. This is useful if the covariates describe an incremental change, instead of a state update. For example, we may have measurements of drugs administered to a patient, and we want the covariate to reflect how much we have administered since the start. Event columns do make sense to cumulative sum as well. In contrast, a covariate to measure the temperature of the patient is a state update, and should not be summed. See :ref:`Example cumulative sums over time-varying covariates` to see an example of this.
|
| 202 |
+
|
| 203 |
+
Delaying time-varying covariates
|
| 204 |
+
#############################################
|
| 205 |
+
|
| 206 |
+
:func:`~lifelines.utils.add_covariate_to_timeline` also has an option for delaying, or shifting, a covariate so it changes later than originally observed. One may ask, why should one delay a time-varying covariate? Here's an example. Consider investigating the impact of smoking on mortality and available to us are time-varying observations of how many cigarettes are consumed each month. Unbeknownst to us, when a subject reaches critical illness levels, they are admitted to the hospital and their cigarette consumption drops to zero. Some expire while in hospital. If we used this dataset naively, we would see that *not* smoking leads to sudden death, and conversely, smoking helps your health! This is a case of reverse causation: the upcoming death event actually influences the covariates.
|
| 207 |
+
|
| 208 |
+
To handle this, you can delay the observations by time periods. This has the possible of effect of dropping rows outside the observation window.
|
| 209 |
+
|
| 210 |
+
.. code-block:: python
|
| 211 |
+
|
| 212 |
+
from lifelines.utils import add_covariate_to_timeline
|
| 213 |
+
|
| 214 |
+
cv = pd.DataFrame([
|
| 215 |
+
{'id': 1, 'time': 0, 'var2': 1.4},
|
| 216 |
+
{'id': 1, 'time': 4, 'var2': 1.2},
|
| 217 |
+
{'id': 1, 'time': 8, 'var2': 1.5},
|
| 218 |
+
{'id': 2, 'time': 0, 'var2': 1.6},
|
| 219 |
+
])
|
| 220 |
+
|
| 221 |
+
base_df = pd.DataFrame([
|
| 222 |
+
{'id': 1, 'duration': 10, 'event': True, 'var1': 0.1},
|
| 223 |
+
{'id': 2, 'duration': 12, 'event': True, 'var1': 0.5}
|
| 224 |
+
])
|
| 225 |
+
base_df = to_long_format(base_df, duration_col="duration")
|
| 226 |
+
|
| 227 |
+
base_df = add_covariate_to_timeline(base_df, cv, duration_col="time", id_col="id", event_col="event", delay=5)\
|
| 228 |
+
.fillna(0)
|
| 229 |
+
|
| 230 |
+
print(base_df)
|
| 231 |
+
"""
|
| 232 |
+
start var1 var2 stop id event
|
| 233 |
+
0 0 0.1 NaN 5.0 1 False
|
| 234 |
+
1 5 0.1 1.4 9.0 1 False
|
| 235 |
+
2 9 0.1 1.2 10.0 1 True
|
| 236 |
+
3 0 0.5 NaN 5.0 2 False
|
| 237 |
+
4 5 0.5 1.6 12.0 2 True
|
| 238 |
+
"""
|
| 239 |
+
|
| 240 |
+
|
| 241 |
+
Fitting the model
|
| 242 |
+
################################################
|
| 243 |
+
|
| 244 |
+
Once your dataset is in the correct orientation, we can use :class:`~lifelines.fitters.cox_time_varying_fitter.CoxTimeVaryingFitter` to fit the model to your data. The method is similar to :class:`~lifelines.fitters.coxph_fitter.CoxPHFitter`, except we need to tell the :meth:`~lifelines.fitters.cox_time_varying_fitter.CoxTimeVaryingFitter.fit` about the additional time columns.
|
| 245 |
+
|
| 246 |
+
Fitting the Cox model to the data involves an iterative gradient descent. *lifelines* takes extra effort to help with convergence, so please be attentive to any warnings that appear. Fixing any warnings will generally help convergence. For further help, see :ref:`Problems with convergence in the Cox Proportional Hazard Model`.
|
| 247 |
+
|
| 248 |
+
|
| 249 |
+
.. code:: python
|
| 250 |
+
|
| 251 |
+
from lifelines import CoxTimeVaryingFitter
|
| 252 |
+
|
| 253 |
+
ctv = CoxTimeVaryingFitter(penalizer=0.1)
|
| 254 |
+
ctv.fit(base_df, id_col="id", event_col="event", start_col="start", stop_col="stop", show_progress=True)
|
| 255 |
+
ctv.print_summary()
|
| 256 |
+
ctv.plot()
|
| 257 |
+
|
| 258 |
+
|
| 259 |
+
Short note on prediction
|
| 260 |
+
################################################
|
| 261 |
+
|
| 262 |
+
Unlike the other regression models, prediction in a time-varying setting is not trivial. To predict, we would need to know the covariates values beyond the observed times, but if we knew that, we would also know if the subject was still alive or not! However, it is still possible to compute the hazard values of subjects at known observations, the baseline cumulative hazard rate, and baseline survival function. So while :class:`~lifelines.fitters.cox_time_varying_fitter.CoxTimeVaryingFitter` exposes prediction methods, there are logical limitations to what these predictions mean.
|
lifelines/source/docs/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
# -*- coding: utf-8 -*-
|
lifelines/source/docs/_static/custom.css
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
.wy-nav-content {
|
| 2 |
+
max-width: 900px !important;
|
| 3 |
+
}
|
lifelines/source/docs/_templates/layout.html
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{% extends "!layout.html" %}
|
| 2 |
+
|
| 3 |
+
{%- block extrahead %}
|
| 4 |
+
<meta name="google-site-verification" content="9qrYvv6zs27wDrtk-LuEXmo-pKnAz2_w5g_hnHB9Ly8" />
|
| 5 |
+
|
| 6 |
+
{% endblock %}
|
lifelines/source/docs/conf.py
ADDED
|
@@ -0,0 +1,297 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
# -*- coding: utf-8 -*-
|
| 3 |
+
#
|
| 4 |
+
# lifelines documentation build configuration file, created by
|
| 5 |
+
# sphinx-quickstart on Sun Feb 2 17:10:21 2014.
|
| 6 |
+
#
|
| 7 |
+
# This file is execfile()d with the current directory set to its
|
| 8 |
+
# containing dir.
|
| 9 |
+
#
|
| 10 |
+
# Note that not all possible configuration values are present in this
|
| 11 |
+
# autogenerated file.
|
| 12 |
+
#
|
| 13 |
+
# All configuration values have a default; values that are commented out
|
| 14 |
+
# serve to show the default.
|
| 15 |
+
from datetime import date
|
| 16 |
+
import sys
|
| 17 |
+
import os
|
| 18 |
+
import lifelines
|
| 19 |
+
|
| 20 |
+
# If extensions (or modules to document with autodoc) are in another directory,
|
| 21 |
+
# add these directories to sys.path here. If the directory is relative to the
|
| 22 |
+
# documentation root, use os.path.abspath to make it absolute, like shown here.
|
| 23 |
+
sys.path.insert(0, os.path.abspath("."))
|
| 24 |
+
|
| 25 |
+
# -- General configuration ------------------------------------------------
|
| 26 |
+
|
| 27 |
+
# If your documentation needs a minimal Sphinx version, state it here.
|
| 28 |
+
# needs_sphinx = '1.0'
|
| 29 |
+
|
| 30 |
+
# Add any Sphinx extension module names here, as strings. They can be
|
| 31 |
+
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
|
| 32 |
+
# ones.
|
| 33 |
+
extensions = [
|
| 34 |
+
"sphinx.ext.coverage",
|
| 35 |
+
"sphinx.ext.mathjax",
|
| 36 |
+
"sphinx.ext.autodoc",
|
| 37 |
+
"sphinx.ext.autosectionlabel",
|
| 38 |
+
"sphinx.ext.napoleon",
|
| 39 |
+
"nbsphinx",
|
| 40 |
+
"sphinxcontrib.jquery",
|
| 41 |
+
]
|
| 42 |
+
|
| 43 |
+
exclude_patterns = ["_build", "jupyter_notebooks/.ipynb_checkpoints/*.ipynb"]
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
# Add any paths that contain templates here, relative to this directory.
|
| 47 |
+
templates_path = ["_templates"]
|
| 48 |
+
|
| 49 |
+
# The suffix of source filenames.
|
| 50 |
+
source_suffix = ".rst"
|
| 51 |
+
|
| 52 |
+
# The encoding of source files.
|
| 53 |
+
# source_encoding = 'utf-8-sig'
|
| 54 |
+
|
| 55 |
+
# The master toctree document.
|
| 56 |
+
master_doc = "index"
|
| 57 |
+
|
| 58 |
+
# General information about the project.
|
| 59 |
+
project = "lifelines"
|
| 60 |
+
copyright = "2014-{}, Cam Davidson-Pilon".format(date.today().year)
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
# The version info for the project you're documenting, acts as replacement for
|
| 64 |
+
# |version| and |release|, also used in various other places throughout the
|
| 65 |
+
# built documents.
|
| 66 |
+
#
|
| 67 |
+
# The short X.Y version.
|
| 68 |
+
|
| 69 |
+
# The short X.Y version.
|
| 70 |
+
version = lifelines.__version__
|
| 71 |
+
# The full version, including alpha/beta/rc tags.
|
| 72 |
+
release = version
|
| 73 |
+
|
| 74 |
+
# The language for content autogenerated by Sphinx. Refer to documentation
|
| 75 |
+
# for a list of supported languages.
|
| 76 |
+
# language = None
|
| 77 |
+
|
| 78 |
+
autoclass_content = "both" # include both class docstring and __init__
|
| 79 |
+
autodoc_default_flags = [
|
| 80 |
+
# Make sure that any autodoc declarations show the right members
|
| 81 |
+
"members",
|
| 82 |
+
"inherited-members",
|
| 83 |
+
"show-inheritance",
|
| 84 |
+
]
|
| 85 |
+
autosummary_generate = True # Make _autosummary files and include them
|
| 86 |
+
|
| 87 |
+
|
| 88 |
+
# There are two options for replacing |today|: either, you set today to some
|
| 89 |
+
# non-false value, then it is used:
|
| 90 |
+
# today = ''
|
| 91 |
+
# Else, today_fmt is used as the format for a strftime call.
|
| 92 |
+
# today_fmt = '%B %d, %Y'
|
| 93 |
+
|
| 94 |
+
# List of patterns, relative to source directory, that match files and
|
| 95 |
+
# directories to ignore when looking for source files.
|
| 96 |
+
exclude_patterns = ["_build"]
|
| 97 |
+
|
| 98 |
+
# The reST default role (used for this markup: `text`) to use for all
|
| 99 |
+
# documents.
|
| 100 |
+
# default_role = None
|
| 101 |
+
|
| 102 |
+
# If true, '()' will be appended to :func: etc. cross-reference text.
|
| 103 |
+
# add_function_parentheses = True
|
| 104 |
+
|
| 105 |
+
# If true, the current module name will be prepended to all description
|
| 106 |
+
# unit titles (such as .. function::).
|
| 107 |
+
# add_module_names = True
|
| 108 |
+
|
| 109 |
+
# If true, sectionauthor and moduleauthor directives will be shown in the
|
| 110 |
+
# output. They are ignored by default.
|
| 111 |
+
# show_authors = False
|
| 112 |
+
|
| 113 |
+
# The name of the Pygments (syntax highlighting) style to use.
|
| 114 |
+
pygments_style = "sphinx"
|
| 115 |
+
|
| 116 |
+
# A list of ignored prefixes for module index sorting.
|
| 117 |
+
# modindex_common_prefix = []
|
| 118 |
+
|
| 119 |
+
# If true, keep warnings as "system message" paragraphs in the built documents.
|
| 120 |
+
# keep_warnings = False
|
| 121 |
+
|
| 122 |
+
|
| 123 |
+
# -- Options for HTML output ----------------------------------------------
|
| 124 |
+
|
| 125 |
+
# The theme to use for HTML and HTML Help pages. See the documentation for
|
| 126 |
+
# a list of builtin themes.
|
| 127 |
+
html_theme = "default"
|
| 128 |
+
# Theme options are theme-specific and customize the look and feel of a theme
|
| 129 |
+
# further. For a list of options available for each theme, see the
|
| 130 |
+
# documentation.
|
| 131 |
+
# html_theme_options = {}
|
| 132 |
+
|
| 133 |
+
# Add any paths that contain custom themes here, relative to this directory.
|
| 134 |
+
# html_theme_path = []
|
| 135 |
+
|
| 136 |
+
# The name for this set of Sphinx documents. If None, it defaults to
|
| 137 |
+
# "<project> v<release> documentation".
|
| 138 |
+
# html_title = None
|
| 139 |
+
|
| 140 |
+
# A shorter title for the navigation bar. Default is the same as html_title.
|
| 141 |
+
# html_short_title = None
|
| 142 |
+
|
| 143 |
+
# The name of an image file (relative to this directory) to place at the top
|
| 144 |
+
# of the sidebar.
|
| 145 |
+
# html_logo = None
|
| 146 |
+
|
| 147 |
+
# The name of an image file (within the static path) to use as favicon of the
|
| 148 |
+
# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32
|
| 149 |
+
# pixels large.
|
| 150 |
+
# html_favicon = None
|
| 151 |
+
|
| 152 |
+
# Add any paths that contain custom static files (such as style sheets) here,
|
| 153 |
+
# relative to this directory. They are copied after the builtin static files,
|
| 154 |
+
# so a file named "default.css" will overwrite the builtin "default.css".
|
| 155 |
+
html_static_path = ["_static"]
|
| 156 |
+
|
| 157 |
+
# Add any extra paths that contain custom files (such as robots.txt or
|
| 158 |
+
# .htaccess) here, relative to this directory. These files are copied
|
| 159 |
+
# directly to the root of the documentation.
|
| 160 |
+
# html_extra_path = []
|
| 161 |
+
|
| 162 |
+
# If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
|
| 163 |
+
# using the given strftime format.
|
| 164 |
+
# html_last_updated_fmt = '%b %d, %Y'
|
| 165 |
+
|
| 166 |
+
# If true, SmartyPants will be used to convert quotes and dashes to
|
| 167 |
+
# typographically correct entities.
|
| 168 |
+
# html_use_smartypants = True
|
| 169 |
+
|
| 170 |
+
# Custom sidebar templates, maps document names to template names.
|
| 171 |
+
# html_sidebars = {}
|
| 172 |
+
|
| 173 |
+
# Additional templates that should be rendered to pages, maps page names to
|
| 174 |
+
# template names.
|
| 175 |
+
# html_additional_pages = {}
|
| 176 |
+
|
| 177 |
+
# If false, no module index is generated.
|
| 178 |
+
# html_domain_indices = True
|
| 179 |
+
|
| 180 |
+
# If false, no index is generated.
|
| 181 |
+
# html_use_index = True
|
| 182 |
+
|
| 183 |
+
# If true, the index is split into individual pages for each letter.
|
| 184 |
+
# html_split_index = False
|
| 185 |
+
|
| 186 |
+
# If true, links to the reST sources are added to the pages.
|
| 187 |
+
# html_show_sourcelink = True
|
| 188 |
+
|
| 189 |
+
# If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
|
| 190 |
+
# html_show_sphinx = True
|
| 191 |
+
|
| 192 |
+
# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
|
| 193 |
+
# html_show_copyright = True
|
| 194 |
+
|
| 195 |
+
# If true, an OpenSearch description file will be output, and all pages will
|
| 196 |
+
# contain a <link> tag referring to it. The value of this option must be the
|
| 197 |
+
# base URL from which the finished HTML is served.
|
| 198 |
+
# html_use_opensearch = ''
|
| 199 |
+
|
| 200 |
+
# This is the file name suffix for HTML files (e.g. ".xhtml").
|
| 201 |
+
# html_file_suffix = None
|
| 202 |
+
|
| 203 |
+
# Output file base name for HTML help builder.
|
| 204 |
+
htmlhelp_basename = "lifelinesdoc"
|
| 205 |
+
|
| 206 |
+
# treat ``x, y : type`` as vars x and y instead of default ``y(x,) : type``
|
| 207 |
+
napoleon_use_param = False
|
| 208 |
+
|
| 209 |
+
|
| 210 |
+
# -- Options for LaTeX output ---------------------------------------------
|
| 211 |
+
|
| 212 |
+
latex_elements = {
|
| 213 |
+
# The paper size ('letterpaper' or 'a4paper').
|
| 214 |
+
#'papersize': 'letterpaper',
|
| 215 |
+
# The font size ('10pt', '11pt' or '12pt').
|
| 216 |
+
#'pointsize': '10pt',
|
| 217 |
+
# Additional stuff for the LaTeX preamble.
|
| 218 |
+
#'preamble': '',
|
| 219 |
+
}
|
| 220 |
+
|
| 221 |
+
# Grouping the document tree into LaTeX files. List of tuples
|
| 222 |
+
# (source start file, target name, title,
|
| 223 |
+
# author, documentclass [howto, manual, or own class]).
|
| 224 |
+
latex_documents = [("index", "lifelines.tex", "lifelines Documentation", "Cam Davidson-Pilon", "manual")]
|
| 225 |
+
|
| 226 |
+
# The name of an image file (relative to this directory) to place at the top of
|
| 227 |
+
# the title page.
|
| 228 |
+
# latex_logo = None
|
| 229 |
+
|
| 230 |
+
# For "manual" documents, if this is true, then toplevel headings are parts,
|
| 231 |
+
# not chapters.
|
| 232 |
+
# latex_use_parts = False
|
| 233 |
+
|
| 234 |
+
# If true, show page references after internal links.
|
| 235 |
+
# latex_show_pagerefs = False
|
| 236 |
+
|
| 237 |
+
# If true, show URL addresses after external links.
|
| 238 |
+
# latex_show_urls = False
|
| 239 |
+
|
| 240 |
+
# Documents to append as an appendix to all manuals.
|
| 241 |
+
# latex_appendices = []
|
| 242 |
+
|
| 243 |
+
# If false, no module index is generated.
|
| 244 |
+
# latex_domain_indices = True
|
| 245 |
+
|
| 246 |
+
|
| 247 |
+
# -- Options for manual page output ---------------------------------------
|
| 248 |
+
|
| 249 |
+
# One entry per manual page. List of tuples
|
| 250 |
+
# (source start file, name, description, authors, manual section).
|
| 251 |
+
man_pages = [("index", "lifelines", "lifelines Documentation", ["Cam Davidson-Pilon"], 1)]
|
| 252 |
+
|
| 253 |
+
# If true, show URL addresses after external links.
|
| 254 |
+
# man_show_urls = False
|
| 255 |
+
|
| 256 |
+
|
| 257 |
+
# nbsphinx
|
| 258 |
+
nbsphinx_prolog = r"""
|
| 259 |
+
.. image:: http://i.imgur.com/EOowdSD.png
|
| 260 |
+
|
| 261 |
+
-------------------------------------
|
| 262 |
+
|
| 263 |
+
|
| 264 |
+
"""
|
| 265 |
+
|
| 266 |
+
|
| 267 |
+
# -- Options for Texinfo output -------------------------------------------
|
| 268 |
+
|
| 269 |
+
# Grouping the document tree into Texinfo files. List of tuples
|
| 270 |
+
# (source start file, target name, title, author,
|
| 271 |
+
# dir menu entry, description, category)
|
| 272 |
+
texinfo_documents = [
|
| 273 |
+
("index", "lifelines", "lifelines Documentation", "Cam Davidson-Pilon", "lifelines", "Survival analysis in Python.")
|
| 274 |
+
]
|
| 275 |
+
|
| 276 |
+
# Documents to append as an appendix to all manuals.
|
| 277 |
+
# texinfo_appendices = []
|
| 278 |
+
|
| 279 |
+
# If false, no module index is generated.
|
| 280 |
+
# texinfo_domain_indices = True
|
| 281 |
+
|
| 282 |
+
# How to display URL addresses: 'footnote', 'no', or 'inline'.
|
| 283 |
+
# texinfo_show_urls = 'footnote'
|
| 284 |
+
|
| 285 |
+
# If true, do not generate a @detailmenu in the "Top" node's menu.
|
| 286 |
+
# texinfo_no_detailmenu = False
|
| 287 |
+
|
| 288 |
+
# use RTFD theme locally
|
| 289 |
+
# on_rtd is whether we are on readthedocs.org, this line of code grabbed from docs.readthedocs.org
|
| 290 |
+
import sphinx_rtd_theme
|
| 291 |
+
|
| 292 |
+
html_theme = "sphinx_rtd_theme"
|
| 293 |
+
html_theme_path = [sphinx_rtd_theme.get_html_theme_path(), "."]
|
| 294 |
+
|
| 295 |
+
|
| 296 |
+
def setup(app):
|
| 297 |
+
app.add_css_file("custom.css")
|
lifelines/source/docs/conftest.py
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# -*- coding: utf-8 -*-
|
| 2 |
+
from os import chdir, getcwd
|
| 3 |
+
from shutil import rmtree
|
| 4 |
+
from tempfile import mkdtemp
|
| 5 |
+
import pytest
|
| 6 |
+
from sybil import Sybil
|
| 7 |
+
from sybil.parsers.codeblock import CodeBlockParser
|
| 8 |
+
from sybil.parsers.doctest import DocTestParser
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
@pytest.fixture(scope="module")
|
| 12 |
+
def tempdir():
|
| 13 |
+
# there are better ways to do temp directories, but it's a simple example:
|
| 14 |
+
path = mkdtemp()
|
| 15 |
+
cwd = getcwd()
|
| 16 |
+
try:
|
| 17 |
+
chdir(path)
|
| 18 |
+
yield path
|
| 19 |
+
finally:
|
| 20 |
+
chdir(cwd)
|
| 21 |
+
rmtree(path)
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
# uncomment to use locally.
|
| 25 |
+
# run `py.test` in the docs folder
|
| 26 |
+
"""
|
| 27 |
+
pytest_collect_file = Sybil(
|
| 28 |
+
parsers=[DocTestParser(), CodeBlockParser(future_imports=["print_function"])], pattern="*.rst", fixtures=["tempdir"]
|
| 29 |
+
).pytest()
|
| 30 |
+
"""
|
lifelines/source/docs/docs_requirements.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
-r ../reqs/docs-requirements.txt
|
lifelines/source/docs/fitters/regression/AalenAdditiveFitter.rst
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
AalenAdditiveFitter
|
| 3 |
+
------------------------------------------------
|
| 4 |
+
|
| 5 |
+
.. automodule:: lifelines.fitters.aalen_additive_fitter
|
| 6 |
+
:members:
|
| 7 |
+
:undoc-members:
|
lifelines/source/docs/fitters/regression/CRCSplineFitter.rst
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
CRCSplineFitter
|
| 2 |
+
------------------------------------------
|
| 3 |
+
|
| 4 |
+
.. automodule:: lifelines.fitters.crc_spline_fitter
|
| 5 |
+
:members:
|
| 6 |
+
:undoc-members:
|
lifelines/source/docs/fitters/regression/CoxPHFitter.rst
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
CoxPHFitter
|
| 2 |
+
--------------------------------------
|
| 3 |
+
|
| 4 |
+
.. autoclass:: lifelines.fitters.coxph_fitter.CoxPHFitter
|
| 5 |
+
:members:
|
| 6 |
+
:undoc-members:
|
| 7 |
+
|
| 8 |
+
.. method:: plot_covariate_groups()
|
| 9 |
+
|
| 10 |
+
see :meth:`~lifelines.fitters.coxph_fitter.SemiParametricPHFitter.plot_covariate_groups`
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
.. method:: plot_partial_effects_on_outcome()
|
| 14 |
+
|
| 15 |
+
see :meth:`~lifelines.fitters.coxph_fitter.SemiParametricPHFitter.plot_partial_effects_on_outcome`
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
.. method:: plot()
|
| 19 |
+
|
| 20 |
+
see :meth:`~lifelines.fitters.coxph_fitter.SemiParametricPHFitter.plot`
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
.. method:: predict_median()
|
| 24 |
+
|
| 25 |
+
see :meth:`~lifelines.fitters.coxph_fitter.SemiParametricPHFitter.predict_median`
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
.. method:: predict_expectation()
|
| 29 |
+
|
| 30 |
+
see :meth:`~lifelines.fitters.coxph_fitter.SemiParametricPHFitter.predict_expectation`
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
.. method:: predict_percentile()
|
| 34 |
+
|
| 35 |
+
see :meth:`~lifelines.fitters.coxph_fitter.SemiParametricPHFitter.predict_percentile`
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
.. method:: predict_survival_function()
|
| 39 |
+
|
| 40 |
+
see :meth:`~lifelines.fitters.coxph_fitter.SemiParametricPHFitter.predict_survival_function`
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
.. method:: predict_partial_hazard()
|
| 44 |
+
|
| 45 |
+
see :meth:`~lifelines.fitters.coxph_fitter.SemiParametricPHFitter.predict_partial_hazard`
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
.. method:: predict_log_partial_hazard()
|
| 49 |
+
|
| 50 |
+
see :meth:`~lifelines.fitters.coxph_fitter.SemiParametricPHFitter.predict_log_partial_hazard`
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
.. method:: predict_cumulative_hazard()
|
| 54 |
+
|
| 55 |
+
see :meth:`~lifelines.fitters.coxph_fitter.SemiParametricPHFitter.predict_cumulative_hazard`
|
| 56 |
+
|
| 57 |
+
.. method:: score()
|
| 58 |
+
|
| 59 |
+
see :meth:`~lifelines.fitters.coxph_fitter.SemiParametricPHFitter.score`
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
.. method:: log_likelihood_ratio_test()
|
| 63 |
+
|
| 64 |
+
see :meth:`~lifelines.fitters.coxph_fitter.SemiParametricPHFitter.log_likelihood_ratio_test`
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
.. autoclass:: lifelines.fitters.coxph_fitter.SemiParametricPHFitter
|
| 68 |
+
:members:
|
| 69 |
+
|
| 70 |
+
.. autoclass:: lifelines.fitters.coxph_fitter.ParametricSplinePHFitter
|
| 71 |
+
:members:
|
lifelines/source/docs/fitters/regression/CoxTimeVaryingFitter.rst
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
CoxTimeVaryingFitter
|
| 2 |
+
---------------------------------------------------
|
| 3 |
+
|
| 4 |
+
.. automodule:: lifelines.fitters.cox_time_varying_fitter
|
| 5 |
+
:members:
|
| 6 |
+
:undoc-members:
|
lifelines/source/docs/fitters/regression/GeneralizedGammaRegressionFitter.rst
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
GeneralizedGammaRegressionFitter
|
| 2 |
+
------------------------------------------
|
| 3 |
+
|
| 4 |
+
.. automodule:: lifelines.fitters.generalized_gamma_regression_fitter
|
| 5 |
+
:members:
|
| 6 |
+
:undoc-members:
|
lifelines/source/docs/fitters/regression/LogLogisticAFTFitter.rst
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
LogLogisticAFTFitter
|
| 3 |
+
-----------------------------------------------------
|
| 4 |
+
|
| 5 |
+
.. automodule:: lifelines.fitters.log_logistic_aft_fitter
|
| 6 |
+
:members:
|
| 7 |
+
:undoc-members:
|