Commit ·
28a6ef1
0
Parent(s):
adding the hugging face deployment pipeline
Browse files- .gitattributes +1 -0
- .github/workflows/deployement.yml +26 -0
- .gitignore +2 -0
- Makefile +31 -0
- app.py +110 -0
- models/aging_score_autoencoder.keras +3 -0
- notebooks/aging_score_autoencoder.ipynb +0 -0
- readme.md +32 -0
- requirements.txt +9 -0
.gitattributes
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
*.keras filter=lfs diff=lfs merge=lfs -text
|
.github/workflows/deployement.yml
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: CI
|
| 2 |
+
|
| 3 |
+
# Controls when the workflow will run
|
| 4 |
+
on:
|
| 5 |
+
push:
|
| 6 |
+
branches: [main]
|
| 7 |
+
|
| 8 |
+
# to run this workflow manually from the Actions tab
|
| 9 |
+
workflow_dispatch:
|
| 10 |
+
|
| 11 |
+
jobs:
|
| 12 |
+
sync-to-hub:
|
| 13 |
+
runs-on: ubuntu-latest
|
| 14 |
+
steps:
|
| 15 |
+
- uses: actions/checkout@v2
|
| 16 |
+
with:
|
| 17 |
+
fetch-depth: 0
|
| 18 |
+
- name: Add remote
|
| 19 |
+
env:
|
| 20 |
+
HF: ${{ secrets.HG }}
|
| 21 |
+
run: git remote add space https://huggingface.co/spaces/Ghaithhmz/aging_score_autoencoder_agent
|
| 22 |
+
- name: Push to hub
|
| 23 |
+
env:
|
| 24 |
+
HF: ${{ secrets.HG }}
|
| 25 |
+
run: |
|
| 26 |
+
git push --force https://Ghaithhmz:${HF}@huggingface.co/spaces/Ghaithhmz/aging_score_autoencoder_agent main
|
.gitignore
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
venv/
|
| 2 |
+
|
Makefile
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Variables
|
| 2 |
+
VENV = venv
|
| 3 |
+
ifeq ($(OS),Windows_NT)
|
| 4 |
+
BIN = $(VENV)/Scripts
|
| 5 |
+
else
|
| 6 |
+
BIN = $(VENV)/bin
|
| 7 |
+
endif
|
| 8 |
+
|
| 9 |
+
PYTHON = $(BIN)/python
|
| 10 |
+
PIP = $(BIN)/pip
|
| 11 |
+
|
| 12 |
+
.PHONY: install run clean help venv
|
| 13 |
+
|
| 14 |
+
help:
|
| 15 |
+
@echo "Available commands:"
|
| 16 |
+
@echo " install : Create virtual environment and install dependencies"
|
| 17 |
+
@echo " run : Run the Gradio application via virtual environment"
|
| 18 |
+
@echo " clean : Remove temporary files and virtual environment"
|
| 19 |
+
|
| 20 |
+
$(VENV):
|
| 21 |
+
python -m venv $(VENV)
|
| 22 |
+
|
| 23 |
+
install: $(VENV)
|
| 24 |
+
$(PIP) install -r requirements.txt
|
| 25 |
+
|
| 26 |
+
run:
|
| 27 |
+
$(PYTHON) app.py
|
| 28 |
+
|
| 29 |
+
clean:
|
| 30 |
+
rm -rf __pycache__ .pytest_cache
|
| 31 |
+
rm -rf $(VENV)
|
app.py
ADDED
|
@@ -0,0 +1,110 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import tensorflow as tf
|
| 3 |
+
import numpy as np
|
| 4 |
+
import pandas as pd
|
| 5 |
+
import os
|
| 6 |
+
import matplotlib.pyplot as plt
|
| 7 |
+
|
| 8 |
+
# Load the model
|
| 9 |
+
MODEL_PATH = "models/aging_score_autoencoder.keras"
|
| 10 |
+
|
| 11 |
+
# Global variables
|
| 12 |
+
model = None
|
| 13 |
+
encoder_model = None
|
| 14 |
+
|
| 15 |
+
def load_resources():
|
| 16 |
+
global model, encoder_model
|
| 17 |
+
if os.path.exists(MODEL_PATH):
|
| 18 |
+
try:
|
| 19 |
+
# Load the full multi-output model
|
| 20 |
+
model = tf.keras.models.load_model(MODEL_PATH)
|
| 21 |
+
# Create a sub-model to extract the latent space (Aging Score)
|
| 22 |
+
# We assume the layer is named 'latent' as per the notebook
|
| 23 |
+
try:
|
| 24 |
+
latent_layer = model.get_layer("latent")
|
| 25 |
+
encoder_model = tf.keras.Model(inputs=model.input, outputs=latent_layer.output)
|
| 26 |
+
print("Model and Encoder extracted successfully.")
|
| 27 |
+
except:
|
| 28 |
+
print("Warning: 'latent' layer not found. Only age prediction will be available.")
|
| 29 |
+
except Exception as e:
|
| 30 |
+
print(f"Error loading model: {e}")
|
| 31 |
+
|
| 32 |
+
# Initial load
|
| 33 |
+
load_resources()
|
| 34 |
+
|
| 35 |
+
def predict_aging(input_file, chron_age):
|
| 36 |
+
if model is None:
|
| 37 |
+
return "Error: Model not found.", None, None
|
| 38 |
+
|
| 39 |
+
try:
|
| 40 |
+
# Load data
|
| 41 |
+
if input_file.name.endswith('.csv'):
|
| 42 |
+
df = pd.read_csv(input_file.name)
|
| 43 |
+
else:
|
| 44 |
+
df = pd.read_parquet(input_file.name)
|
| 45 |
+
|
| 46 |
+
# Feature selection (Genes only)
|
| 47 |
+
META_COLS = ["sample_id", "subject_id", "tissue", "sex", "age", "death_time", "estimated_age"]
|
| 48 |
+
gene_cols = [c for c in df.columns if c not in META_COLS]
|
| 49 |
+
X = df[gene_cols].values
|
| 50 |
+
|
| 51 |
+
# Preprocessing: log1p + standard normalization
|
| 52 |
+
X_scaled = np.log1p(X)
|
| 53 |
+
X_scaled = (X_scaled - np.mean(X_scaled)) / (np.std(X_scaled) + 1e-8)
|
| 54 |
+
|
| 55 |
+
# Inference
|
| 56 |
+
# model.predict returns [reconstruction, age_prediction]
|
| 57 |
+
_, age_pred = model.predict(X_scaled)
|
| 58 |
+
biological_age = float(age_pred[0][0])
|
| 59 |
+
|
| 60 |
+
# Latent Aging Score
|
| 61 |
+
aging_score = "N/A"
|
| 62 |
+
if encoder_model:
|
| 63 |
+
latent_vector = encoder_model.predict(X_scaled)
|
| 64 |
+
# Using the mean of the latent vector as a proxy for the 'Aging Score' intensity
|
| 65 |
+
aging_score = float(np.mean(latent_vector[0]))
|
| 66 |
+
|
| 67 |
+
# Interpretation
|
| 68 |
+
rhythm = biological_age - chron_age
|
| 69 |
+
status = "Vieillissement Accéléré ⚠️" if rhythm > 2 else "Vieillissement Ralenti ✅" if rhythm < -2 else "Vieillissement Normal 🆗"
|
| 70 |
+
|
| 71 |
+
# Summary
|
| 72 |
+
res_text = f"""
|
| 73 |
+
### Résultats d'Analyse
|
| 74 |
+
- **Âge Chronologique :** {chron_age} ans
|
| 75 |
+
- **Âge Biologique (Estimé) :** {biological_age:.2f} ans
|
| 76 |
+
- **Score de Vieillissement (Latent) :** {aging_score:.4f}
|
| 77 |
+
- **Statut :** {status}
|
| 78 |
+
"""
|
| 79 |
+
|
| 80 |
+
# Plot
|
| 81 |
+
fig, ax = plt.subplots(figsize=(6, 2))
|
| 82 |
+
colors = ['#2ecc71', '#f1c40f', '#e74c3c']
|
| 83 |
+
ax.barh(['Rythme'], [rhythm], color='#3498db')
|
| 84 |
+
ax.axvline(0, color='black', linestyle='--')
|
| 85 |
+
ax.set_title("Différentiel de Vieillissement (Bio - Chrono)")
|
| 86 |
+
ax.set_xlim(-15, 15)
|
| 87 |
+
|
| 88 |
+
return res_text, fig
|
| 89 |
+
|
| 90 |
+
except Exception as e:
|
| 91 |
+
return f"Erreur : {str(e)}", None
|
| 92 |
+
|
| 93 |
+
# Gradio Interface
|
| 94 |
+
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
| 95 |
+
gr.Markdown("# 🧠 Aging Score Bio-Predictor")
|
| 96 |
+
gr.Markdown("Analyse du rythme de vieillissement biologique via Autoencoder supervisé.")
|
| 97 |
+
|
| 98 |
+
with gr.Row():
|
| 99 |
+
with gr.Column():
|
| 100 |
+
input_file = gr.File(label="Données Transcriptomiques (18k gènes)")
|
| 101 |
+
chron_age = gr.Number(label="Âge Chronologique Réel", value=40)
|
| 102 |
+
btn = gr.Button("Calculer l'Aging Score", variant="primary")
|
| 103 |
+
|
| 104 |
+
with gr.Column():
|
| 105 |
+
output_text = gr.Markdown()
|
| 106 |
+
output_plot = gr.Plot()
|
| 107 |
+
|
| 108 |
+
btn.click(fn=predict_aging, inputs=[input_file, chron_age], outputs=[output_text, output_plot])
|
| 109 |
+
|
| 110 |
+
demo.launch()
|
models/aging_score_autoencoder.keras
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f07d03ec82d2cf747469caf948784e874143230edca4e201009872134f0e68e0
|
| 3 |
+
size 226125378
|
notebooks/aging_score_autoencoder.ipynb
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
readme.md
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: Aging Score Prediction
|
| 3 |
+
emoji: 🧠
|
| 4 |
+
colorFrom: blue
|
| 5 |
+
colorTo: green
|
| 6 |
+
sdk: gradio
|
| 7 |
+
app_file: app.py
|
| 8 |
+
pinned: false
|
| 9 |
+
---
|
| 10 |
+
|
| 11 |
+
# Aging Score Biologique via Autoencoder Supervisé
|
| 12 |
+
|
| 13 |
+
## 🎯 Formulation Officielle
|
| 14 |
+
L’objectif de ce projet est de développer un modèle de deep learning basé sur un autoencoder supervisé afin d’apprendre une représentation latente de l’expression génique humaine et de définir un score de vieillissement biologique (**“Aging Score”**), permettant de caractériser le rythme de vieillissement d’un individu à partir de données transcriptomiques multi-tissus.
|
| 15 |
+
|
| 16 |
+
## 🧬 Concept Central
|
| 17 |
+
Au lieu de prédire directement la durée de vie d’une personne, on apprend un score de vieillissement biologique à partir de l’expression des gènes. L'expression génique est l'empreinte moléculaire de l'état biologique d'un individu.
|
| 18 |
+
|
| 19 |
+
## 🏗️ Architecture du Modèle
|
| 20 |
+
- **Encoder** : Compresse les 18 000 gènes vers un espace latent.
|
| 21 |
+
- **Latent Space** : Représentation réduite de l'information biologique (32 dimensions).
|
| 22 |
+
- **Decoder** : Assure que l'espace latent contient assez d'informations pour reconstruire les gènes originaux.
|
| 23 |
+
- **Tête de Régression** : Aligne l'espace latent avec l'âge chronologique.
|
| 24 |
+
|
| 25 |
+
## 📊 Interprétation
|
| 26 |
+
- **Âge Biologique < Âge Chronologique** : Vieillissement ralenti (profil protecteur).
|
| 27 |
+
- **Âge Biologique > Âge Chronologique** : Vieillissement accéléré (potentiels facteurs de risques).
|
| 28 |
+
|
| 29 |
+
## 🛠️ Installation locale
|
| 30 |
+
```bash
|
| 31 |
+
make install
|
| 32 |
+
make run
|
requirements.txt
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
gradio
|
| 2 |
+
tensorflow
|
| 3 |
+
pandas
|
| 4 |
+
numpy
|
| 5 |
+
scikit-learn
|
| 6 |
+
joblib
|
| 7 |
+
matplotlib
|
| 8 |
+
huggingface_hub
|
| 9 |
+
pyarrow
|