Spaces:

SreekarB
/

AphasiaPred

Sleeping

App Files Files Community

SreekarB commited on Mar 12, 2025

Commit

b507484

verified ·

1 Parent(s): 7dd36eb

Upload 11 files

Browse files

Files changed (10) hide show

app.py +14 -5
demo_fc_visualization.py +3 -0
direct_fc_visualization.py +3 -0
fc_visualization.py +3 -0
huggingface_fc_visualization.py +3 -0
main.py +3 -0
rcf_prediction.py +30 -1
utils.py +6 -4
visualization.py +3 -0
visualize_fc.py +3 -0

app.py CHANGED Viewed

@@ -2,6 +2,9 @@ import gradio as gr
 from main import run_analysis
 from rcf_prediction import AphasiaTreatmentPredictor
 import numpy as np
 import matplotlib.pyplot as plt
 from data_preprocessing import preprocess_fmri_to_fc, process_single_fmri
 from visualization import plot_fc_matrices, plot_learning_curves
@@ -2091,13 +2094,14 @@ def create_interface():
         def handle_fc_visualization():
             """Generate FC visualization using stored data or synthetic data"""
             try:
                 # Check if we have trained VAE and data
                 if app_state.get('vae_trained', False) and app_state.get('vae') is not None:
                     logger.info("Visualizing FC matrices from trained VAE")
-                    # Get visualization data
-                    from visualization import plot_fc_matrices
                     # If we have stored original and reconstructed matrices, use them
                     if app_state.get('original_fc') is not None and app_state.get('reconstructed_fc') is not None:
                         original = app_state['original_fc']
@@ -2208,6 +2212,10 @@ def create_interface():
         # Tab 2: Random Forest Training Handler
         def handle_rf_training(prediction_type, outcome_variable, rf_n_estimators, rf_max_depth, rf_cv_folds):
             """Train the Random Forest model using the VAE latent representations"""
             # Check if VAE has been trained or if we can use synthetic data
             if not app_state.get('vae_trained', False) or app_state.get('latents') is None:
                 # Instead of error, create synthetic data for demonstration
@@ -2273,8 +2281,6 @@ def create_interface():
                 # Train Random Forest predictor
                 from rcf_prediction import AphasiaTreatmentPredictor
-                import pandas as pd
-                import numpy as np
                 # Get treatment outcomes data
                 # Check if we already created synthetic data
@@ -2429,6 +2435,9 @@ def create_interface():
         def handle_treatment_prediction(fmri_file, age, sex, months, wab):
             """Predict treatment outcome for a new patient"""
             try:
                 # First, check if we have saved models we can use
                 rf_model_path = "results/treatment_predictor.joblib"
                 rf_available = os.path.exists(rf_model_path)

 from main import run_analysis
 from rcf_prediction import AphasiaTreatmentPredictor
 import numpy as np
+# Configure matplotlib for headless environment
+import matplotlib
+matplotlib.use('Agg')  # Use non-interactive backend
 import matplotlib.pyplot as plt
 from data_preprocessing import preprocess_fmri_to_fc, process_single_fmri
 from visualization import plot_fc_matrices, plot_learning_curves
         def handle_fc_visualization():
             """Generate FC visualization using stored data or synthetic data"""
             try:
+                # Import necessary packages
+                import numpy as np
+                from visualization import plot_fc_matrices
                 # Check if we have trained VAE and data
                 if app_state.get('vae_trained', False) and app_state.get('vae') is not None:
                     logger.info("Visualizing FC matrices from trained VAE")
                     # If we have stored original and reconstructed matrices, use them
                     if app_state.get('original_fc') is not None and app_state.get('reconstructed_fc') is not None:
                         original = app_state['original_fc']
         # Tab 2: Random Forest Training Handler
         def handle_rf_training(prediction_type, outcome_variable, rf_n_estimators, rf_max_depth, rf_cv_folds):
             """Train the Random Forest model using the VAE latent representations"""
+            # Import necessary packages
+            import numpy as np
+            import pandas as pd
             # Check if VAE has been trained or if we can use synthetic data
             if not app_state.get('vae_trained', False) or app_state.get('latents') is None:
                 # Instead of error, create synthetic data for demonstration
                 # Train Random Forest predictor
                 from rcf_prediction import AphasiaTreatmentPredictor
                 # Get treatment outcomes data
                 # Check if we already created synthetic data
         def handle_treatment_prediction(fmri_file, age, sex, months, wab):
             """Predict treatment outcome for a new patient"""
             try:
+                # Import necessary packages
+                import numpy as np
                 # First, check if we have saved models we can use
                 rf_model_path = "results/treatment_predictor.joblib"
                 rf_available = os.path.exists(rf_model_path)

demo_fc_visualization.py CHANGED Viewed

@@ -3,6 +3,9 @@ Demo script to visualize FC matrices from real fMRI data using nilearn's built-i
 """
 import numpy as np
 import matplotlib.pyplot as plt
 from nilearn import datasets
 from nilearn import input_data, connectome

 """
 import numpy as np
+# Configure matplotlib for headless environment
+import matplotlib
+matplotlib.use('Agg')  # Use non-interactive backend
 import matplotlib.pyplot as plt
 from nilearn import datasets
 from nilearn import input_data, connectome

direct_fc_visualization.py CHANGED Viewed

@@ -7,6 +7,9 @@ This script creates and visualizes FC matrices directly, without relying on fMRI
 import os
 import numpy as np
 import matplotlib.pyplot as plt
 from visualization import vector_to_matrix

 import os
 import numpy as np
+# Configure matplotlib for headless environment
+import matplotlib
+matplotlib.use('Agg')  # Use non-interactive backend
 import matplotlib.pyplot as plt
 from visualization import vector_to_matrix

fc_visualization.py CHANGED Viewed

@@ -6,6 +6,9 @@ independently from the prediction pipeline.
 """
 import numpy as np
 import matplotlib.pyplot as plt
 from pathlib import Path
 import argparse

 """
 import numpy as np
+# Configure matplotlib for headless environment
+import matplotlib
+matplotlib.use('Agg')  # Use non-interactive backend
 import matplotlib.pyplot as plt
 from pathlib import Path
 import argparse

huggingface_fc_visualization.py CHANGED Viewed

@@ -4,6 +4,9 @@ Script to visualize FC matrices from HuggingFace dataset, comparing original FC
 import os
 import numpy as np
 import matplotlib.pyplot as plt
 from datasets import load_dataset
 from fc_visualization import FCVisualizer

 import os
 import numpy as np
+# Configure matplotlib for headless environment
+import matplotlib
+matplotlib.use('Agg')  # Use non-interactive backend
 import matplotlib.pyplot as plt
 from datasets import load_dataset
 from fc_visualization import FCVisualizer

main.py CHANGED Viewed

@@ -8,6 +8,9 @@ from vae_model import DemoVAE
 from rcf_prediction import AphasiaTreatmentPredictor
 from visualization import plot_fc_matrices, plot_learning_curves
 from config import MODEL_CONFIG, PREDICTION_CONFIG
 import matplotlib.pyplot as plt
 def run_analysis(data_dir="data",

 from rcf_prediction import AphasiaTreatmentPredictor
 from visualization import plot_fc_matrices, plot_learning_curves
 from config import MODEL_CONFIG, PREDICTION_CONFIG
+# Configure matplotlib for headless environment
+import matplotlib
+matplotlib.use('Agg')  # Use non-interactive backend
 import matplotlib.pyplot as plt
 def run_analysis(data_dir="data",

rcf_prediction.py CHANGED Viewed

@@ -3,6 +3,9 @@ from sklearn.ensemble import RandomForestRegressor
 from sklearn.model_selection import cross_val_score, KFold
 import pandas as pd
 from sklearn.metrics import mean_squared_error, r2_score
 import matplotlib.pyplot as plt
 import os
 import joblib
@@ -82,6 +85,11 @@ class AphasiaTreatmentPredictor:
         self.feature_names = feature_names
         logger.info(f"Training {self.prediction_type} model with {X.shape[0]} samples and {X.shape[1]} features")
         self.model.fit(X, treatment_outcomes)
         # Calculate feature importance
@@ -90,6 +98,7 @@ class AphasiaTreatmentPredictor:
             'importance': self.model.feature_importances_
         }).sort_values('importance', ascending=False)
         return self
     def predict(self, latents, demographics):
@@ -137,12 +146,14 @@ class AphasiaTreatmentPredictor:
             n_splits = adjusted_n_splits
         logger.info(f"Running {n_splits}-fold cross-validation on {sample_count} samples")
         # Use stratified KFold for regression to ensure balanced folds
         # or LeaveOneOut for very small datasets
         if sample_count <= 5:
             from sklearn.model_selection import LeaveOneOut
             logger.warning(f"Using Leave-One-Out CV for small dataset with {sample_count} samples")
             kf = LeaveOneOut()
             cv_iterator = kf.split(X)
         else:
@@ -158,11 +169,14 @@ class AphasiaTreatmentPredictor:
             X_train, X_test = X[train_idx], X[test_idx]
             y_train, y_test = treatment_outcomes[train_idx], treatment_outcomes[test_idx]
             # Clone the model for this fold
             fold_model = RandomForestRegressor(
                 n_estimators=self.n_estimators,
                 max_depth=self.max_depth,
-                random_state=self.random_state
             )
             # Train the model
@@ -183,6 +197,7 @@ class AphasiaTreatmentPredictor:
             else:
                 r2 = np.nan
                 logger.warning(f"Fold {fold+1}: R² not calculated (insufficient samples or variance)")
             # MSE can always be calculated
             mse = rmse**2
@@ -213,6 +228,12 @@ class AphasiaTreatmentPredictor:
             fold_metrics.append(metrics)
             logger.info(f"Fold {fold+1} metrics: {metrics}")
         # Calculate average metrics
         avg_metrics = {}
         for key in fold_metrics[0].keys():
@@ -225,7 +246,15 @@ class AphasiaTreatmentPredictor:
         logger.info(f"Average CV metrics: {avg_metrics}")
         # Train final model on all data
         self.model.fit(X, treatment_outcomes)
         # Calculate feature importance

 from sklearn.model_selection import cross_val_score, KFold
 import pandas as pd
 from sklearn.metrics import mean_squared_error, r2_score
+# Configure matplotlib for headless environment
+import matplotlib
+matplotlib.use('Agg')  # Use non-interactive backend
 import matplotlib.pyplot as plt
 import os
 import joblib
         self.feature_names = feature_names
         logger.info(f"Training {self.prediction_type} model with {X.shape[0]} samples and {X.shape[1]} features")
+        print(f"Random Forest: Building {self.n_estimators} trees...")
+        # Track progress during fit with verbose
+        # Set verbose to 2 for detailed per-tree progress
+        self.model.verbose = 1
         self.model.fit(X, treatment_outcomes)
         # Calculate feature importance
             'importance': self.model.feature_importances_
         }).sort_values('importance', ascending=False)
+        print(f"Random Forest: Training complete. Top features: {', '.join(self.feature_importance['feature'].head(3).tolist())}")
         return self
     def predict(self, latents, demographics):
             n_splits = adjusted_n_splits
         logger.info(f"Running {n_splits}-fold cross-validation on {sample_count} samples")
+        print(f"Random Forest: Starting {n_splits}-fold cross-validation with {sample_count} samples")
         # Use stratified KFold for regression to ensure balanced folds
         # or LeaveOneOut for very small datasets
         if sample_count <= 5:
             from sklearn.model_selection import LeaveOneOut
             logger.warning(f"Using Leave-One-Out CV for small dataset with {sample_count} samples")
+            print(f"Random Forest: Using Leave-One-Out cross-validation due to small sample size ({sample_count})")
             kf = LeaveOneOut()
             cv_iterator = kf.split(X)
         else:
             X_train, X_test = X[train_idx], X[test_idx]
             y_train, y_test = treatment_outcomes[train_idx], treatment_outcomes[test_idx]
+            print(f"Random Forest: Training fold {fold+1}/{n_splits} - {len(X_train)} training samples, {len(X_test)} test samples")
             # Clone the model for this fold
             fold_model = RandomForestRegressor(
                 n_estimators=self.n_estimators,
                 max_depth=self.max_depth,
+                random_state=self.random_state,
+                verbose=1  # Add verbosity
             )
             # Train the model
             else:
                 r2 = np.nan
                 logger.warning(f"Fold {fold+1}: R² not calculated (insufficient samples or variance)")
+                print(f"Random Forest: Fold {fold+1} - R² not calculated (insufficient samples or variance)")
             # MSE can always be calculated
             mse = rmse**2
             fold_metrics.append(metrics)
             logger.info(f"Fold {fold+1} metrics: {metrics}")
+            # Print a more user-friendly version of the fold results
+            r2_val = metrics.get('r2', np.nan)
+            rmse_val = metrics.get('rmse', np.nan)
+            r2_text = f"R² = {r2_val:.4f}" if not np.isnan(r2_val) else "R² = N/A"
+            print(f"Random Forest: Fold {fold+1} results - {r2_text}, RMSE = {rmse_val:.4f}")
         # Calculate average metrics
         avg_metrics = {}
         for key in fold_metrics[0].keys():
         logger.info(f"Average CV metrics: {avg_metrics}")
+        # Print a summary of cross-validation performance
+        r2_avg = avg_metrics.get('r2', np.nan)
+        rmse_avg = avg_metrics.get('rmse', np.nan)
+        r2_text = f"R² = {r2_avg:.4f}" if not np.isnan(r2_avg) else "R² = N/A"
+        print(f"Random Forest: Cross-validation complete - Average {r2_text}, RMSE = {rmse_avg:.4f}")
         # Train final model on all data
+        print(f"Random Forest: Training final model on all {len(X)} samples...")
+        self.model.verbose = 1
         self.model.fit(X, treatment_outcomes)
         # Calculate feature importance

utils.py CHANGED Viewed

@@ -156,7 +156,11 @@ def train_vae(vae, x, demo, demo_types, nepochs, pperiod, bsize,
             epoch_losses.append(total_loss.item())
         # Record training loss
-        train_losses.append(np.mean(epoch_losses))
         # Validation step
         if e % pperiod == 0:
@@ -167,8 +171,6 @@ def train_vae(vae, x, demo, demo_types, nepochs, pperiod, bsize,
                 val_loss = rmse(x, y).item()
                 val_losses.append(val_loss)
-                print(f'Epoch {e}/{nepochs} - '
-                      f'Train Loss: {train_losses[-1]:.4f} - '
-                      f'Val Loss: {val_loss:.4f}')
     return train_losses, val_losses

             epoch_losses.append(total_loss.item())
         # Record training loss
+        epoch_loss = np.mean(epoch_losses)
+        train_losses.append(epoch_loss)
+        # Print progress for every epoch
+        print(f'Epoch {e+1}/{nepochs} - Train Loss: {epoch_loss:.4f}')
         # Validation step
         if e % pperiod == 0:
                 val_loss = rmse(x, y).item()
                 val_losses.append(val_loss)
+                print(f'  Validation - Val Loss: {val_loss:.4f}')
     return train_losses, val_losses

visualization.py CHANGED Viewed

@@ -1,3 +1,6 @@
 import matplotlib.pyplot as plt
 import numpy as np

+# Configure matplotlib for headless environment
+import matplotlib
+matplotlib.use('Agg')  # Use non-interactive backend
 import matplotlib.pyplot as plt
 import numpy as np

visualize_fc.py CHANGED Viewed

@@ -6,6 +6,9 @@ Standalone script to visualize FC matrices using the VAE.
 import os
 import sys
 import numpy as np
 import matplotlib.pyplot as plt
 from main import run_fc_analysis
 from config import PREDICTION_CONFIG

 import os
 import sys
 import numpy as np
+# Configure matplotlib for headless environment
+import matplotlib
+matplotlib.use('Agg')  # Use non-interactive backend
 import matplotlib.pyplot as plt
 from main import run_fc_analysis
 from config import PREDICTION_CONFIG