Spaces:

SreekarB
/

AphasiaPred

Sleeping

App Files Files Community

SreekarB commited on Mar 12, 2025

Commit

d526dee

verified ·

1 Parent(s): a4c8f0c

Upload 5 files

Browse files

Files changed (3) hide show

app.py +29 -15
main.py +108 -35
vae_model.py +23 -3

app.py CHANGED Viewed

@@ -504,23 +504,37 @@ class AphasiaPredictionApp:
                 # Prepare prediction visualization if available
                 if self.predictor and predictor_cv_results:
-                    # Get the outcome variable data
-                    if outcome_variable == 'wab_aq':
-                        outcomes = demographics['wab_aq']
-                    elif outcome_variable == 'age':
-                        outcomes = demographics['age']
-                    elif outcome_variable == 'mpo' or outcome_variable == 'months_post_onset':
-                        outcomes = demographics['mpo']
-                    else:
-                        # Try to find the outcome in demographics data
                         outcomes = None
-                        for key in demographics:
-                            if outcome_variable.lower() in key.lower():
-                                outcomes = demographics[key]
-                                break
-                    # Create plots
-                    if 'prediction_stds' in predictor_cv_results and 'predictions' in predictor_cv_results:
                         # Create prediction plots
                         prediction_fig = self.create_prediction_plots(
                             latents,

                 # Prepare prediction visualization if available
                 if self.predictor and predictor_cv_results:
+                    try:
+                        # Get the outcome variable data
+                        outcomes = None
+                        if demographics:
+                            if outcome_variable == 'wab_aq' and 'wab_aq' in demographics:
+                                outcomes = demographics['wab_aq']
+                            elif outcome_variable == 'age' and 'age' in demographics:
+                                outcomes = demographics['age']
+                            elif (outcome_variable == 'mpo' or outcome_variable == 'months_post_onset') and 'mpo' in demographics:
+                                outcomes = demographics['mpo']
+                            else:
+                                # Try to find the outcome in demographics data
+                                for key in demographics:
+                                    if outcome_variable.lower() in key.lower():
+                                        outcomes = demographics[key]
+                                        logger.info(f"Found matching outcome variable: {key}")
+                                        break
+                        if outcomes is None:
+                            logger.warning(f"Could not find outcome variable '{outcome_variable}' in demographics")
+                            # Create a dummy array to prevent errors
+                            if 'predictions' in predictor_cv_results:
+                                outcomes = np.zeros_like(predictor_cv_results['predictions'])
+                            else:
+                                logger.warning("Cannot create prediction plots without outcome data")
+                    except Exception as e:
+                        logger.error(f"Error getting outcome variable: {e}")
                         outcomes = None
+                    # Create plots if we have the necessary data
+                    if outcomes is not None and 'prediction_stds' in predictor_cv_results and 'predictions' in predictor_cv_results:
                         # Create prediction plots
                         prediction_fig = self.create_prediction_plots(
                             latents,

main.py CHANGED Viewed

@@ -84,7 +84,13 @@ def run_analysis(data_dir="data",
     # Initialize and train VAE
     print("Training VAE...")
     vae = DemoVAE(**MODEL_CONFIG)
-    train_losses, val_losses = vae.fit(X, demo_data, demo_types)
     # Get latent representations
     print("Extracting latent representations...")
@@ -116,18 +122,28 @@ def run_analysis(data_dir="data",
     )
     # Extract results from CV
-    mean_metrics = cv_results["mean_metrics"]
-    fold_metrics = cv_results["fold_metrics"]
-    predictions = cv_results["predictions"]
-    prediction_stds = cv_results["prediction_stds"]
     # For regression, get R2 metrics, otherwise use accuracy
-    if predictor.prediction_type == "regression":
-        cv_mean = mean_metrics["r2"]
-        cv_std = np.std([fold["r2"] for fold in fold_metrics])
-    else:
-        cv_mean = mean_metrics["accuracy"]
-        cv_std = np.std([fold["accuracy"] for fold in fold_metrics])
     # Fit final predictor model
     predictor.fit(latents, demographics, treatment_outcomes)
@@ -145,31 +161,84 @@ def run_analysis(data_dir="data",
     print("Generating visualizations...")
     # FC matrix visualization
-    reconstructed = vae.transform(X, demo_data, demo_types)
-    generated = vae.transform(1,
-                            [d[:1] for d in demo_data],
-                            demo_types)
-    fc_fig = plot_fc_matrices(X[0], reconstructed[0], generated[0])
     # Learning curves
-    learning_fig = plot_learning_curves(train_losses, val_losses)
     # Feature importance
-    importance_fig = predictor.plot_feature_importance()
     # Prediction performance
     performance_fig = plt.figure(figsize=(8, 6))
-    plt.scatter(treatment_outcomes, predictions)
-    plt.plot([min(treatment_outcomes), max(treatment_outcomes)],
-             [min(treatment_outcomes), max(treatment_outcomes)],
-             'r--')
-    plt.fill_between(treatment_outcomes,
-                     predictions - 2*prediction_stds,
-                     predictions + 2*prediction_stds,
-                     alpha=0.2, color='gray')
-    plt.xlabel('Actual Outcome')
-    plt.ylabel('Predicted Outcome')
-    plt.title(f'Treatment Outcome Prediction\nR² = {cv_mean:.3f} ± {cv_std:.3f}')
     plt.tight_layout()
     # Save results
@@ -178,6 +247,15 @@ def run_analysis(data_dir="data",
     np.save('results/predictions.npy', predictions)
     np.save('results/prediction_stds.npy', prediction_stds)
     results = {
         'vae': vae,
         'predictor': predictor,
@@ -186,12 +264,7 @@ def run_analysis(data_dir="data",
         'cv_scores': (cv_mean, cv_std),
         'predictions': predictions,
         'prediction_stds': prediction_stds,
-        'predictor_cv_results': {
-            'mean_metrics': mean_metrics,
-            'fold_metrics': fold_metrics,
-            'predictions': predictions,
-            'prediction_stds': prediction_stds
-        },
         'figures': {
             'vae': fc_fig,  # Changed to match app.py expectations
             'fc_analysis': fc_fig,

     # Initialize and train VAE
     print("Training VAE...")
     vae = DemoVAE(**MODEL_CONFIG)
+    try:
+        train_losses, val_losses = vae.fit(X, demo_data, demo_types)
+        print(f"VAE training complete. Final train loss: {train_losses[-1]:.4f}, final validation loss: {val_losses[-1]:.4f}")
+    except Exception as e:
+        print(f"Error during VAE training: {e}")
+        print("Using empty lists for losses as fallback")
+        train_losses, val_losses = [], []
     # Get latent representations
     print("Extracting latent representations...")
     )
     # Extract results from CV
+    mean_metrics = cv_results.get("mean_metrics", {})
+    fold_metrics = cv_results.get("fold_metrics", [])
+    predictions = cv_results.get("predictions", np.zeros_like(treatment_outcomes))
+    prediction_stds = cv_results.get("prediction_stds", np.zeros_like(treatment_outcomes))
     # For regression, get R2 metrics, otherwise use accuracy
+    try:
+        if predictor.prediction_type == "regression":
+            cv_mean = mean_metrics.get("r2", 0.0)
+            if fold_metrics and "r2" in fold_metrics[0]:
+                cv_std = np.std([fold.get("r2", 0.0) for fold in fold_metrics])
+            else:
+                cv_std = 0.0
+        else:
+            cv_mean = mean_metrics.get("accuracy", 0.0)
+            if fold_metrics and "accuracy" in fold_metrics[0]:
+                cv_std = np.std([fold.get("accuracy", 0.0) for fold in fold_metrics])
+            else:
+                cv_std = 0.0
+    except Exception as e:
+        print(f"Error calculating CV metrics: {e}")
+        cv_mean, cv_std = 0.0, 0.0
     # Fit final predictor model
     predictor.fit(latents, demographics, treatment_outcomes)
     print("Generating visualizations...")
     # FC matrix visualization
+    try:
+        reconstructed = vae.transform(X, demo_data, demo_types)
+        generated = vae.transform(1,
+                                [d[:1] for d in demo_data],
+                                demo_types)
+        fc_fig = plot_fc_matrices(X[0], reconstructed[0], generated[0])
+    except Exception as e:
+        print(f"Error creating FC visualization: {e}")
+        fc_fig = plt.figure(figsize=(15, 5))
+        plt.text(0.5, 0.5, "FC visualization unavailable",
+                ha='center', va='center', transform=plt.gca().transAxes)
+        plt.tight_layout()
     # Learning curves
+    try:
+        if train_losses and val_losses:
+            learning_fig = plot_learning_curves(train_losses, val_losses)
+        else:
+            print("No training history available for learning curves")
+            learning_fig = plt.figure(figsize=(10, 6))
+            plt.text(0.5, 0.5, "Learning curve data unavailable",
+                    ha='center', va='center', transform=plt.gca().transAxes)
+            plt.tight_layout()
+    except Exception as e:
+        print(f"Error creating learning curve plot: {e}")
+        learning_fig = plt.figure(figsize=(10, 6))
+        plt.text(0.5, 0.5, "Error creating learning curves",
+                ha='center', va='center', transform=plt.gca().transAxes)
+        plt.tight_layout()
     # Feature importance
+    try:
+        importance_fig = predictor.plot_feature_importance()
+    except Exception as e:
+        print(f"Error creating feature importance plot: {e}")
+        importance_fig = plt.figure(figsize=(8, 6))
+        plt.text(0.5, 0.5, "Feature importance unavailable",
+                ha='center', va='center', transform=plt.gca().transAxes)
+        plt.tight_layout()
     # Prediction performance
     performance_fig = plt.figure(figsize=(8, 6))
+    # Check if we have valid predictions
+    if len(treatment_outcomes) > 0 and len(predictions) == len(treatment_outcomes):
+        try:
+            # Only create scatter plot if we have matching data
+            plt.scatter(treatment_outcomes, predictions)
+            # Reference line
+            min_val = min(np.min(treatment_outcomes), np.min(predictions))
+            max_val = max(np.max(treatment_outcomes), np.max(predictions))
+            plt.plot([min_val, max_val], [min_val, max_val], 'r--')
+            # Confidence band
+            plt.fill_between(treatment_outcomes,
+                            predictions - 2*prediction_stds,
+                            predictions + 2*prediction_stds,
+                            alpha=0.2, color='gray')
+            # Labels
+            plt.xlabel('Actual Outcome')
+            plt.ylabel('Predicted Outcome')
+            # Title with metrics
+            if predictor.prediction_type == "regression":
+                plt.title(f'Treatment Outcome Prediction\nR² = {cv_mean:.3f} ± {cv_std:.3f}')
+            else:
+                plt.title(f'Treatment Outcome Prediction\nAccuracy = {cv_mean:.3f} ± {cv_std:.3f}')
+        except Exception as e:
+            print(f"Error creating performance plot: {e}")
+            plt.text(0.5, 0.5, "Error creating plot",
+                    ha='center', va='center', transform=plt.gca().transAxes)
+    else:
+        # Handle case with no data
+        plt.text(0.5, 0.5, "No prediction data available",
+                ha='center', va='center', transform=plt.gca().transAxes)
     plt.tight_layout()
     # Save results
     np.save('results/predictions.npy', predictions)
     np.save('results/prediction_stds.npy', prediction_stds)
+    # Prepare predictor_cv_results with appropriate default values if missing
+    predictor_cv_results = {
+        'mean_metrics': mean_metrics if mean_metrics else {},
+        'fold_metrics': fold_metrics if fold_metrics else [],
+        'predictions': predictions if len(predictions) > 0 else np.zeros(0),
+        'prediction_stds': prediction_stds if len(prediction_stds) > 0 else np.zeros(0)
+    }
+    # Construct the final results dictionary
     results = {
         'vae': vae,
         'predictor': predictor,
         'cv_scores': (cv_mean, cv_std),
         'predictions': predictions,
         'prediction_stds': prediction_stds,
+        'predictor_cv_results': predictor_cv_results,
         'figures': {
             'vae': fc_fig,  # Changed to match app.py expectations
             'fc_analysis': fc_fig,

vae_model.py CHANGED Viewed

@@ -89,7 +89,7 @@ class DemoVAE(BaseEstimator):
         self.vae = VAE(self.input_dim, self.latent_dim, demo_dim, self.use_cuda)
         # Train VAE
-        train_vae(
             self.vae, x, demo, demo_types,
             self.nepochs, self.pperiod, self.bsize,
             self.loss_C_mult, self.loss_mu_mult, self.loss_rec_mult,
@@ -97,7 +97,13 @@ class DemoVAE(BaseEstimator):
             self.lr, self.weight_decay, self.alpha, self.LR_C,
             self
         )
-        return self
     def transform(self, x, demo, demo_types):
         if isinstance(x, int):
@@ -113,13 +119,19 @@ class DemoVAE(BaseEstimator):
         return to_numpy(z)
     def save(self, path):
         torch.save({
             'model_state_dict': self.vae.state_dict(),
             'params': self.get_params(),
             'pred_stats': self.pred_stats,
             'input_dim': self.input_dim,
-            'demo_dim': self.demo_dim
         }, path)
     def load(self, path):
         checkpoint = torch.load(path)
@@ -129,3 +141,11 @@ class DemoVAE(BaseEstimator):
         self.demo_dim = checkpoint['demo_dim']
         self.vae = VAE(self.input_dim, self.latent_dim, self.demo_dim, self.use_cuda)
         self.vae.load_state_dict(checkpoint['model_state_dict'])

         self.vae = VAE(self.input_dim, self.latent_dim, demo_dim, self.use_cuda)
         # Train VAE
+        train_losses, val_losses = train_vae(
             self.vae, x, demo, demo_types,
             self.nepochs, self.pperiod, self.bsize,
             self.loss_C_mult, self.loss_mu_mult, self.loss_rec_mult,
             self.lr, self.weight_decay, self.alpha, self.LR_C,
             self
         )
+        # Store the losses for later visualization
+        self.train_losses = train_losses
+        self.val_losses = val_losses
+        # Return the losses for immediate use
+        return train_losses, val_losses
     def transform(self, x, demo, demo_types):
         if isinstance(x, int):
         return to_numpy(z)
     def save(self, path):
+        train_losses = getattr(self, 'train_losses', [])
+        val_losses = getattr(self, 'val_losses', [])
         torch.save({
             'model_state_dict': self.vae.state_dict(),
             'params': self.get_params(),
             'pred_stats': self.pred_stats,
             'input_dim': self.input_dim,
+            'demo_dim': self.demo_dim,
+            'train_losses': train_losses,
+            'val_losses': val_losses
         }, path)
+        print(f"Saved VAE model to {path}")
     def load(self, path):
         checkpoint = torch.load(path)
         self.demo_dim = checkpoint['demo_dim']
         self.vae = VAE(self.input_dim, self.latent_dim, self.demo_dim, self.use_cuda)
         self.vae.load_state_dict(checkpoint['model_state_dict'])
+        # Load training history if available
+        if 'train_losses' in checkpoint:
+            self.train_losses = checkpoint['train_losses']
+        if 'val_losses' in checkpoint:
+            self.val_losses = checkpoint['val_losses']
+        print(f"Loaded VAE model from {path}")