Spaces:

SreekarB
/

AphasiaPred

Sleeping

App Files Files Community

SreekarB commited on Mar 13, 2025

Commit

e81f968

verified ·

1 Parent(s): 0d38954

Upload 6 files

Browse files

Files changed (6) hide show

README.md +3 -3
app.py +170 -35
config.py +1 -1
huggingface_fc_visualization.py +1 -1
test_hf_download.py +2 -2
visualize_fc.py +1 -1

README.md CHANGED Viewed

@@ -4,7 +4,7 @@ emoji: 🧠
 colorFrom: blue
 colorTo: pink
 sdk: gradio
-sdk_version: 5.20.1
 app_file: app.py
 pinned: false
 ---
@@ -23,7 +23,7 @@ This application implements a VAE model that:
 ## Dataset
-This demo uses the [SreekarB/OSFData](https://huggingface.co/datasets/SreekarB/OSFData) dataset from HuggingFace, which contains:
 - NIfTI files in P01_rs.nii format containing fMRI data
 - Demographic information directly in the dataset:
@@ -40,7 +40,7 @@ The application processes the NIfTI files using the Power 264 atlas to create fu
 ## How to Use
 1. **Configure Parameters**:
-   - **Data Source**: By default, it uses the SreekarB/OSFData HuggingFace dataset
    - **Latent Dimensions**: Controls the size of the latent space (default: 32)
    - **Number of Epochs**: Training iterations (default: 200 for demo)
    - **Batch Size**: Training batch size (default: 16)

 colorFrom: blue
 colorTo: pink
 sdk: gradio
+sdk_version: 3.36.1
 app_file: app.py
 pinned: false
 ---
 ## Dataset
+This demo uses the [SreekarB/OSFData1](https://huggingface.co/datasets/SreekarB/OSFData1) dataset from HuggingFace, which contains:
 - NIfTI files in P01_rs.nii format containing fMRI data
 - Demographic information directly in the dataset:
 ## How to Use
 1. **Configure Parameters**:
+   - **Data Source**: By default, it uses the SreekarB/OSFData1 HuggingFace dataset
    - **Latent Dimensions**: Controls the size of the latent space (default: 32)
    - **Number of Epochs**: Training iterations (default: 200 for demo)
    - **Batch Size**: Training batch size (default: 16)

app.py CHANGED Viewed

@@ -49,8 +49,8 @@ class AphasiaPredictionApp:
             # Run the full analysis pipeline
             # For HuggingFace dataset, we don't need the demographic file physically
             # as we'll extract demographics directly from the dataset
-            if data_dir == "SreekarB/OSFData":
-                logger.info("Using SreekarB/OSFData dataset, loading demographic data directly from the dataset API")
                 try:
                     # Import HF dataset libraries
@@ -298,8 +298,8 @@ class AphasiaPredictionApp:
             # Set default treatment file path to our fallback file
             treatment_file = fallback_file
-            # For SreekarB/OSFData dataset, optionally look for real treatment data
-            if data_dir == "SreekarB/OSFData":
                 # Check if the user wants to skip behavioral data processing
                 skip_behavioral = PREDICTION_CONFIG.get('skip_behavioral_data', False)
@@ -354,7 +354,7 @@ class AphasiaPredictionApp:
                     except Exception as e:
                         logger.warning(f"Error during treatment data lookup: {e}, using standard outcomes")
                         # Keep using the fallback file
-            # Only check for treatment_file if we're not using the SreekarB/OSFData dataset
             elif not os.path.exists(treatment_file):
                 # Try app directory as fallback
                 app_dir_treatment = os.path.join(os.path.dirname(os.path.abspath(__file__)), "treatment_outcomes.csv")
@@ -369,7 +369,7 @@ class AphasiaPredictionApp:
             logger.info(f"Using treatment file: {treatment_file}")
             # Special handling for HuggingFace dataset
-            if data_dir == "SreekarB/OSFData":
                 # For NIfTI files, we need to search the API or download regardless of demographic source
                 logger.info("Searching for NIfTI files in the dataset...")
@@ -1883,7 +1883,7 @@ def create_interface():
                     with gr.Column(scale=1):
                         data_dir = gr.Textbox(
                             label="Data Directory or HuggingFace Dataset ID",
-                            value="SreekarB/OSFData"
                         )
                         local_nii_dir = gr.Textbox(
                             label="Local NIfTI Files Directory (Optional)",
@@ -1972,7 +1972,7 @@ def create_interface():
                     with gr.Column(scale=1):
                         prediction_type = gr.Radio(
                             label="Prediction Type",
-                            choices=["regression", "classification"],
                             value="regression"
                         )
                         outcome_variable = gr.Dropdown(
@@ -2230,7 +2230,7 @@ def create_interface():
                 logger.info("Looking for data in directory and preprocessing...")
                 # This part is similar to app.train_models but only focuses on VAE
-                if data_dir == "SreekarB/OSFData":
                     # Use dataset, similar to existing code in app.train_models
                     # For brevity, we'll call the full train_models function but only
                     # extract the VAE-related results
@@ -2298,17 +2298,68 @@ def create_interface():
         # Tab 2: Random Forest Training Handler
         def handle_rf_training(prediction_type, outcome_variable, rf_n_estimators, rf_max_depth, rf_cv_folds):
             """Train the Random Forest model using the VAE latent representations"""
-            # Check if VAE has been trained
-            if not app_state['vae_trained'] or app_state['latents'] is None:
-                error_fig = plt.figure(figsize=(10, 6))
-                message = "Error: You must train the VAE model in Tab 1 first!"
-                plt.text(0.5, 0.5, message,
-                        horizontalalignment='center', verticalalignment='center',
-                        fontsize=14, color='red')
-                plt.axis('off')
-                # Return error for both outputs
-                return [error_fig, error_fig, "Error: VAE not trained. Go to Tab 1 and train the VAE first."]
             try:
                 # Update RF configuration
@@ -2317,7 +2368,7 @@ def create_interface():
                 PREDICTION_CONFIG['max_depth'] = rf_max_depth if rf_max_depth > 0 else None
                 PREDICTION_CONFIG['cv_folds'] = rf_cv_folds
-                # Note: prediction_type parameter is ignored as we only support regression
                 logger.info(f"Training Random Forest Regression model: outcome={outcome_variable}")
                 logger.info(f"RF parameters: n_estimators={rf_n_estimators}, max_depth={rf_max_depth}, cv_folds={rf_cv_folds}")
@@ -2455,22 +2506,106 @@ def create_interface():
         # Tab 3: Treatment Prediction Handler
         def handle_treatment_prediction(fmri_file, age, sex, months, wab):
             """Predict treatment outcome for a new patient"""
-            # Check if models have been trained
-            if not app_state['vae_trained'] or not app_state['rf_trained']:
-                error_message = "Error: You must train both the VAE (Tab 1) and Random Forest (Tab 2) models first!"
-                error_fig = plt.figure(figsize=(10, 6))
-                plt.text(0.5, 0.5, error_message,
-                        horizontalalignment='center', verticalalignment='center',
-                        fontsize=14, color='red')
-                plt.axis('off')
-                return [error_message, error_fig]
             # Use the trained models from app_state for prediction
             try:
                 # Set up prediction
-                if app_state['vae'] is None or app_state['predictor'] is None:
-                    return ["Error: Models not properly trained", None]
                 # Create a temporary prediction app with our trained models
                 temp_app = AphasiaPredictionApp()
@@ -2501,8 +2636,8 @@ def create_interface():
         # Add examples
         gr.Examples(
             examples=[
-                ["SreekarB/OSFData", "", 32, 200, 16, True, "regression", "wab_aq", True, False, False],  # Standard training without synthetic data
-                ["SreekarB/OSFData", "", 16, 100, 8, True, "classification", "wab_aq", True, False, False]  # Faster training with classification
             ],
             inputs=[data_dir, local_nii_dir, latent_dim, nepochs, bsize, use_hf_dataset,
                    prediction_type, outcome_variable, skip_behavioral,

             # Run the full analysis pipeline
             # For HuggingFace dataset, we don't need the demographic file physically
             # as we'll extract demographics directly from the dataset
+            if data_dir == "SreekarB/OSFData1":
+                logger.info("Using SreekarB/OSFData1 dataset, loading demographic data directly from the dataset API")
                 try:
                     # Import HF dataset libraries
             # Set default treatment file path to our fallback file
             treatment_file = fallback_file
+            # For SreekarB/OSFData1 dataset, optionally look for real treatment data
+            if data_dir == "SreekarB/OSFData1":
                 # Check if the user wants to skip behavioral data processing
                 skip_behavioral = PREDICTION_CONFIG.get('skip_behavioral_data', False)
                     except Exception as e:
                         logger.warning(f"Error during treatment data lookup: {e}, using standard outcomes")
                         # Keep using the fallback file
+            # Only check for treatment_file if we're not using the SreekarB/OSFData1 dataset
             elif not os.path.exists(treatment_file):
                 # Try app directory as fallback
                 app_dir_treatment = os.path.join(os.path.dirname(os.path.abspath(__file__)), "treatment_outcomes.csv")
             logger.info(f"Using treatment file: {treatment_file}")
             # Special handling for HuggingFace dataset
+            if data_dir == "SreekarB/OSFData1":
                 # For NIfTI files, we need to search the API or download regardless of demographic source
                 logger.info("Searching for NIfTI files in the dataset...")
                     with gr.Column(scale=1):
                         data_dir = gr.Textbox(
                             label="Data Directory or HuggingFace Dataset ID",
+                            value="SreekarB/OSFData1"
                         )
                         local_nii_dir = gr.Textbox(
                             label="Local NIfTI Files Directory (Optional)",
                     with gr.Column(scale=1):
                         prediction_type = gr.Radio(
                             label="Prediction Type",
+                            choices=["regression"],
                             value="regression"
                         )
                         outcome_variable = gr.Dropdown(
                 logger.info("Looking for data in directory and preprocessing...")
                 # This part is similar to app.train_models but only focuses on VAE
+                if data_dir == "SreekarB/OSFData1":
                     # Use dataset, similar to existing code in app.train_models
                     # For brevity, we'll call the full train_models function but only
                     # extract the VAE-related results
         # Tab 2: Random Forest Training Handler
         def handle_rf_training(prediction_type, outcome_variable, rf_n_estimators, rf_max_depth, rf_cv_folds):
             """Train the Random Forest model using the VAE latent representations"""
+            # Try to load the VAE model if it's not already trained
+            if not app_state.get('vae_trained', False) or app_state.get('latents') is None:
+                try:
+                    # Try to load the VAE model from disk
+                    from vae_model import DemoVAE
+                    vae_path = os.path.join('models', 'vae_model.pt')
+                    if os.path.exists(vae_path):
+                        logger.info("Loading saved VAE model...")
+                        vae = DemoVAE()
+                        vae.load(vae_path)
+                        app_state['vae'] = vae
+                        # We also need latent representations for RF training
+                        # Use synthetic data if no real data is available
+                        from data_preprocessing import generate_synthetic_fc_matrices
+                        synthetic_fc, synthetic_demo = generate_synthetic_fc_matrices(30)
+                        logger.info("Generating latent representations from synthetic data...")
+                        latents = vae.encode(synthetic_fc, synthetic_demo)
+                        app_state['latents'] = latents
+                        app_state['demographics'] = synthetic_demo
+                        app_state['vae_trained'] = True
+                        logger.info("Loaded VAE model and generated synthetic latents")
+                    else:
+                        # Train a simple VAE with synthetic data
+                        from vae_model import DemoVAE
+                        from data_preprocessing import generate_synthetic_fc_matrices
+                        logger.info("VAE model not found. Training a simple model with synthetic data...")
+                        # Generate synthetic data
+                        synthetic_fc, synthetic_demo = generate_synthetic_fc_matrices(30)
+                        # Train a simple VAE
+                        vae = DemoVAE(latent_dim=10)
+                        vae.train(synthetic_fc, synthetic_demo, nepochs=10, bsize=8)
+                        # Get latent representations
+                        latents = vae.encode(synthetic_fc, synthetic_demo)
+                        # Save in app_state
+                        app_state['vae'] = vae
+                        app_state['latents'] = latents
+                        app_state['demographics'] = synthetic_demo
+                        app_state['vae_trained'] = True
+                        # Save the model for future use
+                        if not os.path.exists('models'):
+                            os.makedirs('models')
+                        vae.save('models/vae_model.pt')
+                        logger.info("Trained and saved a simple VAE model with synthetic data")
+                except Exception as e:
+                    error_fig = plt.figure(figsize=(10, 6))
+                    message = f"Error: Unable to load or train VAE model: {str(e)}"
+                    plt.text(0.5, 0.5, message,
+                            horizontalalignment='center', verticalalignment='center',
+                            fontsize=14, color='red')
+                    plt.axis('off')
+                    # Return error for both outputs
+                    return [error_fig, error_fig, f"Error: Unable to load or train VAE model: {str(e)}"]
             try:
                 # Update RF configuration
                 PREDICTION_CONFIG['max_depth'] = rf_max_depth if rf_max_depth > 0 else None
                 PREDICTION_CONFIG['cv_folds'] = rf_cv_folds
+                # We only use regression for prediction
                 logger.info(f"Training Random Forest Regression model: outcome={outcome_variable}")
                 logger.info(f"RF parameters: n_estimators={rf_n_estimators}, max_depth={rf_max_depth}, cv_folds={rf_cv_folds}")
         # Tab 3: Treatment Prediction Handler
         def handle_treatment_prediction(fmri_file, age, sex, months, wab):
             """Predict treatment outcome for a new patient"""
+            # Try to load models if they are not already trained
+            if not app_state.get('vae_trained', False) or not app_state.get('rf_trained', False):
+                try:
+                    # First check for VAE model
+                    from vae_model import DemoVAE
+                    vae_path = os.path.join('models', 'vae_model.pt')
+                    rf_path = os.path.join('models', 'predictor_model.pt')
+                    vae_loaded = False
+                    rf_loaded = False
+                    # Try to load the VAE model
+                    if not app_state.get('vae_trained', False) and os.path.exists(vae_path):
+                        logger.info("Loading saved VAE model...")
+                        vae = DemoVAE()
+                        vae.load(vae_path)
+                        app_state['vae'] = vae
+                        app_state['vae_trained'] = True
+                        vae_loaded = True
+                    # Try to load the RF model
+                    if not app_state.get('rf_trained', False) and os.path.exists(rf_path):
+                        logger.info("Loading saved RF predictor model...")
+                        from main import RandomForestPredictor
+                        # Load the model
+                        loaded_data = torch.load(rf_path)
+                        predictor = RandomForestPredictor()
+                        predictor.model = loaded_data['predictor_state']
+                        predictor.feature_importance = loaded_data.get('feature_importance', {})
+                        app_state['predictor'] = predictor
+                        app_state['rf_trained'] = True
+                        rf_loaded = True
+                    # If we couldn't load both models, train quick synthetic models
+                    if not (vae_loaded and rf_loaded):
+                        logger.info("Training synthetic models for demo purposes...")
+                        # Generate synthetic data
+                        from data_preprocessing import generate_synthetic_fc_matrices
+                        synthetic_fc, synthetic_demo = generate_synthetic_fc_matrices(30)
+                        # Train VAE if needed
+                        if not vae_loaded:
+                            vae = DemoVAE(latent_dim=10)
+                            vae.train(synthetic_fc, synthetic_demo, nepochs=10, bsize=8)
+                            app_state['vae'] = vae
+                            app_state['vae_trained'] = True
+                            # Save for future use
+                            if not os.path.exists('models'):
+                                os.makedirs('models')
+                            vae.save('models/vae_model.pt')
+                        else:
+                            vae = app_state['vae']
+                        # Get latent representations for RF training
+                        latents = vae.encode(synthetic_fc, synthetic_demo)
+                        # Train RF if needed
+                        if not rf_loaded:
+                            from main import RandomForestPredictor
+                            # Create synthetic outcome data
+                            import numpy as np
+                            outcomes = np.random.normal(50, 10, size=len(synthetic_demo))
+                            # Train the RF model
+                            predictor = RandomForestPredictor()
+                            predictor.train(latents, outcomes)
+                            app_state['predictor'] = predictor
+                            app_state['rf_trained'] = True
+                            # Save for future use
+                            if not os.path.exists('models'):
+                                os.makedirs('models')
+                            torch.save({
+                                'predictor_state': predictor.model,
+                                'feature_importance': predictor.feature_importance
+                            }, 'models/predictor_model.pt')
+                        logger.info("Successfully trained synthetic models for demo")
+                except Exception as e:
+                    error_message = f"Error: Unable to load or train required models: {str(e)}"
+                    error_fig = plt.figure(figsize=(10, 6))
+                    plt.text(0.5, 0.5, error_message,
+                            horizontalalignment='center', verticalalignment='center',
+                            fontsize=14, color='red')
+                    plt.axis('off')
+                    return [error_message, error_fig]
             # Use the trained models from app_state for prediction
             try:
                 # Set up prediction
+                if app_state.get('vae') is None or app_state.get('predictor') is None:
+                    error_message = "Error: Models not properly available"
+                    return [error_message, None]
                 # Create a temporary prediction app with our trained models
                 temp_app = AphasiaPredictionApp()
         # Add examples
         gr.Examples(
             examples=[
+                ["SreekarB/OSFData1", "", 32, 200, 16, True, "regression", "wab_aq", True, False, False],  # Standard training without synthetic data
+                ["SreekarB/OSFData1", "", 16, 100, 8, True, "regression", "wab_aq", True, False, False]  # Faster training with smaller parameters
             ],
             inputs=[data_dir, local_nii_dir, latent_dim, nepochs, bsize, use_hf_dataset,
                    prediction_type, outcome_variable, skip_behavioral,

config.py CHANGED Viewed

@@ -18,7 +18,7 @@ PREPROCESS_CONFIG = {
 # Dataset configuration
 DATASET_CONFIG = {
-    'name': 'SreekarB/OSFData',
     'split': 'train'
 }

 # Dataset configuration
 DATASET_CONFIG = {
+    'name': 'SreekarB/OSFData1',
     'split': 'train'
 }

huggingface_fc_visualization.py CHANGED Viewed

@@ -373,7 +373,7 @@ def generate_comparison():
     print("Loading dataset from HuggingFace...")
     # Load the HuggingFace dataset using config
-    dataset_name = DATASET_CONFIG.get('name', 'SreekarB/OSFData')
     dataset_split = DATASET_CONFIG.get('split', 'train')
     dataset = load_dataset(dataset_name, split=dataset_split)

     print("Loading dataset from HuggingFace...")
     # Load the HuggingFace dataset using config
+    dataset_name = DATASET_CONFIG.get('name', 'SreekarB/OSFData1')
     dataset_split = DATASET_CONFIG.get('split', 'train')
     dataset = load_dataset(dataset_name, split=dataset_split)

test_hf_download.py CHANGED Viewed

@@ -6,7 +6,7 @@ from datasets import load_dataset
 import numpy as np
 import pandas as pd
-def test_huggingface_download(dataset_name="SreekarB/OSFData", revision=None, auth_token=None):
     """
     Test script to verify downloading NIfTI files from HuggingFace Datasets
     """
@@ -227,7 +227,7 @@ if __name__ == "__main__":
     # Process command line arguments
     import argparse
     parser = argparse.ArgumentParser(description='Test HuggingFace dataset downloading')
-    parser.add_argument('--dataset', type=str, default="SreekarB/OSFData", help='HuggingFace dataset name')
     parser.add_argument('--revision', type=str, default=None, help='Dataset revision/branch')
     parser.add_argument('--token', type=str, default=None, help='HuggingFace authentication token')

 import numpy as np
 import pandas as pd
+def test_huggingface_download(dataset_name="SreekarB/OSFData1", revision=None, auth_token=None):
     """
     Test script to verify downloading NIfTI files from HuggingFace Datasets
     """
     # Process command line arguments
     import argparse
     parser = argparse.ArgumentParser(description='Test HuggingFace dataset downloading')
+    parser.add_argument('--dataset', type=str, default="SreekarB/OSFData1", help='HuggingFace dataset name')
     parser.add_argument('--revision', type=str, default=None, help='Dataset revision/branch')
     parser.add_argument('--token', type=str, default=None, help='HuggingFace authentication token')

visualize_fc.py CHANGED Viewed

@@ -15,7 +15,7 @@ from config import PREDICTION_CONFIG
 def main():
     # Configuration
-    data_dir = "SreekarB/OSFData"  # HuggingFace dataset
     latent_dim = 16
     nepochs = 50
     batch_size = 4

 def main():
     # Configuration
+    data_dir = "SreekarB/OSFData1"  # HuggingFace dataset
     latent_dim = 16
     nepochs = 50
     batch_size = 4