diff --git "a/app.py" "b/app.py"
--- "a/app.py"
+++ "b/app.py"
@@ -1,2923 +1,1163 @@
 import gradio as gr
-from main import run_analysis
-from rcf_prediction import AphasiaTreatmentPredictor
-import numpy as np
-# Configure matplotlib for headless environment
-import matplotlib
-matplotlib.use('Agg')  # Use non-interactive backend
-import matplotlib.pyplot as plt
-from data_preprocessing import preprocess_fmri_to_fc, process_single_fmri
-from visualization import plot_fc_matrices, plot_learning_curves
+import torch
+import numpy as np  # Make sure numpy is imported at the top level
+import nibabel as nib
+import tempfile
 import os
-import glob
-from sklearn.metrics import mean_squared_error, r2_score
-import json
-import pickle
-import pandas as pd
-import seaborn as sns
+import sys
 import logging
-from config import MODEL_CONFIG, PREDICTION_CONFIG
+import traceback
+import datetime
+import threading
+import io
+import time
+import atexit
+import shutil
+from pathlib import Path
+from datasets import load_dataset
+import pandas as pd
+from sklearn.preprocessing import StandardScaler
+# Import from the package instead of direct paths
+from latent_diffusion import LatentDiffusionFMRI, AphasiaFMRIPreprocessor
 
-logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
-logger = logging.getLogger(__name__)
+# Configure logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(levelname)s - %(message)s'
+)
+logger = logging.getLogger('app')
 
-class AphasiaPredictionApp:
-    def __init__(self):
-        self.vae = None
-        self.predictor = None
-        self.trained = False
-        self.latent_dim = MODEL_CONFIG['latent_dim']
-        self.last_treatment_file = None  # Track the last treatment file used
-        
-    def train_models(self, data_dir, latent_dim, nepochs, bsize):
+# Check if we're running on Hugging Face Spaces and disable wandb
+if os.environ.get('SPACE_ID') or os.environ.get('SYSTEM') == 'spaces':
+    logger.info("Detected Hugging Face Spaces environment, disabling wandb")
+    os.environ['WANDB_DISABLED'] = 'true'
+    os.environ['WANDB_MODE'] = 'disabled'
+
+# Track temporary files for cleanup
+temp_files = set()
+
+# Function to clean up temporary files on exit
+def cleanup_temp_files():
+    """Clean up any temporary files created by the application"""
+    global temp_files
+    for file_path in temp_files:
+        try:
+            if os.path.exists(file_path):
+                os.remove(file_path)
+                logger.debug(f"Cleaned up temporary file: {file_path}")
+        except Exception as e:
+            logger.error(f"Failed to clean up temporary file {file_path}: {e}")
+
+# Register the cleanup handler to run on exit
+atexit.register(cleanup_temp_files)
+
+# We already imported numpy as np at the top level, no need for redundant assignment
+
+class AphasiaFMRIGenerator:
+    def __init__(self, csv_path=None):
         """
-        Train VAE and Random Forest models
+        Initialize the fMRI generator
+        
+        Args:
+            csv_path: Optional path to the FC_graph_covariate_data.csv file
+                      If provided, will load demographics directly from this file
         """
-        # Train VAE and Random Forest
-        logger.info(f"Training models with data from {data_dir}")
-        logger.info(f"VAE params: latent_dim={latent_dim}, epochs={nepochs}, batch_size={bsize}")
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        print(f"Using device: {self.device}")
+        self.csv_path = csv_path
+        self.preprocessor = AphasiaFMRIPreprocessor()
+        self.demo_scaler = self.initialize_demo_scaler()
+        self.model = self.load_model()
+        
+        # Training state
+        self.is_training = False
+        self.current_epoch = 0
+        self.training_logs = []
+
+    def load_model(self):
+        # Ensure models directory exists
+        os.makedirs("models", exist_ok=True)
         
-        # Default prediction parameters from our config
-        outcome_variable = PREDICTION_CONFIG.get('default_outcome', 'wab_aq')
-        logger.info(f"Prediction: type=regression, outcome={outcome_variable}")
+        # Default fMRI shape for Hugging Face Spaces compatibility 
+        fmri_shape = (200, 32, 32, 32)  # Smaller for memory constraints
         
-        figures = {}
+        model = LatentDiffusionFMRI(fmri_shape=fmri_shape).to(self.device)
         
-        try:
-            # Run the full analysis pipeline
-            # For HuggingFace dataset, we don't need the demographic file physically
-            # as we'll extract demographics directly from the dataset
-            if data_dir == "SreekarB/OSFData":
-                logger.info("Using SreekarB/OSFData dataset, loading demographic data directly from the dataset API")
+        # Load pretrained weights if available
+        model_path = Path("models/fmri_diffusion.pt")
+        if model_path.exists():
+            print(f"Loading pretrained model from {model_path}")
+            model.load_state_dict(torch.load(model_path, map_location=self.device))
+        else:
+            print("Using initialized model (no pretrained weights found)")
+            
+        model.eval()
+        return model
+
+    def initialize_demo_scaler(self):
+        """Initialize demographics scaler from CSV file or HuggingFace dataset"""
+        # Import os here to avoid UnboundLocalError
+        import os
+        
+        # If CSV path was provided during initialization, use it directly
+        if self.csv_path and os.path.exists(self.csv_path):
+            try:
+                print(f"=== USING PROVIDED CSV FILE: {self.csv_path} ===")
+                logger.info(f"Processing demographics from file: {self.csv_path}")
                 
+                # Check file content type
                 try:
-                    # Import HF dataset libraries
-                    from datasets import load_dataset
-                    import pandas as pd
-                    import os
-                    import tempfile
-                    from huggingface_hub import hf_hub_download
-                    
-                    try:
-                        # First try direct download of demographic file
-                        logger.info(f"Attempting to directly download FC_graph_covariate_data.csv from {data_dir}")
-                        
-                        temp_dir = tempfile.mkdtemp(prefix="hf_demo_")
-                        
-                        try:
-                            # Try to download the demographic CSV directly
-                            csv_path = hf_hub_download(
-                                repo_id=data_dir,
-                                filename="FC_graph_covariate_data.csv",
-                                repo_type="dataset",
-                                cache_dir=temp_dir
-                            )
-                            logger.info(f"✓ Successfully downloaded FC_graph_covariate_data.csv directly")
-                            
-                            # Read the demographic file manually
-                            try:
-                                # Try UTF-8 first
-                                demo_df = pd.read_csv(csv_path)
-                            except UnicodeDecodeError:
-                                # Try alternative encodings
-                                for encoding in ['latin1', 'cp1252', 'iso-8859-1']:
-                                    try:
-                                        logger.info(f"Trying {encoding} encoding...")
-                                        demo_df = pd.read_csv(csv_path, encoding=encoding)
-                                        logger.info(f"Successfully loaded with {encoding} encoding")
-                                        break
-                                    except Exception:
-                                        continue
-                                else:
-                                    # Try as Excel if all encodings fail
-                                    try:
-                                        logger.info("Trying to read as Excel file...")
-                                        demo_df = pd.read_excel(csv_path)
-                                    except Exception:
-                                        raise ValueError("Could not read demographic file with any encoding")
-                            
-                            # Create a mock dataset with the demographic data
-                            dataset = {
-                                'train': demo_df
-                            }
-                            logger.info(f"Created dataset from manual file reading with {len(demo_df)} rows")
-                            
-                            # Extract the column names to validate
-                            columns = demo_df.columns.tolist()
-                            logger.info(f"Dataset columns: {columns}")
-                            
-                        except Exception as direct_err:
-                            logger.warning(f"Could not download demographic file directly: {direct_err}")
-                            
-                            # Fall back to dataset loading
-                            logger.info("Falling back to standard dataset loading")
-                            # Try to load the dataset with encoding parameters
-                            dataset = load_dataset(data_dir)
-                            logger.info(f"Successfully loaded dataset: {data_dir}")
-                            
-                            # Check if we have the 'train' split
-                            if 'train' not in dataset:
-                                logger.error(f"Dataset {data_dir} does not have a 'train' split")
-                                raise ValueError(f"Dataset {data_dir} is missing the required 'train' split")
-                            
-                            # Extract the column names to validate
-                            columns = dataset['train'].column_names
-                            logger.info(f"Dataset columns: {columns}")
-                    
-                    except UnicodeDecodeError as ude:
-                        logger.error(f"Unicode decode error when loading dataset: {ude}")
-                        
-                        # Try to download the files directly without using datasets API
-                        logger.warning("Trying direct file download instead of using datasets API")
-                        
-                        # Create a temporary directory for downloads
-                        temp_dir = tempfile.mkdtemp(prefix="direct_download_")
-                        
-                        # Try different filenames and extensions for demographic data
-                        demo_files = [
-                            "FC_graph_covariate_data.csv", 
-                            "FC_graph_covariate_data.xlsx",
-                            "demographics.csv", 
-                            "demographics.xlsx", 
-                            "subjects.csv",
-                            "patients.csv"
-                        ]
-                        
-                        # Try each file
-                        for demo_file in demo_files:
-                            try:
-                                file_path = hf_hub_download(
-                                    repo_id=data_dir,
-                                    filename=demo_file,
-                                    repo_type="dataset",
-                                    cache_dir=temp_dir
-                                )
-                                
-                                logger.info(f"Found demographic file: {demo_file}")
-                                
-                                # Try to read with different encodings
-                                if demo_file.endswith('.csv'):
-                                    for encoding in ['utf-8', 'latin1', 'cp1252', 'iso-8859-1']:
-                                        try:
-                                            demo_df = pd.read_csv(file_path, encoding=encoding)
-                                            logger.info(f"Successfully read {demo_file} with {encoding} encoding")
-                                            break
-                                        except UnicodeDecodeError:
-                                            continue
-                                    else:
-                                        logger.warning(f"Could not read {demo_file} with any encoding")
-                                        continue
-                                elif demo_file.endswith('.xlsx'):
-                                    try:
-                                        demo_df = pd.read_excel(file_path)
-                                        logger.info(f"Successfully read Excel file {demo_file}")
-                                    except Exception as excel_err:
-                                        logger.warning(f"Could not read Excel file {demo_file}: {excel_err}")
-                                        continue
-                                
-                                # If we got here, we successfully read a file
-                                dataset = {
-                                    'train': demo_df
-                                }
-                                columns = demo_df.columns.tolist()
-                                logger.info(f"Loaded demographic data with {len(demo_df)} rows")
-                                logger.info(f"Columns: {columns}")
-                                break
-                            
-                            except Exception as file_err:
-                                logger.warning(f"Could not download or read {demo_file}: {file_err}")
+                    # First, check file size to make sure it's not empty
+                    file_size = os.path.getsize(self.csv_path)
+                    if file_size == 0:
+                        logger.error("CSV file is empty (0 bytes)")
+                        raise ValueError("CSV file is empty (0 bytes)")
                         
-                        else:
-                            # If all files failed, raise an error
-                            raise ValueError("Could not find or read any demographic data file from the dataset. Please provide a valid FC_graph_covariate_data.csv file.")
-                    
-                    # Check for required demographic columns
-                    required_columns = ['ID', 'wab_aq', 'age', 'mpo', 'education', 'gender']
-                    missing_columns = [col for col in required_columns if col not in columns]
-                    
-                    if missing_columns:
-                        # Try alternative column names
-                        column_mapping = {
-                            'ID': ['ID', 'id', 'subject_id', 'Subject', 'PatientID'],
-                            'wab_aq': ['wab_aq', 'wab', 'WAB', 'WAB_AQ', 'aphasia_score'],
-                            'age': ['age', 'Age', 'age_at_stroke', 'patient_age'],
-                            'mpo': ['mpo', 'MPO', 'months_post_onset', 'months_post_stroke'],
-                            'education': ['education', 'educate', 'edu', 'years_education', 'educ'],
-                            'gender': ['gender', 'sex', 'Gender', 'Sex']
-                        }
-                        
-                        for missing_col in missing_columns:
-                            for alt_col in column_mapping[missing_col]:
-                                if alt_col in columns:
-                                    # Found an alternative
-                                    logger.info(f"Mapped column {alt_col} to {missing_col}")
-                                    if isinstance(dataset['train'], pd.DataFrame):
-                                        # If using DataFrame
-                                        dataset['train'][missing_col] = dataset['train'][alt_col]
-                                    else:
-                                        # If using HF dataset
-                                        dataset['train'] = dataset['train'].add_column(
-                                            missing_col, dataset['train'][alt_col]
-                                        )
-                                    missing_columns.remove(missing_col)
-                                    break
+                    # Try to read the first few lines to check content
+                    with open(self.csv_path, 'r') as f:
+                        header = f.readline().strip()
+                        first_line = f.readline().strip() if f.readline() else ""
+                    
+                    logger.info(f"CSV header: {header}")
+                    logger.info(f"First data line: {first_line}")
+                    
+                    # Try reading the file directly
+                    df = pd.read_csv(self.csv_path)
+                    logger.info(f"Successfully read CSV with shape: {df.shape}")
+                    
+                    # Check required columns
+                    required_cols = ['ID', 'wab_aq', 'age', 'mpo', 'education', 'gender', 'handedness']
+                    missing_cols = [col for col in required_cols if col not in df.columns]
+                    
+                    if missing_cols:
+                        logger.warning(f"CSV file missing columns: {missing_cols}")
+                        available_cols = ', '.join(df.columns)
+                        logger.info(f"Available columns: {available_cols}")
                         
-                        # If we still have missing columns after mapping, raise an error
-                        if missing_columns:
-                            logger.error(f"Missing required columns after mapping: {missing_columns}")
-                            raise ValueError(f"The demographic data is missing required columns: {missing_columns}. Please ensure your FC_graph_covariate_data.csv file contains these columns or equivalent alternatives.")
-                    
-                    # First, check if FC_graph_covariate_data.csv exists in the dataset
+                        if len(missing_cols) > len(required_cols) // 2:
+                            # Too many missing columns, might be the wrong format
+                            logger.error(f"Too many missing columns ({len(missing_cols)}), might be wrong CSV format")
+                            raise ValueError(f"CSV format incorrect - missing required columns: {missing_cols}")
+                    
+                    # Now process with the preprocessor
+                    _, scaler = self.preprocessor.preprocess_demographics(df)
+                    print("Successfully initialized demographics scaler from CSV file!")
+                    return scaler
+                    
+                except pd.errors.EmptyDataError:
+                    logger.error("CSV file is empty according to pandas")
+                    raise ValueError("CSV file is empty or malformed")
+                    
+                except pd.errors.ParserError as pe:
+                    logger.error(f"CSV parsing error: {pe}")
+                    # Try to diagnose the issue
+                    logger.info("Attempting to diagnose CSV parsing error")
                     try:
-                        from huggingface_hub import hf_hub_download
-                        import tempfile
-                        
-                        temp_dir = tempfile.mkdtemp(prefix="hf_csv_")
-                        logger.info(f"Looking for FC_graph_covariate_data.csv in dataset {data_dir}")
+                        with open(self.csv_path, 'r') as f:
+                            first_few_lines = [f.readline() for _ in range(5)]
+                        logger.info(f"First few lines of file:\n{''.join(first_few_lines)}")
+                    except Exception as e:
+                        logger.error(f"Error reading file for diagnosis: {e}")
                         
-                        try:
-                            csv_path = hf_hub_download(
-                                repo_id=data_dir,
-                                filename="FC_graph_covariate_data.csv",
-                                repo_type="dataset",
-                                cache_dir=temp_dir
-                            )
-                            logger.info(f"✓ Successfully found FC_graph_covariate_data.csv in the dataset!")
-                            
-                            # Use the file directly instead of the API
-                            demographic_file = csv_path
-                            logger.info(f"Using FC_graph_covariate_data.csv from dataset: {demographic_file}")
-                        except Exception as e:
-                            logger.info(f"FC_graph_covariate_data.csv not found in dataset (this is okay): {e}")
-                            # Fall back to API
-                            demographic_file = "FROM_DATASET_API"
+                    # Continue with original approach as last resort
+                    try:
+                        _, scaler = self.preprocessor.preprocess_demographics(self.csv_path)
+                        print("Successfully initialized demographics scaler from CSV file!")
+                        return scaler
                     except Exception as e:
-                        logger.warning(f"Error checking for FC_graph_covariate_data.csv: {e}")
-                        demographic_file = "FROM_DATASET_API"
-                    
-                    treatment_file = "FROM_DATASET_API"  # We'll generate this
-                    
-                except Exception as e:
-                    logger.error(f"Error loading HuggingFace dataset: {e}", exc_info=True)
-                    raise
-            else:
-                # For local directories, look for the actual files
-                demographic_file = os.path.join(data_dir, "FC_graph_covariate_data.csv")
-                treatment_file = os.path.join(data_dir, "treatment_outcomes.csv")
-                
-                # Check data directory for files
-                if not os.path.exists(demographic_file):
-                    # Try app directory as fallback
-                    app_dir_demographic = os.path.join(os.path.dirname(os.path.abspath(__file__)), "FC_graph_covariate_data.csv")
-                    if os.path.exists(app_dir_demographic):
-                        demographic_file = app_dir_demographic
-                        logger.info(f"Using FC_graph_covariate_data.csv from app directory: {demographic_file}")
-                    else:
-                        logger.error(f"FC_graph_covariate_data.csv not found in data directory or app directory")
-                        raise FileNotFoundError(f"Demographic file not found. Please ensure FC_graph_covariate_data.csv exists in {data_dir} or the application directory.")
+                        logger.error(f"Failed fallback to direct file path: {e}")
+                        raise
+                        
+            except Exception as csv_error:
+                logger.error(f"Error processing CSV file: {csv_error}", exc_info=True)
+                print(f"Error processing CSV file: {csv_error}")
+                print("Falling back to HuggingFace dataset...")
+        
+        # If no CSV path or failed to process, try HuggingFace dataset
+        try:
+            print("=== ATTEMPTING TO LOAD DATASET FROM HUGGING FACE HUB ===")
+            print("Loading dataset: SreekarB/OSFData...")
             
-            # Create a simple fallback treatment outcomes file that will be used if no actual data is found
-            fallback_file = os.path.join('results', 'treatment_outcomes.csv')
+            # Try different loading methods
+            dataset = None
+            
+            # Method 1: Standard loading
             try:
-                # Create a simple fallback treatment outcomes file
-                os.makedirs('results', exist_ok=True)
-                mock_outcomes = pd.DataFrame([
-                    {'subject_id': 'P001', 'treatment_type': 'Standard', 'outcome_score': 5.2},
-                    {'subject_id': 'P002', 'treatment_type': 'Intensive', 'outcome_score': 7.8},
-                    {'subject_id': 'P003', 'treatment_type': 'Standard', 'outcome_score': 3.1},
-                    {'subject_id': 'P004', 'treatment_type': 'Intensive', 'outcome_score': 9.4},
-                    {'subject_id': 'P005', 'treatment_type': 'Control', 'outcome_score': 1.2}
-                ])
-                mock_outcomes.to_csv(fallback_file, index=False)
-                logger.info(f"Created standard treatment outcomes file with 5 subjects")
-            except Exception as e:
-                logger.error(f"Failed to create standard outcomes file: {e}")
+                dataset = load_dataset("SreekarB/OSFData", split="train")
+                print(f"Successfully loaded dataset with {len(dataset)} samples")
+            except Exception as e1:
+                print(f"Error in standard loading: {e1}")
+            
+            # Method 2: Try with token
+            if dataset is None:
+                try:
+                    import os
+                    hf_token = os.environ.get("HF_TOKEN", None)
+                    if hf_token:
+                        print("Trying with Hugging Face token...")
+                        dataset = load_dataset("SreekarB/OSFData", split="train", use_auth_token=hf_token)
+                    else:
+                        print("No HF_TOKEN found, trying without split...")
+                        dataset = load_dataset("SreekarB/OSFData")
+                        # If successful, get the train split
+                        dataset = dataset['train'] if 'train' in dataset else dataset
+                    print(f"Alternate loading successful, loaded {len(dataset)} samples")
+                except Exception as e2:
+                    print(f"Error in alternate loading: {e2}")
             
-            # Set default treatment file path to our fallback file
-            treatment_file = fallback_file
+            # Method 3: Try using huggingface_hub directly
+            if dataset is None:
+                try:
+                    from huggingface_hub import hf_hub_download
+                    print("Trying direct file download...")
+                    temp_csv = hf_hub_download(
+                        repo_id="SreekarB/OSFData",
+                        filename="FC_graph_covariate_data.csv",
+                        repo_type="dataset"
+                    )
+                    print(f"Successfully downloaded CSV to {temp_csv}")
+                    _, scaler = self.preprocessor.preprocess_demographics(temp_csv)
+                    print("Successfully initialized scaler from downloaded CSV!")
+                    return scaler
+                except Exception as e3:
+                    print(f"Error in direct download: {e3}")
             
-            # For SreekarB/OSFData dataset, optionally look for real treatment data
-            if data_dir == "SreekarB/OSFData":
-                # Check if the user wants to skip behavioral data processing
-                skip_behavioral = PREDICTION_CONFIG.get('skip_behavioral_data', False)
-                
-                if skip_behavioral:
-                    # Skip behavioral data processing entirely
-                    logger.info("Skipping behavioral data processing as requested in config")
-                else:
-                    # Try to find behavioral_data.csv in the dataset
-                    try:
-                        from huggingface_hub import hf_hub_download
-                        import tempfile
-                        
-                        temp_dir = tempfile.mkdtemp(prefix="hf_behavioral_")
-                        logger.info(f"Looking for behavioral_data.csv in dataset {data_dir}")
-                        
-                        try:
-                            csv_path = hf_hub_download(
-                                repo_id=data_dir,
-                                filename="behavioral_data.csv",
-                                repo_type="dataset",
-                                cache_dir=temp_dir
-                            )
-                            logger.info(f"✓ Successfully found behavioral_data.csv in the dataset!")
-                            
-                            # Process behavioral data to extract treatment outcomes
-                            try:
-                                real_treatment_file = process_behavioral_data_to_outcomes(csv_path)
-                                treatment_file = real_treatment_file  # Use the real treatment file if processing succeeded
-                                # Store the treatment file path for later use
-                                self.last_treatment_file = treatment_file
-                                logger.info(f"Using processed behavioral data for treatment outcomes")
-                            except Exception as proc_err:
-                                logger.warning(f"Couldn't process behavioral data: {proc_err}, using standard outcomes")
-                                # Keep using the fallback file
-                        except Exception as e:
-                            logger.warning(f"behavioral_data.csv not found or couldn't be processed: {e}")
-                            
-                            # Try to find any treatment outcomes file
-                            try:
-                                # Use our treatment outcomes file finder
-                                real_treatment_file = find_treatment_outcomes_file(data_dir)
-                                logger.info(f"Found treatment outcomes file: {real_treatment_file}")
-                                
-                                # Use the found file
-                                treatment_file = real_treatment_file
-                                # Store the treatment file path for later use
-                                self.last_treatment_file = treatment_file
-                                logger.info(f"Using real treatment outcomes file")
-                            except Exception as find_err:
-                                logger.warning(f"Couldn't find treatment outcomes file: {find_err}, using standard outcomes")
-                                # Keep using the fallback file
-                    except Exception as e:
-                        logger.warning(f"Error during treatment data lookup: {e}, using standard outcomes")
-                        # Keep using the fallback file
-            # Only check for treatment_file if we're not using the SreekarB/OSFData dataset
-            elif not os.path.exists(treatment_file):
-                # Try app directory as fallback
-                app_dir_treatment = os.path.join(os.path.dirname(os.path.abspath(__file__)), "treatment_outcomes.csv")
-                if os.path.exists(app_dir_treatment):
-                    treatment_file = app_dir_treatment
-                    logger.info(f"Using treatment outcomes file from app directory: {treatment_file}")
+            # If we have a dataset from any method, process it
+            if dataset is not None:
+                # List all keys in the first sample
+                sample = dataset[0]
+                print(f"Sample keys: {list(sample.keys())}")
+                
+                # Look specifically for FC_graph_covariate_data.csv
+                csv_keys = [k for k in sample.keys() if 'csv' in k.lower() or 'covariate' in k.lower()]
+                print(f"Potential CSV keys: {csv_keys}")
+                
+                # Try to get demographics data
+                if 'FC_graph_covariate_data.csv' in sample:
+                    print("Found exact match: FC_graph_covariate_data.csv")
+                    demo_data = sample['FC_graph_covariate_data.csv']
+                elif csv_keys:
+                    key = csv_keys[0]
+                    print(f"Using alternative key: {key}")
+                    demo_data = sample[key]
                 else:
-                    logger.error(f"Treatment outcomes file not found in data directory or app directory")
-                    raise FileNotFoundError(f"Treatment outcomes file not found. Please ensure treatment_outcomes.csv exists in {data_dir} or the application directory.")
+                    print("No demographics CSV found in dataset, checking for other usable columns")
+                    # Check if we have direct demographic columns
+                    demo_cols = ['age', 'gender', 'education', 'wab_aq']
+                    if all(col in sample for col in demo_cols):
+                        print("Found direct demographic columns, using those")
+                        # Create a DataFrame from the sample
+                        demo_data = pd.DataFrame({k: [sample[k]] for k in demo_cols})
+                    else:
+                        print("No usable demographic data found, using fallback")
+                        raise KeyError("Demographics data not found in dataset")
+                
+                # Create and return scaler for demographic features
+                print("Processing demographics data...")
+                _, scaler = self.preprocessor.preprocess_demographics(demo_data)
+                print("Demographics scaler successfully initialized!")
+                return scaler
             
-            logger.info(f"Using demographic file: {demographic_file}")
-            logger.info(f"Using treatment file: {treatment_file}")
+            # If all methods failed
+            raise Exception("All dataset loading methods failed")
             
-            # Special handling for HuggingFace dataset
-            if data_dir == "SreekarB/OSFData":
-                # For NIfTI files, we need to search the API or download regardless of demographic source
-                logger.info("Searching for NIfTI files in the dataset...")
+        except Exception as e:
+            print(f"===== ERROR LOADING DEMOGRAPHICS FROM DATASET =====")
+            print(f"Error: {str(e)}")
+            print("Using fallback normalization for demographics")
+            
+            # Create a manual scaler for all 6 demographic features
+            # [age, gender, education, wab_aq, mpo, handedness]
+            scaler = StandardScaler()
+            # Create a simple dataset with typical ranges to fit the scaler
+            sample_data = np.array([
+                # age, gender, education, wab_aq, mpo, handedness
+                [65, 0, 16, 75, 12, 0],
+                [45, 1, 12, 60, 6, 0],
+                [70, 0, 14, 80, 18, 1],
+                [50, 1, 10, 65, 9, 0]
+            ])
+            scaler.fit(sample_data)
+            print("Created fallback scaler with typical demographic ranges for all 6 features")
+            return None  # Return None to use simple normalization instead
+
+    def generate_fmri(self, age, gender, education, wab_aq, mpo, handedness):
+        """
+        Generate fMRI data based on demographics from FC_graph_covariate_data.csv format
+        
+        Parameters match the columns in FC_graph_covariate_data.csv:
+        - age: Patient's age
+        - gender: "M" or "F"
+        - education: Years of education
+        - wab_aq: Western Aphasia Battery Aphasia Quotient (severity)
+        - mpo: Months post onset
+        - handedness: "R" or "L"
+        """
+        # Validate input parameters
+        try:
+            # Validate numeric inputs are within reasonable ranges
+            age = float(age)
+            if not (18 <= age <= 100):
+                logger.warning(f"Age value {age} is outside normal range (18-100)")
+                age = max(18, min(age, 100))  # Clamp to valid range
+                
+            education = float(education)
+            if not (0 <= education <= 30):
+                logger.warning(f"Education value {education} is outside normal range (0-30)")
+                education = max(0, min(education, 30))
+                
+            wab_aq = float(wab_aq)
+            if not (0 <= wab_aq <= 100):
+                logger.warning(f"WAB-AQ value {wab_aq} is outside normal range (0-100)")
+                wab_aq = max(0, min(wab_aq, 100))
+                
+            mpo = float(mpo)
+            if not (0 <= mpo <= 600):  # 50 years max
+                logger.warning(f"Months post onset value {mpo} is outside normal range (0-600)")
+                mpo = max(0, min(mpo, 600))
+        except ValueError as e:
+            logger.error(f"Invalid numeric input: {e}")
+            raise ValueError(f"Invalid input values: {e}")
+            
+        # Validate categorical inputs
+        if gender not in ["M", "F"]:
+            logger.warning(f"Invalid gender '{gender}', defaulting to 'M'")
+            gender = "M"
+            
+        if handedness not in ["R", "L"]:
+            logger.warning(f"Invalid handedness '{handedness}', defaulting to 'R'")
+            handedness = "R"
+            
+        # Convert categorical inputs to numeric values (0/1)
+        gender_val = 1.0 if gender == "F" else 0.0
+        handedness_val = 1.0 if handedness == "L" else 0.0
+        
+        # Print the input demographics
+        print(f"Generating fMRI with demographics:")
+        print(f"  Age: {age}")
+        print(f"  Gender: {gender} ({gender_val})")
+        print(f"  Education: {education} years")
+        print(f"  WAB-AQ: {wab_aq}")
+        print(f"  MPO: {mpo} months")
+        print(f"  Handedness: {handedness} ({handedness_val})")
+        
+        # Create raw demographics array - use the same order as in the dataset
+        demographics = np.array([[
+            age,            # Age
+            gender_val,     # Gender (0=M, 1=F)
+            education,      # Education years
+            wab_aq,         # WAB-AQ (aphasia severity)
+            mpo,            # Months post onset
+            handedness_val  # Handedness (0=R, 1=L)
+        ]], dtype=np.float32)
+        
+        # Use simple normalization since we had issues with the scaler
+        # This ensures consistent behavior regardless of the dataset structure
+        demographics_scaled = np.array([[
+            age / 100.0,               # Normalize age to 0-1 range
+            gender_val,                # Already 0 or 1
+            education / 20.0,          # Normalize education years
+            wab_aq / 100.0,            # WAB-AQ is on a 0-100 scale
+            mpo / 60.0,                # Normalize months post onset
+            handedness_val             # Already 0 or 1
+        ]], dtype=np.float32)
+        
+        print("Using simple normalization for demographics")
+        
+        # Convert to tensor
+        demographics_tensor = torch.FloatTensor(demographics_scaled).to(self.device)
+        
+        try:
+            # Update the status text
+            print("Generating fMRI (simplified for demo)...")
+            
+            # Use a very small number of steps for demo purposes
+            # For research quality, you would use 500-1000 steps
+            num_steps = 10  # Extremely reduced for speed in demo
+            
+            # Create dummy data for immediate preview
+            dummy_preview = True
+            
+            if dummy_preview:
+                # For demo purposes, show a simple pattern immediately
+                print("Creating quick preview image for demo...")
                 
-                # First check if NIfTI files exist in a local directory
-                local_nii_files = []
+                # Create a simple pattern based on demographics
+                time_dim = 50  # Reduced time dimension
+                spatial_dim = 32  # Small spatial dimension
                 
-                # Check different possible local paths, starting with user-specified directory
-                possible_paths = []
+                # Check memory availability
+                try:
+                    # Calculate approximate memory requirements for the arrays
+                    total_elements = time_dim * spatial_dim**3  # Total elements in final array
+                    # Each element needs 4 bytes (float32)
+                    estimated_memory = total_elements * 4 * 1.5  # 1.5x for intermediate arrays
+                    
+                    # Get available memory (fallback to 8GB if can't determine)
+                    try:
+                        import psutil
+                        available_memory = psutil.virtual_memory().available
+                    except ImportError:
+                        # Fallback to assuming 8GB total with 25% available
+                        available_memory = 8 * 1024**3 * 0.25
+                    
+                    # If estimated memory usage is > 80% of available memory, reduce dimensions
+                    if estimated_memory > 0.8 * available_memory:
+                        logger.warning(f"Reducing dimensions to avoid memory overflow")
+                        spatial_dim = min(spatial_dim, 24)  # Reduce spatial dimensions
+                        time_dim = min(time_dim, 30)       # Reduce time dimension
+                except Exception as e:
+                    logger.warning(f"Error checking memory availability: {e}")
                 
-                # Add user-specified directory from config if available
-                if PREDICTION_CONFIG.get('local_nii_dir'):
-                    user_dir = PREDICTION_CONFIG.get('local_nii_dir')
-                    if os.path.exists(user_dir):
-                        possible_paths.append(user_dir)
-                        logger.info(f"Checking user-specified NIfTI directory: {user_dir}")
+                # Create a simple brain-like pattern with demographic influence
+                intensity = wab_aq / 100.0  # Use WAB-AQ for intensity
+                frequency = age / 50.0      # Use age for frequency
                 
-                # Add other standard paths to check
-                possible_paths.extend([
-                    os.path.join(os.path.dirname(os.path.abspath(__file__)), "nii_files"),
-                    os.path.join(os.path.dirname(os.path.abspath(__file__)), "nifti"),
-                    os.path.join(os.path.dirname(os.path.abspath(__file__)), "fmri"),
-                    os.path.join(os.path.dirname(os.path.abspath(__file__)), "data", "nii_files"),
-                    os.path.join(os.path.dirname(os.path.abspath(__file__)), "data", "nifti"),
-                    "/tmp/nii_files"  # In case files were manually placed here
-                ])
+                # Create coordinate grids
+                x = np.linspace(-1, 1, spatial_dim)
+                y = np.linspace(-1, 1, spatial_dim)
+                z = np.linspace(-1, 1, spatial_dim)
+                xx, yy, zz = np.meshgrid(x, y, z, indexing='ij')
                 
-                for path in possible_paths:
-                    if os.path.exists(path):
-                        # Check for .nii or .nii.gz files
-                        nii_files_here = []
-                        nii_files_here.extend(glob.glob(os.path.join(path, "*.nii")))
-                        nii_files_here.extend(glob.glob(os.path.join(path, "*.nii.gz")))
-                        
-                        if nii_files_here:
-                            local_nii_files.extend(nii_files_here)
-                            logger.info(f"Found {len(nii_files_here)} local NIfTI files in {path}")
+                # Create a sphere
+                sphere = (xx**2 + yy**2 + zz**2) < 0.7**2
                 
-                if local_nii_files:
-                    logger.info(f"Using {len(local_nii_files)} local NIfTI files instead of searching HuggingFace dataset")
-                    
-                    # Log filenames to help with debugging
-                    for i, nii_file in enumerate(local_nii_files[:5]):  # Log first 5 files
-                        logger.info(f"Local NIfTI file {i+1}: {os.path.basename(nii_file)}")
-                    
-                    if len(local_nii_files) > 5:
-                        logger.info(f"... and {len(local_nii_files) - 5} more files")
-                    
-                    nii_files = local_nii_files
-                else:
-                    # If no local files found, find NIfTI files using our comprehensive search function
-                    logger.info("No local NIfTI files found. Searching in the HuggingFace dataset...")
-                    nii_files = find_nifti_files_in_hf_dataset(data_dir, dataset)
-                    
-                    # Log what was found
-                    if nii_files:
-                        logger.info(f"Found {len(nii_files)} NIfTI files in the dataset")
-                        
-                        # Log filenames to help with debugging
-                        for i, nii_file in enumerate(nii_files[:5]):  # Log first 5 files
-                            logger.info(f"NIfTI file {i+1}: {os.path.basename(nii_file)}")
-                        
-                        if len(nii_files) > 5:
-                            logger.info(f"... and {len(nii_files) - 5} more files")
-                    else:
-                        logger.warning("No NIfTI files found in the dataset. This will likely cause an error later.")
+                # Create patterns inside the sphere
+                pattern = np.sin(xx * frequency) * np.cos(yy * frequency) * np.sin(zz * frequency) * intensity
+                pattern = pattern * sphere  # Apply the sphere mask
                 
-                if demographic_file == "FROM_DATASET_API":
-                    logger.info("Using dataset API for demographics rather than files")
-                    
-                    # Extract demographics and prepare dataset for run_analysis
-                    # Get demographics directly from the dataset
-                    demo_data = [
-                        dataset['train']['age'],
-                        dataset['train']['gender'],
-                        dataset['train']['mpo'],
-                        dataset['train']['wab_aq']
-                    ]
-                    
-                    # Create demo_types
-                    demo_types = ['continuous', 'categorical', 'continuous', 'continuous']
-                    
-                    # Run with API data
-                    results = run_analysis(
-                        data_dir=data_dir,
-                        demographic_file=None,  # No physical file, data from API
-                        treatment_file=treatment_file,  # Using the treatment file (real or synthetic)
-                        latent_dim=latent_dim,
-                        nepochs=nepochs,
-                        bsize=bsize,
-                        save_model=True,
-                        use_hf_dataset=True,
-                        hf_dataset=dataset,
-                        hf_nii_files=nii_files,
-                        hf_demo_data=demo_data,
-                        hf_demo_types=demo_types
-                    )
-                else:
-                    logger.info(f"Using FC_graph_covariate_data.csv from dataset: {demographic_file}")
-                    
-                    # Run with CSV file but still pass the NIfTI files
-                    results = run_analysis(
-                        data_dir=data_dir,
-                        demographic_file=demographic_file,  # Using the CSV file from dataset
-                        treatment_file=treatment_file,     # Now using the treatment file (real or synthetic)
-                        latent_dim=latent_dim,
-                        nepochs=nepochs,
-                        bsize=bsize,
-                        save_model=True,
-                        use_hf_dataset=True,              # Still using HF for NIfTI
-                        hf_nii_files=nii_files            # Pass the found NIfTI files
-                    )
-            else:
-                # Standard call for local files
-                results = run_analysis(
-                    data_dir=data_dir,
-                    demographic_file=demographic_file,
-                    treatment_file=treatment_file,
-                    latent_dim=latent_dim,
-                    nepochs=nepochs,
-                    bsize=bsize,
-                    save_model=True
-                )
-            
-            # Get the VAE figure from results
-            vae_fig = results.get('figures', {}).get('vae')
-            
-            figures['vae'] = vae_fig
-            
-            if results:
-                self.vae = results.get('vae')
-                self.predictor = results.get('predictor')
-                latents = results.get('latents')
-                demographics = results.get('demographics')
-                predictor_cv_results = results.get('predictor_cv_results')
+                # Add some random variation
+                np.random.seed(int(education + mpo))  # Use demographics as seed
+                noise = np.random.normal(0, 0.1, (spatial_dim, spatial_dim, spatial_dim))
+                pattern += noise * sphere
                 
-                # Store the latent dimension
-                self.latent_dim = latent_dim
+                # Normalize to 0-1 range
+                pattern = (pattern - pattern.min()) / (pattern.max() - pattern.min() + 1e-8)
                 
-                # Mark models as trained
-                self.trained = True
+                # Create a time series by adding slight variations
+                fmri_data = np.zeros((time_dim, spatial_dim, spatial_dim, spatial_dim))
+                for t in range(time_dim):
+                    t_factor = np.sin(t / time_dim * 2 * np.pi * 5) * 0.1
+                    fmri_data[t] = pattern + t_factor * sphere
                 
-                # Prepare prediction visualization if available
-                if self.predictor and predictor_cv_results:
-                    try:
-                        # Get the outcome variable data
-                        outcomes = None
-                        if demographics:
-                            if outcome_variable == 'wab_aq' and 'wab_aq' in demographics:
-                                outcomes = demographics['wab_aq']
-                            elif outcome_variable == 'age' and 'age' in demographics:
-                                outcomes = demographics['age']
-                            elif (outcome_variable == 'mpo' or outcome_variable == 'months_post_onset') and 'mpo' in demographics:
-                                outcomes = demographics['mpo']
-                            else:
-                                # Try to find the outcome in demographics data
-                                for key in demographics:
-                                    if outcome_variable.lower() in key.lower():
-                                        outcomes = demographics[key]
-                                        logger.info(f"Found matching outcome variable: {key}")
-                                        break
-                        
-                        if outcomes is None:
-                            logger.warning(f"Could not find outcome variable '{outcome_variable}' in demographics")
-                            # Create a dummy array to prevent errors
-                            if 'predictions' in predictor_cv_results:
-                                outcomes = np.zeros_like(predictor_cv_results['predictions'])
-                            else:
-                                logger.warning("Cannot create prediction plots without outcome data")
-                    except Exception as e:
-                        logger.error(f"Error getting outcome variable: {e}")
-                        outcomes = None
-                    
-                    # Create plots if we have the necessary data
-                    if outcomes is not None and 'prediction_stds' in predictor_cv_results and 'predictions' in predictor_cv_results:
-                        # Create prediction plots
-                        prediction_fig = self.create_prediction_plots(
-                            latents, 
-                            demographics,
-                            outcomes, 
-                            predictor_cv_results['predictions'],
-                            predictor_cv_results['prediction_stds']
-                        )
-                        figures['prediction'] = prediction_fig
-                        
-                        # Create feature importance plot if available
-                        try:
-                            feature_importance = self.predictor.get_feature_importance()
-                            if feature_importance is not None:
-                                importance_fig = self.create_importance_plot(feature_importance)
-                                figures['importance'] = importance_fig
-                        except Exception as e:
-                            logger.warning(f"Could not create feature importance plot: {e}")
-                        
-                logger.info("Training completed successfully")
+                print("Preview image created!")
+                generated_fmri = [torch.from_numpy(fmri_data).float()]
                 
-                # Create learning curve plots if available
-                if 'fold_metrics' in predictor_cv_results:
-                    learning_fig = self.create_learning_curve_plot(
-                        predictor_cv_results['fold_metrics']
+            else:
+                # Actually run the diffusion model (slow)
+                with torch.no_grad():
+                    generated_fmri = self.model.sample(
+                        demographics_tensor, 
+                        device=self.device,
+                        num_steps=num_steps
                     )
-                    figures['learning'] = learning_fig
+                    
+        except Exception as e:
+            print(f"Error during generation: {e}")
+            # Create a fallback simple pattern if generation fails
+            simple_shape = (50, 32, 32, 32)
+            fmri_data = np.zeros(simple_shape)
+            # Add a simple sphere
+            center = np.array([16, 16, 16])
+            radius = 10
+            for i in range(simple_shape[1]):
+                for j in range(simple_shape[2]):
+                    for k in range(simple_shape[3]):
+                        if np.sum((np.array([i, j, k]) - center)**2) < radius**2:
+                            fmri_data[:, i, j, k] = 0.8
+            # Add time variation
+            for t in range(simple_shape[0]):
+                fmri_data[t] *= 0.5 + 0.5 * np.sin(t / simple_shape[0] * 2 * np.pi)
+            
+            generated_fmri = [torch.from_numpy(fmri_data).float()]
+            print("Using fallback pattern due to generation error")
         
+        # Convert to numpy and create NIfTI
+        fmri_data = generated_fmri[0].cpu().numpy()
+        
+        # Create axial, sagittal and coronal views for display
+        print(f"Creating visualization slices from fMRI data of shape {fmri_data.shape}")
+        
+        try:
+            # Ensure we have a 4D volume (time, x, y, z)
+            if len(fmri_data.shape) != 4:
+                raise ValueError(f"Expected 4D data, got shape {fmri_data.shape}")
+                
+            mid_time = fmri_data.shape[0] // 2
+            mid_x = fmri_data.shape[1] // 2
+            mid_y = fmri_data.shape[2] // 2
+            mid_z = fmri_data.shape[3] // 2
+            
+            # Get slices from the middle of each dimension
+            axial_slice = fmri_data[mid_time, :, :, mid_z]
+            sagittal_slice = fmri_data[mid_time, mid_x, :, :]
+            coronal_slice = fmri_data[mid_time, :, mid_y, :]
+            
+            # Define helper function for normalization to avoid code duplication
+            def normalize_slice(slice_data):
+                """Normalize slice data to 0-1 range and convert to uint8"""
+                # Avoid division by zero with epsilon
+                eps = 1e-8
+                min_val = slice_data.min()
+                max_val = slice_data.max()
+                
+                # Normalize to 0-1 range
+                normalized = (slice_data - min_val) / (max_val - min_val + eps)
+                
+                # Convert to uint8 for display (0-255)
+                return (normalized * 255).astype(np.uint8)
+            
+            # Normalize all slices using the helper function
+            axial_slice = normalize_slice(axial_slice)
+            sagittal_slice = normalize_slice(sagittal_slice)
+            coronal_slice = normalize_slice(coronal_slice)
+            
+            # Apply colormap for better visualization
+            # Use a simpler approach instead of matplotlib colormaps
+            # Create RGB images with a heat map (blue to red)
+            def simple_colormap(img):
+                # Create an RGB image
+                rgb_img = np.zeros((*img.shape, 3), dtype=np.uint8)
+                # Blue channel (low values)
+                rgb_img[:, :, 0] = np.clip(255 - img, 0, 255)
+                # Red channel (high values)
+                rgb_img[:, :, 2] = np.clip(img, 0, 255)
+                # Green channel (mid values)
+                rgb_img[:, :, 1] = np.clip(255 - np.abs(img - 128), 0, 255)
+                return rgb_img
+            
+            # Apply the colormap to all slices
+            slices = [axial_slice, sagittal_slice, coronal_slice]
+            colored_slices = [simple_colormap(slice_img) for slice_img in slices]
+            axial_slice, sagittal_slice, coronal_slice = colored_slices
+            
+            print("Successfully created visualization slices")
+            
         except Exception as e:
-            logger.error(f"Error in training: {str(e)}", exc_info=True)
-            error_fig = plt.figure(figsize=(10, 6))
+            print(f"Error creating visualization slices: {e}")
+            # Create simple fallback images if visualization fails
+            simple_shape = (32, 32)
+            
+            # Create simple circle patterns as fallbacks
+            fallback_img = np.zeros((*simple_shape, 3), dtype=np.uint8)
+            center = np.array([simple_shape[0]//2, simple_shape[1]//2])
+            radius = min(simple_shape) // 3
+            
+            for i in range(simple_shape[0]):
+                for j in range(simple_shape[1]):
+                    if np.sum((np.array([i, j]) - center)**2) < radius**2:
+                        # Different colors for each view
+                        if np.sum((np.array([i, j]) - center)**2) < (radius*0.7)**2:
+                            intensity = 0.8
+                        else:
+                            intensity = 0.5
+                        
+                        # Use different colors for each view
+                        axial_color = np.array([0, 0, 255])  # Blue
+                        sagittal_color = np.array([0, 255, 0])  # Green
+                        coronal_color = np.array([255, 0, 0])  # Red
+                        
+                        fallback_img[i, j] = axial_color * intensity
             
-            # Provide more helpful error message for common issues
-            error_message = str(e)
-            if "demographic" in error_message.lower() and "not found" in error_message.lower():
-                error_message = f"Demographic file not found. Please ensure FC_graph_covariate_data.csv exists in your data directory or application directory."
-            elif "treatment" in error_message.lower() and "not found" in error_message.lower():
-                error_message = f"Treatment outcomes file not found. Please ensure treatment_outcomes.csv exists in your data directory or application directory."
-            elif "cuda" in error_message.lower() or "gpu" in error_message.lower():
-                error_message = f"GPU/CUDA error detected. Try running with CPU only."
+            axial_slice = fallback_img.copy()
+            # Modify slightly for other views
+            sagittal_slice = fallback_img.copy()
+            sagittal_slice[:, :, [0, 1, 2]] = fallback_img[:, :, [1, 2, 0]]  # Shift colors
+            coronal_slice = fallback_img.copy()
+            coronal_slice[:, :, [0, 1, 2]] = fallback_img[:, :, [2, 0, 1]]  # Shift colors again
+        
+        # Create NIfTI file
+        img = nib.Nifti1Image(fmri_data, np.eye(4))
+        
+        # Save to temporary file with better resource management
+        try:
+            with tempfile.NamedTemporaryFile(suffix='.nii.gz', delete=False) as tmp:
+                output_path = tmp.name
+                nib.save(img, output_path)
+                
+            # Register this file for cleanup on program exit
+            global temp_files
+            temp_files.add(output_path)
+            logger.debug(f"Created temporary file: {output_path}")
             
-            plt.text(0.5, 0.5, f"Error: {error_message}", 
-                    horizontalalignment='center', verticalalignment='center',
-                    fontsize=12, color='red', wrap=True)
-            plt.axis('off')
-            figures['error'] = error_fig
+            return axial_slice, sagittal_slice, coronal_slice, output_path
+        except Exception as e:
+            logger.error(f"Error saving NIfTI file: {e}")
+            # Return results without file on error
+            return axial_slice, sagittal_slice, coronal_slice, None
         
-        return figures
-    
-    def predict_treatment(self, fmri_file=None, age=50, sex="M", 
-                         months_post_stroke=12, wab_score=50, fc_matrix=None):
+    def start_training(self, num_epochs=10, batch_size=4, learning_rate=1e-4):
         """
-        Predict treatment outcome for a patient
+        Start the training process with the specified parameters
+        Using real data from HuggingFace Datasets "SreekarB/OSFData"
         
         Args:
-            fmri_file: Path to patient's fMRI file
-            age: Patient's age at stroke
-            sex: Patient's sex (M/F)
-            months_post_stroke: Months since stroke
-            wab_score: Current WAB score
-            fc_matrix: Pre-processed FC matrix (if fMRI file not provided)
+            num_epochs: Number of epochs to train for
+            batch_size: Batch size for training
+            learning_rate: Learning rate for the optimizer
             
         Returns:
-            Prediction results and visualization
+            Training log and status message
         """
-        if not self.trained:
-            return "Please train the models first!", None
-        
+        # Import required training modules
         try:
-            # Process fMRI to FC matrix if provided
-            if fmri_file and not fc_matrix:
-                logger.info(f"Processing fMRI file: {fmri_file}")
-                try:
-                    # First try without synthetic data
-                    fc_matrix = process_single_fmri(fmri_file, allow_synthetic=False)
-                    logger.info(f"Successfully processed fMRI file to FC matrix")
-                except Exception as proc_err:
-                    logger.warning(f"Error processing fMRI with standard methods: {proc_err}")
-                    # If that fails, try with synthetic data allowed
-                    logger.info("Attempting with synthetic data generation")
-                    try:
-                        fc_matrix = process_single_fmri(fmri_file, allow_synthetic=True)
-                        logger.info("Successfully created synthetic FC data as fallback")
-                    except Exception as synth_err:
-                        logger.error(f"Failed even with synthetic data: {synth_err}")
-                        raise ValueError(f"Could not process fMRI file. Standard processing error: {str(proc_err)}. Synthetic data creation also failed.")
+            # Set up Python path for imports
+            import sys
+            import os
+            
+            # Add the latent_diffusion directory to the Python path
+            latent_diffusion_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "latent_diffusion")
+            if latent_diffusion_path not in sys.path:
+                sys.path.insert(0, latent_diffusion_path)
             
-            if fc_matrix is None:
-                return "Please provide either an fMRI file or an FC matrix", None
+            # Add the model_utils and data_utils directories to the Python path
+            model_utils_path = os.path.join(latent_diffusion_path, "model_utils")
+            data_utils_path = os.path.join(latent_diffusion_path, "data_utils")
+            if model_utils_path not in sys.path:
+                sys.path.insert(0, model_utils_path)
+            if data_utils_path not in sys.path:
+                sys.path.insert(0, data_utils_path)
+                
+            print(f"Python path set up: {sys.path[:5]}")
             
-            # Ensure FC matrix is properly shaped
-            if isinstance(fc_matrix, list):
-                fc_matrix = np.array(fc_matrix)
+            # Now import the training module
+            from latent_diffusion.train import main as train_main
+            import threading
+            import io
             
-            # Get latent representation
-            logger.info("Extracting latent representation from FC matrix")
+            # Mark as training 
+            self.is_training = True
+            self.current_epoch = 0
+            self.training_logs = []
+            
+            # Simple stdout capture for logs
+            original_stdout = sys.stdout
+            log_capture = io.StringIO()
+            sys.stdout = log_capture
+            
+            def training_thread():
+                try:
+                    # datetime and time are already imported at the top level
+                    
+                    # Current timestamp for log entries
+                    start_time = datetime.datetime.now()
+                    
+                    # Print startup banner
+                    print("\n" + "="*80)
+                    print(f"🚀 STARTING FULL TRAINING WITH ACTUAL DATA FROM HUGGING FACE")
+                    print(f"⏱️ Starting at: {start_time.strftime('%Y-%m-%d %H:%M:%S')}")
+                    print(f"📊 Dataset: SreekarB/OSFData")
+                    print(f"⚙️ Parameters: epochs={num_epochs}, batch_size={batch_size}, lr={learning_rate}")
+                    print(f"💡 This is using REAL data, NOT demo/sample data")
+                    print("="*80 + "\n")
+                    
+                    # Ensure we're using the full dataset from Hugging Face
+                    os.environ['USE_FULL_DATA'] = '1'
+                    os.environ['DISABLE_DEMO_MODE'] = '1'
+                    
+                    # Disable wandb when running on Hugging Face Spaces
+                    # This prevents "I/O operation on closed file" errors
+                    os.environ['WANDB_DISABLED'] = 'true'
+                    os.environ['WANDB_MODE'] = 'disabled'
+                    
+                    # Function to update training logs for the UI
+                    def log_progress_callback(epoch, total_epochs, loss, duration):
+                        """Callback function to update training progress in the UI"""
+                        progress_pct = int((epoch + 1) / total_epochs * 100)
+                        timestamp = datetime.datetime.now()
+                        elapsed = (timestamp - start_time).total_seconds() / 60.0  # minutes
+                        
+                        log_entry = f"[{timestamp.strftime('%H:%M:%S')}] Epoch {epoch+1}/{total_epochs} completed in {duration:.2f}s. Loss: {loss:.6f}"
+                        self.training_logs.append(log_entry)
+                        
+                        # Calculate and log ETA
+                        if epoch > 0:
+                            eta_minutes = elapsed / (epoch + 1) * (total_epochs - epoch - 1)
+                            eta_str = f"{eta_minutes:.1f} minutes"
+                            eta_entry = f"[{timestamp.strftime('%H:%M:%S')}] Progress: {progress_pct}%. Elapsed: {elapsed:.1f} min. ETA: {eta_str}"
+                            self.training_logs.append(eta_entry)
+                        
+                        # Keep only the last 100 log entries to avoid memory issues
+                        if len(self.training_logs) > 100:
+                            self.training_logs = self.training_logs[-100:]
+                        
+                        # Print to console for debugging
+                        print(f"🔄 Epoch {epoch+1}/{total_epochs} completed. Loss: {loss:.6f}")
+                        if epoch % 5 == 0 or epoch == total_epochs - 1:
+                            print(f"📊 Training progress: {progress_pct}% complete")
+                    
+                    # Call the training function from train.py
+                    print("📡 Initializing training pipeline...")
+                    print("📚 Loading dataset from Hugging Face...")
+                    
+                    # The modified train.py will load the full dataset
+                    train_main(
+                        num_epochs=num_epochs,
+                        batch_size=batch_size,
+                        learning_rate=learning_rate,
+                        use_real_data=True,  # Explicitly request real data
+                        progress_callback=log_progress_callback
+                    )
+                    
+                    # No need to import datetime/time here - we already imported them at the top of the function
+                    
+                    # Calculate total duration
+                    end_time = datetime.datetime.now()
+                    duration = (end_time - start_time).total_seconds() / 60.0  # minutes
+                    
+                    self.is_training = False
+                    print("\n" + "="*80)
+                    print(f"✅ TRAINING COMPLETED SUCCESSFULLY")
+                    print(f"⏱️ Total duration: {duration:.2f} minutes")
+                    print(f"📅 Finished at: {end_time.strftime('%Y-%m-%d %H:%M:%S')}")
+                    print(f"💾 Model saved to: models/fmri_diffusion.pt")
+                    print("="*80 + "\n")
+                    
+                    # Add final completion log
+                    self.training_logs.append(f"[{end_time.strftime('%H:%M:%S')}] ✅ Training completed in {duration:.2f} minutes")
+                    self.training_logs.append(f"[{end_time.strftime('%H:%M:%S')}] 💾 Model saved to: models/fmri_diffusion.pt")
+                    
+                except Exception as e:
+                    # datetime is already imported at the top level
+                    self.is_training = False
+                    error_msg = f"❌ [ERROR] Training failed: {str(e)}"
+                    print(error_msg)
+                    print(f"Stack trace: {traceback.format_exc()}")
+                    
+                    # Add error to logs
+                    timestamp = datetime.datetime.now().strftime('%H:%M:%S')
+                    self.training_logs.append(f"[{timestamp}] {error_msg}")
+                    self.training_logs.append(f"[{timestamp}] See console for full stack trace")
+                    
+                finally:
+                    # Restore stdout and properly close the log capture
+                    sys.stdout = original_stdout
+                    try:
+                        log_capture.close()
+                    except Exception as e:
+                        logger.warning(f"Error closing log capture: {e}")
             
-            # Check if VAE is properly initialized
-            if self.vae is None:
-                raise ValueError("VAE model is not available for latent extraction. Please train the VAE model first.")
+            # Create a message
+            initial_message = f"""
+            ## Starting Training with Real Data from Hugging Face
             
-            # Check for get_latents method
-            if not hasattr(self.vae, 'get_latents'):
-                raise ValueError("VAE model does not have get_latents method. This is likely due to an incompatible VAE model type.")
+            **Dataset**: SreekarB/OSFData
             
-            # Log FC matrix info for debugging
-            logger.info(f"FC matrix shape: {fc_matrix.shape}, type: {type(fc_matrix)}")
+            Training will run for {num_epochs} epochs with batch size {batch_size}.
+            This will use the REAL dataset from Hugging Face, not sample or demo data.
             
-            try:
-                if len(fc_matrix.shape) == 2:  # If matrix is 2D (e.g., 264x264)
-                    # Convert to flattened upper triangular form
-                    n = fc_matrix.shape[0]
-                    indices = np.triu_indices(n, k=1)
-                    fc_flattened = fc_matrix[indices]
-                    fc_flattened = fc_flattened.reshape(1, -1)
-                    logger.info(f"Flattened FC matrix shape: {fc_flattened.shape}")
-                    latent = self.vae.get_latents(fc_flattened)
-                else:
-                    # Assume already flattened
-                    fc_reshaped = fc_matrix.reshape(1, -1)
-                    logger.info(f"Reshaped FC matrix shape: {fc_reshaped.shape}")
-                    latent = self.vae.get_latents(fc_reshaped)
-                
-                # Log info about resulting latent
-                logger.info(f"Successfully extracted latent representation, shape: {latent.shape}")
-            except Exception as latent_err:
-                logger.error(f"Error extracting latent representation: {latent_err}")
-                # Try a more robust approach
-                logger.info("Attempting alternative latent extraction approach...")
-                
-                # Ensure we have a properly shaped input regardless of original shape
-                fc_flat = np.array(fc_matrix).flatten()
-                
-                # If it's too short for a 264x264 matrix, pad it
-                expected_length = 34716  # For 264x264 matrix
-                if len(fc_flat) < expected_length:
-                    logger.warning(f"FC vector too short ({len(fc_flat)}), padding to {expected_length}")
-                    fc_flat = np.pad(fc_flat, (0, expected_length - len(fc_flat)), mode='constant')
-                elif len(fc_flat) > expected_length:
-                    logger.warning(f"FC vector too long ({len(fc_flat)}), truncating to {expected_length}")
-                    fc_flat = fc_flat[:expected_length]
-                
-                # Reshape and try again
-                fc_flat = fc_flat.reshape(1, -1)
-                latent = self.vae.get_latents(fc_flat)
+            This process will take time and will run in the background.
+            You can continue using the app while training proceeds.
             
-            # Prepare demographics
-            demographics = {
-                'age': np.array([float(age)]),
-                'gender': np.array([sex]),
-                'mpo': np.array([float(months_post_stroke)]),
-                'wab_aq': np.array([float(wab_score)]),
-                'education': np.array([16.0])  # Default value for education
-            }
+            Training logs will appear below as they become available.
+            """
             
-            logger.info("Making prediction")
-            # Make prediction
-            if self.predictor is None:
-                return "Predictor model not trained", None
-                
-            # Make prediction using the model's predict method
-            prediction, prediction_std = self.predictor.predict(latent, demographics)
+            # Set up the training thread
+            training_thread = threading.Thread(target=training_thread)
+            training_thread.daemon = True  # Allow the thread to be terminated when the main program exits
             
-            # Create visualization
-            fig = self.plot_treatment_trajectory(
-                current_score=wab_score,
-                predicted_score=prediction[0],
-                months_post_stroke=months_post_stroke,
-                prediction_std=prediction_std[0]
-            )
+            # Set up a thread to update the UI with training logs
+            def update_ui_thread():
+                """Thread to update the UI with training logs"""
+                try:
+                    # Periodically update the UI with training logs
+                    # time is already imported at the top level
+                    
+                    # Initialize progress
+                    progress = 0
+                    loss_value = 0.0
+                    
+                    while self.is_training:
+                        # Sleep to avoid hammering the UI
+                        time.sleep(1)
+                        
+                        # Try to capture logs but handle closed file errors
+                        try:
+                            logs = log_capture.getvalue()
+                        except (ValueError, IOError) as e:
+                            # Handle "I/O operation on closed file" error
+                            logger.debug(f"Could not read log_capture: {e}")
+                            logs = ""
+                        
+                        # Get copy of training logs for processing
+                        logs_copy = self.training_logs.copy() if self.training_logs else []
+                            
+                        # Now work with the logs
+                        if logs_copy:
+                            # Extract progress percentage if available
+                            progress_lines = [line for line in logs_copy if "Progress:" in line]
+                            if progress_lines:
+                                progress_line = progress_lines[-1]
+                                try:
+                                    progress = int(progress_line.split("Progress:")[1].split("%")[0].strip())
+                                except Exception as e:
+                                    logger.warning(f"Error parsing progress: {e}")
+                            
+                            # Extract loss value if available
+                            loss_lines = [line for line in logs_copy if "Loss:" in line]
+                            if loss_lines:
+                                loss_line = loss_lines[-1]
+                                try:
+                                    loss_value = float(loss_line.split("Loss:")[1].strip().split()[0])
+                                except Exception as e:
+                                    logger.warning(f"Error parsing loss: {e}")
+                    
+                    # Final update after training completes
+                    if progress >= 99:
+                        progress = 100
+                    
+                except Exception as e:
+                    logger.error(f"Error in UI update thread: {e}")
+                finally:
+                    # Clean up resources when the thread exits
+                    logger.debug("UI update thread terminated")
             
-            result_text = f"Predicted treatment outcome: {prediction[0]:.2f} ± {2*prediction_std[0]:.2f}"
-            logger.info(result_text)
+            # Start both threads
+            training_thread.start()
+            ui_thread = threading.Thread(target=update_ui_thread)
+            ui_thread.daemon = True
+            ui_thread.start()
             
-            return result_text, fig
+            # Return initial message
+            return initial_message
             
         except Exception as e:
-            error_msg = f"Error in prediction: {str(e)}"
-            logger.error(error_msg, exc_info=True)
-            error_fig = plt.figure(figsize=(10, 6))
+            error_message = f"""
+            ## Error Starting Training
             
-            # Provide more helpful error message for common issues
-            error_str = str(e).lower()
-            if "fmri_file" in error_str or "file not found" in error_str:
-                error_msg = "Error: fMRI file not found or invalid. Please provide a valid NIfTI file."
-            elif "fc_matrix" in error_str:
-                error_msg = "Error: Invalid FC matrix format. Please ensure the matrix is properly formatted."
-            elif "predictor" in error_str and "none" in error_str:
-                error_msg = "Error: Prediction model not trained. Please train the model first."
-            elif "cuda" in error_str or "gpu" in error_str:
-                error_msg = "Error: GPU/CUDA error. Try running with CPU only."
-            elif "radius option" in error_str:
-                error_msg = ("Error: Could not process the fMRI file with any radius option. "
-                            "The file may be corrupted or in an incompatible format. "
-                            "Please try a different NIfTI file.")
-            elif "insufficient time points" in error_str:
-                error_msg = "Error: The fMRI file has insufficient time points for analysis. A minimum of 20 time points is required."
-            elif "latent" in error_str and ("extract" in error_str or "representation" in error_str):
-                error_msg = ("Error: Could not extract latent representation from the FC matrix. "
-                            "This may be due to an incompatible format or the VAE model not being properly trained. "
-                            "Please ensure the VAE model is trained first.")
-            elif "vae" in error_str and "not available" in error_str:
-                error_msg = "Error: VAE model is not available. Please train the VAE model in Tab 1 before making predictions."
-            elif "vae model does not have get_latents" in error_str:
-                error_msg = "Error: The VAE model is incompatible with the prediction pipeline. Please retrain the VAE model."
-            else:
-                error_msg = f"Error in prediction: {str(e)}"
-                
-            plt.text(0.5, 0.5, error_msg,
-                    horizontalalignment='center', verticalalignment='center',
-                    fontsize=12, color='red', wrap=True)
-            plt.axis('off')
-            return error_msg, error_fig
-    
-    def plot_treatment_trajectory(self, current_score, predicted_score, 
-                                months_post_stroke, prediction_std, 
-                                treatment_duration=6):
-        """
-        Create a visualization of predicted treatment trajectory
-        
-        Args:
-            current_score: Current WAB score
-            predicted_score: Predicted WAB score after treatment
-            months_post_stroke: Current months post stroke
-            prediction_std: Standard deviation of prediction
-            treatment_duration: Duration of treatment in months
+            Failed to start training: {str(e)}
             
-        Returns:
-            matplotlib figure
-        """
-        fig = plt.figure(figsize=(10, 6))
-        
-        # X-axis: months
-        x = np.array([months_post_stroke, months_post_stroke + treatment_duration])
-        
-        # Y-axis: WAB scores
-        y = np.array([current_score, predicted_score])
+            Please check your environment setup and try again.
+            """
+            print(f"Error starting training: {e}")
+            print(traceback.format_exc())
+            return error_message
+
+def download_demographic_csv():
+    """
+    Directly download the FC_graph_covariate_data.csv file from Hugging Face Hub
+    using the Hugging Face Hub API
+    """
+    try:
+        print("Attempting to download demographics CSV directly from Hugging Face Hub...")
+        from huggingface_hub import hf_hub_download
         
-        # Plot the trajectory
-        plt.plot(x, y, 'bo-', linewidth=2, label='Predicted Trajectory')
+        # Check if we can access Hugging Face Hub
+        test_huggingface_connection()
         
-        # Add confidence interval
-        plt.fill_between(
-            x, 
-            [y[0], y[1] - 2*prediction_std],
-            [y[0], y[1] + 2*prediction_std],
-            alpha=0.2, color='blue', label='95% Confidence Interval'
+        # Try to download the file directly
+        csv_path = hf_hub_download(
+            repo_id="SreekarB/OSFData",
+            filename="FC_graph_covariate_data.csv",
+            repo_type="dataset"
         )
         
-        # Add reference lines
-        if current_score < predicted_score:
-            improvement = predicted_score - current_score
-            plt.axhline(y=current_score, color='r', linestyle='--', alpha=0.5, 
-                      label=f'Current WAB = {current_score:.1f}')
-            plt.axhline(y=predicted_score, color='g', linestyle='--', alpha=0.5,
-                      label=f'Predicted WAB = {predicted_score:.1f} (+{improvement:.1f})')
-        else:
-            decline = current_score - predicted_score
-            plt.axhline(y=current_score, color='r', linestyle='--', alpha=0.5, 
-                      label=f'Current WAB = {current_score:.1f}')
-            plt.axhline(y=predicted_score, color='orange', linestyle='--', alpha=0.5,
-                      label=f'Predicted WAB = {predicted_score:.1f} (-{decline:.1f})')
-        
-        # Add labels and title
-        plt.xlabel('Months Post Stroke')
-        plt.ylabel('WAB Score')
-        plt.title('Predicted Treatment Trajectory')
-        plt.legend(loc='best')
-        
-        # Set y-axis limits
-        plt.ylim([0, 100])
-        
-        plt.tight_layout()
-        return fig
-    
-    def create_prediction_plots(self, latents, demographics, y_true, y_pred, y_std):
-        """Create prediction performance plots"""
-        fig = plt.figure(figsize=(12, 8))
-        
-        # Create a 2x2 grid for plots
-        gs = plt.GridSpec(2, 2, figure=fig)
-        
-        # Plot predicted vs actual values
-        ax1 = fig.add_subplot(gs[0, 0])
-        
-        # Regression plots
-        # Scatter plot
-        ax1.scatter(y_true, y_pred, alpha=0.7)
-        
-        # Add perfect prediction line
-        min_val = min(np.min(y_true), np.min(y_pred))
-        max_val = max(np.max(y_true), np.max(y_pred))
-        ax1.plot([min_val, max_val], [min_val, max_val], 'r--')
-        
-        ax1.set_xlabel('Actual Values')
-        ax1.set_ylabel('Predicted Values')
-        ax1.set_title('Predicted vs. Actual Values')
-        
-        # Add R² to the plot
-        r2 = r2_score(y_true, y_pred)
-        ax1.text(0.05, 0.95, f'R² = {r2:.4f}', transform=ax1.transAxes,
-               bbox=dict(facecolor='white', alpha=0.5))
-        
-        # Plot residuals
-        ax2 = fig.add_subplot(gs[0, 1])
-        residuals = y_true - y_pred
-        ax2.scatter(y_pred, residuals, alpha=0.7)
-        ax2.axhline(y=0, color='r', linestyle='--')
-        ax2.set_xlabel('Predicted Values')
-        ax2.set_ylabel('Residuals')
-        ax2.set_title('Residual Plot')
-        
-        # Plot prediction errors
-        ax3 = fig.add_subplot(gs[1, 0])
-        ax3.errorbar(range(len(y_pred)), y_pred, yerr=2*y_std, fmt='o', alpha=0.7,
-                   label='Predicted ± 2σ')
-        ax3.plot(range(len(y_true)), y_true, 'rx', alpha=0.7, label='Actual')
-        ax3.set_xlabel('Sample Index')
-        ax3.set_ylabel('Value')
-        ax3.set_title('Prediction with Error Bars')
-        ax3.legend()
-        
-        # Plot error distribution
-        ax4 = fig.add_subplot(gs[1, 1])
-        ax4.hist(residuals, bins=20, alpha=0.7)
-        ax4.axvline(x=0, color='r', linestyle='--')
-        ax4.set_xlabel('Prediction Error')
-        ax4.set_ylabel('Frequency')
-        ax4.set_title('Error Distribution')
-        
-        plt.tight_layout()
-        return fig
-    
-    def create_importance_plot(self, feature_importance, top_n=15):
-        """Create feature importance plot"""
-        # If feature_importance is a DataFrame, use it directly
-        if isinstance(feature_importance, pd.DataFrame):
-            importance_df = feature_importance
-        else:
-            # Create DataFrame
-            importance_df = pd.DataFrame({
-                'feature': [f'Feature {i}' for i in range(len(feature_importance))],
-                'importance': feature_importance
-            })
-        
-        # Get top N features
-        top_features = importance_df.sort_values('importance', ascending=False).head(top_n)
-        
-        # Create plot
-        fig = plt.figure(figsize=(10, 6))
-        plt.barh(range(len(top_features)), top_features['importance'], align='center')
-        plt.yticks(range(len(top_features)), top_features['feature'])
-        plt.xlabel('Importance')
-        plt.ylabel('Features')
-        plt.title(f'Top {top_n} Features by Importance')
-        plt.tight_layout()
-        
-        return fig
-    
-    def create_learning_curve_plot(self, fold_metrics):
-        """Create learning curve plots from cross-validation results"""
-        fig = plt.figure(figsize=(12, 6))
-        
-        # For regression, show R² and RMSE
-        ax1 = plt.subplot(1, 2, 1)
-        ax2 = plt.subplot(1, 2, 2)
-        
-        # Plot R² for each fold
-        for i, metrics in enumerate(fold_metrics):
-            ax1.plot(i+1, metrics['r2'], 'bo')
-        
-        # Plot average R²
-        avg_r2 = np.mean([m['r2'] for m in fold_metrics])
-        ax1.axhline(y=avg_r2, color='r', linestyle='--', 
-                   label=f'Average R² = {avg_r2:.4f}')
-        
-        ax1.set_xlabel('Fold')
-        ax1.set_ylabel('R²')
-        ax1.set_title('R² by Fold')
-        ax1.set_xticks(range(1, len(fold_metrics)+1))
-        ax1.legend()
-        
-        # Plot RMSE for each fold
-        for i, metrics in enumerate(fold_metrics):
-            ax2.plot(i+1, metrics['rmse'], 'go')
-        
-        # Plot average RMSE
-        avg_rmse = np.mean([m['rmse'] for m in fold_metrics])
-        ax2.axhline(y=avg_rmse, color='r', linestyle='--', 
-                   label=f'Average RMSE = {avg_rmse:.4f}')
+        print(f"Successfully downloaded demographics CSV to: {csv_path}")
+        return csv_path
+    except Exception as e:
+        print(f"Error downloading demographics CSV: {e}")
+        return None
+
+def test_huggingface_connection():
+    """Test connection to Hugging Face Hub"""
+    print("Testing connection to Hugging Face Hub...")
+    try:
+        import requests
+        from huggingface_hub import HfApi
         
-        ax2.set_xlabel('Fold')
-        ax2.set_ylabel('RMSE')
-        ax2.set_title('RMSE by Fold')
-        ax2.set_xticks(range(1, len(fold_metrics)+1))
-        ax2.legend()
+        # Try accessing the Hugging Face API
+        api = HfApi()
         
-        plt.tight_layout()
-        return fig
+        # Simple test - just get the model info for a popular model
+        try:
+            # Test 1: Basic API connection
+            response = requests.get("https://huggingface.co/api/models/bert-base-uncased")
+            response.raise_for_status()  # Raise exception for 4XX/5XX responses
+            print("✅ Test 1: Connection to Hugging Face API successful")
+            
+            # Test 2: List models in the specific dataset
+            models = api.list_models(filter="SreekarB/OSFData")
+            print(f"✅ Test 2: Found dataset - {len(list(models))} related models")
+            
+            # Test 3: Try listing files in the dataset
+            try:
+                files = api.list_repo_files("SreekarB/OSFData", repo_type="dataset")
+                print(f"✅ Test 3: Successfully listed files in dataset: {len(files)} files found")
+                print(f"   First few files: {files[:3]}")
+            except Exception as e:
+                print(f"❌ Test 3: Error listing files: {e}")
+            
+            return True
+        except Exception as e:
+            print(f"❌ Error testing Hugging Face Hub connection: {e}")
+            return False
+    except ImportError:
+        print("❌ Required packages for Hugging Face Hub not installed")
+        return False
 
-def calculate_fc_accuracy(original_fc, reconstructed_fc):
-    """
-    Calculate accuracy metrics between original and reconstructed FC matrices
-    """
-    # Mean Squared Error (lower is better)
-    mse = mean_squared_error(original_fc, reconstructed_fc)
-    
-    # Root Mean Squared Error (lower is better)
-    rmse = np.sqrt(mse)
-    
-    # R² Score (higher is better, 1 is perfect)
-    r2 = r2_score(original_fc, reconstructed_fc)
-    
-    # Correlation between matrices (higher is better)
-    corr = np.corrcoef(original_fc.flatten(), reconstructed_fc.flatten())[0, 1]
-    
-    # Custom similarity score based on normalized dot product (higher is better)
-    norm_dot = np.dot(original_fc.flatten(), reconstructed_fc.flatten()) / (
-        np.linalg.norm(original_fc.flatten()) * np.linalg.norm(reconstructed_fc.flatten()))
+def create_interface():
+    """Create the Gradio interface for the app"""
+    print("Initializing Aphasia fMRI Generator...")
     
-    return {
-        "MSE": float(mse),
-        "RMSE": float(rmse),
-        "R²": float(r2),
-        "Correlation": float(corr),
-        "Cosine Similarity": float(norm_dot)
-    }
-
-def save_latents(latents, demographics, subjects=None, file_path='latents.pkl'):
-    """
-    Save latent representations and associated demographics to file
-    """
-    os.makedirs('results', exist_ok=True)
+    # Try to download demographics CSV directly
+    csv_path = download_demographic_csv()
     
-    # Create a dictionary with latents and demographics
-    data = {
-        'latents': latents,
-        'demographics': demographics
-    }
+    # Initialize generator with the CSV path if available
+    if csv_path:
+        print(f"Initializing generator with downloaded CSV: {csv_path}")
+        generator = AphasiaFMRIGenerator(csv_path)
+    else:
+        print("Initializing generator without CSV path")
+        generator = AphasiaFMRIGenerator()
     
-    if subjects is not None:
-        data['subjects'] = subjects
-    
-    # Save as pickle for easy loading in Python
-    with open(os.path.join('results', file_path), 'wb') as f:
-        pickle.dump(data, f)
-    
-    # Also save as JSON for more universal access
-    json_data = {
-        'latents': latents.tolist() if isinstance(latents, np.ndarray) else latents,
-        'demographics': {k: v.tolist() if isinstance(v, np.ndarray) else v 
-                         for k, v in demographics.items()}
-    }
-    
-    if subjects is not None:
-        json_data['subjects'] = subjects
-    
-    with open(os.path.join('results', file_path.replace('.pkl', '.json')), 'w') as f:
-        json.dump(json_data, f)
-    
-    return os.path.join('results', file_path)
-
-# Function to process behavioral data into treatment outcomes
-def process_behavioral_data_to_outcomes(behavioral_file):
-    """
-    Process behavioral_data.csv to create a treatment outcomes file
-    
-    The behavioral data contains:
-    - patient_id: Patient identifier
-    - Session: Session number
-    - Session Type: Baseline (B), Treatment, Post Treatment
-    - sess_acc: Session accuracy
-    
-    We'll convert this to treatment outcomes by:
-    1. Finding baseline and post-treatment sessions for each patient
-    2. Calculating improvement (post - baseline)
-    3. Creating a treatment_outcomes.csv file with subject_id, treatment_type, outcome_score
-    
-    Args:
-        behavioral_file: Path to behavioral_data.csv
-        
-    Returns:
-        Path to generated treatment_outcomes.csv file
-    """
-    # Create a simple mock outcomes file as a fallback
-    os.makedirs('results', exist_ok=True)
-    fallback_file = os.path.join('results', 'fallback_treatment_outcomes.csv')
-    
-    # Create a simple outcomes file with dummy data (useful as last resort)
-    try:
-        mock_outcomes = pd.DataFrame([
-            {'subject_id': 'P001', 'treatment_type': 'Standard', 'outcome_score': 5.2},
-            {'subject_id': 'P002', 'treatment_type': 'Intensive', 'outcome_score': 7.8},
-            {'subject_id': 'P003', 'treatment_type': 'Standard', 'outcome_score': 3.1},
-            {'subject_id': 'P004', 'treatment_type': 'Intensive', 'outcome_score': 9.4},
-            {'subject_id': 'P005', 'treatment_type': 'Control', 'outcome_score': 1.2}
-        ])
-        mock_outcomes.to_csv(fallback_file, index=False)
-        logger.info(f"Created fallback treatment outcomes file with 5 subjects")
-    except Exception as e:
-        logger.error(f"Failed to create fallback file: {e}")
-    
-    logger.info(f"Processing behavioral data from {behavioral_file}")
-    
-    # Create output file path
-    os.makedirs('results', exist_ok=True)
-    outcomes_file = os.path.join('results', 'behavioral_treatment_outcomes.csv')
-    
-    try:
-        # Read the behavioral data with error handling for different formats
-        import pandas as pd
-        import numpy as np
-        
-        try:
-            # Try regular CSV format first
-            behavioral_df = pd.read_csv(behavioral_file)
-            logger.info(f"Loaded behavioral data with {len(behavioral_df)} rows")
-        except UnicodeDecodeError as e:
-            logger.warning(f"Unicode decode error with behavioral file: {e}")
-            
-            # Try with different encodings
-            for encoding in ['latin1', 'cp1252', 'iso-8859-1']:
-                try:
-                    logger.info(f"Trying {encoding} encoding...")
-                    behavioral_df = pd.read_csv(behavioral_file, encoding=encoding)
-                    logger.info(f"Successfully loaded with {encoding} encoding")
-                    break
-                except Exception as enc_error:
-                    logger.warning(f"Failed with {encoding} encoding: {enc_error}")
-            else:
-                # If all encodings fail, try Excel format
-                try:
-                    logger.info("Trying to read as Excel file...")
-                    behavioral_df = pd.read_excel(behavioral_file)
-                    logger.info(f"Successfully loaded as Excel file with {len(behavioral_df)} rows")
-                except Exception as xl_error:
-                    logger.error(f"Failed to read as Excel: {xl_error}")
-                    raise ValueError(f"Could not read behavioral data file in any format")
-        
-        # Print column names for debugging
-        logger.info(f"Behavioral data columns: {behavioral_df.columns.tolist()}")
-        
-        # Try alternative column names for required fields
-        column_mapping = {
-            'ID': ['ID', 'patient_id', 'subject_id', 'Subject', 'PatientID', 'id', 'patient', 'subj', 'sub'],
-            'Session': ['Session', 'session', 'Session_Number', 'SessionNum', 'sess_num', 'session_num', 'time', 'timepoint'],
-            'Session Type': ['Session Type', 'SessionType', 'Type', 'session_type', 'sess_type', 'phase', 'treatment_phase', 'study_phase', 'condition'],
-            'sess_acc': ['sess_acc', 'Accuracy', 'accuracy', 'acc', 'session_accuracy', 'score', 'performance', 'wab', 'wab_score', 'value']
-        }
-        
-        # Attempt to map columns
-        mapped_columns = {}
-        for target_col, alt_cols in column_mapping.items():
-            if target_col in behavioral_df.columns:
-                mapped_columns[target_col] = target_col
-            else:
-                for alt_col in alt_cols:
-                    if alt_col in behavioral_df.columns:
-                        mapped_columns[target_col] = alt_col
-                        logger.info(f"Mapped column {alt_col} to {target_col}")
-                        break
-        
-        # Check what columns we found
-        logger.info(f"Mapped columns: {mapped_columns}")
-        
-        # Determine how to proceed based on what we found
-        if 'ID' not in mapped_columns:
-            # Try to create patient IDs if not found
-            if 'ID' not in behavioral_df.columns:
-                logger.warning("No patient ID column found, creating synthetic IDs")
-                # Look for any identifier-like columns
-                for col in behavioral_df.columns:
-                    if any(id_term in col.lower() for id_term in ['id', 'subject', 'patient', 'participant']):
-                        behavioral_df['ID'] = behavioral_df[col]
-                        mapped_columns['ID'] = col
-                        logger.info(f"Using {col} as patient ID")
-                        break
-                else:
-                    # Create sequential IDs if no identifier found
-                    behavioral_df['ID'] = [f"P{i+1:03d}" for i in range(len(behavioral_df))]
-                    mapped_columns['ID'] = 'ID'
-                    logger.warning("Created sequential patient IDs")
-        
-        # Handle session identification
-        if 'Session' not in mapped_columns:
-            # Try to create session numbers if not found
-            if 'Session' not in behavioral_df.columns:
-                logger.warning("No session number column found, creating sequential session numbers")
-                # Check if we have any time-related columns
-                time_columns = [col for col in behavioral_df.columns if any(time_term in col.lower() for time_term in ['time', 'session', 'visit', 'week'])]
-                if time_columns:
-                    behavioral_df['Session'] = behavioral_df[time_columns[0]]
-                    mapped_columns['Session'] = time_columns[0]
-                    logger.info(f"Using {time_columns[0]} as session number")
-                else:
-                    # Create sequential session numbers for each patient
-                    if 'ID' in mapped_columns:
-                        behavioral_df['Session'] = behavioral_df.groupby(mapped_columns['ID']).cumcount() + 1
-                    else:
-                        behavioral_df['Session'] = range(1, len(behavioral_df) + 1)
-                    mapped_columns['Session'] = 'Session'
-                    logger.warning("Created sequential session numbers")
-        
-        # Handle session type
-        if 'Session Type' not in mapped_columns:
-            # Try to create session types if not found
-            if 'Session Type' not in behavioral_df.columns:
-                logger.warning("No session type column found, inferring from session sequence")
-                # Create simple session type based on sequence: first=Baseline, last=Post, middle=Treatment
-                behavioral_df['Session Type'] = 'Treatment'
-                
-                # Group by patient ID if available
-                if 'ID' in mapped_columns:
-                    # Get min and max session for each patient
-                    session_col = mapped_columns.get('Session', 'Session')
-                    id_col = mapped_columns.get('ID', 'ID')
-                    
-                    # Get first and last session for each patient
-                    for patient in behavioral_df[id_col].unique():
-                        patient_sessions = behavioral_df[behavioral_df[id_col] == patient][session_col].sort_values()
-                        if len(patient_sessions) > 0:
-                            first_session = patient_sessions.iloc[0]
-                            last_session = patient_sessions.iloc[-1]
-                            
-                            # Mark first as Baseline, last as Post
-                            behavioral_df.loc[(behavioral_df[id_col] == patient) & 
-                                            (behavioral_df[session_col] == first_session), 
-                                            'Session Type'] = 'Baseline'
-                            
-                            behavioral_df.loc[(behavioral_df[id_col] == patient) & 
-                                            (behavioral_df[session_col] == last_session), 
-                                            'Session Type'] = 'Post Treatment'
-                else:
-                    # Just use the first and last rows
-                    if len(behavioral_df) > 0:
-                        behavioral_df.loc[0, 'Session Type'] = 'Baseline'
-                        if len(behavioral_df) > 1:
-                            behavioral_df.loc[len(behavioral_df)-1, 'Session Type'] = 'Post Treatment'
-                
-                mapped_columns['Session Type'] = 'Session Type'
-                logger.warning("Created session types based on sequence")
-        
-        # Handle accuracy/score
-        if 'sess_acc' not in mapped_columns:
-            # Find any numeric columns that might contain scores
-            numeric_cols = behavioral_df.select_dtypes(include=['number']).columns.tolist()
-            score_candidates = [col for col in numeric_cols if any(score_term in col.lower() for score_term in 
-                                ['score', 'acc', 'wab', 'value', 'measure', 'perf', 'test'])]
-            
-            if score_candidates:
-                behavioral_df['sess_acc'] = behavioral_df[score_candidates[0]]
-                mapped_columns['sess_acc'] = score_candidates[0]
-                logger.info(f"Using {score_candidates[0]} as accuracy score")
-            elif numeric_cols:
-                # Just use the first numeric column
-                behavioral_df['sess_acc'] = behavioral_df[numeric_cols[0]]
-                mapped_columns['sess_acc'] = numeric_cols[0]
-                logger.warning(f"Using first numeric column {numeric_cols[0]} as accuracy score")
-            else:
-                # No suitable column found
-                raise ValueError("No suitable accuracy/score column found in behavioral data")
-        
-        # Now work with the mapped columns
-        id_col = mapped_columns.get('ID', 'ID')
-        session_col = mapped_columns.get('Session', 'Session')
-        type_col = mapped_columns.get('Session Type', 'Session Type')
-        acc_col = mapped_columns.get('sess_acc', 'sess_acc')
-        
-        # Extract baseline and post-treatment sessions
-        outcome_data = []
-        
-        # Get unique patient IDs
-        patient_ids = behavioral_df[id_col].unique()
-        logger.info(f"Found {len(patient_ids)} unique patients")
-        
-        for patient_id in patient_ids:
-            patient_data = behavioral_df[behavioral_df[id_col] == patient_id]
-            logger.info(f"Processing patient {patient_id} with {len(patient_data)} sessions")
-            
-            # Try to identify baseline and post sessions by string matching if possible
-            try:
-                # Look for Baseline sessions (may be labeled as 'B', 'Baseline', etc.)
-                baseline_mask = (
-                    patient_data[type_col].str.contains('B', case=False) | 
-                    patient_data[type_col].str.contains('base', case=False) |
-                    patient_data[type_col].str.contains('pre', case=False)
-                )
-                baseline_sessions = patient_data[baseline_mask]
+    with gr.Blocks(title="Aphasia fMRI Generator") as interface:
+        gr.Markdown("# Aphasia fMRI Generator")
+        gr.Markdown("Generate synthetic fMRI data or train a model using the SreekarB/OSFData dataset from Hugging Face Hub.")
+        
+        # Create tabs for generation and training
+        with gr.Tabs() as tabs:
+            # Tab 1: fMRI Generation
+            with gr.TabItem("Generate fMRI") as generate_tab:
+                # Add warning about demo version
+                gr.Markdown("""
+                ⚠️ **DEMO VERSION**: This interface uses significantly reduced diffusion steps (20 instead of 1000)
+                and lower resolution for faster generation. For research-quality outputs, the full model would run
+                for hours on specialized hardware.
+                """, elem_classes=["warning-message"])
                 
-                # Look for Post Treatment sessions
-                post_mask = (
-                    patient_data[type_col].str.contains('Post', case=False) |
-                    patient_data[type_col].str.contains('final', case=False) |
-                    ((patient_data[type_col].str.contains('Treatment', case=False)) & 
-                     (~patient_data[type_col].str.contains('Pre', case=False)))
+                with gr.Row():
+                    with gr.Column():
+                        gr.Markdown("## Demographics")
+                        age = gr.Slider(18, 100, value=65, label="Age")
+                        gender = gr.Radio(["M", "F"], label="Gender", value="M")
+                        education = gr.Slider(0, 25, value=16, label="Education (years)")
+                        wab_aq = gr.Slider(0, 100, value=75.5, label="WAB-AQ Score (aphasia severity)")
+                        mpo = gr.Slider(0, 120, value=12, label="Months Post Onset")
+                        handedness = gr.Radio(["R", "L"], label="Handedness", value="R")
+                        
+                        # Add progress indicator
+                        with gr.Row():
+                            generate_btn = gr.Button("Generate fMRI", variant="primary")
+                            status = gr.Textbox(label="Status", value="Ready")
+                    
+                    with gr.Column():
+                        gr.Markdown("## Generated fMRI")
+                        # Use fewer display elements for simplicity
+                        axial_view = gr.Image(label="Axial View (Demo)", show_download_button=True)
+                        sagittal_view = gr.Image(label="Sagittal View (Demo)", show_download_button=True)
+                        coronal_view = gr.Image(label="Coronal View (Demo)", show_download_button=True)
+                        nifti_file = gr.File(label="Download Full 4D fMRI (.nii.gz)")
+                
+                # Set up example inputs for fMRI generation
+                examples = [
+                    [65, "M", 16, 75.5, 12, "R"],
+                    [45, "F", 14, 60.2, 6, "R"],
+                    [55, "M", 12, 85.3, 24, "L"]
+                ]
+                
+                gr.Examples(
+                    examples=examples,
+                    inputs=[age, gender, education, wab_aq, mpo, handedness],
+                    fn=generator.generate_fmri,
+                    outputs=[axial_view, sagittal_view, coronal_view, nifti_file],
+                    cache_examples=True
                 )
-                post_sessions = patient_data[post_mask]
-            except AttributeError:
-                # In case the column doesn't support string operations
-                logger.warning(f"Column {type_col} doesn't support string operations, using first/last approach")
-                baseline_sessions = pd.DataFrame()
-                post_sessions = pd.DataFrame()
-            
-            # If we can't find labeled sessions, use first and last session
-            if len(baseline_sessions) == 0 or len(post_sessions) == 0:
-                # Sort by session number if possible
-                try:
-                    patient_data = patient_data.sort_values(session_col)
-                except:
-                    logger.warning(f"Could not sort by {session_col}, using data as-is")
-                    
-                baseline_sessions = patient_data.iloc[[0]]  # First session
-                post_sessions = patient_data.iloc[[-1]]     # Last session
-                logger.info(f"Using first/last approach for patient {patient_id}")
-            
-            # If we have both baseline and post sessions, calculate improvement
-            if len(baseline_sessions) > 0 and len(post_sessions) > 0:
-                # Use the average if multiple sessions
-                try:
-                    baseline_acc = baseline_sessions[acc_col].mean()
-                    post_acc = post_sessions[acc_col].mean()
-                    
-                    # Calculate improvement
-                    improvement = post_acc - baseline_acc
-                    
-                    # Determine treatment type
-                    if type_col in patient_data.columns:
-                        try:
-                            # Get middle sessions (between baseline and post)
-                            all_sessions = patient_data.sort_values(session_col)
-                            first_session = all_sessions[session_col].iloc[0]
-                            last_session = all_sessions[session_col].iloc[-1]
-                            
-                            middle_mask = (
-                                (all_sessions[session_col] > first_session) & 
-                                (all_sessions[session_col] < last_session)
-                            )
-                            middle_sessions = all_sessions[middle_mask]
-                            
-                            if len(middle_sessions) > 0 and type_col in middle_sessions.columns:
-                                # Use most common treatment type
-                                treatment_type = middle_sessions[type_col].mode()[0]
-                            else:
-                                # Default treatment type
-                                treatment_type = "Standard"
-                        except:
-                            treatment_type = "Standard"
-                    else:
-                        treatment_type = "Standard"
-                    
-                    # Append to outcomes
-                    outcome_data.append({
-                        'subject_id': patient_id,
-                        'treatment_type': treatment_type,
-                        'outcome_score': improvement
-                    })
-                    logger.info(f"Patient {patient_id}: Baseline={baseline_acc:.2f}, Post={post_acc:.2f}, Improvement={improvement:.2f}")
-                except Exception as e:
-                    logger.warning(f"Could not calculate improvement for patient {patient_id}: {e}")
-        
-        # Create DataFrame and save
-        if outcome_data:
-            outcomes_df = pd.DataFrame(outcome_data)
-            outcomes_df.to_csv(outcomes_file, index=False)
-            logger.info(f"Created treatment outcomes file with {len(outcomes_df)} patients")
-            # Store the treatment file path for later use
-            self.last_treatment_file = outcomes_file
-            return outcomes_file
-        else:
-            # If we couldn't extract outcomes per patient, try a simpler approach
-            logger.warning("Could not extract patient-level outcomes, trying simpler approach")
-            
-            try:
-                # Calculate overall pre/post changes
-                behavioral_df = behavioral_df.sort_values(session_col)
-                first_half = behavioral_df.iloc[:len(behavioral_df)//2]
-                second_half = behavioral_df.iloc[len(behavioral_df)//2:]
-                
-                pre_score = first_half[acc_col].mean()
-                post_score = second_half[acc_col].mean()
-                improvement = post_score - pre_score
-                
-                # Create a simple outcomes file
-                outcomes_df = pd.DataFrame([
-                    {
-                        'subject_id': 'GROUP',
-                        'treatment_type': 'Standard',
-                        'outcome_score': improvement
-                    }
-                ])
-                outcomes_df.to_csv(outcomes_file, index=False)
-                logger.warning(f"Created simplified treatment outcomes with group improvement: {improvement:.2f}")
-                # Store the treatment file path for later use
-                self.last_treatment_file = outcomes_file
-                return outcomes_file
-            except Exception as e:
-                logger.error(f"Could not create even simplified outcomes: {e}")
-                logger.warning("Falling back to predefined treatment outcomes")
-                return fallback_file
-            
-    except Exception as e:
-        logger.error(f"Error processing behavioral data: {e}", exc_info=True)
-        logger.warning("Using fallback treatment outcomes file due to error")
-        # Return the fallback file instead of raising an error
-        return fallback_file
-
-# Function to look for treatment outcome files in the dataset
-def find_treatment_outcomes_file(data_dir):
-    """
-    Look for treatment outcomes file in the dataset
-    
-    Args:
-        data_dir: Dataset directory or HuggingFace dataset ID
-        
-    Returns:
-        Path to treatment outcomes file or None if not found
-    """
-    logger.info(f"Looking for treatment outcomes file in {data_dir}")
-    
-    # Create a temporary directory for downloads
-    import tempfile
-    from huggingface_hub import hf_hub_download
-    
-    temp_dir = tempfile.mkdtemp(prefix="hf_treatment_")
-    
-    # Try different filenames for treatment outcomes
-    outcome_files = [
-        "treatment_outcomes.csv",
-        "outcomes.csv",
-        "treatment_outcomes.xlsx",
-        "outcomes.xlsx",
-        "behavioral_data.csv",
-        "behavioral_data.xlsx",
-        "behavioral.csv",
-        "behavioral.xlsx",
-        "treatment_results.csv"
-    ]
-    
-    # Try each file
-    for outcome_file in outcome_files:
-        try:
-            file_path = hf_hub_download(
-                repo_id=data_dir,
-                filename=outcome_file,
-                repo_type="dataset",
-                cache_dir=temp_dir
-            )
-            
-            logger.info(f"Found treatment outcomes file: {outcome_file}")
-            return file_path
-        
-        except Exception as e:
-            logger.debug(f"Could not find {outcome_file}: {e}")
-    
-    # If we get here, no files were found
-    logger.error("No treatment outcomes file found in the dataset")
-    
-    # Create a fallback file
-    fallback_file = os.path.join('results', 'fallback_treatment_outcomes.csv')
-    try:
-        # Create a simple fallback treatment outcomes file
-        os.makedirs('results', exist_ok=True)
-        mock_outcomes = pd.DataFrame([
-            {'subject_id': 'P001', 'treatment_type': 'Standard', 'outcome_score': 5.2},
-            {'subject_id': 'P002', 'treatment_type': 'Intensive', 'outcome_score': 7.8},
-            {'subject_id': 'P003', 'treatment_type': 'Standard', 'outcome_score': 3.1},
-            {'subject_id': 'P004', 'treatment_type': 'Intensive', 'outcome_score': 9.4},
-            {'subject_id': 'P005', 'treatment_type': 'Control', 'outcome_score': 1.2}
-        ])
-        mock_outcomes.to_csv(fallback_file, index=False)
-        logger.warning("Created and using fallback treatment outcomes file")
-        return fallback_file
-    except Exception as e:
-        logger.error(f"Failed to create fallback file: {e}")
-        raise FileNotFoundError(f"No treatment outcomes file found in {data_dir} and could not create fallback. Please provide a treatment_outcomes.csv file with columns: subject_id, treatment_type, outcome_score.")
-
-# Function to search and download NIfTI files from HuggingFace datasets
-def find_nifti_files_in_hf_dataset(dataset_name, dataset=None):
-    """
-    Find and download NIfTI files from a HuggingFace dataset
-    
-    Args:
-        dataset_name: Name of the HuggingFace dataset
-        dataset: Optional pre-loaded dataset object
-        
-    Returns:
-        List of downloaded NIfTI file paths
-    """
-    logger.info(f"Searching for NIfTI files in dataset: {dataset_name}")
-    
-    # If dataset is not provided, load it
-    if dataset is None:
-        from datasets import load_dataset
-        try:
-            dataset = load_dataset(dataset_name)
-            logger.info(f"Loaded dataset: {dataset_name}")
-        except Exception as e:
-            logger.error(f"Error loading dataset: {e}")
-            return []
-    
-    # Load from HuggingFace Datasets
-    nii_files = []
-    
-    # Create a temp directory for downloads
-    import tempfile
-    from huggingface_hub import hf_hub_download
-    import shutil
-    import json
-    
-    temp_dir = tempfile.mkdtemp(prefix="hf_nifti_")
-    logger.info(f"Created temporary directory for NIfTI files: {temp_dir}")
-    
-    # Log dataset information for debugging
-    logger.info(f"Dataset info: type={type(dataset)}")
-    if dataset is not None:
-        if isinstance(dataset, dict):
-            logger.info(f"Dataset is a dictionary with keys: {list(dataset.keys())}")
-            if 'train' in dataset:
-                train_type = type(dataset['train'])
-                logger.info(f"Train split type: {train_type}")
-                if hasattr(dataset['train'], 'shape'):
-                    logger.info(f"Train split shape: {dataset['train'].shape}")
-                elif hasattr(dataset['train'], '__len__'):
-                    logger.info(f"Train split length: {len(dataset['train'])}")
                 
-                # Log first few rows for pandas DataFrames
-                if isinstance(dataset['train'], pd.DataFrame):
+                # Function to handle generation with status updates
+                def generate_with_status(age, gender, education, wab_aq, mpo, handedness):
                     try:
-                        logger.info(f"DataFrame columns: {dataset['train'].columns.tolist()}")
-                        logger.info(f"DataFrame preview: \n{dataset['train'].head(2).to_string()}")
+                        # Call the generate function
+                        results = generator.generate_fmri(age, gender, education, wab_aq, mpo, handedness)
+                        
+                        # Update status on completion
+                        return [
+                            results[0],  # axial
+                            results[1],  # sagittal
+                            results[2],  # coronal
+                            results[3],  # nifti
+                            "Generation complete! (demo preview)"  # status text
+                        ]
                     except Exception as e:
-                        logger.error(f"Error logging DataFrame info: {e}")
-        elif isinstance(dataset, pd.DataFrame):
-            logger.info(f"Dataset is a pandas DataFrame with shape: {dataset.shape}")
-            try:
-                logger.info(f"DataFrame columns: {dataset.columns.tolist()}")
-                logger.info(f"DataFrame preview: \n{dataset.head(2).to_string()}")
-            except Exception as e:
-                logger.error(f"Error logging DataFrame info: {e}")
-    
-    try:
-        # First approach: Check if there are any columns containing file paths
-        nii_columns = []
-        
-        # Handle both HuggingFace dataset and pandas DataFrame
-        if isinstance(dataset, dict) and 'train' in dataset:
-            # It's a HuggingFace dataset object
-            try:
-                if hasattr(dataset['train'], 'column_names'):
-                    # Standard HuggingFace dataset
-                    columns = dataset['train'].column_names
-                else:
-                    # It might be a pandas DataFrame
-                    columns = dataset['train'].columns.tolist()
-                
-                for col in columns:
-                    # Check if column name suggests NIfTI files
-                    if 'nii' in col.lower() or 'nifti' in col.lower() or 'fmri' in col.lower():
-                        nii_columns.append(col)
-                    # Or check if column contains file paths
-                    elif len(dataset['train']) > 0:
-                        # Try to get first value, handling both Dataset and DataFrame
-                        try:
-                            if hasattr(dataset['train'], '__getitem__'):
-                                first_val = dataset['train'][0][col]
-                            else:
-                                first_val = dataset['train'][col].iloc[0]
-                                
-                            if isinstance(first_val, str) and (first_val.endswith('.nii') or first_val.endswith('.nii.gz')):
-                                nii_columns.append(col)
-                        except Exception as e:
-                            logger.debug(f"Error checking first value of column {col}: {e}")
-            except Exception as e:
-                logger.error(f"Error inspecting dataset columns: {e}")
-        elif isinstance(dataset, pd.DataFrame):
-            # It's just a pandas DataFrame directly
-            try:
-                columns = dataset.columns.tolist()
+                        print(f"Error in generate_with_status: {e}")
+                        # Return empty images and error status
+                        return [
+                            np.zeros((32, 32, 3), dtype=np.uint8),  # empty axial
+                            np.zeros((32, 32, 3), dtype=np.uint8),  # empty sagittal
+                            np.zeros((32, 32, 3), dtype=np.uint8),  # empty coronal
+                            None,  # no file
+                            f"Error: {str(e)}"  # error message
+                        ]
                 
-                for col in columns:
-                    # Check if column name suggests NIfTI files
-                    if 'nii' in col.lower() or 'nifti' in col.lower() or 'fmri' in col.lower():
-                        nii_columns.append(col)
-                    # Or check if column contains file paths
-                    elif len(dataset) > 0:
-                        try:
-                            first_val = dataset[col].iloc[0]
-                            if isinstance(first_val, str) and (first_val.endswith('.nii') or first_val.endswith('.nii.gz')):
-                                nii_columns.append(col)
-                        except Exception as e:
-                            logger.debug(f"Error checking first value of column {col}: {e}")
-            except Exception as e:
-                logger.error(f"Error inspecting DataFrame columns: {e}")
-        else:
-            logger.error(f"Unexpected dataset type: {type(dataset)}")
-        
-        if nii_columns:
-            logger.info(f"Found columns that may contain NIfTI files: {nii_columns}")
+                # Connect the generation button
+                generate_btn.click(
+                    fn=generate_with_status,
+                    inputs=[age, gender, education, wab_aq, mpo, handedness],
+                    outputs=[axial_view, sagittal_view, coronal_view, nifti_file, status]
+                )
             
-            for col in nii_columns:
-                logger.info(f"Processing column '{col}'...")
+            # Tab 2: Model Training
+            with gr.TabItem("Train Model"):
+                gr.Markdown("""
+                ## Model Training
                 
-                # Handle different dataset types
-                try:
-                    # Get the column data
-                    if isinstance(dataset, dict) and 'train' in dataset:
-                        if hasattr(dataset['train'], 'column_names'):
-                            # It's a standard HuggingFace dataset
-                            col_data = dataset['train'][col]
-                        else:
-                            # It's a DataFrame wrapped in dict
-                            col_data = dataset['train'][col].values
-                    elif isinstance(dataset, pd.DataFrame):
-                        # It's a DataFrame directly
-                        col_data = dataset[col].values
-                    else:
-                        logger.error(f"Unexpected dataset type: {type(dataset)}")
-                        continue
-                    
-                    # Process the column data
-                    for i, item in enumerate(col_data):
-                        if not isinstance(item, str):
-                            logger.info(f"Item {i} in column {col} is not a string but {type(item)}")
-                            continue
-                            
-                        if not (item.endswith('.nii') or item.endswith('.nii.gz')):
-                            logger.info(f"Item {i} in column {col} is not a NIfTI file: {item}")
-                            continue
-                            
-                        logger.info(f"Downloading {item} from dataset {dataset_name}...")
-                except Exception as e:
-                    logger.error(f"Error processing column {col}: {e}")
-                    
-                    try:
-                        # Attempt to download with explicit filename
-                        file_path = hf_hub_download(
-                            repo_id=dataset_name,
-                            filename=item,
-                            repo_type="dataset",
-                            cache_dir=temp_dir
-                        )
-                        nii_files.append(file_path)
-                        logger.info(f"✓ Successfully downloaded {item}")
-                    except Exception as e1:
-                        logger.error(f"Error downloading with explicit filename: {e1}")
-                        
-                        # Second attempt: try with the item's basename
-                        try:
-                            basename = os.path.basename(item)
-                            logger.info(f"Trying with basename: {basename}")
-                            file_path = hf_hub_download(
-                                repo_id=dataset_name,
-                                filename=basename,
-                                repo_type="dataset",
-                                cache_dir=temp_dir
-                            )
-                            nii_files.append(file_path)
-                            logger.info(f"✓ Successfully downloaded {basename}")
-                        except Exception as e2:
-                            logger.error(f"Error downloading with basename: {e2}")
-                            
-                            # Third attempt: check if it's a binary blob in the dataset
-                            try:
-                                # Handle different dataset types for binary data
-                                binary_data = None
-                                
-                                if isinstance(dataset, dict) and 'train' in dataset:
-                                    if hasattr(dataset['train'], '__getitem__') and hasattr(dataset['train'][i], 'keys') and 'bytes' in dataset['train'][i]:
-                                        # Standard HuggingFace dataset with binary data
-                                        binary_data = dataset['train'][i]['bytes']
-                                    elif hasattr(dataset['train'], 'iloc') and 'bytes' in dataset['train'].columns:
-                                        # DataFrame with bytes column
-                                        binary_data = dataset['train'].iloc[i]['bytes']
-                                elif isinstance(dataset, pd.DataFrame) and 'bytes' in dataset.columns:
-                                    # Direct DataFrame with bytes column
-                                    binary_data = dataset.iloc[i]['bytes']
-                                
-                                if binary_data is not None:
-                                    logger.info("Found binary data in dataset, saving to temporary file...")
-                                    temp_file = os.path.join(temp_dir, basename)
-                                    with open(temp_file, 'wb') as f:
-                                        f.write(binary_data)
-                                    nii_files.append(temp_file)
-                                    logger.info(f"✓ Saved binary data to {temp_file}")
-                            except Exception as e3:
-                                logger.error(f"Error handling binary data: {e3}")
-                                
-                                # Last resort: look for the file locally
-                                local_path = os.path.join(os.getcwd(), item)
-                                if os.path.exists(local_path):
-                                    nii_files.append(local_path)
-                                    logger.info(f"✓ Found {item} locally")
-                                else:
-                                    logger.warning(f"❌ Warning: Could not find {item} anywhere")
-        
-        # Second approach: Try to find NIfTI files in dataset repository directly
-        if not nii_files:
-            logger.info("No NIfTI files found in dataset columns. Trying direct repository search...")
-            
-            try:
-                from huggingface_hub import list_repo_files, hf_hub_download
+                Train a latent diffusion model on the SreekarB/OSFData dataset. This will download the dataset from 
+                Hugging Face Hub and train a model with the specified parameters.
                 
-                # Try to list all files in the repository
-                try:
-                    logger.info("Listing all repository files...")
-                    all_repo_files = list_repo_files(dataset_name, repo_type="dataset")
-                    logger.info(f"Found {len(all_repo_files)} files in repository")
-                    
-                    # First prioritize P*_rs.nii files
-                    p_rs_files = [f for f in all_repo_files if f.endswith('_rs.nii') and f.startswith('P')]
-                    
-                    # Then include all other NIfTI files
-                    other_nii_files = [f for f in all_repo_files if (f.endswith('.nii') or f.endswith('.nii.gz')) and f not in p_rs_files]
-                    
-                    # Combine, with P*_rs.nii files first
-                    nii_repo_files = p_rs_files + other_nii_files
-                    
-                    if nii_repo_files:
-                        preview = nii_repo_files[:5] if len(nii_repo_files) > 5 else nii_repo_files
-                        logger.info(f"Found {len(nii_repo_files)} NIfTI files in repository: {preview}...")
-                        
-                        # Download each file
-                        for nii_file in nii_repo_files:
-                            try:
-                                file_path = hf_hub_download(
-                                    repo_id=dataset_name,
-                                    filename=nii_file,
-                                    repo_type="dataset",
-                                    cache_dir=temp_dir
-                                )
-                                nii_files.append(file_path)
-                                logger.info(f"✓ Downloaded {nii_file}")
-                            except Exception as e:
-                                logger.error(f"Error downloading {nii_file}: {e}")
-                except Exception as e:
-                    logger.error(f"Error listing repository files: {e}")
-                    logger.info("Will try alternative approaches...")
+                ⚠️ **WARNING**: Training can take several hours depending on your hardware. The process will run in 
+                the background, and you can continue using the app while it trains.
+                """)
                 
-                # If repo listing fails, try with common NIfTI file patterns directly
-                if not nii_files:
-                    logger.info("Trying common NIfTI file patterns...")
-                    
-                    # Focus specifically on P*_rs.nii pattern
-                    patterns = []
+                with gr.Row():
+                    with gr.Column():
+                        # Training parameters
+                        training_epochs = gr.Slider(1, 100, value=10, step=1, label="Number of Epochs")
+                        batch_size = gr.Slider(1, 32, value=4, step=1, label="Batch Size")
+                        learning_rate = gr.Number(value=0.0001, label="Learning Rate")
+                        
+                        # Start training button
+                        train_btn = gr.Button("Start Training", variant="primary")
                     
-                    # Generate P01_rs.nii through P30_rs.nii
-                    for i in range(1, 31):  # Try subjects 1-30
-                        patterns.append(f"P{i:02d}_rs.nii")
+                    with gr.Column():
+                        # Training status and logs
+                        training_status = gr.Markdown("Training status will appear here")
+                        training_logs = gr.Textbox(label="Training Logs", lines=15)
                         
-                    # Also try with .nii.gz extension
-                    for i in range(1, 31):
-                        patterns.append(f"P{i:02d}_rs.nii.gz")
+                        # Training progress
+                        epoch_progress = gr.Slider(0, 100, value=0, label="Training Progress (%)", interactive=False)
                         
-                    # Include a few other common patterns as fallbacks
-                    patterns.extend([
-                        "sub-01_task-rest_bold.nii.gz",  # BIDS format
-                        "fmri.nii.gz", "bold.nii.gz", 
-                        "rest.nii.gz"
-                    ])
-                    
-                    for pattern in patterns:
+                        # Current loss
+                        current_loss = gr.Number(value=0.0, label="Current Loss", precision=6)
+                
+                # Function to refresh training logs
+                def refresh_training_logs():
+                    """Function to refresh the training logs display"""
+                    if not generator.is_training and not generator.training_logs:
+                        return "Training not started yet.", 0, 0.0
+                    
+                    # Join the training logs with newlines
+                    logs = "\n".join(generator.training_logs)
+                    
+                    # Extract progress information
+                    progress = 0
+                    current_loss = 0.0
+                    
+                    # Extract progress percentage if available
+                    progress_lines = [line for line in generator.training_logs if "Progress:" in line]
+                    if progress_lines:
+                        progress_line = progress_lines[-1]
                         try:
-                            logger.info(f"Trying to download {pattern}...")
-                            file_path = hf_hub_download(
-                                repo_id=dataset_name,
-                                filename=pattern,
-                                repo_type="dataset",
-                                cache_dir=temp_dir
-                            )
-                            nii_files.append(file_path)
-                            logger.info(f"✓ Successfully downloaded {pattern}")
+                            progress = int(progress_line.split("Progress:")[1].split("%")[0].strip())
                         except Exception as e:
-                            logger.debug(f"Failed to download {pattern}")
-                            
-                # If we still couldn't find any files, check if data files are nested
-                if not nii_files:
-                    logger.info("Checking for nested data files...")
-                    nested_paths = ["data/", "raw/", "nii/", "derivatives/", "fmri/", "nifti/"]
+                            print(f"Error parsing progress: {e}")
+                            pass
                     
-                    for path in nested_paths:
-                        for pattern in patterns:
-                            nested_file = f"{path}{pattern}"
-                            try:
-                                logger.info(f"Trying to download {nested_file}...")
-                                file_path = hf_hub_download(
-                                    repo_id=dataset_name,
-                                    filename=nested_file,
-                                    repo_type="dataset",
-                                    cache_dir=temp_dir
-                                )
-                                nii_files.append(file_path)
-                                logger.info(f"✓ Successfully downloaded {nested_file}")
-                                # If we found one file in this directory, try to find all files in it
-                                try:
-                                    all_files_in_dir = [f for f in all_repo_files if f.startswith(path)]
-                                    nii_files_in_dir = [f for f in all_files_in_dir if f.endswith('.nii') or f.endswith('.nii.gz')]
-                                    logger.info(f"Found {len(nii_files_in_dir)} additional NIfTI files in {path}")
-                                    
-                                    for nii_file in nii_files_in_dir:
-                                        if nii_file != nested_file:  # Skip the one we already downloaded
-                                            try:
-                                                file_path = hf_hub_download(
-                                                    repo_id=dataset_name,
-                                                    filename=nii_file,
-                                                    repo_type="dataset",
-                                                    cache_dir=temp_dir
-                                                )
-                                                nii_files.append(file_path)
-                                                logger.info(f"✓ Downloaded {nii_file}")
-                                            except Exception as e:
-                                                logger.error(f"Error downloading {nii_file}: {e}")
-                                except Exception as e:
-                                    logger.error(f"Error finding additional files in {path}: {e}")
-                            except Exception as e:
-                                pass
-                            
-            except Exception as e:
-                logger.error(f"Error during repository exploration: {e}")
-        
-        # If we still don't have any files, try to search for P*_rs.nii pattern specifically
-        if not nii_files:
-            logger.info("Trying to find files matching P*_rs.nii pattern specifically...")
-            
-            try:
-                # List all files in the repository (if we haven't already)
-                if 'all_repo_files' not in locals():
-                    from huggingface_hub import list_repo_files
-                    try:
-                        all_repo_files = list_repo_files(dataset_name, repo_type="dataset")
-                    except Exception as e:
-                        logger.error(f"Error listing repo files: {e}")
-                        all_repo_files = []
-                
-                # Look for files matching the pattern exactly (P*_rs.nii)
-                pattern_files = [f for f in all_repo_files if '_rs.nii' in f and f.startswith('P')]
-                
-                # If we don't find any exact matches, try a more relaxed pattern
-                if not pattern_files:
-                    pattern_files = [f for f in all_repo_files if 'rs.nii' in f.lower()]
-                
-                if pattern_files:
-                    logger.info(f"Found {len(pattern_files)} files matching rs.nii pattern")
-                    
-                    # Download each file
-                    for pattern_file in pattern_files:
+                    # Extract loss value if available
+                    loss_lines = [line for line in generator.training_logs if "Loss:" in line]
+                    if loss_lines:
+                        loss_line = loss_lines[-1]
                         try:
-                            file_path = hf_hub_download(
-                                repo_id=dataset_name,
-                                filename=pattern_file,
-                                repo_type="dataset",
-                                cache_dir=temp_dir
-                            )
-                            nii_files.append(file_path)
-                            logger.info(f"✓ Downloaded {pattern_file}")
+                            current_loss = float(loss_line.split("Loss:")[1].strip().split()[0])
                         except Exception as e:
-                            logger.error(f"Error downloading {pattern_file}: {e}")
-            except Exception as e:
-                logger.error(f"Error searching for pattern files: {e}")
-            
-        logger.info(f"Found total of {len(nii_files)} NIfTI files")
-    except Exception as e:
-        logger.error(f"Unexpected error during NIfTI file search: {e}")
-    
-    return nii_files
-
-# Make sure directories exist for saving results and models
-os.makedirs('results', exist_ok=True)
-os.makedirs('models', exist_ok=True)
-
-def create_interface():
-    """Create the Gradio interface"""
-    app = AphasiaPredictionApp()
-    
-    with gr.Blocks(title="Aphasia Treatment Trajectory Prediction") as interface:
-        gr.Markdown("# Aphasia Treatment Trajectory Prediction")
-        
-        with gr.Tabs():
-            # Tab 1: VAE Training
-            with gr.Tab("1. VAE Training"):
-                with gr.Row():
-                    with gr.Column(scale=1):
-                        data_dir = gr.Textbox(
-                            label="Data Directory or HuggingFace Dataset ID", 
-                            value="SreekarB/OSFData"
-                        )
-                        local_nii_dir = gr.Textbox(
-                            label="Local NIfTI Files Directory (Optional)", 
-                            value="",
-                            placeholder="/path/to/nii_files",
-                            info="If provided, NIfTI files from this directory will be used instead of searching the dataset"
-                        )
-                        latent_dim = gr.Slider(
-                            minimum=8, maximum=64, step=8, 
-                            label="Latent Dimensions", value=32
-                        )
-                        nepochs = gr.Slider(
-                            minimum=100, maximum=5000, step=100, 
-                            label="Number of Epochs", value=200  # Reduced for faster demos
-                        )
+                            print(f"Error parsing loss: {e}")
+                            pass
                     
-                    with gr.Column(scale=1):
-                        bsize = gr.Slider(
-                            minimum=8, maximum=64, step=8, 
-                            label="Batch Size", value=16
-                        )
-                        use_hf_dataset = gr.Checkbox(
-                            label="Use HuggingFace Dataset", value=True
-                        )
-                        
-                        with gr.Accordion("Advanced Data Options", open=False):
-                            skip_behavioral = gr.Checkbox(
-                                label="Skip Behavioral Data Processing",
-                                value=PREDICTION_CONFIG.get('skip_behavioral_data', True),
-                                info="Use pre-defined treatment outcomes instead of processing behavioral data"
-                            )
-                            use_synthetic_nifti = gr.Checkbox(
-                                label="Use Synthetic NIfTI Data",
-                                value=PREDICTION_CONFIG.get('use_synthetic_nifti', False),
-                                info="Generate synthetic NIfTI files if real ones aren't found"
-                            )
-                            use_synthetic_fc = gr.Checkbox(
-                                label="Use Synthetic FC Matrices",
-                                value=PREDICTION_CONFIG.get('use_synthetic_fc', False),
-                                info="Generate synthetic FC matrices if processing fails"
-                            )
-                
-                # Split the training and visualization into separate buttons
-                gr.Markdown("### Step 1: Train the VAE model first, then visualize the FC matrices")
-                with gr.Row():
-                    train_vae_btn = gr.Button("TRAIN VAE MODEL", variant="primary", size="lg")
-                    visualize_fc_btn = gr.Button("VISUALIZE FC MATRICES", variant="secondary", size="lg")
-                
-                gr.Markdown("### VAE Training Results")
-                
-                with gr.Row():
-                    with gr.Column(scale=2):
-                        fc_plot = gr.Plot(label="FC Matrices (Original/Reconstructed/Generated)")
-                    with gr.Column(scale=1):
-                        fc_info = gr.TextArea(label="FC Matrix Information", interactive=False)
-                
-                with gr.Row():
-                    learning_plot = gr.Plot(label="VAE Learning Curves")
-                
-                gr.Markdown("After VAE training completes, proceed to the 'Random Forest Prediction' tab →")
-                
-            # Tab 2: Random Forest Prediction
-            with gr.Tab("2. Random Forest Prediction"):
-                gr.Markdown("### Random Forest Model Training")
-                gr.Markdown("First complete the VAE training in tab 1, then configure and train the Random Forest model below:")
-                
-                with gr.Row():
-                    with gr.Column(scale=1):
-                        prediction_type = gr.Radio(
-                            label="Prediction Type",
-                            choices=["regression", "classification"],
-                            value="regression"
-                        )
-                        outcome_variable = gr.Dropdown(
-                            label="Outcome Variable",
-                            choices=["wab_aq", "age", "mpo", "education"],
-                            value="wab_aq"
-                        )
-                    
-                    with gr.Column(scale=1):
-                        rf_n_estimators = gr.Slider(
-                            minimum=10, maximum=500, step=10, 
-                            label="Number of Trees", value=100
-                        )
-                        rf_max_depth = gr.Slider(
-                            minimum=3, maximum=50, step=1, 
-                            label="Max Tree Depth", value=10,
-                            info="Set to 0 for unlimited depth"
-                        )
-                        rf_cv_folds = gr.Slider(
-                            minimum=2, maximum=10, step=1, 
-                            label="Cross-validation Folds", value=5
-                        )
-                
-                train_rf_btn = gr.Button("Train Random Forest Model", variant="primary")
-                
-                gr.Markdown("### Random Forest Results")
-                
-                with gr.Row():
-                    with gr.Column(scale=1):
-                        importance_plot = gr.Plot(label="Feature Importance")
-                    with gr.Column(scale=1):
-                        prediction_plot = gr.Plot(label="Prediction Performance")
-                
-                with gr.Row():
-                    rf_metrics = gr.Textbox(label="Model Performance Metrics")
-                
-                gr.Markdown("After Random Forest training completes, proceed to the 'Treatment Prediction' tab →")
-            
-            # Tab 3: Predict Treatment
-            with gr.Tab("3. Treatment Prediction"):
-                gr.Markdown("### Predict Individual Treatment Outcomes")
-                gr.Markdown("After completing VAE and Random Forest training, you can predict treatment outcomes for individual patients:")
+                    return logs, progress, current_loss
                 
+                # Add refresh button for logs and progress
                 with gr.Row():
-                    with gr.Column(scale=1):
-                        fmri_file = gr.File(label="Patient fMRI Data (NIfTI file)")
-                    with gr.Column(scale=1):
-                        with gr.Group():
-                            gr.Markdown("### Patient Demographics")
-                            age = gr.Number(label="Age at Stroke", value=60)
-                            sex = gr.Dropdown(choices=["M", "F"], label="Sex", value="M")
-                            months = gr.Number(label="Months Post Stroke", value=12)
-                            wab = gr.Number(label="Current WAB Score", value=50)
-                
-                predict_btn = gr.Button("Predict Treatment Outcome", variant="primary")
-                
-                with gr.Row():
-                    prediction_text = gr.Textbox(label="Prediction Result")
-                
-                with gr.Row():
-                    trajectory_plot = gr.Plot(label="Predicted Treatment Trajectory")
-        
-        # Define various handler functions for the different tabs
-        
-        # Store shared state between tabs
-        app_state = {
-            'vae': None,
-            'latents': None,
-            'demographics': None,
-            'predictor': None,
-            'vae_trained': False,
-            'rf_trained': False
-        }
-        
-        # Tab 1: VAE Training Handler
-        def handle_vae_training(data_dir, local_nii_dir, latent_dim, nepochs, bsize, use_hf_dataset,
-                              skip_behavioral, use_synthetic_nifti, use_synthetic_fc):
-            """Train the VAE model and display FC visualization and learning curves"""
-            # Store config values
-            PREDICTION_CONFIG['skip_behavioral_data'] = skip_behavioral
-            PREDICTION_CONFIG['use_synthetic_nifti'] = use_synthetic_nifti
-            PREDICTION_CONFIG['use_synthetic_fc'] = use_synthetic_fc
-            
-            # Store the local NIfTI directory if provided
-            if local_nii_dir and os.path.exists(local_nii_dir):
-                PREDICTION_CONFIG['local_nii_dir'] = local_nii_dir
-                logger.info(f"Using local NIfTI directory: {local_nii_dir}")
-            else:
-                PREDICTION_CONFIG['local_nii_dir'] = None
-            
-            # Log info
-            logger.info(f"Training VAE model with data from: {data_dir}")
-            logger.info(f"VAE parameters: latent_dim={latent_dim}, epochs={nepochs}, batch_size={bsize}")
-            
-            # Create a subset of app.train_models functionality that just trains the VAE
-            try:
-                # Start by setting up data for the VAE
-                from vae_model import DemoVAE
-                from data_preprocessing import load_and_preprocess_data
-                from main import run_analysis
-                import numpy as np
-                import os
-                
-                # Prepare VAE training parameters
-                MODEL_CONFIG.update({
-                    'latent_dim': latent_dim,
-                    'nepochs': nepochs,
-                    'bsize': bsize
-                })
-                
-                # First, find and preprocess data
-                logger.info("Looking for data in directory and preprocessing...")
-                
-                # This part is similar to app.train_models but only focuses on VAE
-                if data_dir == "SreekarB/OSFData":
-                    # Use dataset, similar to existing code in app.train_models
-                    # For brevity, we'll call the full train_models function but only 
-                    # extract the VAE-related results
-                    results = app.train_models(
-                        data_dir=data_dir,
-                        latent_dim=latent_dim,
-                        nepochs=nepochs,
-                        bsize=bsize
-                    )
-                    
-                    # Store results in app_state for the next tabs
-                    app_state['vae'] = results.get('vae', None)
-                    app_state['latents'] = results.get('latents', None)
-                    app_state['demographics'] = results.get('demographics', None)
-                    
-                    # Track VAE training state and ensure it's consistent
-                    if app_state['vae'] is not None:
-                        app_state['vae_trained'] = True
-                        logger.info(f"VAE model stored in app_state. Latent dim: {app_state['vae'].latent_dim if hasattr(app_state['vae'], 'latent_dim') else 'unknown'}")
-                        
-                        # Check if we have latents
-                        if app_state['latents'] is not None:
-                            logger.info(f"Latent representations stored in app_state. Shape: {app_state['latents'].shape if hasattr(app_state['latents'], 'shape') else 'unknown'}")
-                        else:
-                            logger.warning("VAE model stored but no latent representations available")
-                    else:
-                        logger.warning("VAE training didn't result in a valid model")
-                    
-                    # Store FC matrices for visualization
-                    if 'X' in results:
-                        # Store original FC matrices (could be vectors or matrices)
-                        app_state['original_fc'] = results.get('X', None)
-                        
-                        # Store reconstructed FC if available
-                        if app_state['vae'] is not None and app_state['latents'] is not None:
-                            # Reconstruct from latents
-                            reconstructed = app_state['vae'].decode(app_state['latents'])
-                            app_state['reconstructed_fc'] = reconstructed[0] if len(reconstructed) > 0 else None
-                    
-                    app_state['vae_trained'] = True
-                    
-                    # Generate FC info text
-                    if app_state['demographics'] is not None:
-                        demo_info = format_demographics_info(app_state['demographics'])
-                    else:
-                        demo_info = "No demographic information available"
-                    
-                    # Return visualizations and info
-                    return [
-                        results.get('figures', {}).get('vae'),  # FC matrix visualization
-                        demo_info,                              # Demographic info
-                        results.get('figures', {}).get('learning')  # VAE learning curves
-                    ]
-                else:
-                    # Local directory case
-                    results = app.train_models(
-                        data_dir=data_dir,
-                        latent_dim=latent_dim,
-                        nepochs=nepochs,
-                        bsize=bsize
-                    )
-                    
-                    # Store results in app_state
-                    app_state['vae'] = results.get('vae', None)
-                    app_state['latents'] = results.get('latents', None)
-                    app_state['demographics'] = results.get('demographics', None)
-                    
-                    # Store FC matrices for visualization
-                    if 'X' in results:
-                        # Store original FC matrices (could be vectors or matrices)
-                        app_state['original_fc'] = results.get('X', None)
-                        
-                        # Store reconstructed FC if available
-                        if app_state['vae'] is not None and app_state['latents'] is not None:
-                            # Reconstruct from latents
-                            reconstructed = app_state['vae'].decode(app_state['latents'])
-                            app_state['reconstructed_fc'] = reconstructed[0] if len(reconstructed) > 0 else None
-                    
-                    app_state['vae_trained'] = True
-                    
-                    # Generate FC info text
-                    if app_state['demographics'] is not None:
-                        demo_info = format_demographics_info(app_state['demographics'])
-                    else:
-                        demo_info = "No demographic information available"
-                    
-                    # Return visualizations and info
-                    return [
-                        results.get('figures', {}).get('vae'),  # FC matrix visualization
-                        demo_info,                              # Demographic info
-                        results.get('figures', {}).get('learning')  # VAE learning curves
-                    ]
-            except Exception as e:
-                logger.error(f"Error in VAE training: {str(e)}", exc_info=True)
-                error_fig = plt.figure(figsize=(10, 6))
-                plt.text(0.5, 0.5, f"Error: {str(e)}", 
-                        horizontalalignment='center', verticalalignment='center',
-                        fontsize=12, color='red', wrap=True)
-                plt.axis('off')
-                
-                # Return error figures and text for all outputs
-                return [error_fig, f"Error in VAE training: {str(e)}", error_fig]
-                
-        # Helper function to format demographics info
-        def format_demographics_info(demographics):
-            """Format demographics info for display"""
-            if demographics is None:
-                return "No demographic information available"
-                
-            try:
-                # Extract numeric summaries
-                if isinstance(demographics, pd.DataFrame):
-                    info = "FC Matrix Demographics Summary:\n\n"
-                    
-                    # Age stats
-                    if 'age' in demographics.columns:
-                        avg_age = demographics['age'].mean()
-                        min_age = demographics['age'].min()
-                        max_age = demographics['age'].max()
-                        info += f"Age: {avg_age:.1f} years (range: {min_age:.0f}-{max_age:.0f})\n"
-                    
-                    # Gender stats
-                    if 'gender' in demographics.columns:
-                        male_count = (demographics['gender'] == 'M').sum()
-                        female_count = (demographics['gender'] == 'F').sum()
-                        info += f"Gender: {male_count} males, {female_count} females\n"
-                    
-                    # MPO stats
-                    if 'mpo' in demographics.columns:
-                        avg_mpo = demographics['mpo'].mean()
-                        min_mpo = demographics['mpo'].min()
-                        max_mpo = demographics['mpo'].max()
-                        info += f"Months post onset: {avg_mpo:.1f} (range: {min_mpo:.0f}-{max_mpo:.0f})\n"
-                    
-                    # WAB stats
-                    if 'wab_aq' in demographics.columns:
-                        avg_wab = demographics['wab_aq'].mean()
-                        min_wab = demographics['wab_aq'].min()
-                        max_wab = demographics['wab_aq'].max()
-                        info += f"WAB scores: {avg_wab:.1f} (range: {min_wab:.1f}-{max_wab:.1f})\n"
-                    
-                    # Education stats
-                    if 'education' in demographics.columns:
-                        avg_edu = demographics['education'].mean()
-                        min_edu = demographics['education'].min()
-                        max_edu = demographics['education'].max()
-                        info += f"Education: {avg_edu:.1f} years (range: {min_edu:.0f}-{max_edu:.0f})\n"
-                    
-                    # Sample size
-                    info += f"\nTotal subjects: {len(demographics)}"
-                    
-                    return info
-                else:
-                    return "Demographics available but in unsupported format"
-            except Exception as e:
-                logger.error(f"Error formatting demographics: {e}")
-                return f"Error formatting demographics: {e}"
-                
-        # Function to visualize FC matrices independently
-        def handle_fc_visualization():
-            """Generate FC visualization using stored data or synthetic data"""
-            try:
-                # Import necessary packages
-                import numpy as np
-                import pandas as pd
-                from visualization import plot_fc_matrices
-                
-                # Check if we have trained VAE and data
-                if app_state.get('vae_trained', False) and app_state.get('vae') is not None:
-                    logger.info("Visualizing FC matrices from trained VAE")
-                    
-                    # If we have stored original and reconstructed matrices, use them
-                    if app_state.get('original_fc') is not None and app_state.get('reconstructed_fc') is not None:
-                        original = app_state['original_fc']
-                        reconstructed = app_state['reconstructed_fc']
-                    else:
-                        # Otherwise, generate them from latents if available
-                        if app_state.get('latents') is not None:
-                            # Use the first sample
-                            latent = app_state['latents'][0].reshape(1, -1)
-                            
-                            # Generate reconstructed FC using transform
-                            try:
-                                # Get demographics for transformation
-                                if app_state.get('demographics') is not None:
-                                    demo = app_state['demographics']
-                                    demo_types = ['continuous', 'categorical', 'continuous', 'continuous']
-                                else:
-                                    # Create minimal synthetic demographics
-                                    demo = [[60], ['M'], [12], [50]]
-                                    demo_types = ['continuous', 'categorical', 'continuous', 'continuous']
-                                
-                                # Use transform for reconstruction
-                                reconstructed = app_state['vae'].transform(latent, demo, demo_types)[0]
-                                
-                                # Use synthetic original (not ideal but a fallback)
-                                original = reconstructed * 0.9 + np.random.randn(*reconstructed.shape) * 0.1
-                            except Exception as rec_error:
-                                logger.error(f"Error reconstructing FC: {rec_error}")
-                                # Create fallback matrices
-                                n = 264
-                                reconstructed = np.random.rand(n, n) * 2 - 1
-                                reconstructed = (reconstructed + reconstructed.T) / 2
-                                np.fill_diagonal(reconstructed, 1.0)
-                                original = reconstructed * 0.9 + np.random.randn(n, n) * 0.1
-                                original = (original + original.T) / 2
-                                np.fill_diagonal(original, 1.0)
-                        else:
-                            # Complete fallback - create synthetic data
-                            original = np.random.rand(264, 264) * 2 - 1
-                            original = (original + original.T) / 2  # Make symmetric
-                            np.fill_diagonal(original, 1.0)  # Set diagonal to 1
-                            reconstructed = original * 0.8 + np.random.randn(264, 264) * 0.1
-                            reconstructed = (reconstructed + reconstructed.T) / 2  # Make symmetric
-                            np.fill_diagonal(reconstructed, 1.0)  # Set diagonal to 1
-                    
-                    # Generate a new FC matrix
-                    if app_state.get('vae') is not None:
-                        # Sample from prior
-                        try:
-                            latent_dim = getattr(app_state['vae'], 'latent_dim', 32)  # Default to 32 if not found
-                            z = np.random.randn(1, latent_dim)
-                            
-                            # Get synthetic demographic data for generation
-                            if app_state.get('demographics') is not None:
-                                demo = app_state['demographics']
-                                demo_types = ['continuous', 'categorical', 'continuous', 'continuous']
-                            else:
-                                # Create minimal synthetic demographics
-                                demo = [[60], ['M'], [12], [50]]
-                                demo_types = ['continuous', 'categorical', 'continuous', 'continuous']
-                            
-                            # Use transform instead of direct decode
-                            generated = app_state['vae'].transform(z, demo, demo_types)[0]
-                        except Exception as gen_error:
-                            logger.error(f"Error generating new FC: {gen_error}")
-                            # Fallback synthetic data
-                            generated = np.random.rand(264, 264) * 2 - 1
-                            generated = (generated + generated.T) / 2  # Make symmetric
-                            np.fill_diagonal(generated, 1.0)  # Set diagonal to 1
-                    else:
-                        # Synthetic fallback
-                        generated = np.random.rand(264, 264) * 2 - 1
-                        generated = (generated + generated.T) / 2  # Make symmetric
-                        np.fill_diagonal(generated, 1.0)  # Set diagonal to 1
-                    
-                    # Create visualization with explicit save and close
-                    try:
-                        # First clear any existing figures to prevent interference
-                        plt.close('all')
-                        
-                        # Create the figure
-                        fig = plot_fc_matrices(original, reconstructed, generated)
-                        
-                        # Force rendering by drawing the canvas
-                        fig.canvas.draw()
-                        
-                        # Make sure the figure is complete before returning
-                        plt.tight_layout()
-                        
-                        logger.info("Successfully created FC matrix visualization figure")
-                    except Exception as fig_err:
-                        logger.error(f"Error creating visualization figure: {fig_err}")
-                        # Create a simple fallback figure
-                        fig = plt.figure(figsize=(15, 5))
-                        plt.text(0.5, 0.5, "Error creating visualization. See logs for details.", 
-                                ha='center', va='center', fontsize=14, color='red')
-                        plt.axis('off')
-                    
-                    # Generate info text
-                    demo_info = "FC MATRIX VISUALIZATION\n"
-                    demo_info += "=====================\n\n"
-                    
-                    # 1. Demographics information
-                    demo_info += "DEMOGRAPHICS INFORMATION:\n"
-                    if app_state.get('demographics') is not None:
-                        # Format and add overall demographics
-                        demo_info += format_demographics_info(app_state['demographics'])
-                        # Store for reference
-                        recon_demo = app_state['demographics']
-                        gen_demo = app_state['demographics']
-                    else:
-                        demo_info += "No demographic information available in dataset\n"
-                        # These should match what we used in the transform calls
-                        recon_demo = [[60], ['M'], [12], [50]]
-                        gen_demo = [[60], ['M'], [12], [50]]
-                    
-                    # Add reconstruction demographics
-                    demo_info += "\nRECONSTRUCTION DEMOGRAPHICS:\n"
-                    if isinstance(recon_demo, list) and len(recon_demo) >= 4:
-                        demo_info += f"Age: {recon_demo[0][0]}\n"
-                        demo_info += f"Sex: {recon_demo[1][0]}\n"
-                        demo_info += f"Months post stroke: {recon_demo[2][0]}\n"
-                        demo_info += f"WAB score: {recon_demo[3][0]}\n"
-                    elif isinstance(recon_demo, pd.DataFrame):
-                        # Handle DataFrame format
-                        demo_info += f"Used first row of demographics DataFrame\n"
-                    else:
-                        demo_info += "Used synthetic demographics for reconstruction\n"
-                    
-                    # Add generation demographics
-                    demo_info += "\nGENERATION DEMOGRAPHICS:\n"
-                    if isinstance(gen_demo, list) and len(gen_demo) >= 4:
-                        demo_info += f"Age: {gen_demo[0][0]}\n"
-                        demo_info += f"Sex: {gen_demo[1][0]}\n"
-                        demo_info += f"Months post stroke: {gen_demo[2][0]}\n"
-                        demo_info += f"WAB score: {gen_demo[3][0]}\n"
-                    elif isinstance(gen_demo, pd.DataFrame):
-                        # Handle DataFrame format
-                        demo_info += f"Used first row of demographics DataFrame\n"
-                    else:
-                        demo_info += "Used synthetic demographics for generation\n"
-                    
-                    # 2. Add FC matrix stats
-                    demo_info += f"\nFC MATRIX INFORMATION:\n"
-                    demo_info += f"Matrix shape: {original.shape}\n"
-                    demo_info += f"Original FC range: [{np.min(original):.3f}, {np.max(original):.3f}]\n"
-                    demo_info += f"Reconstructed FC range: [{np.min(reconstructed):.3f}, {np.max(reconstructed):.3f}]\n"
-                    demo_info += f"Generated FC range: [{np.min(generated):.3f}, {np.max(generated):.3f}]\n"
-                    
-                    # 3. Calculate metrics between original and reconstructed
-                    from sklearn.metrics import mean_squared_error, r2_score
-                    mse = mean_squared_error(original.flatten(), reconstructed.flatten())
-                    r2 = r2_score(original.flatten(), reconstructed.flatten())
-                    demo_info += f"\nRECONSTRUCTION METRICS:\n"
-                    demo_info += f"MSE: {mse:.4f}\n"
-                    demo_info += f"R²: {r2:.4f}\n"
-                    
-                    return [fig, demo_info]
-                else:
-                    # Create synthetic data visualization
-                    logger.info("Creating synthetic FC visualization")
-                    
-                    # Create synthetic FC matrices
-                    from visualization import plot_fc_matrices
-                    import numpy as np
-                    
-                    # Create symmetric matrices with values between -1 and 1
-                    n = 264  # Standard size for brain connectivity
-                    
-                    # Original FC (symmetric with diagonal=1)
-                    original = np.random.rand(n, n) * 2 - 1
-                    original = (original + original.T) / 2  # Make symmetric
-                    np.fill_diagonal(original, 1.0)  # Set diagonal to 1
-                    
-                    # Reconstructed FC (similar to original but with some noise)
-                    reconstructed = original * 0.8 + np.random.randn(n, n) * 0.1
-                    reconstructed = (reconstructed + reconstructed.T) / 2  # Make symmetric
-                    np.fill_diagonal(reconstructed, 1.0)  # Set diagonal to 1
-                    
-                    # Generated FC (new random matrix)
-                    generated = np.random.rand(n, n) * 2 - 1
-                    generated = (generated + generated.T) / 2  # Make symmetric
-                    np.fill_diagonal(generated, 1.0)  # Set diagonal to 1
-                    
-                    # Create visualization with explicit save and close
-                    try:
-                        # First clear any existing figures to prevent interference
-                        plt.close('all')
-                        
-                        # Create the figure
-                        fig = plot_fc_matrices(original, reconstructed, generated)
-                        
-                        # Force rendering by drawing the canvas
-                        fig.canvas.draw()
-                        
-                        # Make sure the figure is complete before returning
-                        plt.tight_layout()
-                        
-                        logger.info("Successfully created synthetic FC matrix visualization figure")
-                    except Exception as fig_err:
-                        logger.error(f"Error creating synthetic visualization figure: {fig_err}")
-                        # Create a simple fallback figure
-                        fig = plt.figure(figsize=(15, 5))
-                        plt.text(0.5, 0.5, "Error creating visualization. Using synthetic data.", 
-                                ha='center', va='center', fontsize=14, color='red')
-                        plt.axis('off')
-                    
-                    # Generate info text for synthetic data
-                    demo_info = "FC MATRIX VISUALIZATION (SYNTHETIC DATA)\n"
-                    demo_info += "=====================================\n\n"
-                    demo_info += "Using synthetic FC data for demonstration.\n"
-                    demo_info += "Train the VAE model to see real FC matrices.\n\n"
-                    
-                    # Create synthetic demographics to display
-                    demo_info += "DEMOGRAPHICS INFORMATION (SYNTHETIC):\n"
-                    age = 60
-                    sex = "M"
-                    months = 12
-                    wab = 50
-                    
-                    demo_info += f"Age: {age}\n"
-                    demo_info += f"Sex: {sex}\n"
-                    demo_info += f"Months post stroke: {months}\n"
-                    demo_info += f"WAB score: {wab}\n\n"
-                    
-                    # Split out reconstruction and generation demographics
-                    demo_info += "RECONSTRUCTION DEMOGRAPHICS:\n"
-                    demo_info += "Same as above (synthetic)\n\n"
-                    
-                    demo_info += "GENERATION DEMOGRAPHICS:\n"
-                    demo_info += "Same as above (synthetic)\n\n"
-                    
-                    # Matrix information
-                    demo_info += "SYNTHETIC FC MATRIX INFORMATION:\n"
-                    demo_info += f"Matrix shape: {original.shape}\n"
-                    demo_info += f"Value range: [{-1:.1f}, {1:.1f}]\n"
-                    demo_info += "Symmetric matrices with diagonal=1\n\n"
-                    
-                    # Add correlation between original and reconstructed
-                    from sklearn.metrics import mean_squared_error, r2_score
-                    mse = mean_squared_error(original.flatten(), reconstructed.flatten())
-                    r2 = r2_score(original.flatten(), reconstructed.flatten())
-                    demo_info += "METRICS (SYNTHETIC DATA):\n"
-                    demo_info += f"MSE: {mse:.4f}\n"
-                    demo_info += f"R²: {r2:.4f}\n"
-                    
-                    return [fig, demo_info]
-            except Exception as e:
-                logger.error(f"Error in FC visualization: {str(e)}", exc_info=True)
-                error_fig = plt.figure(figsize=(10, 6))
-                plt.text(0.5, 0.5, f"Error: {str(e)}", 
-                        horizontalalignment='center', verticalalignment='center',
-                        fontsize=12, color='red', wrap=True)
-                plt.axis('off')
+                    refresh_btn = gr.Button("Refresh Training Status", variant="secondary")
                 
-                return [error_fig, f"Error in FC visualization: {str(e)}"]
-        
-        # Tab 2: Random Forest Training Handler
-        def handle_rf_training(prediction_type, outcome_variable, rf_n_estimators, rf_max_depth, rf_cv_folds):
-            """Train the Random Forest model using the VAE latent representations"""
-            # Import necessary packages
-            import numpy as np
-            import pandas as pd
-            
-            # Check if VAE has been trained or if we can use synthetic data
-            if not app_state.get('vae_trained', False) or app_state.get('latents') is None:
-                # Instead of error, create synthetic data for demonstration
-                logger.info("No VAE latents available - using synthetic data for RF training")
-                
-                # Number of synthetic samples
-                n_samples = 30
-                
-                # Create synthetic latent features (10 dimensions)
-                np.random.seed(42)  # For reproducibility
-                latents = np.random.randn(n_samples, 10)
-                
-                # Create synthetic demographics
-                demographics = pd.DataFrame({
-                    'age': np.random.randint(40, 80, n_samples),
-                    'gender': np.random.choice(['M', 'F'], n_samples),
-                    'mpo': np.random.randint(1, 24, n_samples),
-                    'education': np.random.randint(8, 20, n_samples),
-                    'wab_aq': np.random.uniform(20, 80, n_samples)
-                })
-                
-                # Create synthetic treatment outcomes with correlation to features
-                # Higher age -> worse outcomes, higher education -> better outcomes
-                treatment_outcomes = (
-                    -0.3 * demographics['age'] +
-                    0.4 * demographics['education'] +
-                    0.6 * demographics['wab_aq'] +
-                    2.0 * latents[:, 0] -
-                    1.5 * latents[:, 1] +
-                    np.random.randn(n_samples) * 5
-                )
-                
-                # Scale to realistic range (0-100)
-                treatment_outcomes = (treatment_outcomes - treatment_outcomes.min()) / (treatment_outcomes.max() - treatment_outcomes.min()) * 80 + 10
-                
-                # Store in app_state
-                app_state['latents'] = latents
-                app_state['demographics'] = demographics
-                app_state['synthetic_data'] = True
-                
-                # Inform the user we're using synthetic data
-                logger.info("Created synthetic data for RF training demonstration")
-                info_msg = "Using synthetic data for demonstration. For real analysis, train the VAE in Tab 1 first."
-            else:
-                # Normal case - using real VAE latents
-                app_state['synthetic_data'] = False
-                info_msg = "Using VAE latents for Random Forest training."
-            
-            try:
-                # Update RF configuration
-                PREDICTION_CONFIG['default_outcome'] = outcome_variable
-                PREDICTION_CONFIG['n_estimators'] = rf_n_estimators
-                PREDICTION_CONFIG['max_depth'] = rf_max_depth if rf_max_depth > 0 else None
-                PREDICTION_CONFIG['cv_folds'] = rf_cv_folds
-                
-                # Note: prediction_type parameter is ignored as we only support regression
-                logger.info(f"Training Random Forest Regression model: outcome={outcome_variable}")
-                logger.info(f"RF parameters: n_estimators={rf_n_estimators}, max_depth={rf_max_depth}, cv_folds={rf_cv_folds}")
-                
-                # Get data from app_state
-                latents = app_state['latents']
-                demographics = app_state['demographics']
-                
-                # Train Random Forest predictor
-                from rcf_prediction import AphasiaTreatmentPredictor
-                
-                # Get treatment outcomes data
-                # Check if we already created synthetic data
-                if app_state.get('synthetic_data', False):
-                    # Use the synthetic treatment outcomes we created above
-                    # (available in this scope from the if block above)
-                    logger.info("Using synthetic treatment outcomes")
-                    # treatment_outcomes is already defined above
-                # Or try to find real treatment file
-                elif hasattr(app, 'last_treatment_file') and os.path.exists(app.last_treatment_file):
-                    treatment_file = app.last_treatment_file
-                    treatment_df = pd.read_csv(treatment_file)
-                    treatment_outcomes = treatment_df['outcome_score'].values
-                    logger.info(f"Using treatment outcomes from {treatment_file}")
-                else:
-                    # Check if we should show an error or use mock data
-                    if not app_state.get('synthetic_data', False):
-                        # Show error for missing treatment file
-                        logger.error("No treatment file available and not using synthetic data")
-                        error_fig = plt.figure(figsize=(10, 6))
-                        message = "Error: Treatment outcomes file not found. Please retrain the VAE in Tab 1."
-                        plt.text(0.5, 0.5, message, 
-                                horizontalalignment='center', verticalalignment='center',
-                                fontsize=14, color='red')
-                        plt.axis('off')
-                        
-                        return [error_fig, error_fig, "Error: Treatment outcomes file not found."]
-                    
-                    # Create a fallback set of treatment outcomes for synthetic data
-                    logger.info("No treatment outcomes found - creating mock data")
-                    n_samples = len(app_state['latents'])
-                    
-                    # Create simple mock outcomes based on demographics (if available)
-                    if app_state.get('demographics') is not None and 'wab_aq' in app_state['demographics']:
-                        # Base it on improvement from current scores
-                        base_scores = app_state['demographics']['wab_aq'].values
-                        # Add 10-30 points of improvement
-                        improvements = np.random.uniform(10, 30, n_samples)
-                        treatment_outcomes = np.minimum(base_scores + improvements, 100)
-                    else:
-                        # Complete fallback - just random scores
-                        treatment_outcomes = np.random.uniform(30, 90, n_samples)
-                    
-                    logger.info(f"Created {n_samples} mock treatment outcomes")
-                
-                # Initialize predictor
-                predictor = AphasiaTreatmentPredictor(
-                    n_estimators=rf_n_estimators,
-                    max_depth=rf_max_depth if rf_max_depth > 0 else None
-                )
-                
-                # Cross-validate
-                cv_results = predictor.cross_validate(
-                    latents=latents,
-                    demographics=demographics,
-                    treatment_outcomes=treatment_outcomes,
-                    n_splits=rf_cv_folds
+                # Connect manual refresh button
+                refresh_btn.click(
+                    fn=refresh_training_logs,
+                    inputs=[],
+                    outputs=[training_logs, epoch_progress, current_loss]
                 )
                 
-                # Fit final model
-                predictor.fit(latents, demographics, treatment_outcomes)
-                
-                # Store in app_state
-                app_state['predictor'] = predictor
-                app_state['rf_trained'] = True
-                
-                # Create feature importance plot
-                importance_fig = predictor.plot_feature_importance()
-                
-                # Create prediction performance plot
-                predictions = cv_results['predictions']
-                prediction_stds = cv_results['prediction_stds']
-                
-                performance_fig = plt.figure(figsize=(8, 6))
-                
-                # Check if we have valid predictions
-                if len(treatment_outcomes) > 0 and len(predictions) == len(treatment_outcomes):
-                    # Only create scatter plot if we have matching data
-                    plt.scatter(treatment_outcomes, predictions)
-                    
-                    # Reference line
-                    min_val = min(np.min(treatment_outcomes), np.min(predictions))
-                    max_val = max(np.max(treatment_outcomes), np.max(predictions))
-                    plt.plot([min_val, max_val], [min_val, max_val], 'r--')
-                    
-                    # Confidence band
-                    plt.fill_between(treatment_outcomes,
-                                    predictions - 2*prediction_stds,
-                                    predictions + 2*prediction_stds,
-                                    alpha=0.2, color='gray')
-                    
-                    plt.xlabel('Actual Outcome')
-                    plt.ylabel('Predicted Outcome')
-                    
-                    # Get performance metrics
-                    metrics_text = ""
-                    mean_metrics = cv_results.get('mean_metrics', {})
-                    
-                    r2 = mean_metrics.get('r2', 0)
-                    rmse = mean_metrics.get('rmse', 0)
-                    plt.title(f'Treatment Outcome Prediction\nR² = {r2:.3f}, RMSE = {rmse:.3f}')
-                    metrics_text = f"Regression Model Performance:\nR² = {r2:.4f}\nRMSE = {rmse:.4f}"
-                else:
-                    # Handle case with no data
-                    plt.text(0.5, 0.5, "No prediction data available", 
-                            ha='center', va='center', transform=plt.gca().transAxes)
-                    metrics_text = "No performance metrics available"
-                    
-                plt.tight_layout()
-                
-                # Add notice if using synthetic data
-                if app_state.get('synthetic_data', False):
-                    metrics_text = f"{metrics_text}\n\nNOTE: Using synthetic data for demonstration."
-                
-                return [importance_fig, performance_fig, metrics_text]
-                
-            except Exception as e:
-                logger.error(f"Error in RF training: {str(e)}", exc_info=True)
-                error_fig = plt.figure(figsize=(10, 6))
-                plt.text(0.5, 0.5, f"Error: {str(e)}", 
-                        horizontalalignment='center', verticalalignment='center',
-                        fontsize=12, color='red', wrap=True)
-                plt.axis('off')
-                
-                return [error_fig, error_fig, f"Error: {str(e)}"]
-        
-        # Connect the tab handlers
-        
-        # VAE Training tab - add queue=False to prevent timeouts on long operations
-        train_vae_btn.click(
-            fn=handle_vae_training,
-            inputs=[data_dir, local_nii_dir, latent_dim, nepochs, bsize, use_hf_dataset, 
-                   skip_behavioral, use_synthetic_nifti, use_synthetic_fc],
-            outputs=[fc_plot, fc_info, learning_plot],
-            queue=False,  # Don't queue requests, run immediately
-            api_name="train_vae"  # Add API name for direct access
-        )
-        
-        # FC Visualization button
-        visualize_fc_btn.click(
-            fn=handle_fc_visualization,
-            inputs=[],
-            outputs=[fc_plot, fc_info],
-            queue=False,
-            api_name="visualize_fc"
-        )
-        
-        # Random Forest Training tab
-        train_rf_btn.click(
-            fn=handle_rf_training,
-            inputs=[prediction_type, outcome_variable, rf_n_estimators, rf_max_depth, rf_cv_folds],
-            outputs=[importance_plot, prediction_plot, rf_metrics],
-            queue=False,
-            api_name="train_rf"
-        )
-        
-        # Tab 3: Treatment Prediction Handler
-        def handle_treatment_prediction(fmri_file, age, sex, months, wab):
-            """Predict treatment outcome for a new patient"""
-            try:
-                # Import necessary packages
-                import numpy as np
-                
-                # First, check if we have saved models we can use
-                rf_model_path = "results/treatment_predictor.joblib"
-                rf_available = os.path.exists(rf_model_path)
-                
-                # Create prediction app
-                temp_app = AphasiaPredictionApp()
-                
-                # If there are trained models in app_state, use them
-                if app_state.get('vae_trained', False) and app_state.get('rf_trained', False) and app_state.get('vae') is not None and app_state.get('predictor') is not None:
-                    logger.info("Using trained models from current session for prediction")
-                    temp_app.vae = app_state.get('vae')
-                    temp_app.predictor = app_state.get('predictor')
-                    temp_app.trained = True
-                    # Set latent_dim from VAE if available
-                    vae = app_state.get('vae')
-                    if vae is not None:
-                        if hasattr(vae, 'latent_dim'):
-                            temp_app.latent_dim = vae.latent_dim
-                            logger.info(f"Using latent_dim={temp_app.latent_dim} from VAE model")
-                        else:
-                            # Try other attributes that might contain latent_dim
-                            if hasattr(vae, 'vae') and hasattr(vae.vae, 'latent_dim'):
-                                temp_app.latent_dim = vae.vae.latent_dim
-                                logger.info(f"Using latent_dim={temp_app.latent_dim} from VAE.vae model")
-                            else:
-                                temp_app.latent_dim = 32  # Default
-                                logger.warning(f"Could not determine latent_dim from VAE model, using default: {temp_app.latent_dim}")
-                    else:
-                        temp_app.latent_dim = 32  # Default
-                        logger.warning(f"VAE model not available, using default latent_dim: {temp_app.latent_dim}")
-                    
-                # If we don't have trained models, but saved models exist, load them
-                elif rf_available:
-                    logger.info("Loading saved RF model for prediction")
+                # Function to validate training parameters
+                def validate_and_start_training(epochs, bs, lr):
+                    """Validate training parameters before starting training"""
                     try:
-                        # Try to load the RF model from disk
-                        from rcf_prediction import AphasiaTreatmentPredictor
-                        temp_app.predictor = AphasiaTreatmentPredictor.load_model(rf_model_path)
-                        temp_app.trained = True
-                        
-                        # Use the VAE from app_state if available, otherwise use synthetic FC
-                        if app_state.get('vae') is not None:
-                            temp_app.vae = app_state.get('vae')
-                            temp_app.latent_dim = temp_app.vae.latent_dim if hasattr(temp_app.vae, 'latent_dim') else 32
-                        else:
-                            # Create a synthetic FC matrix based on demographics
-                            logger.info("No VAE available - using synthetic FC data")
-                            from visualization import plot_treatment_trajectory
-                            
-                            # Generate synthetic prediction
-                            current_score = wab
-                            
-                            # Calculate predicted score based on demographics (simplified model)
-                            age_factor = -0.1 * (age - 60)  # Age effect (younger is better)
-                            time_factor = 0.7 * months      # More treatment time is better
-                            gender_factor = 2 if sex == "F" else 0  # Small gender effect
+                        # Convert and validate epochs
+                        epochs = int(epochs)
+                        if epochs <= 0:
+                            return "Error: Number of epochs must be positive"
+                        if epochs > 1000:
+                            return "Error: Number of epochs too high (max 1000)"
                             
-                            # Base improvement of 15 points, modified by factors
-                            improvement = 15 + age_factor + time_factor + gender_factor
-                            # Add some randomness
-                            improvement = max(5, min(30, improvement + np.random.normal(0, 3)))
+                        # Convert and validate batch size
+                        bs = int(bs)
+                        if bs <= 0:
+                            return "Error: Batch size must be positive"
+                        if bs > 128:
+                            return "Error: Batch size too high (max 128)"
                             
-                            predicted_score = min(100, current_score + improvement)
-                            prediction_std = 5.0  # Fixed uncertainty for demo
+                        # Convert and validate learning rate
+                        lr = float(lr)
+                        if lr <= 0:
+                            return "Error: Learning rate must be positive"
+                        if lr > 1.0:
+                            return "Error: Learning rate too high (max 1.0)"
                             
-                            # Create a trajectory plot
-                            fig = plot_treatment_trajectory(
-                                current_score=current_score,
-                                predicted_score=predicted_score,
-                                months_post_stroke=months,
-                                prediction_std=prediction_std
-                            )
-                            
-                            # Create prediction text
-                            prediction_text = (
-                                f"Using simplified model (VAE not trained)\n\n"
-                                f"Current WAB-AQ: {current_score:.1f}\n"
-                                f"Predicted WAB-AQ after {months} months: {predicted_score:.1f} ± {1.96*prediction_std:.1f}\n"
-                                f"Expected improvement: {predicted_score - current_score:.1f} points\n\n"
-                                f"Note: This prediction uses a simplified model.\n"
-                                f"Train the VAE for more accurate predictions."
-                            )
-                            
-                            return [prediction_text, fig]
-                    except Exception as load_err:
-                        logger.error(f"Error loading models: {load_err}")
-                        return [f"Error loading models: {load_err}", None]
-                else:
-                    # If no models are available, generate a demo visualization
-                    logger.info("No models available - creating demonstration visualization")
-                    from visualization import plot_treatment_trajectory
-                    
-                    # Generate synthetic prediction with realistic values
-                    current_score = wab
-                    
-                    # Calculate predicted score based on demographics (simplified model)
-                    age_factor = -0.1 * (age - 60)  # Age effect (younger is better)
-                    time_factor = 0.7 * months      # More treatment time is better
-                    gender_factor = 2 if sex == "F" else 0  # Small gender effect
-                    
-                    # Base improvement of 15 points, modified by factors
-                    improvement = 15 + age_factor + time_factor + gender_factor
-                    # Add some randomness
-                    improvement = max(5, min(30, improvement + np.random.normal(0, 3)))
-                    
-                    predicted_score = min(100, current_score + improvement)
-                    prediction_std = 5.0  # Fixed uncertainty for demo
-                    
-                    # Create a demo trajectory plot
-                    fig = plot_treatment_trajectory(
-                        current_score=current_score,
-                        predicted_score=predicted_score,
-                        months_post_stroke=months,
-                        prediction_std=prediction_std
-                    )
-                    
-                    # Create prediction text
-                    prediction_text = (
-                        f"DEMO MODE - No trained models available\n\n"
-                        f"Current WAB-AQ: {current_score:.1f}\n"
-                        f"Predicted WAB-AQ after {months} months: {predicted_score:.1f} ± {1.96*prediction_std:.1f}\n"
-                        f"Expected improvement: {predicted_score - current_score:.1f} points\n\n"
-                        f"Note: This is a demonstration using synthetic data.\n"
-                        f"Train the VAE and RF models for actual predictions."
-                    )
-                    
-                    return [prediction_text, fig]
-                
-                # Make prediction using the available models
-                return temp_app.predict_treatment(
-                    fmri_file=fmri_file, 
-                    age=age, 
-                    sex=sex, 
-                    months_post_stroke=months, 
-                    wab_score=wab
+                        # All parameters are valid, start training
+                        return generator.start_training(
+                            num_epochs=epochs,
+                            batch_size=bs,
+                            learning_rate=lr
+                        )
+                    except ValueError as e:
+                        return f"Error: Invalid parameter values - {str(e)}"
+                    except Exception as e:
+                        logger.error(f"Error starting training: {e}")
+                        return f"Error starting training: {str(e)}"
+                
+                # Connect the training button with validation
+                train_btn.click(
+                    fn=validate_and_start_training,
+                    inputs=[training_epochs, batch_size, learning_rate],
+                    outputs=training_status
                 )
-            except Exception as e:
-                logger.error(f"Error in treatment prediction: {str(e)}", exc_info=True)
-                return [f"Error in prediction: {str(e)}", None]
         
-        # Connect the treatment prediction handler
-        predict_btn.click(
-            fn=handle_treatment_prediction,
-            inputs=[fmri_file, age, sex, months, wab],
-            outputs=[prediction_text, trajectory_plot],
-            queue=False,
-            api_name="predict_treatment"
-        )
-        
-        # Add examples
-        gr.Examples(
-            examples=[
-                ["SreekarB/OSFData", "", 32, 200, 16, True, "regression", "wab_aq", True, False, False],  # Standard training without synthetic data
-                ["SreekarB/OSFData", "", 16, 100, 8, True, "classification", "wab_aq", True, False, False]  # Faster training with classification
-            ],
-            inputs=[data_dir, local_nii_dir, latent_dim, nepochs, bsize, use_hf_dataset, 
-                   prediction_type, outcome_variable, skip_behavioral,
-                   use_synthetic_nifti, use_synthetic_fc],
-        )
-        
-        # Add explanation
-        gr.Markdown("""
-        ## How to use this tool
-        
-        1. **Train Models Tab**: First train the VAE and Random Forest models using your dataset
-            - Provide the path to your data directory or HuggingFace dataset ID (e.g., "SreekarB/OSFData")
-            - You can optionally specify a local directory containing NIfTI files (.nii or .nii.gz format)
-            - The system needs:
-               - fMRI files (NIfTI format, *.nii or *.nii.gz)
-               - FC_graph_covariate_data.csv (with columns: ID, wab_aq, age, mpo, education, gender, handedness)
-               - treatment_outcomes.csv (with columns: subject_id, treatment_type, outcome_score)
-            - Adjust parameters like latent dimensions and training epochs
-            - Choose regression or classification prediction type
-            - Select which variable to predict (WAB score by default)
+        # Add a tab for diagnostics
+        with gr.TabItem("Diagnostics"):
+            gr.Markdown("## Connection Diagnostics")
+            gr.Markdown("Use these tools to diagnose connection issues with external services.")
             
-        2. **Predict Treatment Tab**: Use the trained models to predict treatment outcomes
-            - Upload a patient's fMRI scan (must be in NIfTI format)
-            - Enter the patient's demographic information
-            - Click "Predict Treatment Outcome" to see the projected treatment trajectory
-            - The visualization shows the predicted outcome with confidence intervals
+            # Add a button to test HuggingFace Hub connection
+            test_hf_btn = gr.Button("Test Hugging Face Connection")
+            hf_status = gr.Markdown("Click the button to test connection to Hugging Face Hub")
             
-        ## Required Data Files
-        
-        Your data directory must contain:
-        
-        1. **fMRI Data**: NIfTI files (*.nii or *.nii.gz)
-        
-        2. **FC_graph_covariate_data.csv**: A CSV file with the following columns:
-           - ID: Unique identifier for each patient
-           - wab_aq: Western Aphasia Battery Aphasia Quotient score
-           - age: Patient's age
-           - mpo: Months post onset (time since stroke)
-           - education: Years of education
-           - gender: Patient's gender (M/F)
-           - handedness: Patient's handedness (Left/Right)
-        
-        3. **treatment_outcomes.csv**: A CSV file with the following columns:
-           - subject_id: Matching the IDs in FC_graph_covariate_data.csv
-           - treatment_type: Type of treatment administered
-           - outcome_score: Treatment outcome measure (e.g., WAB score improvement)
-        
-        ## Interpreting Results
+            # Function to test connection and return status
+            def run_hf_test():
+                try:
+                    result = test_huggingface_connection()
+                    if result:
+                        return "✅ **Connection to Hugging Face Hub successful**"
+                    else:
+                        return "❌ **Connection to Hugging Face Hub failed. See console logs for details.**"
+                except Exception as e:
+                    return f"❌ **Error testing connection: {str(e)}**"
+            
+            # Connect the test button
+            test_hf_btn.click(fn=run_hf_test, outputs=hf_status)
+            
+        # About section
+        gr.Markdown("### About")
+        gr.Markdown("""
+        This app uses a latent diffusion model to generate synthetic fMRI data for patients with aphasia.
+        The model is conditioned on demographics and aphasia severity metrics from the SreekarB/OSFData dataset on Hugging Face Hub.
         
-        - The **Feature Importance** plot shows which latent dimensions and demographic variables most strongly predict treatment outcomes
-        - The **Prediction Performance** plot shows how well the model predicts known outcomes
-        - The **Treatment Trajectory** shows the projected change in WAB score over the course of treatment
+        **Note**: In this demo version, we use simplified generation settings for speed. In a real research setting, 
+        generation would use many more diffusion steps (500-1000) and higher resolution, requiring hours of computation 
+        on specialized hardware.
         
-        Note: For optimal results, train with at least 500 epochs and latent dimension of 32 or higher.
+        Created for running on Hugging Face Spaces.
         """)
+        
     
     return interface
 
 if __name__ == "__main__":
     interface = create_interface()
-    
-    # Check if running in Hugging Face Spaces
-    import os
-    
-    # Make sure matplotlib is properly configured for Gradio
-    import matplotlib
-    matplotlib.use('Agg')  # Use non-interactive backend
-    
-    # Set figure format to PNG (more compatible)
-    matplotlib.rcParams['figure.dpi'] = 100
-    matplotlib.rcParams['savefig.dpi'] = 100
-    matplotlib.rcParams['savefig.format'] = 'png'
-    
-    if os.environ.get('SPACE_ID'):
-        # Running in Spaces
-        interface.launch(
-            # These parameters help with plot rendering
-            server_name="0.0.0.0",
-            server_port=7860,
-            show_error=True,
-            share=False,  # No sharing needed in Spaces
-            debug=True    # Show full error tracebacks
-        )
-    else:
-        # Running locally
-        interface.launch(
-            # These parameters help with plot rendering
-            server_name="127.0.0.1",
-            show_error=True,
-            share=True,
-            debug=True    # Show full error tracebacks
-        )
\ No newline at end of file
+    interface.launch(share=True)
+