diff --git "a/app.py" "b/app.py"
new file mode 100644--- /dev/null
+++ "b/app.py"
@@ -0,0 +1,2403 @@
+# ============================================
+# TimeFlow Pro - Data Analysis and Preprocessing
+# ============================================
+import streamlit as st
+import pandas as pd
+import numpy as np
+import os
+import sys
+import glob
+import re
+from datetime import datetime, timedelta
+import plotly.graph_objects as go
+import plotly.express as px
+from plotly.subplots import make_subplots
+from PIL import Image
+import matplotlib.pyplot as plt
+import warnings
+
+from pipeline.main_pipeline import EnhancedDataPreprocessingPipeline
+
+warnings.filterwarnings('ignore')
+
+# Add project path
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from config.config import Config
+from data_loader.data_loader import DataLoader
+from visualization.visualization_manager import VisualisationManager
+
+# ============================================
+# PAGE CONFIGURATION
+# ============================================
+st.set_page_config(
+    page_title="TimeFlow Pro - Data Analysis and Preprocessing",
+    page_icon="📊",
+    layout="wide",
+    initial_sidebar_state="expanded"
+)
+
+# ============================================
+# STATE MANAGEMENT CLASS
+# ============================================
+class StreamlitApp:
+    """Main Streamlit application class"""
+    
+    def __init__(self):
+        self.init_session_state()
+        self.config = None
+        self.pipeline = None
+        self.data = None
+        
+    def init_session_state(self):
+        """Initialise session state"""
+        if 'pipeline_completed' not in st.session_state:
+            st.session_state.pipeline_completed = False
+        if 'processed_data' not in st.session_state:
+            st.session_state.processed_data = None
+        if 'modeling_data' not in st.session_state:
+            st.session_state.modeling_data = None
+        if 'current_step' not in st.session_state:
+            st.session_state.current_step = 1
+        if 'uploaded_file' not in st.session_state:
+            st.session_state.uploaded_file = None
+        if 'config_params' not in st.session_state:
+            st.session_state.config_params = self.get_default_config()
+        if 'plots_path' not in st.session_state:
+            st.session_state.plots_path = None
+        if 'available_plots' not in st.session_state:
+            st.session_state.available_plots = {}
+        if 'synthetic_data_generated' not in st.session_state:
+            st.session_state.synthetic_data_generated = False
+        if 'auto_pipeline_ready' not in st.session_state:
+            st.session_state.auto_pipeline_ready = False
+        if 'quick_test_mode' not in st.session_state:
+            st.session_state.quick_test_mode = False
+    
+    def get_default_config(self):
+        """Get default configuration"""
+        return {
+            'data_path': '',
+            'results_dir': 'streamlit_results',
+            'target_column': '',
+            'start_year': 1970,
+            'end_year': 1990,
+            'max_lags': 5,
+            'seasonal_period': 365,
+            'rolling_windows': [7, 30, 90],
+            'expanding_windows': [30, 90],
+            'test_size': 0.2,
+            'validation_size': 0.1,
+            'scaling_method': 'robust',
+            'feature_selection_method': 'correlation',
+            'max_features': 20,
+            'missing_threshold': 0.3,
+            'outlier_method': 'iqr',
+            'enable_validation': True,
+            'split_method': 'time_based'
+        }
+    
+    def create_sidebar(self):
+        """Create sidebar"""
+        with st.sidebar:
+            st.title("🎯 TimeFlowPro")
+            st.markdown("---")
+            
+            # Navigation
+            st.subheader("Navigation")
+            steps = {
+                1: "📁 Data Loading",
+                2: "⚙️ Configuration",
+                3: "🔍 Data Analysis",
+                4: "⚡ Pipeline Execution",
+                5: "📊 Results",
+                6: "📈 Visualisations",
+                7: "🤖 Modelling"
+            }
+            
+            for step_num, step_name in steps.items():
+                if st.button(
+                    f"{step_name}",
+                    key=f"nav_{step_num}",
+                    type="primary" if st.session_state.current_step == step_num else "secondary",
+                    width='stretch'
+                ):
+                    st.session_state.current_step = step_num
+                    st.rerun()
+            
+            st.markdown("---")
+            
+            # Quick start with synthetic data
+            st.subheader("⚡ Quick Test")
+            
+            if st.button("🚀 Quick Start with Synthetic Data", 
+                        type="primary", 
+                        width='stretch',
+                        help="Generate synthetic data and run pipeline immediately"):
+                st.session_state.quick_test_mode = True
+                st.session_state.current_step = 1
+                st.rerun()
+            
+            st.markdown("---")
+            
+            # Project information
+            st.subheader("📈 About the Project")
+            st.info("""
+            TimeFlow Pro - Data Analysis and Preprocessing.
+            
+            **New Features:**
+            - Synthetic data generation for testing
+            - Automatic pipeline execution
+            - Quick testing without file upload
+            
+            **Standard Features:**
+            - Missing data analysis and processing
+            - Outlier detection
+            - Feature engineering
+            - Stationarity analysis
+            - Data scaling
+            - Feature selection
+            """)
+            
+            # Progress indicator
+            if st.session_state.pipeline_completed:
+                st.success("✅ Pipeline completed")
+            else:
+                st.warning("⚠️ Pipeline not started")
+            
+            # Quick test indicator
+            if st.session_state.quick_test_mode:
+                st.info("⚡ Quick test mode active")
+        
+    def generate_synthetic_data(self, n_days=1095, include_seasonality=True, include_trend=True, 
+                            include_noise=True, include_exogenous=True, data_type="complex"):
+        """
+        Generate synthetic data for testing
+        
+        Args:
+            n_days (int): Number of days of data
+            include_seasonality (bool): Include seasonality
+            include_trend (bool): Include trend
+            include_noise (bool): Include noise
+            include_exogenous (bool): Include exogenous variables
+            data_type (str): Data type (simple, medium, complex)
+            
+        Returns:
+            pd.DataFrame: Generated synthetic data
+        """
+        try:
+            # Base parameters depending on data type
+            if data_type == "simple":
+                n_days = min(n_days, 365)  # Limit for simple type
+                trend_strength = 0.005
+                noise_std = 2
+                include_exogenous = False
+            elif data_type == "medium":
+                n_days = min(n_days, 730)  # Limit for medium type
+                trend_strength = 0.01
+                noise_std = 5
+                include_exogenous = True
+            else:  # complex
+                n_days = min(n_days, 1095)  # Limit for complex type
+                trend_strength = 0.02
+                noise_std = 10
+                include_exogenous = True
+            
+            # Create dates
+            start_date = datetime.now() - timedelta(days=n_days)
+            dates = pd.date_range(start=start_date, periods=n_days, freq='D')
+                        
+            # Base trend
+            if include_trend:
+                trend = np.linspace(0, trend_strength * n_days, n_days)
+            else:
+                trend = np.zeros(n_days)
+            
+            # Seasonality
+            if include_seasonality:
+                # Annual seasonality
+                seasonal = 10 * np.sin(2 * np.pi * np.arange(n_days) / 365)
+                # Quarterly seasonality
+                seasonal += 5 * np.sin(2 * np.pi * np.arange(n_days) / 90)
+                # Monthly seasonality
+                seasonal += 3 * np.sin(2 * np.pi * np.arange(n_days) / 30)
+                # Weekly seasonality
+                seasonal += 2 * np.sin(2 * np.pi * np.arange(n_days) / 7)
+            else:
+                seasonal = np.zeros(n_days)
+            
+            # Main target variable (water consumption)
+            base_value = 100
+            raskhodvoda = base_value + trend + seasonal
+            
+            # Add noise
+            if include_noise:
+                noise = np.random.normal(0, noise_std, n_days)
+                raskhodvoda += noise
+            
+            # Create DataFrame
+            data = pd.DataFrame({
+                'date': dates,
+                'raskhodvoda': raskhodvoda
+            })
+            
+            # Add exogenous variables
+            if include_exogenous:
+                # Temperature (seasonal)
+                data['temperature'] = 15 + 10 * np.sin(2 * np.pi * np.arange(n_days) / 365) + np.random.normal(0, 3, n_days)
+                
+                # Precipitation (random spikes)
+                precipitation = np.random.exponential(2, n_days)
+                # Add seasonality to precipitation
+                precipitation_seasonality = 5 * np.sin(2 * np.pi * np.arange(n_days) / 365 + np.pi/2)
+                data['precipitation'] = np.maximum(0, precipitation + precipitation_seasonality)
+                
+                # Pressure
+                data['pressure'] = 760 + np.random.normal(0, 5, n_days)
+                
+                # Humidity
+                data['humidity'] = 60 + 20 * np.sin(2 * np.pi * np.arange(n_days) / 180) + np.random.normal(0, 10, n_days)
+                
+                # Electricity consumption (correlated with target variable)
+                data['electricity_consumption'] = raskhodvoda * 0.8 + np.random.normal(0, 5, n_days)
+                
+                # Day of week (categorical variable)
+                data['day_of_week'] = dates.dayofweek
+                data['is_weekend'] = (data['day_of_week'] >= 5).astype(int)
+                
+                # Holidays (random)
+                holidays = np.random.choice([0, 1], size=n_days, p=[0.95, 0.05])
+                data['is_holiday'] = holidays
+                
+                # Lag variables
+                for lag in [1, 7, 30]:
+                    data[f'raskhodvoda_lag_{lag}'] = data['raskhodvoda'].shift(lag)
+                
+                # Moving averages
+                for window in [7, 30]:
+                    data[f'raskhodvoda_ma_{window}'] = data['raskhodvoda'].rolling(window=window).mean()
+            
+            # Add missing values for realism (5% random missing values)
+            # CORRECTION: proper creation of missing value mask
+            for col in data.columns:
+                if col != 'date':  # Don't add missing values to dates
+                    mask = np.random.random(len(data)) < 0.05
+                    data.loc[mask, col] = np.nan
+            
+            # Add outliers (1% of data)
+            # CORRECTION: proper creation of outlier mask
+            numeric_cols = data.select_dtypes(include=[np.number]).columns.tolist()
+            for col in numeric_cols:
+                outlier_mask = np.random.random(len(data)) < 0.01
+                if outlier_mask.any():
+                    # Find outlier indices
+                    outlier_indices = data.index[outlier_mask]
+                    for idx in outlier_indices:
+                        if col in data.columns:
+                            mean_val = data[col].mean(skipna=True)
+                            std_val = data[col].std(skipna=True)
+                            if not np.isnan(mean_val) and not np.isnan(std_val) and std_val > 0:
+                                outlier_value = mean_val + 5 * std_val * np.random.choice([-1, 1])
+                                data.at[idx, col] = outlier_value
+            
+            # Reset index
+            data.reset_index(drop=True, inplace=True)
+            
+            st.session_state.synthetic_data_generated = True
+            return data
+            
+        except Exception as e:
+            st.error(f"Error generating synthetic data: {str(e)}")
+            import traceback
+            st.error(f"Error traceback: {traceback.format_exc()}")
+            return None
+        
+    def quick_test_pipeline(self):
+        """Quick pipeline execution with synthetic data"""
+        with st.spinner("🚀 Running quick test with synthetic data..."):
+            try:
+                # Step 1: Generate synthetic data
+                st.info("Step 1: Generating synthetic data...")
+                synthetic_data = self.generate_synthetic_data(
+                    n_days=365,  # Reduced for speed
+                    include_seasonality=True,
+                    include_trend=True,
+                    include_noise=True,
+                    include_exogenous=True,
+                    data_type="medium"  # Changed to medium for balance between speed and quality
+                )
+                
+                if synthetic_data is None:
+                    st.error("Failed to generate synthetic data")
+                    return
+                
+                # Save data to temporary file
+                temp_file = "temp_synthetic_data.csv"
+                synthetic_data.to_csv(temp_file, index=False)
+                
+                # Step 2: Configure settings
+                st.info("Step 2: Configuring settings...")
+                config_params = st.session_state.config_params.copy()
+                config_params.update({
+                    'data_path': temp_file,
+                    'target_column': 'raskhodvoda',
+                    'start_year': 2020,
+                    'end_year': 2023,
+                    'max_lags': 7,
+                    'seasonal_period': 365,
+                    'rolling_windows': [7, 30],
+                    'expanding_windows': [30],
+                    'test_size': 0.2,
+                    'validation_size': 0.1,
+                    'scaling_method': 'robust',
+                    'feature_selection_method': 'correlation',
+                    'max_features': 10,  # Reduced for speed
+                    'missing_threshold': 0.3,
+                    'outlier_method': 'iqr',
+                    'enable_validation': True,
+                    'split_method': 'time_based'
+                })
+                
+                # Step 3: Create and run pipeline
+                st.info("Step 3: Creating and running pipeline...")
+                
+                # Create progress bar
+                progress_bar = st.progress(0)
+                status_text = st.empty()
+                
+                # Update configuration
+                st.session_state.config_params = config_params
+                st.session_state.uploaded_file = temp_file
+                st.session_state.data_preview = synthetic_data
+                
+                # Create configuration
+                status_text.text("Creating configuration...")
+                progress_bar.progress(20)
+                
+                config = Config(**config_params)
+                
+                # Create pipeline
+                status_text.text("Initialising pipeline...")
+                progress_bar.progress(40)
+                
+                self.pipeline = EnhancedDataPreprocessingPipeline(config)
+                
+                # Run pipeline
+                status_text.text("Running preprocessing pipeline...")
+                progress_bar.progress(60)
+                
+                processed_data = self.pipeline.run_full_pipeline(
+                    use_synthetic=False,  # Synthetic data already loaded
+                    save_intermediate=True,
+                    create_reports=True
+                )
+                
+                # Update progress
+                if processed_data is not None:
+                    status_text.text("Getting data for modelling...")
+                    progress_bar.progress(80)
+                    
+                    modeling_data = self.pipeline.get_final_data_for_modeling()
+                    
+                    # Save to session state
+                    st.session_state.processed_data = processed_data
+                    st.session_state.modeling_data = modeling_data
+                    st.session_state.pipeline_completed = True
+                    st.session_state.plots_path = os.path.join(config.results_dir, 'plots')
+                    st.session_state.auto_pipeline_ready = True
+                    
+                    # Collect information about available plots
+                    self.collect_available_plots()
+                    
+                    # Completion
+                    status_text.text("Completing...")
+                    progress_bar.progress(100)
+                    
+                    st.success("✅ Quick test completed successfully!")
+                    
+                    # Show results
+                    col1, col2, col3 = st.columns(3)
+                    
+                    with col1:
+                        st.metric("Records generated", f"{synthetic_data.shape[0]:,}")
+                    
+                    with col2:
+                        st.metric("Processed data", f"{processed_data.shape[0]:,} rows")
+                    
+                    with col3:
+                        st.metric("Final features", f"{processed_data.shape[1]} columns")
+                    
+                    # Automatic transition to results
+                    st.session_state.current_step = 5
+                    st.rerun()
+                
+                else:
+                    st.error("❌ Error running pipeline")
+                    st.error("Check logs for more information")
+            
+            except Exception as e:
+                st.error(f"❌ Error during quick test: {str(e)}")
+                import traceback
+                st.error(f"Error traceback: {traceback.format_exc()}")
+                
+
+    def render_step_1_data_loading(self):
+        """Step 1: Data Loading"""
+        st.header("📁 Data Loading")
+        
+        # Check quick test mode
+        if st.session_state.quick_test_mode and not st.session_state.auto_pipeline_ready:
+            st.info("⚡ Quick test mode activated. Generating synthetic data and running pipeline...")
+            self.quick_test_pipeline()
+            return
+        
+        col1, col2 = st.columns([2, 1])
+        
+        with col1:
+            # File upload
+            uploaded_file = st.file_uploader(
+                "Upload CSV file with data",
+                type=['csv', 'xlsx', 'parquet'],
+                help="Supported formats: CSV, Excel, Parquet"
+            )
+            
+            if uploaded_file is not None:
+                # Save file temporarily
+                file_path = f"temp_data.{uploaded_file.name.split('.')[-1]}"
+                with open(file_path, "wb") as f:
+                    f.write(uploaded_file.getbuffer())
+                
+                st.session_state.uploaded_file = file_path
+                st.session_state.config_params['data_path'] = file_path
+                
+                # Load and preview data
+                try:
+                    if file_path.endswith('.csv'):
+                        data = pd.read_csv(file_path)
+                    elif file_path.endswith('.xlsx'):
+                        data = pd.read_excel(file_path)
+                    elif file_path.endswith('.parquet'):
+                        data = pd.read_parquet(file_path)
+                    else:
+                        st.error("Unsupported file format")
+                        return
+                    
+                    st.session_state.data_preview = data
+                    
+                    # Data preview
+                    st.subheader("Data Preview")
+                    st.dataframe(data.head(50), width='stretch')
+                    
+                    # Basic information
+                    st.subheader("📋 Data Information")
+                    
+                    info_col1, info_col2, info_col3 = st.columns(3)
+                    
+                    with info_col1:
+                        st.metric("Rows", data.shape[0])
+                        st.metric("Columns", data.shape[1])
+                    
+                    with info_col2:
+                        numeric_cols = data.select_dtypes(include=[np.number]).columns.tolist()
+                        st.metric("Numeric columns", len(numeric_cols))
+                        categorical_cols = data.select_dtypes(include=['object', 'category']).columns.tolist()
+                        st.metric("Categorical columns", len(categorical_cols))
+                    
+                    with info_col3:
+                        total_missing = data.isnull().sum().sum()
+                        missing_percentage = (total_missing / (data.shape[0] * data.shape[1])) * 100
+                        st.metric("Missing values", f"{total_missing:,}")
+                        st.metric("Missing percentage", f"{missing_percentage:.2f}%")
+                    
+                    # Automatic target column selection if not set
+                    if 'target_column' not in st.session_state.config_params or not st.session_state.config_params['target_column']:
+                        numeric_columns = data.select_dtypes(include=[np.number]).columns.tolist()
+                        if numeric_columns:
+                            # Automatically select column with typical name
+                            target_keywords = ['target', 'y', 'value', 'price', 'sales', 'demand', 'raskhod', 'расход']
+                            selected_target = None
+                            
+                            for col in numeric_columns:
+                                if any(keyword in col.lower() for keyword in target_keywords):
+                                    selected_target = col
+                                    break
+                            
+                            # If not found by keywords, take last numeric column
+                            if not selected_target and numeric_columns:
+                                selected_target = numeric_columns[-1]
+                            
+                            if selected_target:
+                                st.session_state.config_params['target_column'] = selected_target
+                                st.info(f"Target variable automatically selected: **{selected_target}**")
+                                st.info("You can change it in the next step")
+                    
+                    # Button to proceed to next step
+                    if st.button("➡️ Go to Configuration", type="primary", width='stretch'):
+                        st.session_state.current_step = 2
+                        st.rerun()
+                        
+                except Exception as e:
+                    st.error(f"Error loading data: {str(e)}")
+        
+        with col2:
+            # Demo data
+            st.subheader("🎮 Demo Mode")
+            
+            demo_option = st.radio(
+                "Choose demo data:",
+                ["Synthetic Data", "Time Series Example"]
+            )
+            
+            # Synthetic data settings
+            with st.expander("⚙️ Synthetic Data Settings", expanded=False):
+                data_type = st.selectbox(
+                    "Data Type",
+                    options=["Simple", "Medium", "Complex"],
+                    index=1,
+                    help="Simple: 1 year, few features\nMedium: 2 years, main features\nComplex: 3 years, all features"
+                )
+                
+                n_days = st.slider(
+                    "Number of days",
+                    min_value=90,
+                    max_value=1825,
+                    value=1095,
+                    step=30,
+                    help="Number of days in synthetic data"
+                )
+                
+                include_trend = st.checkbox("Include trend", value=True)
+                include_seasonality = st.checkbox("Include seasonality", value=True)
+                include_noise = st.checkbox("Include noise", value=True)
+                include_exogenous = st.checkbox("Include additional features", value=True)
+            
+            if st.button("Generate and Load Synthetic Data", width='stretch'):
+                with st.spinner("Creating synthetic data..."):
+                    try:
+                        # Data type mapping
+                        data_type_map = {
+                            "Simple": "simple",
+                            "Medium": "medium", 
+                            "Complex": "complex"
+                        }
+                        
+                        # Generate synthetic data
+                        synthetic_data = self.generate_synthetic_data(
+                            n_days=n_days,
+                            include_seasonality=include_seasonality,
+                            include_trend=include_trend,
+                            include_noise=include_noise,
+                            include_exogenous=include_exogenous,
+                            data_type=data_type_map[data_type]
+                        )
+                        
+                        if synthetic_data is not None:
+                            st.session_state.data_preview = synthetic_data
+                            st.session_state.uploaded_file = "synthetic_data"
+                            st.session_state.config_params['data_path'] = 'synthetic_data'
+                            
+                            # Automatically select target variable
+                            if 'raskhodvoda' in synthetic_data.columns:
+                                st.session_state.config_params['target_column'] = 'raskhodvoda'
+                            
+                            st.success(f"✅ Synthetic data created: {synthetic_data.shape[0]} rows, {synthetic_data.shape[1]} columns")
+                            
+                            # Show preview
+                            st.subheader("Synthetic Data Preview")
+                            st.dataframe(synthetic_data.head(20), width='stretch')
+                            
+                            # Statistics
+                            st.subheader("📊 Synthetic Data Statistics")
+                            
+                            stat_col1, stat_col2 = st.columns(2)
+                            
+                            with stat_col1:
+                                st.metric("Period", f"{synthetic_data.shape[0]} days")
+                                # CORRECTION: convert dates to strings for display
+                                if 'date' in synthetic_data.columns:
+                                    min_date = synthetic_data['date'].min()
+                                    max_date = synthetic_data['date'].max()
+                                    if isinstance(min_date, (pd.Timestamp, datetime)):
+                                        st.text(f"Start: {min_date.strftime('%Y-%m-%d')}")
+                                    else:
+                                        st.text(f"Start: {str(min_date)}")
+                                    
+                                    if isinstance(max_date, (pd.Timestamp, datetime)):
+                                        st.text(f"End: {max_date.strftime('%Y-%m-%d')}")
+                                    else:
+                                        st.text(f"End: {str(max_date)}")
+                            
+                            with stat_col2:
+                                if 'raskhodvoda' in synthetic_data.columns:
+                                    st.metric("Average consumption", f"{synthetic_data['raskhodvoda'].mean():.2f}")
+                                    st.metric("Max consumption", f"{synthetic_data['raskhodvoda'].max():.2f}")
+                                    st.metric("Min consumption", f"{synthetic_data['raskhodvoda'].min():.2f}")
+                            
+                            # Quick pipeline execution
+                            st.markdown("---")
+                            if st.button("🚀 Quick Run Pipeline with This Data", type="primary", width='stretch'):
+                                st.session_state.quick_test_mode = True
+                                st.session_state.auto_pipeline_ready = False
+                                st.rerun()
+                            
+                            st.rerun()
+                        else:
+                            st.error("Failed to generate synthetic data")
+                        
+                    except Exception as e:
+                        st.error(f"Error creating synthetic data: {str(e)}")
+            
+            st.markdown("---")
+            
+            # Instructions
+            st.subheader("📖 Instructions")
+            st.markdown("""
+            1. Upload CSV file with data **OR**
+            2. Generate synthetic data for testing
+            3. Check data preview
+            4. Target variable will be selected automatically
+            5. Go to configuration to specify parameters
+            
+            **Data Requirements:**
+            - Date in separate column or index
+            - Clean column names
+            - Time series with regular intervals
+            """)
+            
+    def render_step_2_configuration(self):
+        """Step 2: Pipeline Configuration"""
+        st.header("⚙️ Pipeline Configuration")
+        
+        # Automatic configuration for synthetic data
+        if st.session_state.uploaded_file == "synthetic_data" or st.session_state.config_params['data_path'] == 'synthetic_data':
+            st.info("⚡ Synthetic data detected. Optimised configuration applied.")
+            
+            # Automatic parameter setup for synthetic data
+            if st.button("Apply Recommended Settings for Synthetic Data", width='stretch'):
+                st.session_state.config_params.update({
+                    'target_column': 'raskhodvoda',
+                    'max_lags': 7,
+                    'seasonal_period': 365,
+                    'rolling_windows': [7, 30, 90],
+                    'expanding_windows': [30, 90],
+                    'test_size': 0.2,
+                    'validation_size': 0.1,
+                    'scaling_method': 'robust',
+                    'feature_selection_method': 'correlation',
+                    'max_features': 15,
+                    'missing_threshold': 0.3,
+                    'outlier_method': 'iqr',
+                    'enable_validation': True
+                })
+                st.success("Settings applied!")
+                st.rerun()
+        
+        # Configuration sections
+        tab1, tab2, tab3, tab4 = st.tabs([
+            "📊 Basic Parameters",
+            "🔧 Data Processing",
+            "🎯 Features and Selection",
+            "📈 Temporal Parameters"
+        ])
+        
+        with tab1:
+            col1, col2 = st.columns(2)
+            
+            with col1:
+                st.subheader("Basic Parameters")
+                st.session_state.config_params['results_dir'] = st.text_input(
+                    "Results Directory",
+                    value=st.session_state.config_params['results_dir']
+                )
+                
+                # CORRECTION: replace text_input with selectbox for target variable selection
+                if hasattr(st.session_state, 'data_preview') and st.session_state.data_preview is not None:
+                    # Get all data columns
+                    all_columns = st.session_state.data_preview.columns.tolist()
+                    
+                    # If target variable already set and present in data, use it
+                    current_target = st.session_state.config_params.get('target_column', '')
+                    default_index = 0
+                    
+                    if current_target in all_columns:
+                        default_index = all_columns.index(current_target)
+                    elif len(all_columns) > 0:
+                        # Try to find suitable default column
+                        numeric_columns = st.session_state.data_preview.select_dtypes(include=[np.number]).columns.tolist()
+                        if numeric_columns:
+                            # Look for columns with typical target variable names
+                            target_keywords = ['target', 'y', 'value', 'price', 'sales', 'demand', 'raskhod', 'расход']
+                            for i, col in enumerate(all_columns):
+                                if any(keyword in col.lower() for keyword in target_keywords):
+                                    default_index = i
+                                    break
+                            # If not found by keywords, take first numeric column
+                            if default_index == 0 and numeric_columns[0] in all_columns:
+                                default_index = all_columns.index(numeric_columns[0])
+                    
+                    st.session_state.config_params['target_column'] = st.selectbox(
+                        "Select Target Variable",
+                        options=all_columns,
+                        index=default_index,
+                        help="Select column to be predicted"
+                    )
+                else:
+                    # If data not loaded, keep text field
+                    st.session_state.config_params['target_column'] = st.text_input(
+                        "Target Variable",
+                        value=st.session_state.config_params.get('target_column', ''),
+                        help="Enter target column name"
+                    )
+                
+                st.session_state.config_params['enable_validation'] = st.checkbox(
+                    "Enable Data Validation",
+                    value=st.session_state.config_params['enable_validation']
+                )
+            
+            with col2:
+                st.subheader("Data Split")
+                st.session_state.config_params['test_size'] = st.slider(
+                    "Test Set Size (%)",
+                    min_value=5,
+                    max_value=40,
+                    value=int(st.session_state.config_params['test_size'] * 100),
+                    step=5,
+                    format="%d%%"
+                ) / 100
+                
+                st.session_state.config_params['validation_size'] = st.slider(
+                    "Validation Set Size (%)",
+                    min_value=5,
+                    max_value=30,
+                    value=int(st.session_state.config_params['validation_size'] * 100),
+                    step=5,
+                    format="%d%%"
+                ) / 100
+                
+                split_methods = ['time_based', 'random']
+                st.session_state.config_params['split_method'] = st.selectbox(
+                    "Split Method",
+                    options=split_methods,
+                    index=split_methods.index(st.session_state.config_params['split_method'])
+                )
+        
+        with tab2:
+            col1, col2 = st.columns(2)
+            
+            with col1:
+                st.subheader("Missing Value Processing")
+                st.session_state.config_params['missing_threshold'] = st.slider(
+                    "Missing Value Column Removal Threshold",
+                    min_value=0.0,
+                    max_value=0.5,
+                    value=st.session_state.config_params['missing_threshold'],
+                    step=0.05,
+                    format="%.2f"
+                )
+                
+                st.subheader("Outlier Processing")
+                outlier_methods = ['iqr', 'zscore', 'isolation_forest']
+                st.session_state.config_params['outlier_method'] = st.selectbox(
+                    "Outlier Detection Method",
+                    options=outlier_methods,
+                    index=outlier_methods.index(st.session_state.config_params['outlier_method'])
+                )
+            
+            with col2:
+                st.subheader("Data Scaling")
+                scaling_methods = ['robust', 'standard', 'minmax', 'none']
+                st.session_state.config_params['scaling_method'] = st.selectbox(
+                    "Scaling Method",
+                    options=scaling_methods,
+                    index=scaling_methods.index(st.session_state.config_params['scaling_method'])
+                )
+                
+                if st.session_state.config_params['scaling_method'] == 'none':
+                    st.info("⚠️ Data will not be scaled")
+        
+        with tab3:
+            col1, col2 = st.columns(2)
+            
+            with col1:
+                st.subheader("Feature Engineering")
+                st.session_state.config_params['max_lags'] = st.slider(
+                    "Maximum Number of Lags",
+                    min_value=1,
+                    max_value=20,
+                    value=st.session_state.config_params['max_lags'],
+                    step=1
+                )
+                
+                rolling_windows_input = st.text_input(
+                    "Windows for Rolling Statistics (comma-separated)",
+                    value=', '.join(map(str, st.session_state.config_params['rolling_windows']))
+                )
+                if rolling_windows_input:
+                    st.session_state.config_params['rolling_windows'] = [
+                        int(x.strip()) for x in rolling_windows_input.split(',') if x.strip().isdigit()
+                    ]
+            
+            with col2:
+                st.subheader("Feature Selection")
+                feature_methods = ['correlation', 'variance', 'mutual_info', 'rf', 'none']
+                st.session_state.config_params['feature_selection_method'] = st.selectbox(
+                    "Feature Selection Method",
+                    options=feature_methods,
+                    index=feature_methods.index(st.session_state.config_params['feature_selection_method'])
+                )
+                
+                st.session_state.config_params['max_features'] = st.slider(
+                    "Maximum Number of Features",
+                    min_value=5,
+                    max_value=100,
+                    value=st.session_state.config_params['max_features'],
+                    step=5
+                )
+        
+        with tab4:
+            col1, col2 = st.columns(2)
+            
+            with col1:
+                st.subheader("Temporal Parameters")
+                
+                # If there is data for preview, show date range
+                if hasattr(st.session_state, 'data_preview'):
+                    if 'date' in st.session_state.data_preview.columns:
+                        date_col = 'date'
+                    elif isinstance(st.session_state.data_preview.index, pd.DatetimeIndex):
+                        dates = st.session_state.data_preview.index
+                    else:
+                        # Try to find date column
+                        date_cols = [col for col in st.session_state.data_preview.columns 
+                                   if 'date' in col.lower() or 'time' in col.lower()]
+                        date_col = date_cols[0] if date_cols else None
+                    
+                    if date_col:
+                        if date_col in st.session_state.data_preview.columns:
+                            dates = pd.to_datetime(st.session_state.data_preview[date_col])
+                        else:
+                            dates = st.session_state.data_preview.index
+                        
+                        if len(dates) > 0:
+                            min_date = dates.min()
+                            max_date = dates.max()
+                            
+                            col1_date, col2_date = st.columns(2)
+                            with col1_date:
+                                st.session_state.config_params['start_year'] = st.number_input(
+                                    "Start Year",
+                                    min_value=1900,
+                                    max_value=2100,
+                                    value=min_date.year,
+                                    step=1
+                                )
+                            with col2_date:
+                                st.session_state.config_params['end_year'] = st.number_input(
+                                    "End Year",
+                                    min_value=1900,
+                                    max_value=2100,
+                                    value=max_date.year,
+                                    step=1
+                                )
+            
+            with col2:
+                st.subheader("Seasonality")
+                st.session_state.config_params['seasonal_period'] = st.selectbox(
+                    "Seasonal Period",
+                    options=[7, 30, 90, 365, 12, 24],
+                    index=[7, 30, 90, 365, 12, 24].index(
+                        st.session_state.config_params['seasonal_period']
+                    ) if st.session_state.config_params['seasonal_period'] in [7, 30, 90, 365, 12, 24] else 0
+                )
+                
+                expanding_windows_input = st.text_input(
+                    "Windows for Expanding Statistics (comma-separated)",
+                    value=', '.join(map(str, st.session_state.config_params['expanding_windows']))
+                )
+                if expanding_windows_input:
+                    st.session_state.config_params['expanding_windows'] = [
+                        int(x.strip()) for x in expanding_windows_input.split(',') if x.strip().isdigit()
+                    ]
+        
+        # Navigation buttons
+        col1, col2, col3 = st.columns([1, 1, 1])
+        
+        with col1:
+            if st.button("⬅️ Back to Loading", width='stretch'):
+                st.session_state.current_step = 1
+                st.rerun()
+        
+        with col3:
+            if st.button("Go to Analysis ➡️", type="primary", width='stretch'):
+                st.session_state.current_step = 3
+                st.rerun()
+    
+    def render_step_3_data_analysis(self):
+        """Step 3: Data Analysis"""
+        st.header("🔍 Data Analysis")
+        
+        if not hasattr(st.session_state, 'data_preview') or st.session_state.data_preview is None:
+            st.warning("First load data in Step 1")
+            if st.button("Return to Data Loading"):
+                st.session_state.current_step = 1
+                st.rerun()
+            return
+        
+        data = st.session_state.data_preview
+        
+        # Analysis tabs
+        tab1, tab2, tab3, tab4 = st.tabs([
+            "📈 Statistics",
+            "🔍 Distributions",
+            "📅 Temporal Analysis",
+            "❓ Missing Values and Outliers"
+        ])
+        
+        with tab1:
+            col1, col2 = st.columns(2)
+            
+            with col1:
+                st.subheader("Basic Statistics")
+                st.dataframe(data.describe().round(2), width='stretch')
+            
+            with col2:
+                st.subheader("Data Types")
+                dtype_info = pd.DataFrame({
+                    'Column': data.columns,
+                    'Type': data.dtypes.values,
+                    'Unique Values': [data[col].nunique() for col in data.columns]
+                })
+                st.dataframe(dtype_info, width='stretch')
+        
+        with tab2:
+            # Select column for visualisation
+            numeric_cols = data.select_dtypes(include=[np.number]).columns.tolist()
+            
+            if numeric_cols:
+                selected_col = st.selectbox(
+                    "Select Column for Analysis",
+                    options=numeric_cols
+                )
+                
+                col1, col2 = st.columns(2)
+                
+                with col1:
+                    # Histogram
+                    fig = px.histogram(
+                        data, 
+                        x=selected_col,
+                        title=f"Distribution of {selected_col}",
+                        nbins=50,
+                        color_discrete_sequence=['#636EFA']
+                    )
+                    st.plotly_chart(fig, width='stretch')
+                
+                with col2:
+                    # Box plot
+                    fig = go.Figure()
+                    fig.add_trace(go.Box(
+                        y=data[selected_col],
+                        name=selected_col,
+                        boxpoints='outliers',
+                        marker_color='#EF553B'
+                    ))
+                    fig.update_layout(
+                        title=f"Box plot {selected_col}",
+                        yaxis_title=selected_col
+                    )
+                    st.plotly_chart(fig, width='stretch')
+            else:
+                st.warning("No numeric columns for distribution analysis")
+        
+        with tab3:
+            # Time series analysis
+            date_cols = [col for col in data.columns if 'date' in col.lower()]
+            
+            if date_cols or isinstance(data.index, pd.DatetimeIndex):
+                if date_cols:
+                    date_col = date_cols[0]
+                    dates = pd.to_datetime(data[date_col])
+                else:
+                    dates = data.index
+                    date_col = 'index'
+                
+                # Check for numeric columns
+                if len(numeric_cols) > 0:
+                    # Select column for time series
+                    ts_col = st.selectbox(
+                        "Select Column for Time Series",
+                        options=numeric_cols
+                    )
+                    
+                    # Time series
+                    fig = go.Figure()
+                    fig.add_trace(go.Scatter(
+                        x=dates,
+                        y=data[ts_col],
+                        mode='lines',
+                        name=ts_col,
+                        line=dict(color='#636EFA', width=2)
+                    ))
+                    
+                    fig.update_layout(
+                        title=f"Time Series: {ts_col}",
+                        xaxis_title="Date",
+                        yaxis_title=ts_col,
+                        hovermode='x unified'
+                    )
+                    
+                    st.plotly_chart(fig, width='stretch')
+                    
+                    # Seasonality (if sufficient data)
+                    if len(dates) > 30:
+                        # Monthly trend
+                        if hasattr(dates, 'month'):
+                            monthly_data = data.groupby(dates.dt.month)[ts_col].mean()
+                            
+                            fig2 = px.bar(
+                                x=monthly_data.index,
+                                y=monthly_data.values,
+                                title=f"Monthly Seasonality: {ts_col}",
+                                labels={'x': 'Month', 'y': 'Average Value'}
+                            )
+                            st.plotly_chart(fig2, width='stretch')
+                else:
+                    st.warning("No numeric columns for temporal analysis")
+            else:
+                st.info("For temporal analysis, date column or DatetimeIndex required")
+        
+        with tab4:
+            col1, col2 = st.columns(2)
+            
+            with col1:
+                # Missing value analysis
+                st.subheader("Missing Values")
+                missing_data = data.isnull().sum()
+                missing_percentage = (missing_data / len(data)) * 100
+                
+                missing_df = pd.DataFrame({
+                    'Column': missing_data.index,
+                    'Missing Count': missing_data.values,
+                    'Missing Percentage': missing_percentage.values
+                }).sort_values('Missing Count', ascending=False)
+                
+                st.dataframe(missing_df, width='stretch')
+                
+                # Missing values visualisation
+                if missing_data.sum() > 0:
+                    fig = px.bar(
+                        missing_df,
+                        x='Column',
+                        y='Missing Percentage',
+                        title="Missing Percentage by Column",
+                        color='Missing Percentage',
+                        color_continuous_scale='Reds'
+                    )
+                    st.plotly_chart(fig, width='stretch')
+            
+            with col2:
+                # Quick outlier analysis
+                st.subheader("Quick Outlier Analysis")
+                
+                if len(numeric_cols) > 0:
+                    outlier_summary = []
+                    
+                    for col in numeric_cols[:5]:  # Limit to 5 columns for speed
+                        q1 = data[col].quantile(0.25)
+                        q3 = data[col].quantile(0.75)
+                        iqr = q3 - q1
+                        lower_bound = q1 - 1.5 * iqr
+                        upper_bound = q3 + 1.5 * iqr
+                        
+                        outliers = data[(data[col] < lower_bound) | (data[col] > upper_bound)]
+                        outlier_pct = (len(outliers) / len(data)) * 100
+                        
+                        outlier_summary.append({
+                            'Column': col,
+                            'Outliers': len(outliers),
+                            'Percentage': f"{outlier_pct:.2f}%"
+                        })
+                    
+                    outlier_df = pd.DataFrame(outlier_summary)
+                    st.dataframe(outlier_df, width='stretch')
+                else:
+                    st.warning("No numeric columns for outlier analysis")
+        
+        # Navigation buttons
+        col1, col2, col3 = st.columns([1, 1, 1])
+        
+        with col1:
+            if st.button("⬅️ Back to Configuration", width='stretch'):
+                st.session_state.current_step = 2
+                st.rerun()
+        
+        with col3:
+            if st.button("Run Pipeline ➡️", type="primary", width='stretch'):
+                st.session_state.current_step = 4
+                st.rerun()
+                  
+    def render_step_4_pipeline_execution(self):
+        """Step 4: Pipeline Execution"""
+        st.header("⚡ Pipeline Execution")
+        
+        # Readiness check
+        ready_to_run = True
+        issues = []
+        
+        if not st.session_state.uploaded_file and st.session_state.config_params['data_path'] != 'demo' and st.session_state.config_params['data_path'] != 'synthetic_data':
+            issues.append("Data not loaded")
+            ready_to_run = False
+        
+        if not st.session_state.config_params['target_column']:
+            issues.append("Target variable not selected")
+            ready_to_run = False
+        
+        # Automatic synthetic data generation if quick test enabled
+        if st.session_state.quick_test_mode and not st.session_state.auto_pipeline_ready:
+            st.info("⚡ Quick test mode activated. Generating synthetic data...")
+            self.quick_test_pipeline()
+            return
+        
+        # Display warnings
+        if issues:
+            st.error("⚠️ Fix before running:")
+            for issue in issues:
+                st.write(f"- {issue}")
+            
+            # Suggest using synthetic data
+            st.markdown("---")
+            st.subheader("🎮 Quick Solution")
+            
+            col1, col2 = st.columns(2)
+            
+            with col1:
+                if st.button("Generate Synthetic Data", width='stretch'):
+                    st.session_state.current_step = 1
+                    st.rerun()
+            
+            with col2:
+                if st.button("To Data Loading", width='stretch'):
+                    st.session_state.current_step = 1
+                    st.rerun()
+            
+            col3, col4 = st.columns(2)
+            with col3:
+                if st.button("To Configuration", width='stretch'):
+                    st.session_state.current_step = 2
+                    st.rerun()
+            
+            return
+        
+        # Display configuration
+        st.subheader("Execution Configuration")
+        
+        config_col1, config_col2 = st.columns(2)
+        
+        with config_col1:
+            st.metric("Target Variable", st.session_state.config_params['target_column'])
+            st.metric("Test Set", f"{st.session_state.config_params['test_size']*100:.0f}%")
+            st.metric("Scaling Method", st.session_state.config_params['scaling_method'])
+        
+        with config_col2:
+            st.metric("Max Lags", st.session_state.config_params['max_lags'])
+            st.metric("Feature Selection Method", st.session_state.config_params['feature_selection_method'])
+            st.metric("Validation Enabled", "Yes" if st.session_state.config_params['enable_validation'] else "No")
+        
+        # Execution options
+        st.subheader("Execution Options")
+        
+        col1, col2 = st.columns(2)
+        
+        with col1:
+            use_synthetic = st.checkbox(
+                "Use Synthetic Data",
+                value=(st.session_state.config_params['data_path'] == 'demo' or 
+                       st.session_state.config_params['data_path'] == 'synthetic_data'),
+                disabled=(st.session_state.config_params['data_path'] == 'demo' or 
+                         st.session_state.config_params['data_path'] == 'synthetic_data')
+            )
+            
+            save_intermediate = st.checkbox(
+                "Save Intermediate Results",
+                value=True
+            )
+        
+        with col2:
+            create_reports = st.checkbox(
+                "Create Reports",
+                value=True
+            )
+            
+            create_visualisations = st.checkbox(
+                "Create Visualisations",
+                value=True,
+                help="Create data analysis plots"
+            )
+        
+        # Run button
+        if st.button("🚀 Run Preprocessing Pipeline", type="primary", width='stretch'):
+            
+            # Create progress bar
+            progress_bar = st.progress(0)
+            status_text = st.empty()
+            
+            try:
+                # Create configuration
+                status_text.text("Creating configuration...")
+                progress_bar.progress(10)
+                
+                config = Config(**st.session_state.config_params)
+                
+                # Create pipeline
+                status_text.text("Initialising pipeline...")
+                progress_bar.progress(20)
+                
+                self.pipeline = EnhancedDataPreprocessingPipeline(config)
+                
+                # Determine whether to use synthetic data
+                use_synthetic_flag = (use_synthetic or 
+                                     st.session_state.config_params['data_path'] == 'demo' or 
+                                     st.session_state.config_params['data_path'] == 'synthetic_data')
+                
+                # Run pipeline
+                status_text.text("Running preprocessing pipeline...")
+                progress_bar.progress(30)
+                
+                processed_data = self.pipeline.run_full_pipeline(
+                    use_synthetic=use_synthetic_flag,
+                    save_intermediate=save_intermediate,
+                    create_reports=create_reports
+                )
+                
+                # Update progress
+                if processed_data is not None:
+                    status_text.text("Getting data for modelling...")
+                    progress_bar.progress(80)
+                    
+                    modeling_data = self.pipeline.get_final_data_for_modelling()
+                    
+                    # Save to session state
+                    st.session_state.processed_data = processed_data
+                    st.session_state.modeling_data = modeling_data
+                    st.session_state.pipeline_completed = True
+                    st.session_state.plots_path = os.path.join(config.results_dir, 'plots')
+                    
+                    # Collect information about available plots
+                    self.collect_available_plots()
+                    
+                    # Completion
+                    status_text.text("Completing...")
+                    progress_bar.progress(100)
+                    
+                    st.success("✅ Pipeline completed successfully!")
+                    
+                    # Show results
+                    col1, col2, col3 = st.columns(3)
+                    
+                    with col1:
+                        if hasattr(self.pipeline, 'results') and 'data_loading' in self.pipeline.results:
+                            st.metric("Original Data", f"{self.pipeline.results['data_loading']['shape'][0]:,} rows")
+                        else:
+                            st.metric("Original Data", "Information unavailable")
+                    
+                    with col2:
+                        st.metric("Processed Data", f"{processed_data.shape[0]:,} rows")
+                    
+                    with col3:
+                        st.metric("Final Features", f"{processed_data.shape[1]} columns")
+                    
+                    # Button to proceed to results
+                    if st.button("📊 Go to Results", type="primary", width='stretch'):
+                        st.session_state.current_step = 5
+                        st.rerun()
+                
+                else:
+                    st.error("❌ Error executing pipeline")
+                    st.error("Check logs for more information")
+            
+            except Exception as e:
+                progress_bar.progress(0)
+                status_text.text("")
+                st.error(f"❌ Error: {str(e)}")
+                st.exception(e)
+        
+        # Back button
+        if st.button("⬅️ Back to Analysis", width='stretch'):
+            st.session_state.current_step = 3
+            st.rerun()
+    
+    def collect_available_plots(self):
+        """Collect information about available plots"""
+        if not st.session_state.plots_path or not os.path.exists(st.session_state.plots_path):
+            st.session_state.available_plots = {}
+            return
+        
+        plots_categories = {
+            'summary': ['summary_dashboard.png'],
+            'missing_values': ['missing_values_analysis.png'],
+            'outliers': ['outliers_analysis.png', 'outlier_handling_results.png', 'temporal_outliers.png'],
+            'stationarity': ['stationarity_*.png'],
+            'data_split': ['data_split.png'],
+            'scaling': ['scaling_results.png'],
+            'feature_selection': ['feature_selection_*.png'],
+            'correlations': ['correlation_matrix.png', 'high_correlations.png', 'target_correlations.png', 'vif_scores.png']
+        }
+        
+        available_plots = {}
+        
+        for category, patterns in plots_categories.items():
+            category_plots = []
+            
+            # Search for files for each pattern
+            for pattern in patterns:
+                # For general patterns
+                if '*' in pattern:
+                    search_path = os.path.join(st.session_state.plots_path, pattern)
+                    files = glob.glob(search_path)
+                    
+                    # Also search in subfolders
+                    for root, dirs, filenames in os.walk(st.session_state.plots_path):
+                        for filename in filenames:
+                            if pattern.replace('*', '') in filename and filename.endswith('.png'):
+                                full_path = os.path.join(root, filename)
+                                if full_path not in files:
+                                    files.append(full_path)
+                else:
+                    # For specific file names
+                    file_path = os.path.join(st.session_state.plots_path, pattern)
+                    
+                    # Check in main folder
+                    if os.path.exists(file_path):
+                        files = [file_path]
+                    else:
+                        # Check in subfolders
+                        files = []
+                        for root, dirs, filenames in os.walk(st.session_state.plots_path):
+                            for filename in filenames:
+                                if filename == pattern:
+                                    files.append(os.path.join(root, filename))
+                
+                for file in files:
+                    if os.path.exists(file):
+                        # Get relative path for display
+                        rel_path = os.path.relpath(file, st.session_state.plots_path)
+                        category_plots.append({
+                            'path': file,
+                            'name': os.path.basename(file),
+                            'rel_path': rel_path,
+                            'size': os.path.getsize(file)
+                        })
+            
+            if category_plots:
+                available_plots[category] = category_plots
+        
+        # Also add all found PNG files in general folder
+        all_png_files = []
+        for root, dirs, filenames in os.walk(st.session_state.plots_path):
+            for filename in filenames:
+                if filename.endswith('.png'):
+                    file_path = os.path.join(root, filename)
+                    # Check if this file already added
+                    already_added = False
+                    for category_plots in available_plots.values():
+                        for plot in category_plots:
+                            if plot['path'] == file_path:
+                                already_added = True
+                                break
+                    
+                    if not already_added:
+                        rel_path = os.path.relpath(file_path, st.session_state.plots_path)
+                        all_png_files.append({
+                            'path': file_path,
+                            'name': filename,
+                            'rel_path': rel_path,
+                            'size': os.path.getsize(file_path)
+                        })
+        
+        if all_png_files:
+            available_plots['other'] = all_png_files
+        
+        st.session_state.available_plots = available_plots
+    
+    def render_step_5_results(self):
+        """Step 5: Results"""
+        st.header("📊 Pipeline Results")
+        
+        if not st.session_state.pipeline_completed or st.session_state.processed_data is None:
+            st.warning("Pipeline not yet run or not completed successfully")
+            
+            # Suggest using quick test
+            st.markdown("---")
+            st.subheader("🎮 Quick Start")
+            
+            col1, col2 = st.columns(2)
+            with col1:
+                if st.button("🚀 Run Quick Test", type="primary", width='stretch'):
+                    st.session_state.quick_test_mode = True
+                    st.session_state.current_step = 1
+                    st.rerun()
+            
+            with col2:
+                if st.button("Load Data", width='stretch'):
+                    st.session_state.current_step = 1
+                    st.rerun()
+            
+            return
+        
+        data = st.session_state.processed_data
+        modeling_data = st.session_state.modeling_data
+        
+        # Results tabs
+        tab1, tab2, tab3, tab4 = st.tabs([
+            "📈 Data Overview",
+            "📊 Feature Analysis",
+            "📉 Validation",
+            "💾 Export"
+        ])
+        
+        with tab1:
+            st.subheader("Processed Data")
+            
+            # Basic information
+            info_col1, info_col2, info_col3, info_col4 = st.columns(4)
+            
+            with info_col1:
+                st.metric("Total Records", f"{data.shape[0]:,}")
+            with info_col2:
+                st.metric("Total Features", data.shape[1])
+            with info_col3:
+                numeric_cols = data.select_dtypes(include=[np.number]).columns.tolist()
+                st.metric("Numeric Features", len(numeric_cols))
+            with info_col4:
+                missing_total = data.isnull().sum().sum()
+                st.metric("Missing Values", missing_total)
+            
+            # Data preview
+            st.subheader("Data Preview")
+            st.dataframe(data.head(100), width='stretch')
+            
+            # Statistics
+            st.subheader("Processed Data Statistics")
+            st.dataframe(data.describe().round(4), width='stretch')
+        
+        with tab2:
+            st.subheader("Feature Analysis")
+            
+            if modeling_data and 'feature_names' in modeling_data:
+                features = modeling_data['feature_names']
+                
+                # Feature list
+                st.write(f"**Selected Features:** {len(features)}")
+                
+                # Display features as cards
+                cols_per_row = 4
+                for i in range(0, len(features), cols_per_row):
+                    cols = st.columns(cols_per_row)
+                    for j in range(cols_per_row):
+                        idx = i + j
+                        if idx < len(features):
+                            with cols[j]:
+                                st.info(features[idx])
+                
+                # Feature importance (if available)
+                if (self.pipeline is not None and 
+                    hasattr(self.pipeline, 'feature_selector') and 
+                    self.pipeline.feature_selector is not None):
+                    
+                    # Check for feature_importances_
+                    if hasattr(self.pipeline.feature_selector, 'feature_importances_'):
+                        importances = self.pipeline.feature_selector.feature_importances_
+                        
+                        if importances is not None and len(importances) > 0:
+                            importance_df = pd.DataFrame({
+                                'Feature': features[:len(importances)] if len(features) >= len(importances) else features,
+                                'Importance': importances[:len(features)] if len(importances) >= len(features) else importances
+                            }).sort_values('Importance', ascending=False)
+                            
+                            st.subheader("Feature Importance")
+                            
+                            fig = px.bar(
+                                importance_df.head(20),
+                                x='Importance',
+                                y='Feature',
+                                orientation='h',
+                                title="Top-20 Features by Importance",
+                                color='Importance',
+                                color_continuous_scale='Viridis'
+                            )
+                            st.plotly_chart(fig, width='stretch')
+            
+            # Correlation matrix (limited for performance)
+            if data.shape[1] <= 50:  # Performance limit
+                st.subheader("Correlation Matrix (first 20 features)")
+                
+                # Select only numeric columns and limit quantity
+                numeric_data = data.select_dtypes(include=[np.number])
+                if len(numeric_data.columns) > 20:
+                    numeric_data = numeric_data.iloc[:, :20]
+                
+                if not numeric_data.empty and len(numeric_data.columns) > 1:
+                    corr_matrix = numeric_data.corr()
+                    
+                    fig = go.Figure(data=go.Heatmap(
+                        z=corr_matrix.values,
+                        x=corr_matrix.columns,
+                        y=corr_matrix.columns,
+                        colorscale='RdBu',
+                        zmin=-1,
+                        zmax=1,
+                        text=corr_matrix.round(2).values,
+                        texttemplate='%{text}',
+                        textfont={"size": 10}
+                    ))
+                    
+                    fig.update_layout(
+                        title="Correlation Matrix",
+                        width=800,
+                        height=800
+                    )
+                    
+                    st.plotly_chart(fig, width='stretch')
+                else:
+                    st.info("Insufficient data for correlation matrix")
+        
+        with tab3:
+            st.subheader("Validation Results")
+            
+            # Improved validation result availability check
+            validation_available = False
+            validation_data = None
+            
+            if self.pipeline is not None:
+                # Check for results in pipeline
+                if hasattr(self.pipeline, 'results'):
+                    # Look for validation results under different keys
+                    validation_keys = ['final_validation', 'validation_results', 'validation', 'validation_checks']
+                    for key in validation_keys:
+                        if key in self.pipeline.results:
+                            validation_data = self.pipeline.results[key]
+                            validation_available = True
+                            break
+                
+                # If not found in results, check other attributes
+                if not validation_available and hasattr(self.pipeline, 'validation_report'):
+                    validation_data = self.pipeline.validation_report
+                    validation_available = True
+                
+                # Or check processing results
+                if not validation_available and hasattr(self.pipeline, 'get_validation_summary'):
+                    try:
+                        validation_data = self.pipeline.get_validation_summary()
+                        validation_available = True
+                    except:
+                        pass
+            
+            # If validation results available
+            if validation_available and validation_data:
+                st.success("✅ Validation results available")
+                
+                # Check validation data format
+                if isinstance(validation_data, dict):
+                    # Display as dictionary
+                    col1, col2 = st.columns(2)
+                    
+                    with col1:
+                        # Status
+                        status = validation_data.get('status', 'UNKNOWN')
+                        if status == 'PASS':
+                            st.success(f"Status: {status}")
+                        elif status == 'WARNING':
+                            st.warning(f"Status: {status}")
+                        else:
+                            st.error(f"Status: {status}")
+                        
+                        # Overall score
+                        score = validation_data.get('overall_score', validation_data.get('score', 0))
+                        if score:
+                            st.metric("Overall Score", f"{score}/100")
+                    
+                    with col2:
+                        # Check counters
+                        if 'checks' in validation_data:
+                            checks = validation_data['checks']
+                        elif 'basic_checks' in validation_data:
+                            checks = validation_data['basic_checks']
+                        else:
+                            checks = validation_data
+                        
+                        if isinstance(checks, dict):
+                            passed = sum(1 for check in checks.values() 
+                                       if isinstance(check, dict) and check.get('passed', False))
+                            total = len(checks)
+                            st.metric("Checks Passed", f"{passed}/{total}")
+                    
+                    # Check details
+                    st.subheader("Check Details")
+                    
+                    # Determine where checks are located
+                    checks_to_display = None
+                    if 'checks' in validation_data:
+                        checks_to_display = validation_data['checks']
+                    elif 'basic_checks' in validation_data:
+                        checks_to_display = validation_data['basic_checks']
+                    elif any(isinstance(v, dict) and 'passed' in v for v in validation_data.values()):
+                        checks_to_display = validation_data
+                    
+                    if checks_to_display and isinstance(checks_to_display, dict):
+                        for check_name, check_info in checks_to_display.items():
+                            if isinstance(check_info, dict):
+                                col1, col2, col3 = st.columns([3, 1, 3])
+                                
+                                with col1:
+                                    # Check description
+                                    description = check_info.get('description', check_name)
+                                    st.write(f"**{description}**")
+                                
+                                with col2:
+                                    # Status
+                                    if check_info.get('passed', False):
+                                        st.success("✅")
+                                    else:
+                                        st.error("❌")
+                                
+                                with col3:
+                                    # Message
+                                    if 'message' in check_info:
+                                        st.caption(check_info['message'])
+                            else:
+                                # Simple format
+                                st.write(f"**{check_name}**: {check_info}")
+                    else:
+                        # Display all validation data
+                        st.json(validation_data)
+                else:
+                    # If not dictionary, display as is
+                    st.write("Validation results:")
+                    st.write(validation_data)
+            else:
+                # If no validation results, show pipeline information
+                st.info("Validation results in report format not available, but pipeline execution statistics presented below")
+                
+                # Pipeline stage statistics
+                st.subheader("Pipeline Execution Statistics")
+                
+                # Create stage table
+                stages = [
+                    ("Data Loading", "✅ Successful" if data is not None else "❌ Error"),
+                    ("Missing Value Processing", "✅ Completed"),
+                    ("Outlier Processing", "✅ Completed"),
+                    ("Feature Engineering", "✅ Completed"),
+                    ("Scaling", "✅ Completed"),
+                    ("Feature Selection", "✅ Completed"),
+                    ("Data Split", "✅ Completed" if modeling_data else "❌ Not completed")
+                ]
+                
+                for stage_name, status in stages:
+                    col1, col2 = st.columns([3, 1])
+                    with col1:
+                        st.write(f"**{stage_name}**")
+                    with col2:
+                        if "✅" in status:
+                            st.success(status)
+                        else:
+                            st.error(status)
+                
+                # If pipeline exists, show available metrics
+                if self.pipeline is not None:
+                    # Check for various metrics
+                    st.subheader("Data Quality Metrics")
+                    
+                    col1, col2, col3 = st.columns(3)
+                    
+                    with col1:
+                        # Data quality
+                        if data is not None:
+                            missing_pct = (data.isnull().sum().sum() / (data.shape[0] * data.shape[1])) * 100
+                            st.metric("Missing Values", f"{missing_pct:.2f}%")
+                    
+                    with col2:
+                        # Feature information
+                        if data is not None:
+                            numeric_cols = len(data.select_dtypes(include=[np.number]).columns)
+                            st.metric("Numeric Features", numeric_cols)
+                    
+                    with col3:
+                        # Split information
+                        if modeling_data and 'X_train' in modeling_data:
+                            train_size = len(modeling_data['X_train'])
+                            total_size = train_size
+                            if 'X_test' in modeling_data:
+                                total_size += len(modeling_data['X_test'])
+                            if 'X_val' in modeling_data:
+                                total_size += len(modeling_data['X_val'])
+                            
+                            if total_size > 0:
+                                train_pct = (train_size / total_size) * 100
+                                st.metric("Training Set", f"{train_pct:.1f}%")
+        
+        with tab4:
+            st.subheader("Data Export")
+            
+            # Export formats
+            export_format = st.radio(
+                "Export Format",
+                options=['CSV', 'Parquet', 'Excel'],
+                horizontal=True
+            )
+            
+            # Export buttons
+            if data is not None:
+                # Export processed data
+                st.write("**Processed Data**")
+                
+                if export_format == 'CSV':
+                    csv = data.to_csv(index=True)
+                    st.download_button(
+                        label="📥 Download CSV",
+                        data=csv,
+                        file_name="streamlit_processed_data.csv",
+                        mime="text/csv",
+                        width='stretch'
+                    )
+                
+                elif export_format == 'Parquet':
+                    # For Parquet need to save to temporary file
+                    import io
+                    buffer = io.BytesIO()
+                    data.to_parquet(buffer)
+                    buffer.seek(0)
+                    
+                    st.download_button(
+                        label="📥 Download Parquet",
+                        data=buffer,
+                        file_name="streamlit_processed_data.parquet",
+                        mime="application/octet-stream",
+                        width='stretch'
+                    )
+                
+                elif export_format == 'Excel':
+                    import io
+                    buffer = io.BytesIO()
+                    with pd.ExcelWriter(buffer, engine='openpyxl') as writer:
+                        data.to_excel(writer, sheet_name='Processed_Data')
+                    
+                    buffer.seek(0)
+                    
+                    st.download_button(
+                        label="📥 Download Excel",
+                        data=buffer,
+                        file_name="streamlit_processed_data.xlsx",
+                        mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
+                        width='stretch'
+                    )
+            
+            # Export modeling data
+            if modeling_data:
+                st.write("**Modeling Data**")
+                
+                col1, col2, col3 = st.columns(3)
+                
+                with col1:
+                    if 'X_train' in modeling_data and modeling_data['X_train'] is not None:
+                        train_df = pd.concat([
+                            modeling_data['X_train'], 
+                            modeling_data['y_train'].rename('target')
+                        ], axis=1) if 'y_train' in modeling_data else modeling_data['X_train']
+                        
+                        st.download_button(
+                            label="📥 Training Set",
+                            data=train_df.to_csv(),
+                            file_name="train_data.csv",
+                            mime="text/csv",
+                            width='stretch'
+                        )
+                
+                with col2:
+                    if 'X_val' in modeling_data and modeling_data['X_val'] is not None:
+                        val_df = pd.concat([
+                            modeling_data['X_val'], 
+                            modeling_data['y_val'].rename('target')
+                        ], axis=1) if 'y_val' in modeling_data else modeling_data['X_val']
+                        
+                        st.download_button(
+                            label="📥 Validation Set",
+                            data=val_df.to_csv(),
+                            file_name="validation_data.csv",
+                            mime="text/csv",
+                            width='stretch'
+                        )
+                
+                with col3:
+                    if 'X_test' in modeling_data and modeling_data['X_test'] is not None:
+                        test_df = pd.concat([
+                            modeling_data['X_test'], 
+                            modeling_data['y_test'].rename('target')
+                        ], axis=1) if 'y_test' in modeling_data else modeling_data['X_test']
+                        
+                        st.download_button(
+                            label="📥 Test Set",
+                            data=test_df.to_csv(),
+                            file_name="test_data.csv",
+                            mime="text/csv",
+                            width='stretch'
+                        )
+        
+        # Navigation
+        st.markdown("---")
+        col1, col2, col3 = st.columns([1, 1, 1])
+        
+        with col1:
+            if st.button("⬅️ Back to Pipeline", width='stretch'):
+                st.session_state.current_step = 4
+                st.rerun()
+        
+        with col3:
+            if st.button("Go to Visualisations ➡️", type="primary", width='stretch'):
+                st.session_state.current_step = 6
+                st.rerun()
+    
+    def render_step_6_visualisations(self):
+        """Step 6: Visualisations"""
+        st.header("📈 Pipeline Visualisations")
+        
+        if not st.session_state.pipeline_completed:
+            st.warning("First run pipeline in Step 4")
+            
+            # Suggest quick test
+            st.markdown("---")
+            st.subheader("🎮 Quick Test")
+            
+            col1, col2 = st.columns(2)
+            with col1:
+                if st.button("🚀 Run Quick Test", type="primary", width='stretch'):
+                    st.session_state.quick_test_mode = True
+                    st.session_state.current_step = 1
+                    st.rerun()
+            
+            with col2:
+                if st.button("Run Pipeline", width='stretch'):
+                    st.session_state.current_step = 4
+                    st.rerun()
+            
+            return
+        
+        # Check for plots
+        if not st.session_state.available_plots:
+            st.warning("Plots not found. Ensure pipeline was run with visualisation option enabled.")
+            
+            # Try to collect plots again
+            if st.button("Try to Find Plots", width='stretch'):
+                self.collect_available_plots()
+                st.rerun()
+            
+            return
+        
+        # Plot statistics
+        total_plots = sum(len(plots) for plots in st.session_state.available_plots.values())
+        st.success(f"✅ Found {total_plots} plots")
+        
+        # Plot category tabs
+        categories = list(st.session_state.available_plots.keys())
+        
+        if 'summary' in categories:
+            categories.remove('summary')
+            categories.insert(0, 'summary')
+        
+        tabs = st.tabs([cat.capitalize().replace('_', ' ') for cat in categories])
+        
+        for i, category in enumerate(categories):
+            with tabs[i]:
+                self.display_category_plots(category)
+        
+        # All plots in one gallery
+        st.markdown("---")
+        st.subheader("🖼️ All Plots Gallery")
+        
+        # Collect all plots
+        all_plots = []
+        for category, plots in st.session_state.available_plots.items():
+            for plot in plots:
+                all_plots.append((category, plot))
+        
+        # Display plots in grid
+        cols_per_row = 3
+        for i in range(0, len(all_plots), cols_per_row):
+            cols = st.columns(cols_per_row)
+            for j in range(cols_per_row):
+                idx = i + j
+                if idx < len(all_plots):
+                    category, plot_info = all_plots[idx]
+                    with cols[j]:
+                        self.display_plot_card(plot_info, category)
+    
+    def display_category_plots(self, category):
+        """Display plots in category"""
+        plots = st.session_state.available_plots.get(category, [])
+        
+        if not plots:
+            st.info(f"No plots in category '{category}'")
+            return
+        
+        st.subheader(f"{category.capitalize().replace('_', ' ')} ({len(plots)} plots)")
+        
+        # Sort plots by name
+        plots_sorted = sorted(plots, key=lambda x: x['name'])
+        
+        # Display plots in accordions for convenience
+        for plot_info in plots_sorted:
+            with st.expander(f"📊 {plot_info['name'].replace('_', ' ').replace('.png', '')}", expanded=True):
+                self.display_plot_image(plot_info)
+    
+    def display_plot_card(self, plot_info, category):
+        """Display plot card"""
+        try:
+            # Load image
+            image = Image.open(plot_info['path'])
+            
+            # Create safe key for state
+            safe_key = plot_info['path'].replace('/', '_').replace('\\', '_').replace('.', '_')
+            
+            # Initialise state for this plot if not exists
+            if f"show_{safe_key}" not in st.session_state:
+                st.session_state[f"show_{safe_key}"] = False
+            
+            # Create card
+            with st.container():
+                st.markdown(f"**{plot_info['name'].replace('_', ' ').replace('.png', '')}**")
+                st.image(image, width='stretch', caption=plot_info['rel_path'])
+                
+                # File information
+                size_kb = plot_info['size'] / 1024
+                st.caption(f"Size: {size_kb:.1f} KB | Category: {category}")
+                
+                # Zoom control buttons
+                col1, col2 = st.columns(2)
+                
+                with col1:
+                    # Zoom button
+                    if st.button("🔍 Zoom", key=f"zoom_{safe_key}", width='stretch'):
+                        st.session_state[f"show_{safe_key}"] = True
+                        # Don't use st.rerun() here
+                
+                with col2:
+                    # Hide zoomed image button (if shown)
+                    if st.session_state[f"show_{safe_key}"]:
+                        if st.button("✕ Hide", key=f"hide_{safe_key}", width='stretch'):
+                            st.session_state[f"show_{safe_key}"] = False
+                            # Don't use st.rerun() here
+                
+                # If zoom button clicked, show zoomed image
+                if st.session_state[f"show_{safe_key}"]:
+                    st.markdown("---")
+                    st.subheader(f"🔍 {plot_info['name'].replace('_', ' ').replace('.png', '')}")
+                    st.image(image, width='stretch')
+            
+        except Exception as e:
+            st.error(f"Error loading plot: {str(e)}")
+            st.code(f"Path: {plot_info['path']}")
+            
+
+    def display_plot_image(self, plot_info):
+        """Display plot image"""
+        try:
+            # Load image
+            image = Image.open(plot_info['path'])
+            
+            # Display with information
+            col1, col2 = st.columns([3, 1])
+            
+            with col1:
+                st.image(image, width='stretch')
+            
+            with col2:
+                # File information
+                st.metric("Size", f"{plot_info['size'] / 1024:.1f} KB")
+                st.metric("Resolution", f"{image.width}×{image.height}")
+                
+                # File format
+                st.write(f"**Format:** {image.format}")
+                
+                # Download button
+                with open(plot_info['path'], 'rb') as file:
+                    btn = st.download_button(
+                        label="📥 Download",
+                        data=file,
+                        file_name=plot_info['name'],
+                        mime="image/png",
+                        width='stretch'
+                    )
+        
+        except Exception as e:
+            st.error(f"Error loading plot: {str(e)}")
+            st.code(f"Path: {plot_info['path']}")
+    
+    def render_step_7_modeling(self):
+        """Step 7: Modelling Preparation"""
+        st.header("🤖 Modelling Preparation")
+        
+        if not st.session_state.pipeline_completed or st.session_state.modeling_data is None:
+            st.warning("First run pipeline in Step 4")
+            
+            # Suggest quick test
+            st.markdown("---")
+            st.subheader("🎮 Quick Test")
+            
+            col1, col2 = st.columns(2)
+            with col1:
+                if st.button("🚀 Run Quick Test", type="primary", width='stretch'):
+                    st.session_state.quick_test_mode = True
+                    st.session_state.current_step = 1
+                    st.rerun()
+            
+            with col2:
+                if st.button("Run Pipeline", width='stretch'):
+                    st.session_state.current_step = 4
+                    st.rerun()
+            
+            return
+        
+        modeling_data = st.session_state.modeling_data
+        
+        # Basic information
+        col1, col2, col3, col4 = st.columns(4)
+        
+        with col1:
+            if 'X_train' in modeling_data and modeling_data['X_train'] is not None:
+                st.metric("Training Set", f"{modeling_data['X_train'].shape[0]:,} records")
+        with col2:
+            if 'X_val' in modeling_data and modeling_data['X_val'] is not None:
+                st.metric("Validation Set", f"{modeling_data['X_val'].shape[0]:,} records")
+        with col3:
+            if 'X_test' in modeling_data and modeling_data['X_test'] is not None:
+                st.metric("Test Set", f"{modeling_data['X_test'].shape[0]:,} records")
+        with col4:
+            if 'feature_names' in modeling_data and modeling_data['feature_names'] is not None:
+                st.metric("Number of Features", len(modeling_data['feature_names']))
+        
+        # Tabs
+        tab1, tab2, tab3 = st.tabs([
+            "📐 Data Structure",
+            "📊 Target Variable Distribution",
+            "🔗 ML Integration"
+        ])
+        
+        with tab1:
+            st.subheader("Modeling Data Structure")
+            
+            # Information table
+            data_info = []
+            
+            if 'X_train' in modeling_data and modeling_data['X_train'] is not None:
+                data_info.append({
+                    'Dataset': 'Training',
+                    'Samples': modeling_data['X_train'].shape[0],
+                    'Features': modeling_data['X_train'].shape[1],
+                    'Target Variable': 'Yes' if 'y_train' in modeling_data and modeling_data['y_train'] is not None else 'No'
+                })
+            
+            if 'X_val' in modeling_data and modeling_data['X_val'] is not None:
+                data_info.append({
+                    'Dataset': 'Validation',
+                    'Samples': modeling_data['X_val'].shape[0],
+                    'Features': modeling_data['X_val'].shape[1],
+                    'Target Variable': 'Yes' if 'y_val' in modeling_data and modeling_data['y_val'] is not None else 'No'
+                })
+            
+            if 'X_test' in modeling_data and modeling_data['X_test'] is not None:
+                data_info.append({
+                    'Dataset': 'Test',
+                    'Samples': modeling_data['X_test'].shape[0],
+                    'Features': modeling_data['X_test'].shape[1],
+                    'Target Variable': 'Yes' if 'y_test' in modeling_data and modeling_data['y_test'] is not None else 'No'
+                })
+            
+            if data_info:
+                st.table(pd.DataFrame(data_info))
+            else:
+                st.info("Modeling data not available")
+            
+            # Data sample
+            st.subheader("Training Data Sample")
+            
+            if ('X_train' in modeling_data and modeling_data['X_train'] is not None and 
+                'y_train' in modeling_data and modeling_data['y_train'] is not None):
+                sample_data = pd.concat([
+                    modeling_data['X_train'].head(10),
+                    modeling_data['y_train'].head(10).rename('target')
+                ], axis=1)
+                
+                st.dataframe(sample_data, width='stretch')
+        
+        with tab2:
+            st.subheader("Target Variable Distribution")
+            
+            if 'y_train' in modeling_data and modeling_data['y_train'] is not None:
+                # Target variable histogram
+                fig = px.histogram(
+                    x=modeling_data['y_train'],
+                    nbins=50,
+                    title="Target Variable Distribution (Training Set)",
+                    labels={'x': 'Target Variable', 'y': 'Frequency'},
+                    color_discrete_sequence=['#00CC96']
+                )
+                
+                st.plotly_chart(fig, width='stretch')
+                
+                # Statistics
+                col1, col2, col3, col4 = st.columns(4)
+                
+                with col1:
+                    st.metric("Mean", f"{modeling_data['y_train'].mean():.2f}")
+                with col2:
+                    st.metric("Standard Deviation", f"{modeling_data['y_train'].std():.2f}")
+                with col3:
+                    st.metric("Minimum", f"{modeling_data['y_train'].min():.2f}")
+                with col4:
+                    st.metric("Maximum", f"{modeling_data['y_train'].max():.2f}")
+            else:
+                st.info("Target variable not available")
+        
+        with tab3:
+            st.subheader("Machine Learning Library Integration")
+            
+            st.info("""
+            Your data is ready for use with any Python ML libraries.
+            Below are code examples for various libraries.
+            """)
+            
+            # Library selection
+            ml_library = st.selectbox(
+                "Select ML Library",
+                options=["Scikit-learn", "XGBoost", "LightGBM", "CatBoost", "PyTorch", "TensorFlow"]
+            )
+            
+            # Code generation
+            code_placeholder = st.empty()
+            
+            if ml_library == "Scikit-learn":
+                code = """# Example usage with Scikit-learn
+from sklearn.ensemble import RandomForestRegressor
+from sklearn.metrics import mean_squared_error, r2_score
+import numpy as np
+
+# Use prepared data
+X_train = modeling_data['X_train']
+y_train = modeling_data['y_train']
+X_val = modeling_data['X_val']
+y_val = modeling_data['y_val']
+
+# Create and train model
+model = RandomForestRegressor(
+    n_estimators=100,
+    max_depth=10,
+    random_state=42
+)
+
+model.fit(X_train, y_train)
+
+# Predictions and evaluation
+y_pred = model.predict(X_val)
+
+print(f"RMSE: {np.sqrt(mean_squared_error(y_val, y_pred)):.4f}")
+print(f"R² Score: {r2_score(y_val, y_pred):.4f}")
+print(f"Feature Importance: {model.feature_importances_}")"""
+                
+            elif ml_library == "XGBoost":
+                code = """# Example usage with XGBoost
+import xgboost as xgb
+from sklearn.metrics import mean_squared_error
+import numpy as np
+
+# Prepare data in DMatrix format
+dtrain = xgb.DMatrix(modeling_data['X_train'], label=modeling_data['y_train'])
+dval = xgb.DMatrix(modeling_data['X_val'], label=modeling_data['y_val'])
+
+# Model parameters
+params = {
+    'objective': 'reg:squarederror',
+    'max_depth': 6,
+    'learning_rate': 0.1,
+    'subsample': 0.8,
+    'colsample_bytree': 0.8,
+    'seed': 42
+}
+
+# Train model
+model = xgb.train(
+    params,
+    dtrain,
+    num_boost_round=100,
+    evals=[(dval, 'validation')],
+    early_stopping_rounds=10,
+    verbose_eval=False
+)
+
+# Predictions
+y_pred = model.predict(dval)
+
+print(f"RMSE: {np.sqrt(mean_squared_error(modeling_data['y_val'], y_pred)):.4f}")
+print(f"Number of Trees: {model.best_ntree_limit}")"""
+            
+            elif ml_library == "LightGBM":
+                code = """# Example usage with LightGBM
+import lightgbm as lgb
+from sklearn.metrics import mean_squared_error
+import numpy as np
+
+# Prepare data
+train_data = lgb.Dataset(
+    modeling_data['X_train'],
+    label=modeling_data['y_train']
+)
+
+val_data = lgb.Dataset(
+    modeling_data['X_val'],
+    label=modeling_data['y_val'],
+    reference=train_data
+)
+
+# Model parameters
+params = {
+    'objective': 'regression',
+    'metric': 'rmse',
+    'num_leaves': 31,
+    'learning_rate': 0.05,
+    'feature_fraction': 0.9,
+    'bagging_fraction': 0.8,
+    'bagging_freq': 5,
+    'verbose': 0
+}
+
+# Train model
+model = lgb.train(
+    params,
+    train_data,
+    valid_sets=[val_data],
+    num_boost_round=100,
+    callbacks=[lgb.early_stopping(10)]
+)
+
+# Predictions
+y_pred = model.predict(modeling_data['X_val'])
+
+print(f"RMSE: {np.sqrt(mean_squared_error(modeling_data['y_val'], y_pred)):.4f}")
+print(f"Best Iteration: {model.best_iteration}")"""
+            
+            else:
+                code = f"""# Template for {ml_library}
+# Your data available in modeling_data variable
+
+X_train = modeling_data['X_train']
+y_train = modeling_data['y_train']
+X_val = modeling_data['X_val']
+y_val = modeling_data['y_val']
+X_test = modeling_data['X_test']
+y_test = modeling_data['y_test']
+
+# Code for {ml_library}...
+print(f"Data sizes:")
+print(f"  X_train: {{X_train.shape}}")
+print(f"  y_train: {{y_train.shape}}")
+print(f"  X_val: {{X_val.shape}}")
+print(f"  X_test: {{X_test.shape}}")"""
+            
+            # Display code
+            code_placeholder.code(code, language='python')
+            
+            # Copy code button
+            try:
+                import pyperclip
+                if st.button("📋 Copy Code", width='stretch'):
+                    try:
+                        pyperclip.copy(code)
+                        st.success("Code copied to clipboard!")
+                    except:
+                        st.warning("Failed to copy code. Copy manually.")
+            except:
+                st.warning("To copy code, install pyperclip library: pip install pyperclip")
+        
+        # Final information
+        st.markdown("---")
+        st.success("""
+        🎉 Congratulations! You have successfully prepared data for machine learning.
+        
+        **Next Steps:**
+        1. Use code above for integration with chosen ML library
+        2. Experiment with various models
+        3. Optimise hyperparameters
+        4. Evaluate results on test set
+        """)
+        
+        # Navigation
+        col1, col2 = st.columns([1, 1])
+        
+        with col1:
+            if st.button("⬅️ Back to Visualisations", width='stretch'):
+                st.session_state.current_step = 6
+                st.rerun()
+        
+        with col2:
+            if st.button("🔄 Run New Pipeline", type="primary", width='stretch'):
+                # Reset state
+                st.session_state.pipeline_completed = False
+                st.session_state.processed_data = None
+                st.session_state.modeling_data = None
+                st.session_state.current_step = 1
+                st.session_state.uploaded_file = None
+                st.session_state.plots_path = None
+                st.session_state.available_plots = {}
+                st.session_state.synthetic_data_generated = False
+                st.session_state.auto_pipeline_ready = False
+                st.session_state.quick_test_mode = False
+                st.rerun()
+    
+    def render_footer(self):
+        """Application footer"""
+        st.markdown("---")
+        
+        col1, col2, col3 = st.columns(3)
+        
+        with col1:
+            st.markdown("**TimeFlowPro** v1.1.0")
+            st.caption("Added synthetic data generation")
+        
+        with col2:
+            st.markdown("📧 Contacts: cool.araby@gmail.com")
+        
+        with col3:
+            st.markdown("© 2026 All Rights Reserved")
+    
+    def run(self):
+        """Run application"""
+        # Header
+        st.title("📊 TimeFlow Pro - Data Analysis and Preprocessing")
+        st.markdown("---")
+        
+        # Sidebar
+        self.create_sidebar()
+        
+        # Main content depending on step
+        if st.session_state.current_step == 1:
+            self.render_step_1_data_loading()
+        elif st.session_state.current_step == 2:
+            self.render_step_2_configuration()
+        elif st.session_state.current_step == 3:
+            self.render_step_3_data_analysis()
+        elif st.session_state.current_step == 4:
+            self.render_step_4_pipeline_execution()
+        elif st.session_state.current_step == 5:
+            self.render_step_5_results()
+        elif st.session_state.current_step == 6:
+            self.render_step_6_visualisations()
+        elif st.session_state.current_step == 7:
+            self.render_step_7_modeling()
+        
+        # Footer
+        self.render_footer()
+
+# ============================================
+# APPLICATION LAUNCH
+# ============================================
+if __name__ == "__main__":
+    app = StreamlitApp()
+    app.run()
\ No newline at end of file