Spaces:

Navya-Sree
/

Data_sheets

Sleeping

File size: 11,424 Bytes

e706da8
96cab9a
 
 
 
 
 
 
e706da8

import streamlit as st

st.set_page_config(
    page_title="Advanced Forecasting",
    page_icon="📈",
    layout="wide",
    initial_sidebar_state="expanded"
)
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta
import yaml
import os
import sys

# Add src to path
sys.path.append('src')

from src.data_processing.processor import AdvancedDataProcessor
from src.modeling.advanced_models import AdvancedModelTrainer
from src.agents.genai_integration import ForecastingAIAssistant

# Page configuration
st.set_page_config(
    page_title="Advanced Forecasting",
    page_icon="📈",
    layout="wide",
    initial_sidebar_state="expanded"
)

# Custom CSS
st.markdown("""
<style>
    .main-header {font-size: 3rem; color: #1f77b4;}
    .section-header {font-size: 2rem; color: #ff7f0e; margin-top: 2rem;}
    .highlight {background-color: #f7f7f7; padding: 15px; border-radius: 5px; margin: 10px 0;}
</style>
""", unsafe_allow_html=True)

# Load configuration
@st.cache_resource
def load_config():
    with open('config/config.yaml', 'r') as f:
        return yaml.safe_load(f)

config = load_config()

# Initialize components
@st.cache_resource
def init_components():
    processor = AdvancedDataProcessor(config['data_processing'])
    trainer = AdvancedModelTrainer(config['modeling'])
    
    # Check for OpenAI API key
    openai_key = os.getenv('OPENAI_API_KEY')
    ai_assistant = ForecastingAIAssistant(openai_key) if openai_key else None
    
    return processor, trainer, ai_assistant

processor, trainer, ai_assistant = init_components()

# App title
st.markdown('<h1 class="main-header">Advanced Time Series Forecasting</h1>', unsafe_allow_html=True)
st.write("""
A comprehensive forecasting system with advanced features including deep learning models, 
automated feature engineering, and AI-powered insights.
""")

# Sidebar
st.sidebar.title("Configuration")
st.sidebar.header("Data Input")

# Data input options
data_option = st.sidebar.radio(
    "Choose data source:",
    ["Use example data", "Upload your own data"]
)

df = None
if data_option == "Use example data":
    st.sidebar.info("Using example sales data")
    df = pd.read_csv('assets/example_data.csv')
    df['date'] = pd.to_datetime(df['date'])
else:
    uploaded_file = st.sidebar.file_uploader(
        "Upload your time series data (CSV)", 
        type=['csv']
    )
    if uploaded_file is not None:
        df = pd.read_csv(uploaded_file)
        date_col = st.sidebar.selectbox("Select date column", df.columns)
        value_col = st.sidebar.selectbox("Select value column", df.columns)
        df[date_col] = pd.to_datetime(df[date_col])
        df = df.rename(columns={date_col: 'date', value_col: 'value'})

# Main content
if df is not None:
    # Display data info
    st.markdown('<h2 class="section-header">Data Overview</h2>', unsafe_allow_html=True)
    
    col1, col2, col3, col4 = st.columns(4)
    col1.metric("Total Records", len(df))
    col2.metric("Date Range", f"{df['date'].min().date()} to {df['date'].max().date()}")
    col3.metric("Average Value", f"{df['value'].mean():.2f}")
    col4.metric("Data Frequency", "Daily")
    
    # Data preview
    st.dataframe(df.head(10))
    
    # Plot raw data
    st.markdown('<h2 class="section-header">Data Visualization</h2>', unsafe_allow_html=True)
    
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=df['date'], y=df['value'], mode='lines', name='Value'))
    fig.update_layout(
        title='Time Series Data',
        xaxis_title='Date',
        yaxis_title='Value',
        height=500
    )
    st.plotly_chart(fig, use_container_width=True)
    
    # Feature engineering
    st.markdown('<h2 class="section-header">Feature Engineering</h2>', unsafe_allow_html=True)
    
    if st.button("Generate Features"):
        with st.spinner("Creating advanced features..."):
            df_engineered = processor.engineer_features(df, 'date', 'value')
            
            st.success(f"Created {len(processor.feature_columns)} features!")
            
            # Show feature importance (simplified)
            st.write("Top 10 features by correlation with target:")
            correlations = df_engineered.corr()['value'].abs().sort_values(ascending=False)
            top_features = correlations[1:11]  # Exclude the target itself
            
            fig, ax = plt.subplots(figsize=(10, 6))
            top_features.plot(kind='bar', ax=ax)
            ax.set_title('Top Feature Correlations with Target')
            ax.set_ylabel('Absolute Correlation')
            st.pyplot(fig)
            
            # Prepare data for modeling
            X, y = processor.create_sequences(
                df_engineered, 'value', processor.feature_columns, 30, 7
            )
            
            st.session_state.X = X
            st.session_state.y = y
            st.session_state.df_engineered = df_engineered
    
    # Model training
    if 'X' in st.session_state:
        st.markdown('<h2 class="section-header">Model Training</h2>', unsafe_allow_html=True)
        
        model_option = st.selectbox(
            "Select model type:",
            ["LSTM", "Prophet", "ARIMA", "Ensemble"]
        )
        
        if st.button("Train Model"):
            with st.spinner(f"Training {model_option} model..."):
                if model_option == "LSTM":
                    model = trainer.train_lstm(
                        st.session_state.X[:-100], 
                        st.session_state.y[:-100],
                        st.session_state.X[-100:],
                        st.session_state.y[-100:]
                    )
                elif model_option == "Prophet":
                    model = trainer.train_prophet(df, 'date', 'value')
                elif model_option == "ARIMA":
                    model = trainer.train_auto_arima(df['value'])
                else:
                    st.warning("Ensemble model not implemented in this demo")
                    model = None
                
                if model:
                    st.session_state.model = model
                    st.session_state.model_type = model_option.lower()
                    st.success(f"{model_option} model trained successfully!")
    
    # Forecasting
    if 'model' in st.session_state:
        st.markdown('<h2 class="section-header">Forecasting</h2>', unsafe_allow_html=True)
        
        forecast_days = st.slider("Forecast horizon (days)", 7, 90, 30)
        
        if st.button("Generate Forecast"):
            with st.spinner("Generating forecast..."):
                # For demo purposes, we'll create a simple forecast
                last_values = df['value'].values[-30:]
                forecast = np.array([last_values.mean()] * forecast_days)
                
                # Add some randomness to simulate a forecast
                np.random.seed(42)
                noise = np.random.normal(0, df['value'].std() * 0.1, forecast_days)
                trend = np.linspace(0, forecast_days * 0.01, forecast_days)
                forecast = forecast + noise + trend
                
                # Create forecast dates
                last_date = df['date'].max()
                forecast_dates = [last_date + timedelta(days=i) for i in range(1, forecast_days+1)]
                
                # Plot forecast
                fig = go.Figure()
                fig.add_trace(go.Scatter(
                    x=df['date'], 
                    y=df['value'], 
                    mode='lines', 
                    name='Historical Data'
                ))
                fig.add_trace(go.Scatter(
                    x=forecast_dates, 
                    y=forecast, 
                    mode='lines', 
                    name='Forecast',
                    line=dict(dash='dash')
                ))
                
                # Add confidence interval
                upper_bound = forecast + df['value'].std() * 0.5
                lower_bound = forecast - df['value'].std() * 0.5
                
                fig.add_trace(go.Scatter(
                    x=forecast_dates + forecast_dates[::-1],
                    y=np.concatenate([upper_bound, lower_bound[::-1]]),
                    fill='toself',
                    fillcolor='rgba(0,100,80,0.2)',
                    line=dict(color='rgba(255,255,255,0)'),
                    name='Confidence Interval'
                ))
                
                fig.update_layout(
                    title=f'{forecast_days}-Day Forecast',
                    xaxis_title='Date',
                    yaxis_title='Value',
                    height=500
                )
                
                st.plotly_chart(fig, use_container_width=True)
                
                # Display forecast values
                forecast_df = pd.DataFrame({
                    'Date': forecast_dates,
                    'Forecast': forecast,
                    'Lower Bound': lower_bound,
                    'Upper Bound': upper_bound
                })
                
                st.dataframe(forecast_df)
    
    # AI Insights
    if ai_assistant and 'model' in st.session_state:
        st.markdown('<h2 class="section-header">AI-Powered Insights</h2>', unsafe_allow_html=True)
        
        if st.button("Generate AI Insights"):
            with st.spinner("Generating AI insights..."):
                # Prepare data for AI analysis
                data_summary = {
                    'period': f"{df['date'].min().date()} to {df['date'].max().date()}",
                    'data_points': len(df),
                    'mean': df['value'].mean(),
                    'std': df['value'].std(),
                    'trend': 'upward' if df['value'].iloc[-1] > df['value'].iloc[0] else 'downward'
                }
                
                # Generate interpretation
                interpretation = ai_assistant.generate_forecast_interpretation(
                    data_summary, 
                    {'model_type': st.session_state.model_type},
                    {'rmse': 0.05, 'mae': 0.03}  # Placeholder metrics
                )
                
                st.markdown('<div class="highlight">', unsafe_allow_html=True)
                st.write("### AI Interpretation")
                st.write(interpretation)
                st.markdown('</div>', unsafe_allow_html=True)
                
                # Generate recommendations
                recommendations = ai_assistant.generate_business_recommendations(
                    "Time series forecasting for business planning",
                    {'forecast_horizon': 30, 'confidence': 0.8},
                    df['value']
                )
                
                st.markdown('<div class="highlight">', unsafe_allow_html=True)
                st.write("### AI Recommendations")
                st.write(recommendations)
                st.markdown('</div>', unsafe_allow_html=True)

else:
    st.info("Please load data to get started. Use the sidebar to upload a file or use example data.")

# Footer
st.markdown("---")
st.markdown("""
<div style="text-align: center;">
    <p>Advanced Time Series Forecasting System | Built with Streamlit</p>
</div>
""", unsafe_allow_html=True)