File size: 11,424 Bytes
e706da8
96cab9a
 
 
 
 
 
 
e706da8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
import streamlit as st

st.set_page_config(
    page_title="Advanced Forecasting",
    page_icon="๐Ÿ“ˆ",
    layout="wide",
    initial_sidebar_state="expanded"
)
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta
import yaml
import os
import sys

# Add src to path
sys.path.append('src')

from src.data_processing.processor import AdvancedDataProcessor
from src.modeling.advanced_models import AdvancedModelTrainer
from src.agents.genai_integration import ForecastingAIAssistant

# Page configuration
st.set_page_config(
    page_title="Advanced Forecasting",
    page_icon="๐Ÿ“ˆ",
    layout="wide",
    initial_sidebar_state="expanded"
)

# Custom CSS
st.markdown("""
<style>
    .main-header {font-size: 3rem; color: #1f77b4;}
    .section-header {font-size: 2rem; color: #ff7f0e; margin-top: 2rem;}
    .highlight {background-color: #f7f7f7; padding: 15px; border-radius: 5px; margin: 10px 0;}
</style>
""", unsafe_allow_html=True)

# Load configuration
@st.cache_resource
def load_config():
    with open('config/config.yaml', 'r') as f:
        return yaml.safe_load(f)

config = load_config()

# Initialize components
@st.cache_resource
def init_components():
    processor = AdvancedDataProcessor(config['data_processing'])
    trainer = AdvancedModelTrainer(config['modeling'])
    
    # Check for OpenAI API key
    openai_key = os.getenv('OPENAI_API_KEY')
    ai_assistant = ForecastingAIAssistant(openai_key) if openai_key else None
    
    return processor, trainer, ai_assistant

processor, trainer, ai_assistant = init_components()

# App title
st.markdown('<h1 class="main-header">Advanced Time Series Forecasting</h1>', unsafe_allow_html=True)
st.write("""
A comprehensive forecasting system with advanced features including deep learning models, 
automated feature engineering, and AI-powered insights.
""")

# Sidebar
st.sidebar.title("Configuration")
st.sidebar.header("Data Input")

# Data input options
data_option = st.sidebar.radio(
    "Choose data source:",
    ["Use example data", "Upload your own data"]
)

df = None
if data_option == "Use example data":
    st.sidebar.info("Using example sales data")
    df = pd.read_csv('assets/example_data.csv')
    df['date'] = pd.to_datetime(df['date'])
else:
    uploaded_file = st.sidebar.file_uploader(
        "Upload your time series data (CSV)", 
        type=['csv']
    )
    if uploaded_file is not None:
        df = pd.read_csv(uploaded_file)
        date_col = st.sidebar.selectbox("Select date column", df.columns)
        value_col = st.sidebar.selectbox("Select value column", df.columns)
        df[date_col] = pd.to_datetime(df[date_col])
        df = df.rename(columns={date_col: 'date', value_col: 'value'})

# Main content
if df is not None:
    # Display data info
    st.markdown('<h2 class="section-header">Data Overview</h2>', unsafe_allow_html=True)
    
    col1, col2, col3, col4 = st.columns(4)
    col1.metric("Total Records", len(df))
    col2.metric("Date Range", f"{df['date'].min().date()} to {df['date'].max().date()}")
    col3.metric("Average Value", f"{df['value'].mean():.2f}")
    col4.metric("Data Frequency", "Daily")
    
    # Data preview
    st.dataframe(df.head(10))
    
    # Plot raw data
    st.markdown('<h2 class="section-header">Data Visualization</h2>', unsafe_allow_html=True)
    
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=df['date'], y=df['value'], mode='lines', name='Value'))
    fig.update_layout(
        title='Time Series Data',
        xaxis_title='Date',
        yaxis_title='Value',
        height=500
    )
    st.plotly_chart(fig, use_container_width=True)
    
    # Feature engineering
    st.markdown('<h2 class="section-header">Feature Engineering</h2>', unsafe_allow_html=True)
    
    if st.button("Generate Features"):
        with st.spinner("Creating advanced features..."):
            df_engineered = processor.engineer_features(df, 'date', 'value')
            
            st.success(f"Created {len(processor.feature_columns)} features!")
            
            # Show feature importance (simplified)
            st.write("Top 10 features by correlation with target:")
            correlations = df_engineered.corr()['value'].abs().sort_values(ascending=False)
            top_features = correlations[1:11]  # Exclude the target itself
            
            fig, ax = plt.subplots(figsize=(10, 6))
            top_features.plot(kind='bar', ax=ax)
            ax.set_title('Top Feature Correlations with Target')
            ax.set_ylabel('Absolute Correlation')
            st.pyplot(fig)
            
            # Prepare data for modeling
            X, y = processor.create_sequences(
                df_engineered, 'value', processor.feature_columns, 30, 7
            )
            
            st.session_state.X = X
            st.session_state.y = y
            st.session_state.df_engineered = df_engineered
    
    # Model training
    if 'X' in st.session_state:
        st.markdown('<h2 class="section-header">Model Training</h2>', unsafe_allow_html=True)
        
        model_option = st.selectbox(
            "Select model type:",
            ["LSTM", "Prophet", "ARIMA", "Ensemble"]
        )
        
        if st.button("Train Model"):
            with st.spinner(f"Training {model_option} model..."):
                if model_option == "LSTM":
                    model = trainer.train_lstm(
                        st.session_state.X[:-100], 
                        st.session_state.y[:-100],
                        st.session_state.X[-100:],
                        st.session_state.y[-100:]
                    )
                elif model_option == "Prophet":
                    model = trainer.train_prophet(df, 'date', 'value')
                elif model_option == "ARIMA":
                    model = trainer.train_auto_arima(df['value'])
                else:
                    st.warning("Ensemble model not implemented in this demo")
                    model = None
                
                if model:
                    st.session_state.model = model
                    st.session_state.model_type = model_option.lower()
                    st.success(f"{model_option} model trained successfully!")
    
    # Forecasting
    if 'model' in st.session_state:
        st.markdown('<h2 class="section-header">Forecasting</h2>', unsafe_allow_html=True)
        
        forecast_days = st.slider("Forecast horizon (days)", 7, 90, 30)
        
        if st.button("Generate Forecast"):
            with st.spinner("Generating forecast..."):
                # For demo purposes, we'll create a simple forecast
                last_values = df['value'].values[-30:]
                forecast = np.array([last_values.mean()] * forecast_days)
                
                # Add some randomness to simulate a forecast
                np.random.seed(42)
                noise = np.random.normal(0, df['value'].std() * 0.1, forecast_days)
                trend = np.linspace(0, forecast_days * 0.01, forecast_days)
                forecast = forecast + noise + trend
                
                # Create forecast dates
                last_date = df['date'].max()
                forecast_dates = [last_date + timedelta(days=i) for i in range(1, forecast_days+1)]
                
                # Plot forecast
                fig = go.Figure()
                fig.add_trace(go.Scatter(
                    x=df['date'], 
                    y=df['value'], 
                    mode='lines', 
                    name='Historical Data'
                ))
                fig.add_trace(go.Scatter(
                    x=forecast_dates, 
                    y=forecast, 
                    mode='lines', 
                    name='Forecast',
                    line=dict(dash='dash')
                ))
                
                # Add confidence interval
                upper_bound = forecast + df['value'].std() * 0.5
                lower_bound = forecast - df['value'].std() * 0.5
                
                fig.add_trace(go.Scatter(
                    x=forecast_dates + forecast_dates[::-1],
                    y=np.concatenate([upper_bound, lower_bound[::-1]]),
                    fill='toself',
                    fillcolor='rgba(0,100,80,0.2)',
                    line=dict(color='rgba(255,255,255,0)'),
                    name='Confidence Interval'
                ))
                
                fig.update_layout(
                    title=f'{forecast_days}-Day Forecast',
                    xaxis_title='Date',
                    yaxis_title='Value',
                    height=500
                )
                
                st.plotly_chart(fig, use_container_width=True)
                
                # Display forecast values
                forecast_df = pd.DataFrame({
                    'Date': forecast_dates,
                    'Forecast': forecast,
                    'Lower Bound': lower_bound,
                    'Upper Bound': upper_bound
                })
                
                st.dataframe(forecast_df)
    
    # AI Insights
    if ai_assistant and 'model' in st.session_state:
        st.markdown('<h2 class="section-header">AI-Powered Insights</h2>', unsafe_allow_html=True)
        
        if st.button("Generate AI Insights"):
            with st.spinner("Generating AI insights..."):
                # Prepare data for AI analysis
                data_summary = {
                    'period': f"{df['date'].min().date()} to {df['date'].max().date()}",
                    'data_points': len(df),
                    'mean': df['value'].mean(),
                    'std': df['value'].std(),
                    'trend': 'upward' if df['value'].iloc[-1] > df['value'].iloc[0] else 'downward'
                }
                
                # Generate interpretation
                interpretation = ai_assistant.generate_forecast_interpretation(
                    data_summary, 
                    {'model_type': st.session_state.model_type},
                    {'rmse': 0.05, 'mae': 0.03}  # Placeholder metrics
                )
                
                st.markdown('<div class="highlight">', unsafe_allow_html=True)
                st.write("### AI Interpretation")
                st.write(interpretation)
                st.markdown('</div>', unsafe_allow_html=True)
                
                # Generate recommendations
                recommendations = ai_assistant.generate_business_recommendations(
                    "Time series forecasting for business planning",
                    {'forecast_horizon': 30, 'confidence': 0.8},
                    df['value']
                )
                
                st.markdown('<div class="highlight">', unsafe_allow_html=True)
                st.write("### AI Recommendations")
                st.write(recommendations)
                st.markdown('</div>', unsafe_allow_html=True)

else:
    st.info("Please load data to get started. Use the sidebar to upload a file or use example data.")

# Footer
st.markdown("---")
st.markdown("""
<div style="text-align: center;">
    <p>Advanced Time Series Forecasting System | Built with Streamlit</p>
</div>
""", unsafe_allow_html=True)