Data_sheets / app.py
Navya-Sree's picture
Update app.py
96cab9a verified
import streamlit as st
st.set_page_config(
page_title="Advanced Forecasting",
page_icon="๐Ÿ“ˆ",
layout="wide",
initial_sidebar_state="expanded"
)
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta
import yaml
import os
import sys
# Add src to path
sys.path.append('src')
from src.data_processing.processor import AdvancedDataProcessor
from src.modeling.advanced_models import AdvancedModelTrainer
from src.agents.genai_integration import ForecastingAIAssistant
# Page configuration
st.set_page_config(
page_title="Advanced Forecasting",
page_icon="๐Ÿ“ˆ",
layout="wide",
initial_sidebar_state="expanded"
)
# Custom CSS
st.markdown("""
<style>
.main-header {font-size: 3rem; color: #1f77b4;}
.section-header {font-size: 2rem; color: #ff7f0e; margin-top: 2rem;}
.highlight {background-color: #f7f7f7; padding: 15px; border-radius: 5px; margin: 10px 0;}
</style>
""", unsafe_allow_html=True)
# Load configuration
@st.cache_resource
def load_config():
with open('config/config.yaml', 'r') as f:
return yaml.safe_load(f)
config = load_config()
# Initialize components
@st.cache_resource
def init_components():
processor = AdvancedDataProcessor(config['data_processing'])
trainer = AdvancedModelTrainer(config['modeling'])
# Check for OpenAI API key
openai_key = os.getenv('OPENAI_API_KEY')
ai_assistant = ForecastingAIAssistant(openai_key) if openai_key else None
return processor, trainer, ai_assistant
processor, trainer, ai_assistant = init_components()
# App title
st.markdown('<h1 class="main-header">Advanced Time Series Forecasting</h1>', unsafe_allow_html=True)
st.write("""
A comprehensive forecasting system with advanced features including deep learning models,
automated feature engineering, and AI-powered insights.
""")
# Sidebar
st.sidebar.title("Configuration")
st.sidebar.header("Data Input")
# Data input options
data_option = st.sidebar.radio(
"Choose data source:",
["Use example data", "Upload your own data"]
)
df = None
if data_option == "Use example data":
st.sidebar.info("Using example sales data")
df = pd.read_csv('assets/example_data.csv')
df['date'] = pd.to_datetime(df['date'])
else:
uploaded_file = st.sidebar.file_uploader(
"Upload your time series data (CSV)",
type=['csv']
)
if uploaded_file is not None:
df = pd.read_csv(uploaded_file)
date_col = st.sidebar.selectbox("Select date column", df.columns)
value_col = st.sidebar.selectbox("Select value column", df.columns)
df[date_col] = pd.to_datetime(df[date_col])
df = df.rename(columns={date_col: 'date', value_col: 'value'})
# Main content
if df is not None:
# Display data info
st.markdown('<h2 class="section-header">Data Overview</h2>', unsafe_allow_html=True)
col1, col2, col3, col4 = st.columns(4)
col1.metric("Total Records", len(df))
col2.metric("Date Range", f"{df['date'].min().date()} to {df['date'].max().date()}")
col3.metric("Average Value", f"{df['value'].mean():.2f}")
col4.metric("Data Frequency", "Daily")
# Data preview
st.dataframe(df.head(10))
# Plot raw data
st.markdown('<h2 class="section-header">Data Visualization</h2>', unsafe_allow_html=True)
fig = go.Figure()
fig.add_trace(go.Scatter(x=df['date'], y=df['value'], mode='lines', name='Value'))
fig.update_layout(
title='Time Series Data',
xaxis_title='Date',
yaxis_title='Value',
height=500
)
st.plotly_chart(fig, use_container_width=True)
# Feature engineering
st.markdown('<h2 class="section-header">Feature Engineering</h2>', unsafe_allow_html=True)
if st.button("Generate Features"):
with st.spinner("Creating advanced features..."):
df_engineered = processor.engineer_features(df, 'date', 'value')
st.success(f"Created {len(processor.feature_columns)} features!")
# Show feature importance (simplified)
st.write("Top 10 features by correlation with target:")
correlations = df_engineered.corr()['value'].abs().sort_values(ascending=False)
top_features = correlations[1:11] # Exclude the target itself
fig, ax = plt.subplots(figsize=(10, 6))
top_features.plot(kind='bar', ax=ax)
ax.set_title('Top Feature Correlations with Target')
ax.set_ylabel('Absolute Correlation')
st.pyplot(fig)
# Prepare data for modeling
X, y = processor.create_sequences(
df_engineered, 'value', processor.feature_columns, 30, 7
)
st.session_state.X = X
st.session_state.y = y
st.session_state.df_engineered = df_engineered
# Model training
if 'X' in st.session_state:
st.markdown('<h2 class="section-header">Model Training</h2>', unsafe_allow_html=True)
model_option = st.selectbox(
"Select model type:",
["LSTM", "Prophet", "ARIMA", "Ensemble"]
)
if st.button("Train Model"):
with st.spinner(f"Training {model_option} model..."):
if model_option == "LSTM":
model = trainer.train_lstm(
st.session_state.X[:-100],
st.session_state.y[:-100],
st.session_state.X[-100:],
st.session_state.y[-100:]
)
elif model_option == "Prophet":
model = trainer.train_prophet(df, 'date', 'value')
elif model_option == "ARIMA":
model = trainer.train_auto_arima(df['value'])
else:
st.warning("Ensemble model not implemented in this demo")
model = None
if model:
st.session_state.model = model
st.session_state.model_type = model_option.lower()
st.success(f"{model_option} model trained successfully!")
# Forecasting
if 'model' in st.session_state:
st.markdown('<h2 class="section-header">Forecasting</h2>', unsafe_allow_html=True)
forecast_days = st.slider("Forecast horizon (days)", 7, 90, 30)
if st.button("Generate Forecast"):
with st.spinner("Generating forecast..."):
# For demo purposes, we'll create a simple forecast
last_values = df['value'].values[-30:]
forecast = np.array([last_values.mean()] * forecast_days)
# Add some randomness to simulate a forecast
np.random.seed(42)
noise = np.random.normal(0, df['value'].std() * 0.1, forecast_days)
trend = np.linspace(0, forecast_days * 0.01, forecast_days)
forecast = forecast + noise + trend
# Create forecast dates
last_date = df['date'].max()
forecast_dates = [last_date + timedelta(days=i) for i in range(1, forecast_days+1)]
# Plot forecast
fig = go.Figure()
fig.add_trace(go.Scatter(
x=df['date'],
y=df['value'],
mode='lines',
name='Historical Data'
))
fig.add_trace(go.Scatter(
x=forecast_dates,
y=forecast,
mode='lines',
name='Forecast',
line=dict(dash='dash')
))
# Add confidence interval
upper_bound = forecast + df['value'].std() * 0.5
lower_bound = forecast - df['value'].std() * 0.5
fig.add_trace(go.Scatter(
x=forecast_dates + forecast_dates[::-1],
y=np.concatenate([upper_bound, lower_bound[::-1]]),
fill='toself',
fillcolor='rgba(0,100,80,0.2)',
line=dict(color='rgba(255,255,255,0)'),
name='Confidence Interval'
))
fig.update_layout(
title=f'{forecast_days}-Day Forecast',
xaxis_title='Date',
yaxis_title='Value',
height=500
)
st.plotly_chart(fig, use_container_width=True)
# Display forecast values
forecast_df = pd.DataFrame({
'Date': forecast_dates,
'Forecast': forecast,
'Lower Bound': lower_bound,
'Upper Bound': upper_bound
})
st.dataframe(forecast_df)
# AI Insights
if ai_assistant and 'model' in st.session_state:
st.markdown('<h2 class="section-header">AI-Powered Insights</h2>', unsafe_allow_html=True)
if st.button("Generate AI Insights"):
with st.spinner("Generating AI insights..."):
# Prepare data for AI analysis
data_summary = {
'period': f"{df['date'].min().date()} to {df['date'].max().date()}",
'data_points': len(df),
'mean': df['value'].mean(),
'std': df['value'].std(),
'trend': 'upward' if df['value'].iloc[-1] > df['value'].iloc[0] else 'downward'
}
# Generate interpretation
interpretation = ai_assistant.generate_forecast_interpretation(
data_summary,
{'model_type': st.session_state.model_type},
{'rmse': 0.05, 'mae': 0.03} # Placeholder metrics
)
st.markdown('<div class="highlight">', unsafe_allow_html=True)
st.write("### AI Interpretation")
st.write(interpretation)
st.markdown('</div>', unsafe_allow_html=True)
# Generate recommendations
recommendations = ai_assistant.generate_business_recommendations(
"Time series forecasting for business planning",
{'forecast_horizon': 30, 'confidence': 0.8},
df['value']
)
st.markdown('<div class="highlight">', unsafe_allow_html=True)
st.write("### AI Recommendations")
st.write(recommendations)
st.markdown('</div>', unsafe_allow_html=True)
else:
st.info("Please load data to get started. Use the sidebar to upload a file or use example data.")
# Footer
st.markdown("---")
st.markdown("""
<div style="text-align: center;">
<p>Advanced Time Series Forecasting System | Built with Streamlit</p>
</div>
""", unsafe_allow_html=True)