pythonnew / app.py
bangaboy's picture
Update app.py
3af41db verified
raw
history blame
15.6 kB
import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
# Set page configuration with custom theme
st.set_page_config(
page_title="Data Analytics Hub",
page_icon="📊",
layout="wide",
initial_sidebar_state="expanded"
)
# Custom CSS for better styling
st.markdown("""
<style>
.main {
padding-top: 2rem;
}
.stButton>button {
width: 100%;
border-radius: 5px;
height: 3em;
background-color: #ff4b4b;
color: white;
border: none;
}
.stButton>button:hover {
background-color: #ff6b6b;
color: white;
}
div[data-testid="stSidebarNav"] {
background-image: linear-gradient(#f0f2f6, #e0e2e6);
padding: 2rem 0;
border-radius: 10px;
}
.css-1d391kg {
padding: 2rem 1rem;
}
.stAlert {
padding: 1rem;
border-radius: 5px;
}
div[data-testid="stMetricValue"] {
background-color: #f0f2f6;
padding: 1rem;
border-radius: 5px;
}
</style>
""", unsafe_allow_html=True)
# Initialize session state
if 'data' not in st.session_state:
# Create sample data
np.random.seed(42)
dates = pd.date_range('2023-01-01', periods=100, freq='D')
st.session_state.data = pd.DataFrame({
'date': dates,
'sales': np.random.normal(1000, 200, 100),
'visitors': np.random.normal(500, 100, 100),
'conversion_rate': np.random.uniform(0.01, 0.05, 100),
'customer_satisfaction': np.random.normal(4.2, 0.5, 100),
'region': np.random.choice(['North', 'South', 'East', 'West'], 100)
})
# Sidebar with enhanced styling
with st.sidebar:
st.image("https://via.placeholder.com/150?text=Analytics+Hub", width=150)
st.title("Analytics Hub")
selected_page = st.radio(
"📑 Navigation",
["🏠 Dashboard", "🔍 Data Explorer", "📊 Visualization", "🤖 ML Predictions"],
key="navigation"
)
# Dashboard page
if selected_page == "🏠 Dashboard":
st.title("📊 Data Analytics Dashboard")
# Quick stats in a grid
col1, col2, col3, col4 = st.columns(4)
with col1:
st.metric(
"Total Records",
f"{len(st.session_state.data):,}",
"Current dataset size"
)
with col2:
st.metric(
"Avg Sales",
f"${st.session_state.data['sales'].mean():,.2f}",
f"{st.session_state.data['sales'].pct_change().mean()*100:.1f}%"
)
with col3:
st.metric(
"Avg Visitors",
f"{st.session_state.data['visitors'].mean():,.0f}",
f"{st.session_state.data['visitors'].pct_change().mean()*100:.1f}%"
)
with col4:
st.metric(
"Satisfaction",
f"{st.session_state.data['customer_satisfaction'].mean():.2f}",
"Average rating"
)
# Data upload section with better styling
st.markdown("### 📁 Upload Your Dataset")
upload_col1, upload_col2 = st.columns([2, 3])
with upload_col1:
uploaded_file = st.file_uploader(
"Choose a CSV file",
type="csv",
help="Upload your CSV file to begin analysis"
)
if uploaded_file is not None:
try:
st.session_state.data = pd.read_csv(uploaded_file)
st.success("✅ Data uploaded successfully!")
except Exception as e:
st.error(f"❌ Error uploading file: {e}")
with upload_col2:
st.markdown("#### Dataset Preview")
st.dataframe(
st.session_state.data.head(3),
use_container_width=True
)
# Data Explorer page
elif selected_page == "🔍 Data Explorer":
st.title("🔍 Data Explorer")
# Enhanced data summary
col1, col2 = st.columns([1, 2])
with col1:
st.markdown("### 📊 Dataset Overview")
st.info(f"""
- **Rows:** {st.session_state.data.shape[0]:,}
- **Columns:** {st.session_state.data.shape[1]}
- **Memory Usage:** {st.session_state.data.memory_usage().sum() / 1024**2:.2f} MB
""")
with col2:
st.markdown("### 📈 Quick Stats")
st.dataframe(
st.session_state.data.describe(),
use_container_width=True
)
# Column analysis with better visualization
st.markdown("### 🔬 Column Analysis")
col1, col2, col3 = st.columns([1, 1, 2])
with col1:
column = st.selectbox(
"Select column:",
st.session_state.data.columns,
help="Choose a column to analyze"
)
with col2:
if pd.api.types.is_numeric_dtype(st.session_state.data[column]):
analysis_type = st.selectbox(
"Analysis type:",
["Distribution", "Time Series"] if "date" in column.lower() else ["Distribution"],
help="Choose type of analysis"
)
else:
analysis_type = "Value Counts"
with col3:
if pd.api.types.is_numeric_dtype(st.session_state.data[column]):
stats_col1, stats_col2 = st.columns(2)
with stats_col1:
st.metric("Mean", f"{st.session_state.data[column].mean():.2f}")
st.metric("Std Dev", f"{st.session_state.data[column].std():.2f}")
with stats_col2:
st.metric("Median", f"{st.session_state.data[column].median():.2f}")
st.metric("IQR", f"{st.session_state.data[column].quantile(0.75) - st.session_state.data[column].quantile(0.25):.2f}")
# Enhanced visualization
fig, ax = plt.subplots(figsize=(12, 6))
if pd.api.types.is_numeric_dtype(st.session_state.data[column]):
sns.set_style("whitegrid")
sns.histplot(data=st.session_state.data, x=column, kde=True, ax=ax)
ax.set_title(f"Distribution of {column}", pad=20)
else:
value_counts = st.session_state.data[column].value_counts()
sns.barplot(x=value_counts.index, y=value_counts.values, ax=ax)
ax.set_title(f"Value Counts for {column}", pad=20)
plt.xticks(rotation=45)
st.pyplot(fig)
# Visualization page
elif selected_page == "📊 Visualization":
st.title("📊 Advanced Visualizations")
# Enhanced chart selection
chart_type = st.selectbox(
"Select visualization type:",
["📊 Bar Chart", "📈 Line Chart", "🔵 Scatter Plot", "🌡️ Heatmap"],
help="Choose the type of visualization you want to create"
)
if chart_type in ["📊 Bar Chart", "📈 Line Chart"]:
col1, col2, col3 = st.columns([1, 1, 1])
with col1:
x_column = st.selectbox("X-axis:", st.session_state.data.columns)
with col2:
y_column = st.selectbox(
"Y-axis:",
[col for col in st.session_state.data.columns
if pd.api.types.is_numeric_dtype(st.session_state.data[col])]
)
with col3:
color_theme = st.selectbox(
"Color theme:",
["viridis", "magma", "plasma", "inferno"]
)
# Create enhanced visualization
fig, ax = plt.subplots(figsize=(12, 6))
sns.set_style("whitegrid")
sns.set_palette(color_theme)
if not pd.api.types.is_numeric_dtype(st.session_state.data[x_column]):
agg_data = st.session_state.data.groupby(x_column)[y_column].mean().reset_index()
if "Bar" in chart_type:
sns.barplot(x=x_column, y=y_column, data=agg_data, ax=ax)
else:
sns.lineplot(x=x_column, y=y_column, data=agg_data, ax=ax, marker='o')
else:
if "Bar" in chart_type:
sns.barplot(x=x_column, y=y_column, data=st.session_state.data, ax=ax)
else:
sns.lineplot(x=x_column, y=y_column, data=st.session_state.data, ax=ax)
plt.xticks(rotation=45)
ax.set_title(f"{y_column} by {x_column}", pad=20)
st.pyplot(fig)
elif "Scatter" in chart_type:
col1, col2, col3 = st.columns([1, 1, 1])
with col1:
x_column = st.selectbox(
"X-axis:",
[col for col in st.session_state.data.columns
if pd.api.types.is_numeric_dtype(st.session_state.data[col])]
)
with col2:
y_column = st.selectbox(
"Y-axis:",
[col for col in st.session_state.data.columns
if pd.api.types.is_numeric_dtype(st.session_state.data[col]) and col != x_column]
)
with col3:
hue_column = st.selectbox(
"Color by:",
["None"] + list(st.session_state.data.columns)
)
fig, ax = plt.subplots(figsize=(12, 6))
sns.set_style("whitegrid")
if hue_column != "None":
sns.scatterplot(x=x_column, y=y_column, data=st.session_state.data, hue=hue_column, ax=ax)
else:
sns.scatterplot(x=x_column, y=y_column, data=st.session_state.data, ax=ax)
ax.set_title(f"{y_column} vs {x_column}", pad=20)
st.pyplot(fig)
elif "Heatmap" in chart_type:
st.markdown("### 🌡️ Correlation Heatmap")
numeric_cols = st.session_state.data.select_dtypes(include=['number']).columns.tolist()
correlation = st.session_state.data[numeric_cols].corr()
fig, ax = plt.subplots(figsize=(12, 8))
mask = np.triu(np.ones_like(correlation))
sns.heatmap(
correlation,
mask=mask,
annot=True,
cmap='coolwarm',
ax=ax,
center=0,
square=True,
fmt='.2f',
linewidths=1
)
ax.set_title("Correlation Heatmap", pad=20)
st.pyplot(fig)
# ML Predictions page
elif selected_page == "🤖 ML Predictions":
st.title("🤖 Machine Learning Predictions")
# Model configuration
st.markdown("### ⚙️ Model Configuration")
config_col1, config_col2 = st.columns(2)
with config_col1:
numeric_cols = st.session_state.data.select_dtypes(include=['number']).columns.tolist()
target_column = st.selectbox(
"Target variable:",
numeric_cols,
help="Select the variable you want to predict"
)
with config_col2:
model_type = st.selectbox(
"Model type:",
["📊 Linear Regression", "🌲 Random Forest"],
help="Choose the type of model to train"
)
# Feature selection with better UI
st.markdown("### 🎯 Feature Selection")
feature_cols = [col for col in numeric_cols if col != target_column]
selected_features = st.multiselect(
"Select features for the model:",
feature_cols,
default=feature_cols,
help="Choose the variables to use as predictors"
)
# Model training section
train_col1, train_col2 = st.columns([2, 1])
with train_col1:
if st.button("🚀 Train Model", use_container_width=True):
if len(selected_features) > 0:
with st.spinner("Training model..."):
# Prepare data
X = st.session_state.data[selected_features]
y = st.session_state.data[target_column]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
if "Linear" in model_type:
model = LinearRegression()
else:
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train_scaled, y_train)
# Store model and scaler in session state
st.session_state.model = model
st.session_state.scaler = scaler
st.session_state.features = selected_features
# Model evaluation
train_score = model.score(X_train_scaled, y_train)
test_score = model.score(X_test_scaled, y_test)
st.success("✨ Model trained successfully!")
# Display metrics
metric_col1, metric_col2 = st.columns(2)
with metric_col1:
st.metric("Training R² Score", f"{train_score:.4f}")
with metric_col2:
st.metric("Testing R² Score", f"{test_score:.4f}")
# Feature importance for Random Forest
if "Random" in model_type:
st.markdown("### 📊 Feature Importance")
importance = pd.DataFrame({
'Feature': selected_features,
'Importance': model.feature_importances_
}).sort_values('Importance', ascending=False)
fig, ax = plt.subplots(figsize=(10, 6))
sns.barplot(x='Importance', y='Feature', data=importance, ax=ax)
ax.set_title("Feature Importance")
st.pyplot(fig)
else:
st.error("⚠️ Please select at least one feature")
# Prediction section
st.markdown("### 🎯 Make Predictions")
if 'model' in st.session_state:
pred_col1, pred_col2 = st.columns([2, 1])
with pred_col1:
st.markdown("#### Input Features")
input_data = {}
# Create input fields for each feature
for feature in st.session_state.features:
min_val = float(st.session_state.data[feature].min())
max_val = float(st.session_state.data[feature].max())
mean_val = float(st.session_state.data[feature].mean())
input_data[feature] = st.slider(
f"{feature}:",
min_value=min_val,
max_value=max_val,
value=mean_val,
help=f"Range: {min_val:.2f} to {max_val:.2f}"
)
with pred_col2:
if st.button("🎯 Predict", use_container_width=True):
input_df = pd.DataFrame([input_data])
input_scaled = st.session_state.scaler.transform(input_df)
prediction = st.session_state.model.predict(input_scaled)[0]
st.success(f"Predicted {target_column}: {prediction:.2f}")
else:
st.info("ℹ️ Train a model first to make predictions")