Spaces:
Build error
Build error
| import streamlit as st | |
| import pandas as pd | |
| import numpy as np | |
| from statsmodels.tsa.arima.model import ARIMA | |
| import matplotlib.pyplot as plt | |
| # Load and cache dataset | |
| def load_data(): | |
| return pd.read_excel('./gcp_usage_data_2024.xlsx') | |
| df = load_data() | |
| # Aggregate costs by service description | |
| service_costs = df.groupby('Service Description')['Cost ($)'].sum() | |
| # Calculate average cost | |
| average_cost = service_costs.mean() | |
| # Filter services with costs greater than the average cost | |
| services_above_average = service_costs[service_costs > average_cost].sort_values(ascending=False) | |
| # Forecast future costs for a specific service using ARIMA | |
| def forecast_costs(service_name, steps=3): | |
| service_data = df[df['Service Description'] == service_name].copy() | |
| service_data['Date'] = pd.to_datetime(service_data['Date']) | |
| service_data.set_index('Date', inplace=True) | |
| monthly_costs = service_data['Cost ($)'].resample('M').sum() | |
| model = ARIMA(monthly_costs, order=(1, 1, 1)) | |
| model_fit = model.fit() | |
| forecast = model_fit.forecast(steps=steps) | |
| return monthly_costs, forecast | |
| # Streamlit UI | |
| st.title('GCP Cost Analysis and Optimization') | |
| # Display the dataset | |
| if st.checkbox('Show Raw Data'): | |
| st.write(df) | |
| # Display aggregate costs by service | |
| st.write("### Aggregated Costs by Service") | |
| st.dataframe(service_costs.sort_values(ascending=False)) | |
| # Show services with costs greater than the average | |
| st.write(f"### Average Cost: ${average_cost:.2f}") | |
| st.write("### Services with Costs Greater Than Average:") | |
| st.dataframe(services_above_average) | |
| # Forecast costs | |
| st.write("### Cost Forecasting") | |
| service_name = st.selectbox('Select a Service for Forecasting', df['Service Description'].unique()) | |
| if st.button('Forecast Costs'): | |
| monthly_costs, forecast = forecast_costs(service_name) | |
| st.write("### Forecasted Costs") | |
| st.write(forecast) | |
| # Plot the results | |
| fig, ax = plt.subplots(figsize=(10, 6)) | |
| ax.plot(monthly_costs, label='Observed Costs') | |
| ax.plot(pd.date_range(start=monthly_costs.index[-1], periods=len(forecast) + 1, freq='M')[1:], forecast, label='Forecast', color='red') | |
| ax.set_title('Monthly Cost Forecast') | |
| ax.set_xlabel('Date') | |
| ax.set_ylabel('Cost ($)') | |
| ax.legend() | |
| st.pyplot(fig) | |
| # Cost Optimization | |
| st.write("### Cost Optimization Analysis") | |
| optimization_factor = st.slider('Optimization Factor (%)', min_value=0, max_value=100, value=25) | |
| df['Optimized Cost ($)'] = df['Cost ($)'] * (1 - optimization_factor / 100) | |
| total_cost_before = df['Cost ($)'].sum() | |
| total_cost_after = df['Optimized Cost ($)'].sum() | |
| cost_change_percentage = ((total_cost_before - total_cost_after) / total_cost_before) * 100 | |
| dollar_saving = total_cost_before - total_cost_after | |
| st.write(f"Total Cost Before Optimization: ${total_cost_before:.2f}") | |
| st.write(f"Total Cost After Optimization: ${total_cost_after:.2f}") | |
| st.write(f"Percentage Change in Cost: {cost_change_percentage:.2f}%") | |
| st.write(f"Dollar Saving: ${dollar_saving:.2f}") | |
| # Optionally, show a chart of cost before and after optimization | |
| fig, ax = plt.subplots(figsize=(10, 6)) | |
| services = df['Service Description'].unique() | |
| costs_before = df.groupby('Service Description')['Cost ($)'].sum() | |
| costs_after = df.groupby('Service Description')['Optimized Cost ($)'].sum() | |
| ax.barh(services, costs_before, label='Before Optimization', alpha=0.7) | |
| ax.barh(services, costs_after, label='After Optimization', alpha=0.7) | |
| ax.set_title('Cost Before and After Optimization') | |
| ax.set_xlabel('Cost ($)') | |
| ax.legend() | |
| st.pyplot(fig) | |