Spaces:
Build error
Build error
Create app.py
Browse files
app.py
ADDED
|
@@ -0,0 +1,97 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import numpy as np
|
| 4 |
+
from statsmodels.tsa.arima.model import ARIMA
|
| 5 |
+
import matplotlib.pyplot as plt
|
| 6 |
+
|
| 7 |
+
# Load and cache dataset
|
| 8 |
+
@st.cache
|
| 9 |
+
def load_data():
|
| 10 |
+
return pd.read_excel('gcp_usage_data_2024.xlsx')
|
| 11 |
+
|
| 12 |
+
df = load_data()
|
| 13 |
+
|
| 14 |
+
# Aggregate costs by service description
|
| 15 |
+
service_costs = df.groupby('Service Description')['Cost ($)'].sum()
|
| 16 |
+
|
| 17 |
+
# Calculate average cost
|
| 18 |
+
average_cost = service_costs.mean()
|
| 19 |
+
|
| 20 |
+
# Filter services with costs greater than the average cost
|
| 21 |
+
services_above_average = service_costs[service_costs > average_cost].sort_values(ascending=False)
|
| 22 |
+
|
| 23 |
+
# Forecast future costs for a specific service using ARIMA
|
| 24 |
+
def forecast_costs(service_name, steps=3):
|
| 25 |
+
service_data = df[df['Service Description'] == service_name].copy()
|
| 26 |
+
service_data['Date'] = pd.to_datetime(service_data['Date'])
|
| 27 |
+
service_data.set_index('Date', inplace=True)
|
| 28 |
+
monthly_costs = service_data['Cost ($)'].resample('M').sum()
|
| 29 |
+
model = ARIMA(monthly_costs, order=(1, 1, 1))
|
| 30 |
+
model_fit = model.fit()
|
| 31 |
+
forecast = model_fit.forecast(steps=steps)
|
| 32 |
+
return monthly_costs, forecast
|
| 33 |
+
|
| 34 |
+
# Streamlit UI
|
| 35 |
+
st.title('GCP Cost Analysis and Optimization')
|
| 36 |
+
|
| 37 |
+
# Display the dataset
|
| 38 |
+
if st.checkbox('Show Raw Data'):
|
| 39 |
+
st.write(df)
|
| 40 |
+
|
| 41 |
+
# Display aggregate costs by service
|
| 42 |
+
st.write("### Aggregated Costs by Service")
|
| 43 |
+
st.dataframe(service_costs.sort_values(ascending=False))
|
| 44 |
+
|
| 45 |
+
# Show services with costs greater than the average
|
| 46 |
+
st.write(f"### Average Cost: ${average_cost:.2f}")
|
| 47 |
+
st.write("### Services with Costs Greater Than Average:")
|
| 48 |
+
st.dataframe(services_above_average)
|
| 49 |
+
|
| 50 |
+
# Forecast costs
|
| 51 |
+
st.write("### Cost Forecasting")
|
| 52 |
+
service_name = st.selectbox('Select a Service for Forecasting', df['Service Description'].unique())
|
| 53 |
+
if st.button('Forecast Costs'):
|
| 54 |
+
monthly_costs, forecast = forecast_costs(service_name)
|
| 55 |
+
|
| 56 |
+
st.write("### Forecasted Costs")
|
| 57 |
+
st.write(forecast)
|
| 58 |
+
|
| 59 |
+
# Plot the results
|
| 60 |
+
fig, ax = plt.subplots(figsize=(10, 6))
|
| 61 |
+
ax.plot(monthly_costs, label='Observed Costs')
|
| 62 |
+
ax.plot(pd.date_range(start=monthly_costs.index[-1], periods=len(forecast) + 1, freq='M')[1:], forecast, label='Forecast', color='red')
|
| 63 |
+
ax.set_title('Monthly Cost Forecast')
|
| 64 |
+
ax.set_xlabel('Date')
|
| 65 |
+
ax.set_ylabel('Cost ($)')
|
| 66 |
+
ax.legend()
|
| 67 |
+
st.pyplot(fig)
|
| 68 |
+
|
| 69 |
+
# Cost Optimization
|
| 70 |
+
st.write("### Cost Optimization Analysis")
|
| 71 |
+
optimization_factor = st.slider('Optimization Factor (%)', min_value=0, max_value=100, value=25)
|
| 72 |
+
df['Optimized Cost ($)'] = df['Cost ($)'] * (1 - optimization_factor / 100)
|
| 73 |
+
|
| 74 |
+
total_cost_before = df['Cost ($)'].sum()
|
| 75 |
+
total_cost_after = df['Optimized Cost ($)'].sum()
|
| 76 |
+
cost_change_percentage = ((total_cost_before - total_cost_after) / total_cost_before) * 100
|
| 77 |
+
dollar_saving = total_cost_before - total_cost_after
|
| 78 |
+
|
| 79 |
+
st.write(f"Total Cost Before Optimization: ${total_cost_before:.2f}")
|
| 80 |
+
st.write(f"Total Cost After Optimization: ${total_cost_after:.2f}")
|
| 81 |
+
st.write(f"Percentage Change in Cost: {cost_change_percentage:.2f}%")
|
| 82 |
+
st.write(f"Dollar Saving: ${dollar_saving:.2f}")
|
| 83 |
+
|
| 84 |
+
# Optionally, show a chart of cost before and after optimization
|
| 85 |
+
fig, ax = plt.subplots(figsize=(10, 6))
|
| 86 |
+
services = df['Service Description'].unique()
|
| 87 |
+
costs_before = df.groupby('Service Description')['Cost ($)'].sum()
|
| 88 |
+
costs_after = df.groupby('Service Description')['Optimized Cost ($)'].sum()
|
| 89 |
+
|
| 90 |
+
ax.barh(services, costs_before, label='Before Optimization', alpha=0.7)
|
| 91 |
+
ax.barh(services, costs_after, label='After Optimization', alpha=0.7)
|
| 92 |
+
ax.set_title('Cost Before and After Optimization')
|
| 93 |
+
ax.set_xlabel('Cost ($)')
|
| 94 |
+
ax.legend()
|
| 95 |
+
st.pyplot(fig)
|
| 96 |
+
|
| 97 |
+
|