kgauvin603's picture
Rename app.py to app-v5.py
055be94 verified
# app.py — OCI Consumption Forecasting (Original with Detailed Markdown + PDF Export)
import os
import io
import tempfile
import datetime as dt
import numpy as np
import pandas as pd
import gradio as gr
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
from statsmodels.tsa.holtwinters import Holt
from statsmodels.tsa.statespace.sarimax import SARIMAX
# --- Provider-targeted bullet summary ---
from llm_consumption_analysis import analyze_consumption
# --- Optional OpenAI import (only used if OPENAI_API_KEY present) ---
try:
from openai import OpenAI # openai>=1.0
except Exception:
OpenAI = None # type: ignore
# --- PDF helpers ---
from reportlab.lib.pagesizes import LETTER
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from reportlab.lib.enums import TA_LEFT
from reportlab.lib.units import inch
from reportlab.lib import colors
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Image as RLImage, ListFlowable, ListItem, HRFlowable
import markdown2
# ---------- Data loading & preparation ----------
def _read_table(file_obj):
if isinstance(file_obj, (str, os.PathLike)):
path = str(file_obj)
else:
path = getattr(file_obj, "name", None)
if not path:
raise gr.Error("Upload a .csv, .xls, or .xlsx file.")
low = path.lower()
if low.endswith(".csv"):
return pd.read_csv(path)
elif low.endswith(".xls") or low.endswith(".xlsx"):
return pd.read_excel(path)
else:
raise gr.Error("Please upload a .csv, .xls, or .xlsx file.")
def _prepare_series(df: pd.DataFrame) -> pd.Series:
cols_map = {c.strip(): c for c in df.columns}
if ("Metered Service Date" in cols_map) and ("Computed Amount CD" in cols_map):
tmp = df[[cols_map["Metered Service Date"], cols_map["Computed Amount CD"]]].copy()
tmp.rename(columns={
cols_map["Metered Service Date"]: "Date",
cols_map["Computed Amount CD"]: "Amount"
}, inplace=True)
elif ("Date" in df.columns) and ("Amount" in df.columns):
tmp = df[["Date", "Amount"]].copy()
else:
raise gr.Error("File must contain either ['Metered Service Date','Computed Amount CD'] or ['Date','Amount'].")
tmp["Date"] = pd.to_datetime(tmp["Date"], errors="coerce")
tmp["Amount"] = pd.to_numeric(tmp["Amount"], errors="coerce")
tmp = tmp.dropna(subset=["Date", "Amount"])
tmp["Date"] = tmp["Date"] + pd.offsets.MonthEnd(0)
tmp = tmp.groupby("Date", as_index=False)["Amount"].sum().sort_values("Date")
y = tmp.set_index("Date")["Amount"]
full_idx = pd.date_range(y.index.min(), y.index.max(), freq="M")
y = y.reindex(full_idx)
y = y.asfreq("M")
if len(y) < 24:
raise gr.Error("Need at least two full calendar years (>= 24 months) of monthly totals.")
return y
# ---------- Forecast & intervals ----------
def _forecast_holt_with_ci(y: pd.Series, H: int = 12, n_boot: int = 800, alpha: float = 0.05, interval_method: str = "bootstrap"):
holt = Holt(y, damped_trend=True, initialization_method="estimated")
fit = holt.fit(optimized=True, use_brute=True)
future_index = pd.date_range(y.index[-1] + pd.offsets.MonthEnd(1), periods=H, freq="M")
fc_mean = fit.forecast(H)
fc_mean.index = future_index
if interval_method == "bootstrap":
resid = (y - fit.fittedvalues).dropna().values
rng = np.random.default_rng(42)
sims = rng.choice(resid, size=(n_boot, H), replace=True) + fc_mean.values if len(resid) >= 5 else rng.normal(fc_mean.values, np.std(resid), size=(n_boot, H))
lower = np.quantile(sims, alpha / 2, axis=0)
upper = np.quantile(sims, 1 - alpha / 2, axis=0)
else:
sar = SARIMAX(y, order=(1, 0, 0), seasonal_order=(0, 1, 1, 12), enforce_stationarity=False, enforce_invertibility=False).fit(disp=False)
pred = sar.get_forecast(steps=H)
ci = pred.conf_int().reindex(pred.predicted_mean.index)
lower, upper = ci.iloc[:, 0].to_numpy(), ci.iloc[:, 1].to_numpy()
ci_lower = pd.Series(lower, index=future_index, name="Lower")
ci_upper = pd.Series(upper, index=future_index, name="Upper")
return fc_mean, ci_lower, ci_upper
def _make_figure(y, fc_mean, ci_lower, ci_upper):
fig, ax = plt.subplots(figsize=(14, 6))
ax.plot(y.index, y.values, label="Actual", linewidth=2)
ax.plot(fc_mean.index, fc_mean.values, "--", label="Forecast (next 12m)")
ax.fill_between(fc_mean.index, ci_lower.values, ci_upper.values, alpha=0.2, label="95% CI")
ax.axvline(y.index[-1], color="k", linewidth=1, alpha=0.6)
ax.set_title("12-Month Forecast with Confidence Interval")
ax.legend()
fig.tight_layout()
return fig
# ---------- Narrative generation ----------
def _generate_narrative(y, fc_mean, ci_lower, ci_upper) -> str:
return "Narrative: Variables & Impact\nThis section provides detailed analysis (generated by LLM or fallback)."
# ---------- PDF builder ----------
def _build_pdf(title: str, fig_png_path: str, bullets_md: str, narrative_md: str) -> str:
pdf_tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf", prefix="OCI_Consumption_Report_")
pdf_path = pdf_tmp.name
pdf_tmp.close()
doc = SimpleDocTemplate(pdf_path, pagesize=LETTER, leftMargin=50, rightMargin=50, topMargin=50, bottomMargin=50)
styles = getSampleStyleSheet()
styles.add(ParagraphStyle(name="Body", parent=styles["BodyText"], fontSize=10.5, leading=14, alignment=TA_LEFT))
story = []
story.append(Paragraph(title, styles["Title"]))
story.append(Paragraph(dt.datetime.now().strftime("%B %d, %Y %I:%M %p"), styles["Body"]))
story.append(Spacer(1, 0.25 * inch))
story.append(RLImage(fig_png_path, width=6.5*inch, height=3.0*inch))
story.append(Spacer(1, 0.2 * inch))
story.append(Paragraph("Provider-Targeted Summary", styles["Heading2"]))
bullets = [Paragraph(b.strip("• "), styles["Body"]) for b in bullets_md.splitlines() if b.strip()]
story.extend(bullets)
story.append(Spacer(1, 0.15 * inch))
story.append(Paragraph("Narrative: Variables & Impact", styles["Heading2"]))
story.append(Paragraph(narrative_md, styles["Body"]))
story.append(Spacer(1, 0.2 * inch))
story.append(Paragraph("Generated by OCI Consumption Forecasting", styles["Body"]))
doc.build(story)
return pdf_path
# ---------- Pipeline ----------
def run_pipeline(file_obj, interval_source: str = "Bootstrap (recommended)"):
df = _read_table(file_obj)
y = _prepare_series(df)
method = "bootstrap" if "Bootstrap" in interval_source else "sarima"
fc_mean, ci_lower, ci_upper = _forecast_holt_with_ci(y, H=12, n_boot=800, interval_method=method)
actual_df = pd.DataFrame({"Actual": y})
fc_df = pd.DataFrame({"Forecast": fc_mean, "Lower": ci_lower, "Upper": ci_upper})
out_df = pd.concat([actual_df, fc_df], axis=0)
tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".csv", prefix="forecast_12m_with_ci_")
out_path = tmp.name
out_df.to_csv(out_path, index=True)
tmp.close()
fig = _make_figure(y, fc_mean, ci_lower, ci_upper)
bullets_md = analyze_consumption(y=y, fc_mean=fc_mean, ci_lower=ci_lower, ci_upper=ci_upper, provider="auto", cloud_provider="oci", render="md")
narrative_md = _generate_narrative(y, fc_mean, ci_lower, ci_upper)
fig_png_path = tempfile.NamedTemporaryFile(delete=False, suffix=".png").name
fig.savefig(fig_png_path, dpi=160, bbox_inches="tight")
pdf_path = _build_pdf("OCI Consumption Forecasting", fig_png_path, bullets_md, narrative_md)
combined_md = "### Provider-Targeted Summary\n" + bullets_md + "\n\n---\n\n### Narrative: Variables & Impact\n" + narrative_md
return fig, out_path, combined_md, pdf_path
# ---------- UI ----------
with gr.Blocks(title="OCI Consumption Forecasting") as demo:
gr.Markdown("# OCI Consumption Forecasting\nUpload your CSV/XLS file with >=24 months of data.")
file_in = gr.File(label="Upload CSV/XLS", file_types=[".csv", ".xls", ".xlsx"])
interval_src = gr.Dropdown(["Bootstrap (recommended)", "SARIMA (intervals only)"], value="Bootstrap (recommended)", label="Confidence Interval Method")
run_btn = gr.Button("Run Forecast", variant="primary")
plot_out = gr.Plot(label="12-Month Forecast")
csv_out = gr.File(label="Download forecast_12m_with_ci.csv")
analysis_out = gr.Markdown(label="Analysis (Markdown)")
pdf_out = gr.File(label="Download Analysis PDF")
run_btn.click(fn=run_pipeline, inputs=[file_in, interval_src], outputs=[plot_out, csv_out, analysis_out, pdf_out])
if __name__ == "__main__":
demo.launch()