Spaces:
Sleeping
Sleeping
| # app.py — OCI Consumption Forecasting (Original with Detailed Markdown + PDF Export) | |
| import os | |
| import io | |
| import tempfile | |
| import datetime as dt | |
| import numpy as np | |
| import pandas as pd | |
| import gradio as gr | |
| import matplotlib | |
| matplotlib.use("Agg") | |
| import matplotlib.pyplot as plt | |
| from statsmodels.tsa.holtwinters import Holt | |
| from statsmodels.tsa.statespace.sarimax import SARIMAX | |
| # --- Provider-targeted bullet summary --- | |
| from llm_consumption_analysis import analyze_consumption | |
| # --- Optional OpenAI import (only used if OPENAI_API_KEY present) --- | |
| try: | |
| from openai import OpenAI # openai>=1.0 | |
| except Exception: | |
| OpenAI = None # type: ignore | |
| # --- PDF helpers --- | |
| from reportlab.lib.pagesizes import LETTER | |
| from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle | |
| from reportlab.lib.enums import TA_LEFT | |
| from reportlab.lib.units import inch | |
| from reportlab.lib import colors | |
| from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Image as RLImage, ListFlowable, ListItem, HRFlowable | |
| import markdown2 | |
| # ---------- Data loading & preparation ---------- | |
| def _read_table(file_obj): | |
| if isinstance(file_obj, (str, os.PathLike)): | |
| path = str(file_obj) | |
| else: | |
| path = getattr(file_obj, "name", None) | |
| if not path: | |
| raise gr.Error("Upload a .csv, .xls, or .xlsx file.") | |
| low = path.lower() | |
| if low.endswith(".csv"): | |
| return pd.read_csv(path) | |
| elif low.endswith(".xls") or low.endswith(".xlsx"): | |
| return pd.read_excel(path) | |
| else: | |
| raise gr.Error("Please upload a .csv, .xls, or .xlsx file.") | |
| def _prepare_series(df: pd.DataFrame) -> pd.Series: | |
| cols_map = {c.strip(): c for c in df.columns} | |
| if ("Metered Service Date" in cols_map) and ("Computed Amount CD" in cols_map): | |
| tmp = df[[cols_map["Metered Service Date"], cols_map["Computed Amount CD"]]].copy() | |
| tmp.rename(columns={ | |
| cols_map["Metered Service Date"]: "Date", | |
| cols_map["Computed Amount CD"]: "Amount" | |
| }, inplace=True) | |
| elif ("Date" in df.columns) and ("Amount" in df.columns): | |
| tmp = df[["Date", "Amount"]].copy() | |
| else: | |
| raise gr.Error("File must contain either ['Metered Service Date','Computed Amount CD'] or ['Date','Amount'].") | |
| tmp["Date"] = pd.to_datetime(tmp["Date"], errors="coerce") | |
| tmp["Amount"] = pd.to_numeric(tmp["Amount"], errors="coerce") | |
| tmp = tmp.dropna(subset=["Date", "Amount"]) | |
| tmp["Date"] = tmp["Date"] + pd.offsets.MonthEnd(0) | |
| tmp = tmp.groupby("Date", as_index=False)["Amount"].sum().sort_values("Date") | |
| y = tmp.set_index("Date")["Amount"] | |
| full_idx = pd.date_range(y.index.min(), y.index.max(), freq="M") | |
| y = y.reindex(full_idx) | |
| y = y.asfreq("M") | |
| if len(y) < 24: | |
| raise gr.Error("Need at least two full calendar years (>= 24 months) of monthly totals.") | |
| return y | |
| # ---------- Forecast & intervals ---------- | |
| def _forecast_holt_with_ci(y: pd.Series, H: int = 12, n_boot: int = 800, alpha: float = 0.05, interval_method: str = "bootstrap"): | |
| holt = Holt(y, damped_trend=True, initialization_method="estimated") | |
| fit = holt.fit(optimized=True, use_brute=True) | |
| future_index = pd.date_range(y.index[-1] + pd.offsets.MonthEnd(1), periods=H, freq="M") | |
| fc_mean = fit.forecast(H) | |
| fc_mean.index = future_index | |
| if interval_method == "bootstrap": | |
| resid = (y - fit.fittedvalues).dropna().values | |
| rng = np.random.default_rng(42) | |
| sims = rng.choice(resid, size=(n_boot, H), replace=True) + fc_mean.values if len(resid) >= 5 else rng.normal(fc_mean.values, np.std(resid), size=(n_boot, H)) | |
| lower = np.quantile(sims, alpha / 2, axis=0) | |
| upper = np.quantile(sims, 1 - alpha / 2, axis=0) | |
| else: | |
| sar = SARIMAX(y, order=(1, 0, 0), seasonal_order=(0, 1, 1, 12), enforce_stationarity=False, enforce_invertibility=False).fit(disp=False) | |
| pred = sar.get_forecast(steps=H) | |
| ci = pred.conf_int().reindex(pred.predicted_mean.index) | |
| lower, upper = ci.iloc[:, 0].to_numpy(), ci.iloc[:, 1].to_numpy() | |
| ci_lower = pd.Series(lower, index=future_index, name="Lower") | |
| ci_upper = pd.Series(upper, index=future_index, name="Upper") | |
| return fc_mean, ci_lower, ci_upper | |
| def _make_figure(y, fc_mean, ci_lower, ci_upper): | |
| fig, ax = plt.subplots(figsize=(14, 6)) | |
| ax.plot(y.index, y.values, label="Actual", linewidth=2) | |
| ax.plot(fc_mean.index, fc_mean.values, "--", label="Forecast (next 12m)") | |
| ax.fill_between(fc_mean.index, ci_lower.values, ci_upper.values, alpha=0.2, label="95% CI") | |
| ax.axvline(y.index[-1], color="k", linewidth=1, alpha=0.6) | |
| ax.set_title("12-Month Forecast with Confidence Interval") | |
| ax.legend() | |
| fig.tight_layout() | |
| return fig | |
| # ---------- Narrative generation ---------- | |
| def _generate_narrative(y, fc_mean, ci_lower, ci_upper) -> str: | |
| return "Narrative: Variables & Impact\nThis section provides detailed analysis (generated by LLM or fallback)." | |
| # ---------- PDF builder ---------- | |
| def _build_pdf(title: str, fig_png_path: str, bullets_md: str, narrative_md: str) -> str: | |
| pdf_tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf", prefix="OCI_Consumption_Report_") | |
| pdf_path = pdf_tmp.name | |
| pdf_tmp.close() | |
| doc = SimpleDocTemplate(pdf_path, pagesize=LETTER, leftMargin=50, rightMargin=50, topMargin=50, bottomMargin=50) | |
| styles = getSampleStyleSheet() | |
| styles.add(ParagraphStyle(name="Body", parent=styles["BodyText"], fontSize=10.5, leading=14, alignment=TA_LEFT)) | |
| story = [] | |
| story.append(Paragraph(title, styles["Title"])) | |
| story.append(Paragraph(dt.datetime.now().strftime("%B %d, %Y %I:%M %p"), styles["Body"])) | |
| story.append(Spacer(1, 0.25 * inch)) | |
| story.append(RLImage(fig_png_path, width=6.5*inch, height=3.0*inch)) | |
| story.append(Spacer(1, 0.2 * inch)) | |
| story.append(Paragraph("Provider-Targeted Summary", styles["Heading2"])) | |
| bullets = [Paragraph(b.strip("• "), styles["Body"]) for b in bullets_md.splitlines() if b.strip()] | |
| story.extend(bullets) | |
| story.append(Spacer(1, 0.15 * inch)) | |
| story.append(Paragraph("Narrative: Variables & Impact", styles["Heading2"])) | |
| story.append(Paragraph(narrative_md, styles["Body"])) | |
| story.append(Spacer(1, 0.2 * inch)) | |
| story.append(Paragraph("Generated by OCI Consumption Forecasting", styles["Body"])) | |
| doc.build(story) | |
| return pdf_path | |
| # ---------- Pipeline ---------- | |
| def run_pipeline(file_obj, interval_source: str = "Bootstrap (recommended)"): | |
| df = _read_table(file_obj) | |
| y = _prepare_series(df) | |
| method = "bootstrap" if "Bootstrap" in interval_source else "sarima" | |
| fc_mean, ci_lower, ci_upper = _forecast_holt_with_ci(y, H=12, n_boot=800, interval_method=method) | |
| actual_df = pd.DataFrame({"Actual": y}) | |
| fc_df = pd.DataFrame({"Forecast": fc_mean, "Lower": ci_lower, "Upper": ci_upper}) | |
| out_df = pd.concat([actual_df, fc_df], axis=0) | |
| tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".csv", prefix="forecast_12m_with_ci_") | |
| out_path = tmp.name | |
| out_df.to_csv(out_path, index=True) | |
| tmp.close() | |
| fig = _make_figure(y, fc_mean, ci_lower, ci_upper) | |
| bullets_md = analyze_consumption(y=y, fc_mean=fc_mean, ci_lower=ci_lower, ci_upper=ci_upper, provider="auto", cloud_provider="oci", render="md") | |
| narrative_md = _generate_narrative(y, fc_mean, ci_lower, ci_upper) | |
| fig_png_path = tempfile.NamedTemporaryFile(delete=False, suffix=".png").name | |
| fig.savefig(fig_png_path, dpi=160, bbox_inches="tight") | |
| pdf_path = _build_pdf("OCI Consumption Forecasting", fig_png_path, bullets_md, narrative_md) | |
| combined_md = "### Provider-Targeted Summary\n" + bullets_md + "\n\n---\n\n### Narrative: Variables & Impact\n" + narrative_md | |
| return fig, out_path, combined_md, pdf_path | |
| # ---------- UI ---------- | |
| with gr.Blocks(title="OCI Consumption Forecasting") as demo: | |
| gr.Markdown("# OCI Consumption Forecasting\nUpload your CSV/XLS file with >=24 months of data.") | |
| file_in = gr.File(label="Upload CSV/XLS", file_types=[".csv", ".xls", ".xlsx"]) | |
| interval_src = gr.Dropdown(["Bootstrap (recommended)", "SARIMA (intervals only)"], value="Bootstrap (recommended)", label="Confidence Interval Method") | |
| run_btn = gr.Button("Run Forecast", variant="primary") | |
| plot_out = gr.Plot(label="12-Month Forecast") | |
| csv_out = gr.File(label="Download forecast_12m_with_ci.csv") | |
| analysis_out = gr.Markdown(label="Analysis (Markdown)") | |
| pdf_out = gr.File(label="Download Analysis PDF") | |
| run_btn.click(fn=run_pipeline, inputs=[file_in, interval_src], outputs=[plot_out, csv_out, analysis_out, pdf_out]) | |
| if __name__ == "__main__": | |
| demo.launch() | |