ALM_LLM / tools /ts_preprocess.py
AshenH's picture
Update tools/ts_preprocess.py
980e633 verified
# tools/ts_preprocess.py
import pandas as pd
import numpy as np
from datetime import date
from typing import Tuple
# -------------------------
# Cash-flow projections
# -------------------------
def estimated_monthly_interest(principal: float, annual_rate: float, n_months: int) -> float:
return principal * (annual_rate / 12.0) * n_months
def project_deposit(principal: float, annual_rate: float, months: int) -> pd.DataFrame:
start = pd.Timestamp(date.today()).normalize().replace(day=1)
periods = pd.period_range(start=start, periods=max(int(months), 1), freq="M")
df = pd.DataFrame({"period": periods.to_timestamp()})
df["interest"] = principal * (annual_rate / 12.0)
df["principal_cf"] = 0.0
df.loc[df.index[-1], "principal_cf"] = principal
df["cash_flow"] = df["interest"] + df["principal_cf"]
return df
def project_asset(principal: float, annual_rate: float, months: int) -> pd.DataFrame:
m = max(int(months), 1)
r = annual_rate / 12.0
if r == 0.0:
pmt = principal / m
else:
pmt = principal * (r * (1 + r) ** m) / ((1 + r) ** m - 1)
rows = []
start = pd.Timestamp(date.today()).normalize().replace(day=1)
bal = principal
for i in range(1, m + 1):
interest = bal * r
principal_cf = pmt - interest
bal = max(0.0, bal - principal_cf)
rows.append({
"period": start + pd.offsets.MonthEnd(i),
"interest": float(interest),
"principal_cf": float(principal_cf),
"cash_flow": float(pmt),
"balance": float(bal)
})
return pd.DataFrame(rows)
def liquidity_gap(cf: pd.DataFrame) -> pd.DataFrame:
"""
Input columns: period, product, cash_flow
"""
cf = cf.copy()
cf["bucket"] = pd.PeriodIndex(cf["period"], freq="M").astype(str)
piv = cf.pivot_table(index="bucket", columns="product", values="cash_flow",
aggfunc="sum", fill_value=0.0)
# assets - liabilities(FD)
piv["gap"] = piv.get("assets", 0.0) - piv.get("fd", 0.0)
piv["cumulative_gap"] = piv["gap"].cumsum()
return piv.reset_index()
# -------------------------
# Adapter + main builder
# -------------------------
REQUIRED_MASTER_COLS = {"Portfolio_value", "Interest_rate", "months", "product"}
def _adapt_masterdataset(df: pd.DataFrame) -> pd.DataFrame:
"""
Accepts rows from main.masterdataset_v and returns a normalized frame
with a monthly projection per row.
"""
df = df.copy()
assert REQUIRED_MASTER_COLS.issubset(df.columns), (
f"DataFrame must include {REQUIRED_MASTER_COLS}, got {set(df.columns)}"
)
all_cf = []
for _, r in df.iterrows():
principal = float(r["Portfolio_value"])
rate = float(r["Interest_rate"])
months = int(r["months"]) if pd.notna(r["months"]) else max(int((r.get("days_to_maturity", 0) or 0) // 30), 1)
prod = str(r["product"]).lower().strip()
if prod == "fd":
cf = project_deposit(principal, rate, months)
else:
cf = project_asset(principal, rate, months)
# carry attributes
cf["product"] = prod
cf["contract_number"] = r.get("contract_number", None)
cf["segments"] = r.get("segments", None)
cf["currency"] = r.get("currency", None)
cf["board_currency"] = r.get("board_currency", None)
cf["Portfolio_value"] = principal
cf["Interest_rate"] = rate
all_cf.append(cf)
return pd.concat(all_cf, ignore_index=True) if all_cf else pd.DataFrame(
columns=["period", "interest", "principal_cf", "cash_flow", "product",
"contract_number", "segments", "currency", "board_currency",
"Portfolio_value", "Interest_rate"]
)
def build_timeseries(df: pd.DataFrame) -> Tuple[pd.DataFrame, pd.DataFrame]:
"""
Main entrypoint used by app.py:
- If frame matches masterdataset_v, generate cashflows
- Compute liquidity gap on the result
Returns: (cashflows_df, gap_df)
"""
if REQUIRED_MASTER_COLS.issubset(df.columns):
cf = _adapt_masterdataset(df)
else:
# if already a projected cashflow frame (period/product/cash_flow), pass-through
if {"period", "product", "cash_flow"}.issubset(df.columns):
cf = df.copy()
else:
# unsupported schema
return pd.DataFrame(), pd.DataFrame()
gap = liquidity_gap(cf[["period", "product", "cash_flow"]])
return cf, gap