File size: 4,520 Bytes
980e633 74bc39f 980e633 74bc39f 980e633 74bc39f 980e633 500d236 980e633 74bc39f 980e633 500d236 980e633 74bc39f 980e633 500d236 980e633 74bc39f 980e633 74bc39f 980e633 74bc39f 980e633 500d236 980e633 74bc39f 980e633 74bc39f 980e633 74bc39f 980e633 74bc39f 980e633 74bc39f 980e633 74bc39f 980e633 74bc39f 980e633 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 |
# tools/ts_preprocess.py
import pandas as pd
import numpy as np
from datetime import date
from typing import Tuple
# -------------------------
# Cash-flow projections
# -------------------------
def estimated_monthly_interest(principal: float, annual_rate: float, n_months: int) -> float:
return principal * (annual_rate / 12.0) * n_months
def project_deposit(principal: float, annual_rate: float, months: int) -> pd.DataFrame:
start = pd.Timestamp(date.today()).normalize().replace(day=1)
periods = pd.period_range(start=start, periods=max(int(months), 1), freq="M")
df = pd.DataFrame({"period": periods.to_timestamp()})
df["interest"] = principal * (annual_rate / 12.0)
df["principal_cf"] = 0.0
df.loc[df.index[-1], "principal_cf"] = principal
df["cash_flow"] = df["interest"] + df["principal_cf"]
return df
def project_asset(principal: float, annual_rate: float, months: int) -> pd.DataFrame:
m = max(int(months), 1)
r = annual_rate / 12.0
if r == 0.0:
pmt = principal / m
else:
pmt = principal * (r * (1 + r) ** m) / ((1 + r) ** m - 1)
rows = []
start = pd.Timestamp(date.today()).normalize().replace(day=1)
bal = principal
for i in range(1, m + 1):
interest = bal * r
principal_cf = pmt - interest
bal = max(0.0, bal - principal_cf)
rows.append({
"period": start + pd.offsets.MonthEnd(i),
"interest": float(interest),
"principal_cf": float(principal_cf),
"cash_flow": float(pmt),
"balance": float(bal)
})
return pd.DataFrame(rows)
def liquidity_gap(cf: pd.DataFrame) -> pd.DataFrame:
"""
Input columns: period, product, cash_flow
"""
cf = cf.copy()
cf["bucket"] = pd.PeriodIndex(cf["period"], freq="M").astype(str)
piv = cf.pivot_table(index="bucket", columns="product", values="cash_flow",
aggfunc="sum", fill_value=0.0)
# assets - liabilities(FD)
piv["gap"] = piv.get("assets", 0.0) - piv.get("fd", 0.0)
piv["cumulative_gap"] = piv["gap"].cumsum()
return piv.reset_index()
# -------------------------
# Adapter + main builder
# -------------------------
REQUIRED_MASTER_COLS = {"Portfolio_value", "Interest_rate", "months", "product"}
def _adapt_masterdataset(df: pd.DataFrame) -> pd.DataFrame:
"""
Accepts rows from main.masterdataset_v and returns a normalized frame
with a monthly projection per row.
"""
df = df.copy()
assert REQUIRED_MASTER_COLS.issubset(df.columns), (
f"DataFrame must include {REQUIRED_MASTER_COLS}, got {set(df.columns)}"
)
all_cf = []
for _, r in df.iterrows():
principal = float(r["Portfolio_value"])
rate = float(r["Interest_rate"])
months = int(r["months"]) if pd.notna(r["months"]) else max(int((r.get("days_to_maturity", 0) or 0) // 30), 1)
prod = str(r["product"]).lower().strip()
if prod == "fd":
cf = project_deposit(principal, rate, months)
else:
cf = project_asset(principal, rate, months)
# carry attributes
cf["product"] = prod
cf["contract_number"] = r.get("contract_number", None)
cf["segments"] = r.get("segments", None)
cf["currency"] = r.get("currency", None)
cf["board_currency"] = r.get("board_currency", None)
cf["Portfolio_value"] = principal
cf["Interest_rate"] = rate
all_cf.append(cf)
return pd.concat(all_cf, ignore_index=True) if all_cf else pd.DataFrame(
columns=["period", "interest", "principal_cf", "cash_flow", "product",
"contract_number", "segments", "currency", "board_currency",
"Portfolio_value", "Interest_rate"]
)
def build_timeseries(df: pd.DataFrame) -> Tuple[pd.DataFrame, pd.DataFrame]:
"""
Main entrypoint used by app.py:
- If frame matches masterdataset_v, generate cashflows
- Compute liquidity gap on the result
Returns: (cashflows_df, gap_df)
"""
if REQUIRED_MASTER_COLS.issubset(df.columns):
cf = _adapt_masterdataset(df)
else:
# if already a projected cashflow frame (period/product/cash_flow), pass-through
if {"period", "product", "cash_flow"}.issubset(df.columns):
cf = df.copy()
else:
# unsupported schema
return pd.DataFrame(), pd.DataFrame()
gap = liquidity_gap(cf[["period", "product", "cash_flow"]])
return cf, gap
|