File size: 4,520 Bytes
980e633
74bc39f
 
980e633
 
74bc39f
980e633
 
 
 
 
74bc39f
980e633
 
 
 
 
 
 
 
 
500d236
980e633
 
74bc39f
980e633
 
 
 
500d236
980e633
 
74bc39f
980e633
 
 
 
 
 
 
 
 
 
 
 
500d236
980e633
74bc39f
980e633
74bc39f
980e633
 
 
 
 
 
 
 
74bc39f
980e633
 
 
 
500d236
980e633
74bc39f
980e633
 
74bc39f
 
980e633
 
 
 
 
 
 
 
 
74bc39f
980e633
 
74bc39f
980e633
74bc39f
980e633
 
 
 
 
 
 
 
 
74bc39f
980e633
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74bc39f
980e633
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
# tools/ts_preprocess.py
import pandas as pd
import numpy as np
from datetime import date
from typing import Tuple

# -------------------------
# Cash-flow projections
# -------------------------
def estimated_monthly_interest(principal: float, annual_rate: float, n_months: int) -> float:
    return principal * (annual_rate / 12.0) * n_months

def project_deposit(principal: float, annual_rate: float, months: int) -> pd.DataFrame:
    start = pd.Timestamp(date.today()).normalize().replace(day=1)
    periods = pd.period_range(start=start, periods=max(int(months), 1), freq="M")
    df = pd.DataFrame({"period": periods.to_timestamp()})
    df["interest"] = principal * (annual_rate / 12.0)
    df["principal_cf"] = 0.0
    df.loc[df.index[-1], "principal_cf"] = principal
    df["cash_flow"] = df["interest"] + df["principal_cf"]
    return df

def project_asset(principal: float, annual_rate: float, months: int) -> pd.DataFrame:
    m = max(int(months), 1)
    r = annual_rate / 12.0
    if r == 0.0:
        pmt = principal / m
    else:
        pmt = principal * (r * (1 + r) ** m) / ((1 + r) ** m - 1)

    rows = []
    start = pd.Timestamp(date.today()).normalize().replace(day=1)
    bal = principal
    for i in range(1, m + 1):
        interest = bal * r
        principal_cf = pmt - interest
        bal = max(0.0, bal - principal_cf)
        rows.append({
            "period": start + pd.offsets.MonthEnd(i),
            "interest": float(interest),
            "principal_cf": float(principal_cf),
            "cash_flow": float(pmt),
            "balance": float(bal)
        })
    return pd.DataFrame(rows)

def liquidity_gap(cf: pd.DataFrame) -> pd.DataFrame:
    """
    Input columns: period, product, cash_flow
    """
    cf = cf.copy()
    cf["bucket"] = pd.PeriodIndex(cf["period"], freq="M").astype(str)
    piv = cf.pivot_table(index="bucket", columns="product", values="cash_flow",
                         aggfunc="sum", fill_value=0.0)
    # assets - liabilities(FD)
    piv["gap"] = piv.get("assets", 0.0) - piv.get("fd", 0.0)
    piv["cumulative_gap"] = piv["gap"].cumsum()
    return piv.reset_index()

# -------------------------
# Adapter + main builder
# -------------------------
REQUIRED_MASTER_COLS = {"Portfolio_value", "Interest_rate", "months", "product"}

def _adapt_masterdataset(df: pd.DataFrame) -> pd.DataFrame:
    """
    Accepts rows from main.masterdataset_v and returns a normalized frame
    with a monthly projection per row.
    """
    df = df.copy()
    assert REQUIRED_MASTER_COLS.issubset(df.columns), (
        f"DataFrame must include {REQUIRED_MASTER_COLS}, got {set(df.columns)}"
    )
    all_cf = []
    for _, r in df.iterrows():
        principal = float(r["Portfolio_value"])
        rate = float(r["Interest_rate"])
        months = int(r["months"]) if pd.notna(r["months"]) else max(int((r.get("days_to_maturity", 0) or 0) // 30), 1)
        prod = str(r["product"]).lower().strip()

        if prod == "fd":
            cf = project_deposit(principal, rate, months)
        else:
            cf = project_asset(principal, rate, months)

        # carry attributes
        cf["product"] = prod
        cf["contract_number"] = r.get("contract_number", None)
        cf["segments"] = r.get("segments", None)
        cf["currency"] = r.get("currency", None)
        cf["board_currency"] = r.get("board_currency", None)
        cf["Portfolio_value"] = principal
        cf["Interest_rate"] = rate
        all_cf.append(cf)

    return pd.concat(all_cf, ignore_index=True) if all_cf else pd.DataFrame(
        columns=["period", "interest", "principal_cf", "cash_flow", "product",
                 "contract_number", "segments", "currency", "board_currency",
                 "Portfolio_value", "Interest_rate"]
    )

def build_timeseries(df: pd.DataFrame) -> Tuple[pd.DataFrame, pd.DataFrame]:
    """
    Main entrypoint used by app.py:
      - If frame matches masterdataset_v, generate cashflows
      - Compute liquidity gap on the result
    Returns: (cashflows_df, gap_df)
    """
    if REQUIRED_MASTER_COLS.issubset(df.columns):
        cf = _adapt_masterdataset(df)
    else:
        # if already a projected cashflow frame (period/product/cash_flow), pass-through
        if {"period", "product", "cash_flow"}.issubset(df.columns):
            cf = df.copy()
        else:
            # unsupported schema
            return pd.DataFrame(), pd.DataFrame()

    gap = liquidity_gap(cf[["period", "product", "cash_flow"]])
    return cf, gap