File size: 12,622 Bytes
5f22115
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
# app.py
# Hugging Face Space - Freddie Mac PMMS Visualizer
# Downloads the CSV at runtime and provides several interactive views.
#
# Source CSV: https://www.freddiemac.com/pmms/docs/PMMS_history.csv

import io
import os
from functools import lru_cache
from typing import List, Tuple

import gradio as gr
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import requests


PMMS_URL = "https://www.freddiemac.com/pmms/docs/PMMS_history.csv"


# ---------- Data Loading & Utilities ----------
@lru_cache(maxsize=1)
def load_pmms() -> pd.DataFrame:
    """
    Download the PMMS CSV and return a cleaned DataFrame.
    - Ensures first column is a datetime 'Date'
    - Coerces other columns to numeric
    """
    resp = requests.get(PMMS_URL, timeout=30)
    resp.raise_for_status()
    raw = resp.content

    # Try reading as-is; if needed, fall back to utf-8 decode path
    df = pd.read_csv(io.BytesIO(raw))
    # If 'Date' isn't present but a first column exists, rename it
    if "Date" not in df.columns:
        df.rename(columns={df.columns[0]: "Date"}, inplace=True)

    # Normalize date
    df["Date"] = pd.to_datetime(df["Date"], errors="coerce", infer_datetime_format=True)
    df = df.dropna(subset=["Date"]).sort_values("Date")

    # Standardize numeric columns
    for c in df.columns:
        if c == "Date":
            continue
        # Remove typical artifacts (%, commas, etc.) then to numeric
        df[c] = (
            df[c]
            .astype(str)
            .str.replace("%", "", regex=False)
            .str.replace(",", "", regex=False)
        )
        df[c] = pd.to_numeric(df[c], errors="coerce")

    # Drop empty columns (all NaN or constant NaN after coercion)
    non_empty = [c for c in df.columns if c == "Date" or df[c].notna().any()]
    df = df[non_empty]

    return df.reset_index(drop=True)


def available_series(df: pd.DataFrame) -> List[str]:
    """Return numeric series columns (excluding Date)."""
    return [c for c in df.columns if c != "Date" and pd.api.types.is_numeric_dtype(df[c])]


def clip_by_date(df: pd.DataFrame, start: pd.Timestamp, end: pd.Timestamp) -> pd.DataFrame:
    if start is None and end is None:
        return df
    if start is None:
        return df[df["Date"] <= end]
    if end is None:
        return df[df["Date"] >= start]
    return df[(df["Date"] >= start) & (df["Date"] <= end)]


def resample_df(df: pd.DataFrame, how: str) -> pd.DataFrame:
    """Resample by rule if provided ('W','M','Q','A'); otherwise return original."""
    if not how or how == "None":
        return df
    # Use mean for typical rate series
    numeric_cols = available_series(df)
    tmp = df.set_index("Date")[numeric_cols].resample(how).mean()
    return tmp.reset_index()


def moving_average(df: pd.DataFrame, window: int, cols: List[str]) -> pd.DataFrame:
    """Apply moving average; if window <= 1, return df unchanged for those columns."""
    if window is None or window <= 1:
        return df
    out = df.copy()
    for c in cols:
        if c in out.columns:
            out[c] = out[c].rolling(window=window, min_periods=1).mean()
    return out


def yoy_change(df: pd.DataFrame, cols: List[str]) -> pd.DataFrame:
    """Year-over-year change in percentage points for selected columns."""
    out = df.set_index("Date").copy()
    for c in cols:
        if c in out.columns:
            out[c] = out[c] - out[c].shift(52)  # approx weekly; robust to mixed frequencies
    return out.reset_index()


def monthly_heatmap_df(df: pd.DataFrame, col: str) -> pd.DataFrame:
    """Pivot into (Year x Month) table of monthly averages for heatmap."""
    tmp = df.copy()
    tmp["Year"] = tmp["Date"].dt.year
    tmp["Month"] = tmp["Date"].dt.month
    monthly = tmp.groupby(["Year", "Month"], as_index=False)[col].mean()
    pivot = monthly.pivot(index="Year", columns="Month", values=col).sort_index(ascending=False)
    pivot = pivot.rename(columns={m: pd.to_datetime(str(m), format="%m").strftime("%b") for m in pivot.columns})
    return pivot


def make_download(df: pd.DataFrame) -> str:
    """Write a CSV to a temp path and return the file path for gr.File."""
    path = "filtered_pmms.csv"
    df.to_csv(path, index=False)
    return path


# ---------- Plot Builders ----------
def make_line_chart(df: pd.DataFrame, cols: List[str], title: str) -> go.Figure:
    fig = go.Figure()
    for c in cols:
        if c in df.columns:
            fig.add_trace(go.Scatter(x=df["Date"], y=df[c], mode="lines", name=c))
    fig.update_layout(
        title=title,
        xaxis_title="Date",
        yaxis_title="Rate (%)",
        hovermode="x unified",
        template="plotly"
    )
    return fig


def make_histogram(df: pd.DataFrame, cols: List[str], title: str) -> go.Figure:
    fig = go.Figure()
    for c in cols:
        if c in df.columns:
            fig.add_trace(go.Histogram(x=df[c], name=c, opacity=0.75, nbinsx=50))
    fig.update_layout(
        title=title,
        xaxis_title="Rate (%)",
        yaxis_title="Count",
        barmode="overlay",
        template="plotly"
    )
    return fig


def make_heatmap(pivot: pd.DataFrame, series_name: str) -> go.Figure:
    fig = go.Figure(
        data=go.Heatmap(
            z=pivot.values,
            x=list(pivot.columns),
            y=list(pivot.index.astype(str)),
            coloraxis="coloraxis"
        )
    )
    fig.update_layout(
        title=f"Monthly Average Heatmap — {series_name}",
        xaxis_title="Month",
        yaxis_title="Year",
        coloraxis=dict(colorscale="Viridis"),
        template="plotly"
    )
    return fig


# ---------- Gradio Callbacks ----------
def update_overview(series: List[str], resample: str, ma_window: int, date_range: Tuple[str, str]):
    df = load_pmms()
    if not series:
        series = available_series(df)[:1]  # fallback to first series
    start, end = None, None
    if date_range and date_range[0]:
        start = pd.to_datetime(date_range[0])
    if date_range and date_range[1]:
        end = pd.to_datetime(date_range[1])

    df = clip_by_date(df, start, end)
    df = resample_df(df, resample)
    df = moving_average(df, ma_window, series)

    fig = make_line_chart(df, series, "Mortgage Rates Over Time")
    download_path = make_download(df[["Date"] + [c for c in series if c in df.columns]])
    head = df.head(10)
    return fig, download_path, head


def update_yoy(series: List[str], resample: str, date_range: Tuple[str, str]):
    df = load_pmms()
    if not series:
        series = available_series(df)[:1]
    start, end = None, None
    if date_range and date_range[0]:
        start = pd.to_datetime(date_range[0])
    if date_range and date_range[1]:
        end = pd.to_datetime(date_range[1])

    df = clip_by_date(df, start, end)
    df = resample_df(df, resample)
    df_yoy = yoy_change(df, series)

    fig = make_line_chart(df_yoy, series, "Year-over-Year Change (percentage points)")
    return fig


def update_distribution(series: List[str], resample: str, date_range: Tuple[str, str]):
    df = load_pmms()
    if not series:
        series = available_series(df)[:1]
    start, end = None, None
    if date_range and date_range[0]:
        start = pd.to_datetime(date_range[0])
    if date_range and date_range[1]:
        end = pd.to_datetime(date_range[1])

    df = clip_by_date(df, start, end)
    df = resample_df(df, resample)

    fig = make_histogram(df, series, "Distribution of Rates")
    return fig


def update_heatmap(series_one: str, resample: str, date_range: Tuple[str, str]):
    df = load_pmms()
    series_one = series_one or (available_series(df)[0] if available_series(df) else None)
    if series_one is None:
        return go.Figure()

    start, end = None, None
    if date_range and date_range[0]:
        start = pd.to_datetime(date_range[0])
    if date_range and date_range[1]:
        end = pd.to_datetime(date_range[1])

    df = clip_by_date(df, start, end)
    df = resample_df(df, resample)

    pivot = monthly_heatmap_df(df, series_one)
    fig = make_heatmap(pivot, series_one)
    return fig


def get_defaults():
    df = load_pmms()
    cols = available_series(df)
    min_date = df["Date"].min().date()
    max_date = df["Date"].max().date()
    return df, cols, (str(min_date), str(max_date))


# ---------- UI ----------
with gr.Blocks(title="Freddie Mac PMMS — Interactive Visualizer") as demo:
    gr.Markdown(
        """
        # Freddie Mac Primary Mortgage Market Survey (PMMS) — Interactive Visualizer
        - Data source: Freddie Mac PMMS (downloaded live at runtime)
        - Explore line charts, YoY deltas, distributions, and a monthly heatmap.
        - Use resampling and moving averages to smooth the series.
        """
    )

    df0, cols0, full_range = get_defaults()

    with gr.Row():
        series = gr.CheckboxGroup(choices=cols0, value=cols0[:1], label="Select rate series (multi-select)")
        series_one = gr.Dropdown(choices=cols0, value=(cols0[0] if cols0 else None), label="Heatmap series")
    with gr.Row():
        resample = gr.Dropdown(
            choices=["None", "W (Weekly)", "M (Monthly)", "Q (Quarterly)", "A (Annual)"],
            value="W (Weekly)",
            label="Resample frequency",
            info="Choose an aggregation frequency for the chart calculations."
        )
        ma_window = gr.Slider(1, 52, value=8, step=1, label="Moving average window (periods)", info="Set to 1 for no smoothing.")
        date_range = gr.DateRange(
            value=full_range,
            label="Date range (inclusive)"
        )

    # Normalize the resample selection into pandas rule inside callbacks
    def _normalize_resample(x: str) -> str:
        mapping = {
            "None": "None",
            "W (Weekly)": "W",
            "M (Monthly)": "M",
            "Q (Quarterly)": "Q",
            "A (Annual)": "A",
        }
        return mapping.get(x or "None", "None")

    # Hidden helpers to route normalized resample to callbacks
    resample_hidden = gr.State(value=_normalize_resample(resample.value))

    def _resample_state(x):
        return _normalize_resample(x)

    resample.change(_resample_state, inputs=resample, outputs=resample_hidden)

    with gr.Tab("Overview"):
        fig_overview = gr.Plot(label="Mortgage Rates Over Time")
        download_csv = gr.File(label="Download filtered CSV")
        head_df = gr.Dataframe(interactive=False, label="Preview (first 10 rows)")
        gr.Markdown("Tip: Use the controls above to pick series, resample, smoothing, and date range.")
        btn_update_1 = gr.Button("Refresh Overview")

    with gr.Tab("YoY Change"):
        fig_yoy = gr.Plot(label="Year-over-Year Change")
        btn_update_2 = gr.Button("Refresh YoY")

    with gr.Tab("Distribution"):
        fig_hist = gr.Plot(label="Histogram")
        btn_update_3 = gr.Button("Refresh Distribution")

    with gr.Tab("Monthly Heatmap"):
        fig_heat = gr.Plot(label="Monthly Average Heatmap")
        btn_update_4 = gr.Button("Refresh Heatmap")

    # Wire callbacks
    btn_update_1.click(
        update_overview,
        inputs=[series, resample_hidden, ma_window, date_range],
        outputs=[fig_overview, download_csv, head_df],
        show_progress="minimal",
    )

    btn_update_2.click(
        update_yoy,
        inputs=[series, resample_hidden, date_range],
        outputs=[fig_yoy],
        show_progress="minimal",
    )

    btn_update_3.click(
        update_distribution,
        inputs=[series, resample_hidden, date_range],
        outputs=[fig_hist],
        show_progress="minimal",
    )

    btn_update_4.click(
        update_heatmap,
        inputs=[series_one, resample_hidden, date_range],
        outputs=[fig_heat],
        show_progress="minimal",
    )

    # Auto-run once on load for a nice first view
    gr.on(
        triggers=[gr.PageLoad],
        fn=update_overview,
        inputs=[series, resample_hidden, ma_window, date_range],
        outputs=[fig_overview, download_csv, head_df],
    )
    gr.on(
        triggers=[gr.PageLoad],
        fn=update_yoy,
        inputs=[series, resample_hidden, date_range],
        outputs=[fig_yoy],
    )
    gr.on(
        triggers=[gr.PageLoad],
        fn=update_distribution,
        inputs=[series, resample_hidden, date_range],
        outputs=[fig_hist],
    )
    gr.on(
        triggers=[gr.PageLoad],
        fn=update_heatmap,
        inputs=[series_one, resample_hidden, date_range],
        outputs=[fig_heat],
    )

if __name__ == "__main__":
    demo.launch()