dibend commited on
Commit
5f22115
·
verified ·
1 Parent(s): 4309516

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +385 -0
app.py ADDED
@@ -0,0 +1,385 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py
2
+ # Hugging Face Space - Freddie Mac PMMS Visualizer
3
+ # Downloads the CSV at runtime and provides several interactive views.
4
+ #
5
+ # Source CSV: https://www.freddiemac.com/pmms/docs/PMMS_history.csv
6
+
7
+ import io
8
+ import os
9
+ from functools import lru_cache
10
+ from typing import List, Tuple
11
+
12
+ import gradio as gr
13
+ import numpy as np
14
+ import pandas as pd
15
+ import plotly.express as px
16
+ import plotly.graph_objects as go
17
+ import requests
18
+
19
+
20
+ PMMS_URL = "https://www.freddiemac.com/pmms/docs/PMMS_history.csv"
21
+
22
+
23
+ # ---------- Data Loading & Utilities ----------
24
+ @lru_cache(maxsize=1)
25
+ def load_pmms() -> pd.DataFrame:
26
+ """
27
+ Download the PMMS CSV and return a cleaned DataFrame.
28
+ - Ensures first column is a datetime 'Date'
29
+ - Coerces other columns to numeric
30
+ """
31
+ resp = requests.get(PMMS_URL, timeout=30)
32
+ resp.raise_for_status()
33
+ raw = resp.content
34
+
35
+ # Try reading as-is; if needed, fall back to utf-8 decode path
36
+ df = pd.read_csv(io.BytesIO(raw))
37
+ # If 'Date' isn't present but a first column exists, rename it
38
+ if "Date" not in df.columns:
39
+ df.rename(columns={df.columns[0]: "Date"}, inplace=True)
40
+
41
+ # Normalize date
42
+ df["Date"] = pd.to_datetime(df["Date"], errors="coerce", infer_datetime_format=True)
43
+ df = df.dropna(subset=["Date"]).sort_values("Date")
44
+
45
+ # Standardize numeric columns
46
+ for c in df.columns:
47
+ if c == "Date":
48
+ continue
49
+ # Remove typical artifacts (%, commas, etc.) then to numeric
50
+ df[c] = (
51
+ df[c]
52
+ .astype(str)
53
+ .str.replace("%", "", regex=False)
54
+ .str.replace(",", "", regex=False)
55
+ )
56
+ df[c] = pd.to_numeric(df[c], errors="coerce")
57
+
58
+ # Drop empty columns (all NaN or constant NaN after coercion)
59
+ non_empty = [c for c in df.columns if c == "Date" or df[c].notna().any()]
60
+ df = df[non_empty]
61
+
62
+ return df.reset_index(drop=True)
63
+
64
+
65
+ def available_series(df: pd.DataFrame) -> List[str]:
66
+ """Return numeric series columns (excluding Date)."""
67
+ return [c for c in df.columns if c != "Date" and pd.api.types.is_numeric_dtype(df[c])]
68
+
69
+
70
+ def clip_by_date(df: pd.DataFrame, start: pd.Timestamp, end: pd.Timestamp) -> pd.DataFrame:
71
+ if start is None and end is None:
72
+ return df
73
+ if start is None:
74
+ return df[df["Date"] <= end]
75
+ if end is None:
76
+ return df[df["Date"] >= start]
77
+ return df[(df["Date"] >= start) & (df["Date"] <= end)]
78
+
79
+
80
+ def resample_df(df: pd.DataFrame, how: str) -> pd.DataFrame:
81
+ """Resample by rule if provided ('W','M','Q','A'); otherwise return original."""
82
+ if not how or how == "None":
83
+ return df
84
+ # Use mean for typical rate series
85
+ numeric_cols = available_series(df)
86
+ tmp = df.set_index("Date")[numeric_cols].resample(how).mean()
87
+ return tmp.reset_index()
88
+
89
+
90
+ def moving_average(df: pd.DataFrame, window: int, cols: List[str]) -> pd.DataFrame:
91
+ """Apply moving average; if window <= 1, return df unchanged for those columns."""
92
+ if window is None or window <= 1:
93
+ return df
94
+ out = df.copy()
95
+ for c in cols:
96
+ if c in out.columns:
97
+ out[c] = out[c].rolling(window=window, min_periods=1).mean()
98
+ return out
99
+
100
+
101
+ def yoy_change(df: pd.DataFrame, cols: List[str]) -> pd.DataFrame:
102
+ """Year-over-year change in percentage points for selected columns."""
103
+ out = df.set_index("Date").copy()
104
+ for c in cols:
105
+ if c in out.columns:
106
+ out[c] = out[c] - out[c].shift(52) # approx weekly; robust to mixed frequencies
107
+ return out.reset_index()
108
+
109
+
110
+ def monthly_heatmap_df(df: pd.DataFrame, col: str) -> pd.DataFrame:
111
+ """Pivot into (Year x Month) table of monthly averages for heatmap."""
112
+ tmp = df.copy()
113
+ tmp["Year"] = tmp["Date"].dt.year
114
+ tmp["Month"] = tmp["Date"].dt.month
115
+ monthly = tmp.groupby(["Year", "Month"], as_index=False)[col].mean()
116
+ pivot = monthly.pivot(index="Year", columns="Month", values=col).sort_index(ascending=False)
117
+ pivot = pivot.rename(columns={m: pd.to_datetime(str(m), format="%m").strftime("%b") for m in pivot.columns})
118
+ return pivot
119
+
120
+
121
+ def make_download(df: pd.DataFrame) -> str:
122
+ """Write a CSV to a temp path and return the file path for gr.File."""
123
+ path = "filtered_pmms.csv"
124
+ df.to_csv(path, index=False)
125
+ return path
126
+
127
+
128
+ # ---------- Plot Builders ----------
129
+ def make_line_chart(df: pd.DataFrame, cols: List[str], title: str) -> go.Figure:
130
+ fig = go.Figure()
131
+ for c in cols:
132
+ if c in df.columns:
133
+ fig.add_trace(go.Scatter(x=df["Date"], y=df[c], mode="lines", name=c))
134
+ fig.update_layout(
135
+ title=title,
136
+ xaxis_title="Date",
137
+ yaxis_title="Rate (%)",
138
+ hovermode="x unified",
139
+ template="plotly"
140
+ )
141
+ return fig
142
+
143
+
144
+ def make_histogram(df: pd.DataFrame, cols: List[str], title: str) -> go.Figure:
145
+ fig = go.Figure()
146
+ for c in cols:
147
+ if c in df.columns:
148
+ fig.add_trace(go.Histogram(x=df[c], name=c, opacity=0.75, nbinsx=50))
149
+ fig.update_layout(
150
+ title=title,
151
+ xaxis_title="Rate (%)",
152
+ yaxis_title="Count",
153
+ barmode="overlay",
154
+ template="plotly"
155
+ )
156
+ return fig
157
+
158
+
159
+ def make_heatmap(pivot: pd.DataFrame, series_name: str) -> go.Figure:
160
+ fig = go.Figure(
161
+ data=go.Heatmap(
162
+ z=pivot.values,
163
+ x=list(pivot.columns),
164
+ y=list(pivot.index.astype(str)),
165
+ coloraxis="coloraxis"
166
+ )
167
+ )
168
+ fig.update_layout(
169
+ title=f"Monthly Average Heatmap — {series_name}",
170
+ xaxis_title="Month",
171
+ yaxis_title="Year",
172
+ coloraxis=dict(colorscale="Viridis"),
173
+ template="plotly"
174
+ )
175
+ return fig
176
+
177
+
178
+ # ---------- Gradio Callbacks ----------
179
+ def update_overview(series: List[str], resample: str, ma_window: int, date_range: Tuple[str, str]):
180
+ df = load_pmms()
181
+ if not series:
182
+ series = available_series(df)[:1] # fallback to first series
183
+ start, end = None, None
184
+ if date_range and date_range[0]:
185
+ start = pd.to_datetime(date_range[0])
186
+ if date_range and date_range[1]:
187
+ end = pd.to_datetime(date_range[1])
188
+
189
+ df = clip_by_date(df, start, end)
190
+ df = resample_df(df, resample)
191
+ df = moving_average(df, ma_window, series)
192
+
193
+ fig = make_line_chart(df, series, "Mortgage Rates Over Time")
194
+ download_path = make_download(df[["Date"] + [c for c in series if c in df.columns]])
195
+ head = df.head(10)
196
+ return fig, download_path, head
197
+
198
+
199
+ def update_yoy(series: List[str], resample: str, date_range: Tuple[str, str]):
200
+ df = load_pmms()
201
+ if not series:
202
+ series = available_series(df)[:1]
203
+ start, end = None, None
204
+ if date_range and date_range[0]:
205
+ start = pd.to_datetime(date_range[0])
206
+ if date_range and date_range[1]:
207
+ end = pd.to_datetime(date_range[1])
208
+
209
+ df = clip_by_date(df, start, end)
210
+ df = resample_df(df, resample)
211
+ df_yoy = yoy_change(df, series)
212
+
213
+ fig = make_line_chart(df_yoy, series, "Year-over-Year Change (percentage points)")
214
+ return fig
215
+
216
+
217
+ def update_distribution(series: List[str], resample: str, date_range: Tuple[str, str]):
218
+ df = load_pmms()
219
+ if not series:
220
+ series = available_series(df)[:1]
221
+ start, end = None, None
222
+ if date_range and date_range[0]:
223
+ start = pd.to_datetime(date_range[0])
224
+ if date_range and date_range[1]:
225
+ end = pd.to_datetime(date_range[1])
226
+
227
+ df = clip_by_date(df, start, end)
228
+ df = resample_df(df, resample)
229
+
230
+ fig = make_histogram(df, series, "Distribution of Rates")
231
+ return fig
232
+
233
+
234
+ def update_heatmap(series_one: str, resample: str, date_range: Tuple[str, str]):
235
+ df = load_pmms()
236
+ series_one = series_one or (available_series(df)[0] if available_series(df) else None)
237
+ if series_one is None:
238
+ return go.Figure()
239
+
240
+ start, end = None, None
241
+ if date_range and date_range[0]:
242
+ start = pd.to_datetime(date_range[0])
243
+ if date_range and date_range[1]:
244
+ end = pd.to_datetime(date_range[1])
245
+
246
+ df = clip_by_date(df, start, end)
247
+ df = resample_df(df, resample)
248
+
249
+ pivot = monthly_heatmap_df(df, series_one)
250
+ fig = make_heatmap(pivot, series_one)
251
+ return fig
252
+
253
+
254
+ def get_defaults():
255
+ df = load_pmms()
256
+ cols = available_series(df)
257
+ min_date = df["Date"].min().date()
258
+ max_date = df["Date"].max().date()
259
+ return df, cols, (str(min_date), str(max_date))
260
+
261
+
262
+ # ---------- UI ----------
263
+ with gr.Blocks(title="Freddie Mac PMMS — Interactive Visualizer") as demo:
264
+ gr.Markdown(
265
+ """
266
+ # Freddie Mac Primary Mortgage Market Survey (PMMS) — Interactive Visualizer
267
+ - Data source: Freddie Mac PMMS (downloaded live at runtime)
268
+ - Explore line charts, YoY deltas, distributions, and a monthly heatmap.
269
+ - Use resampling and moving averages to smooth the series.
270
+ """
271
+ )
272
+
273
+ df0, cols0, full_range = get_defaults()
274
+
275
+ with gr.Row():
276
+ series = gr.CheckboxGroup(choices=cols0, value=cols0[:1], label="Select rate series (multi-select)")
277
+ series_one = gr.Dropdown(choices=cols0, value=(cols0[0] if cols0 else None), label="Heatmap series")
278
+ with gr.Row():
279
+ resample = gr.Dropdown(
280
+ choices=["None", "W (Weekly)", "M (Monthly)", "Q (Quarterly)", "A (Annual)"],
281
+ value="W (Weekly)",
282
+ label="Resample frequency",
283
+ info="Choose an aggregation frequency for the chart calculations."
284
+ )
285
+ ma_window = gr.Slider(1, 52, value=8, step=1, label="Moving average window (periods)", info="Set to 1 for no smoothing.")
286
+ date_range = gr.DateRange(
287
+ value=full_range,
288
+ label="Date range (inclusive)"
289
+ )
290
+
291
+ # Normalize the resample selection into pandas rule inside callbacks
292
+ def _normalize_resample(x: str) -> str:
293
+ mapping = {
294
+ "None": "None",
295
+ "W (Weekly)": "W",
296
+ "M (Monthly)": "M",
297
+ "Q (Quarterly)": "Q",
298
+ "A (Annual)": "A",
299
+ }
300
+ return mapping.get(x or "None", "None")
301
+
302
+ # Hidden helpers to route normalized resample to callbacks
303
+ resample_hidden = gr.State(value=_normalize_resample(resample.value))
304
+
305
+ def _resample_state(x):
306
+ return _normalize_resample(x)
307
+
308
+ resample.change(_resample_state, inputs=resample, outputs=resample_hidden)
309
+
310
+ with gr.Tab("Overview"):
311
+ fig_overview = gr.Plot(label="Mortgage Rates Over Time")
312
+ download_csv = gr.File(label="Download filtered CSV")
313
+ head_df = gr.Dataframe(interactive=False, label="Preview (first 10 rows)")
314
+ gr.Markdown("Tip: Use the controls above to pick series, resample, smoothing, and date range.")
315
+ btn_update_1 = gr.Button("Refresh Overview")
316
+
317
+ with gr.Tab("YoY Change"):
318
+ fig_yoy = gr.Plot(label="Year-over-Year Change")
319
+ btn_update_2 = gr.Button("Refresh YoY")
320
+
321
+ with gr.Tab("Distribution"):
322
+ fig_hist = gr.Plot(label="Histogram")
323
+ btn_update_3 = gr.Button("Refresh Distribution")
324
+
325
+ with gr.Tab("Monthly Heatmap"):
326
+ fig_heat = gr.Plot(label="Monthly Average Heatmap")
327
+ btn_update_4 = gr.Button("Refresh Heatmap")
328
+
329
+ # Wire callbacks
330
+ btn_update_1.click(
331
+ update_overview,
332
+ inputs=[series, resample_hidden, ma_window, date_range],
333
+ outputs=[fig_overview, download_csv, head_df],
334
+ show_progress="minimal",
335
+ )
336
+
337
+ btn_update_2.click(
338
+ update_yoy,
339
+ inputs=[series, resample_hidden, date_range],
340
+ outputs=[fig_yoy],
341
+ show_progress="minimal",
342
+ )
343
+
344
+ btn_update_3.click(
345
+ update_distribution,
346
+ inputs=[series, resample_hidden, date_range],
347
+ outputs=[fig_hist],
348
+ show_progress="minimal",
349
+ )
350
+
351
+ btn_update_4.click(
352
+ update_heatmap,
353
+ inputs=[series_one, resample_hidden, date_range],
354
+ outputs=[fig_heat],
355
+ show_progress="minimal",
356
+ )
357
+
358
+ # Auto-run once on load for a nice first view
359
+ gr.on(
360
+ triggers=[gr.PageLoad],
361
+ fn=update_overview,
362
+ inputs=[series, resample_hidden, ma_window, date_range],
363
+ outputs=[fig_overview, download_csv, head_df],
364
+ )
365
+ gr.on(
366
+ triggers=[gr.PageLoad],
367
+ fn=update_yoy,
368
+ inputs=[series, resample_hidden, date_range],
369
+ outputs=[fig_yoy],
370
+ )
371
+ gr.on(
372
+ triggers=[gr.PageLoad],
373
+ fn=update_distribution,
374
+ inputs=[series, resample_hidden, date_range],
375
+ outputs=[fig_hist],
376
+ )
377
+ gr.on(
378
+ triggers=[gr.PageLoad],
379
+ fn=update_heatmap,
380
+ inputs=[series_one, resample_hidden, date_range],
381
+ outputs=[fig_heat],
382
+ )
383
+
384
+ if __name__ == "__main__":
385
+ demo.launch()