dibend commited on
Commit
c8d21d7
·
verified ·
1 Parent(s): e278d1d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -42
app.py CHANGED
@@ -2,9 +2,13 @@ import gradio as gr
2
  import pandas as pd
3
  import plotly.graph_objects as go
4
  import numpy as np
 
 
 
 
 
5
 
6
  def plot_zip_code_correlation(zip_codes_str, start_date, end_date):
7
- # Validate dates
8
  start_year = pd.to_datetime(start_date).year
9
  end_year = pd.to_datetime(end_date).year
10
  if start_year < 2000 or end_year < 2000:
@@ -12,87 +16,60 @@ def plot_zip_code_correlation(zip_codes_str, start_date, end_date):
12
  if start_year > end_year:
13
  raise ValueError("Start date must be before end date.")
14
 
15
- # Process ZIP codes (ensure 5-digit format)
16
  zip_codes = [z.strip().zfill(5) for z in zip_codes_str.split(",")]
17
-
18
- # Load data
19
  df = pd.read_csv('https://files.zillowstatic.com/research/public_csvs/zhvi/Zip_zhvi_uc_sfrcondo_tier_0.33_0.67_sm_sa_month.csv')
20
-
21
- # Ensure ZIP codes in dataframe are strings with leading zeros
22
  df['RegionName'] = df['RegionName'].astype(str).str.zfill(5)
23
  df = df[df['RegionName'].isin(zip_codes)]
24
-
25
  if df.empty:
26
  raise ValueError("No data found for the provided ZIP codes.")
27
 
28
- # Extract date columns within the selected range
29
- date_columns = []
30
- for col in df.columns[7:]:
31
- try:
32
- date = pd.to_datetime(col)
33
- if start_date <= str(date.date()) <= end_date:
34
- date_columns.append(col)
35
- except:
36
- continue
37
-
38
  if not date_columns:
39
  raise ValueError("No data available within the selected date range.")
40
 
41
- # Build price matrix
42
  price_matrix = []
43
  valid_zip_list = []
44
-
45
  for zip_code in zip_codes:
46
  df_zip = df[df['RegionName'] == zip_code]
47
  if not df_zip.empty:
48
- prices = df_zip.loc[:, date_columns].values.flatten()
49
  if not np.isnan(prices).all():
50
  price_matrix.append(prices)
51
  valid_zip_list.append(zip_code)
52
 
53
  if len(price_matrix) < 2:
54
- raise ValueError(f"Not enough data for correlation calculation. Ensure at least two valid ZIP codes with overlapping data between {start_date} and {end_date}.")
55
 
56
- price_matrix_df = pd.DataFrame(price_matrix, index=valid_zip_list, columns=date_columns)
57
- price_matrix_df = price_matrix_df.T.dropna()
58
-
59
- # Calculate correlation matrix
60
  corr_matrix = price_matrix_df.corr()
61
-
62
- # Prepare 3D plot
63
  z_data = corr_matrix.values
64
  x_data, y_data = np.meshgrid(valid_zip_list, valid_zip_list)
65
 
66
  fig = go.Figure(data=[go.Surface(z=z_data, x=x_data, y=y_data)])
67
  fig.update_layout(
68
  title=f'3D Correlation Matrix of Housing Prices ({start_date} to {end_date})',
69
- scene=dict(
70
- xaxis_title='ZIP Code',
71
- yaxis_title='ZIP Code',
72
- zaxis_title='Correlation',
73
- ),
74
  autosize=True
75
  )
76
-
77
  return fig
78
 
79
  iface = gr.Interface(
80
  fn=plot_zip_code_correlation,
81
- gr.Markdown(
82
- f"""
83
- # 🇺🇸 US Real Estate Zip ZHVI Price Movement Correlation Matrix Gen
84
- Track housing price correlations by ZIP code to make informed decisions as a property owner or buyer.
85
- **Data up to {latest_data_date_str}**. Enter a two-letter state abbreviation below (e.g., CA, NY, TX).
86
- ### [Contact a real estate broker](https://micheled.com)
87
- """
88
- ),
89
  inputs=[
90
  gr.Textbox(label="Enter comma-separated ZIP codes (e.g., 07001,07002,07003)"),
91
  gr.Textbox(label="Start Date (YYYY-MM-DD) - No earlier than 2000"),
92
  gr.Textbox(label="End Date (YYYY-MM-DD) - No earlier than 2000")
93
  ],
94
  outputs=gr.Plot(),
95
- title="3D ZIP Code Housing Price Correlation Matrix"
 
 
 
 
 
 
 
 
96
  )
97
 
98
  iface.launch(share=False, debug=True)
 
2
  import pandas as pd
3
  import plotly.graph_objects as go
4
  import numpy as np
5
+ from datetime import datetime
6
+
7
+ # Fetch the latest data date from the CSV
8
+ df_sample = pd.read_csv('https://files.zillowstatic.com/research/public_csvs/zhvi/Zip_zhvi_uc_sfrcondo_tier_0.33_0.67_sm_sa_month.csv', nrows=1)
9
+ latest_data_date_str = df_sample.columns[-1]
10
 
11
  def plot_zip_code_correlation(zip_codes_str, start_date, end_date):
 
12
  start_year = pd.to_datetime(start_date).year
13
  end_year = pd.to_datetime(end_date).year
14
  if start_year < 2000 or end_year < 2000:
 
16
  if start_year > end_year:
17
  raise ValueError("Start date must be before end date.")
18
 
 
19
  zip_codes = [z.strip().zfill(5) for z in zip_codes_str.split(",")]
 
 
20
  df = pd.read_csv('https://files.zillowstatic.com/research/public_csvs/zhvi/Zip_zhvi_uc_sfrcondo_tier_0.33_0.67_sm_sa_month.csv')
 
 
21
  df['RegionName'] = df['RegionName'].astype(str).str.zfill(5)
22
  df = df[df['RegionName'].isin(zip_codes)]
 
23
  if df.empty:
24
  raise ValueError("No data found for the provided ZIP codes.")
25
 
26
+ date_columns = [col for col in df.columns[7:] if start_date <= col <= end_date]
 
 
 
 
 
 
 
 
 
27
  if not date_columns:
28
  raise ValueError("No data available within the selected date range.")
29
 
 
30
  price_matrix = []
31
  valid_zip_list = []
 
32
  for zip_code in zip_codes:
33
  df_zip = df[df['RegionName'] == zip_code]
34
  if not df_zip.empty:
35
+ prices = df_zip[date_columns].values.flatten()
36
  if not np.isnan(prices).all():
37
  price_matrix.append(prices)
38
  valid_zip_list.append(zip_code)
39
 
40
  if len(price_matrix) < 2:
41
+ raise ValueError("Not enough data for correlation calculation.")
42
 
43
+ price_matrix_df = pd.DataFrame(price_matrix, index=valid_zip_list, columns=date_columns).T.dropna()
 
 
 
44
  corr_matrix = price_matrix_df.corr()
 
 
45
  z_data = corr_matrix.values
46
  x_data, y_data = np.meshgrid(valid_zip_list, valid_zip_list)
47
 
48
  fig = go.Figure(data=[go.Surface(z=z_data, x=x_data, y=y_data)])
49
  fig.update_layout(
50
  title=f'3D Correlation Matrix of Housing Prices ({start_date} to {end_date})',
51
+ scene=dict(xaxis_title='ZIP Code', yaxis_title='ZIP Code', zaxis_title='Correlation'),
 
 
 
 
52
  autosize=True
53
  )
 
54
  return fig
55
 
56
  iface = gr.Interface(
57
  fn=plot_zip_code_correlation,
 
 
 
 
 
 
 
 
58
  inputs=[
59
  gr.Textbox(label="Enter comma-separated ZIP codes (e.g., 07001,07002,07003)"),
60
  gr.Textbox(label="Start Date (YYYY-MM-DD) - No earlier than 2000"),
61
  gr.Textbox(label="End Date (YYYY-MM-DD) - No earlier than 2000")
62
  ],
63
  outputs=gr.Plot(),
64
+ title="3D ZIP Code Housing Price Correlation Matrix",
65
+ description=f"""
66
+ ## US Real Estate Zip ZHVI Price Movement Correlation Matrix Gen
67
+
68
+ Track housing price correlations by ZIP code to make informed decisions as a property owner or buyer.
69
+ **Data up to {latest_data_date_str}**. Enter ZIP codes below.
70
+
71
+ [Contact a real estate broker](https://micheled.com)
72
+ """
73
  )
74
 
75
  iface.launch(share=False, debug=True)