dibend commited on
Commit
89af670
·
verified ·
1 Parent(s): caffa37

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -19
app.py CHANGED
@@ -9,23 +9,23 @@ def plot_zip_code_correlation(zip_codes_str, start_date, end_date):
9
  end_year = pd.to_datetime(end_date).year
10
  if start_year < 2000 or end_year < 2000:
11
  raise ValueError("Please select dates no earlier than the year 2000.")
12
-
13
  if start_year > end_year:
14
  raise ValueError("Start date must be before end date.")
15
 
16
- # Convert input string to list of zip codes
17
- zip_codes = [z.strip() for z in zip_codes_str.split(",")]
18
 
19
- # Read the CSV file
20
  df = pd.read_csv('https://files.zillowstatic.com/research/public_csvs/zhvi/Zip_zhvi_uc_sfrcondo_tier_0.33_0.67_sm_sa_month.csv')
21
 
22
- # Filter for the given ZIP codes
23
- df = df[df['RegionName'].astype(str).isin(zip_codes)]
 
24
 
25
  if df.empty:
26
  raise ValueError("No data found for the provided ZIP codes.")
27
 
28
- # Extract valid date columns within the selected date range
29
  date_columns = []
30
  for col in df.columns[7:]:
31
  try:
@@ -38,35 +38,34 @@ def plot_zip_code_correlation(zip_codes_str, start_date, end_date):
38
  if not date_columns:
39
  raise ValueError("No data available within the selected date range.")
40
 
41
- # Initialize price matrix
42
  price_matrix = []
43
- zip_list = []
44
 
45
  for zip_code in zip_codes:
46
- df_zip = df[df['RegionName'].astype(str) == zip_code]
47
  if not df_zip.empty:
48
  prices = df_zip.loc[:, date_columns].values.flatten()
49
  if not np.isnan(prices).all():
50
  price_matrix.append(prices)
51
- zip_list.append(zip_code)
52
 
53
  if len(price_matrix) < 2:
54
- raise ValueError("Not enough data for correlation calculation.")
55
 
56
- price_matrix_df = pd.DataFrame(price_matrix, index=zip_list, columns=date_columns)
57
  price_matrix_df = price_matrix_df.T.dropna()
58
 
59
  # Calculate correlation matrix
60
  corr_matrix = price_matrix_df.corr()
61
 
62
- # Prepare 3D plot data
63
  z_data = corr_matrix.values
64
- x_data, y_data = np.meshgrid(zip_list, zip_list)
65
 
66
- # Plot
67
  fig = go.Figure(data=[go.Surface(z=z_data, x=x_data, y=y_data)])
68
  fig.update_layout(
69
- title=f'3D Correlation Matrix ({start_date} to {end_date})',
70
  scene=dict(
71
  xaxis_title='ZIP Code',
72
  yaxis_title='ZIP Code',
@@ -80,12 +79,12 @@ def plot_zip_code_correlation(zip_codes_str, start_date, end_date):
80
  iface = gr.Interface(
81
  fn=plot_zip_code_correlation,
82
  inputs=[
83
- gr.Textbox(label="Enter comma-separated ZIP codes (e.g., 07001, 07002, 07003)"),
84
  gr.Textbox(label="Start Date (YYYY-MM-DD) - No earlier than 2000"),
85
  gr.Textbox(label="End Date (YYYY-MM-DD) - No earlier than 2000")
86
  ],
87
  outputs=gr.Plot(),
88
- title="ZIP Code 3D Correlation Matrix with Date Range"
89
  )
90
 
91
  iface.launch(share=False, debug=True)
 
9
  end_year = pd.to_datetime(end_date).year
10
  if start_year < 2000 or end_year < 2000:
11
  raise ValueError("Please select dates no earlier than the year 2000.")
 
12
  if start_year > end_year:
13
  raise ValueError("Start date must be before end date.")
14
 
15
+ # Process ZIP codes (ensure 5-digit format)
16
+ zip_codes = [z.strip().zfill(5) for z in zip_codes_str.split(",")]
17
 
18
+ # Load data
19
  df = pd.read_csv('https://files.zillowstatic.com/research/public_csvs/zhvi/Zip_zhvi_uc_sfrcondo_tier_0.33_0.67_sm_sa_month.csv')
20
 
21
+ # Ensure ZIP codes in dataframe are strings with leading zeros
22
+ df['RegionName'] = df['RegionName'].astype(str).str.zfill(5)
23
+ df = df[df['RegionName'].isin(zip_codes)]
24
 
25
  if df.empty:
26
  raise ValueError("No data found for the provided ZIP codes.")
27
 
28
+ # Extract date columns within the selected range
29
  date_columns = []
30
  for col in df.columns[7:]:
31
  try:
 
38
  if not date_columns:
39
  raise ValueError("No data available within the selected date range.")
40
 
41
+ # Build price matrix
42
  price_matrix = []
43
+ valid_zip_list = []
44
 
45
  for zip_code in zip_codes:
46
+ df_zip = df[df['RegionName'] == zip_code]
47
  if not df_zip.empty:
48
  prices = df_zip.loc[:, date_columns].values.flatten()
49
  if not np.isnan(prices).all():
50
  price_matrix.append(prices)
51
+ valid_zip_list.append(zip_code)
52
 
53
  if len(price_matrix) < 2:
54
+ raise ValueError(f"Not enough data for correlation calculation. Ensure at least two valid ZIP codes with overlapping data between {start_date} and {end_date}.")
55
 
56
+ price_matrix_df = pd.DataFrame(price_matrix, index=valid_zip_list, columns=date_columns)
57
  price_matrix_df = price_matrix_df.T.dropna()
58
 
59
  # Calculate correlation matrix
60
  corr_matrix = price_matrix_df.corr()
61
 
62
+ # Prepare 3D plot
63
  z_data = corr_matrix.values
64
+ x_data, y_data = np.meshgrid(valid_zip_list, valid_zip_list)
65
 
 
66
  fig = go.Figure(data=[go.Surface(z=z_data, x=x_data, y=y_data)])
67
  fig.update_layout(
68
+ title=f'3D Correlation Matrix of Housing Prices ({start_date} to {end_date})',
69
  scene=dict(
70
  xaxis_title='ZIP Code',
71
  yaxis_title='ZIP Code',
 
79
  iface = gr.Interface(
80
  fn=plot_zip_code_correlation,
81
  inputs=[
82
+ gr.Textbox(label="Enter comma-separated ZIP codes (e.g., 07001,07002,07003)"),
83
  gr.Textbox(label="Start Date (YYYY-MM-DD) - No earlier than 2000"),
84
  gr.Textbox(label="End Date (YYYY-MM-DD) - No earlier than 2000")
85
  ],
86
  outputs=gr.Plot(),
87
+ title="3D ZIP Code Housing Price Correlation Matrix"
88
  )
89
 
90
  iface.launch(share=False, debug=True)