Update app.py
Browse files
app.py
CHANGED
|
@@ -10,10 +10,19 @@ def plot_real_estate_correlation(state):
|
|
| 10 |
# Filter for the given state
|
| 11 |
df = df[df['State'] == state.upper()]
|
| 12 |
|
| 13 |
-
# Extract the list of ZIP codes and
|
| 14 |
zip_codes = df['RegionName'].unique()
|
| 15 |
-
|
| 16 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
# Initialize a DataFrame to hold price data for correlation calculation
|
| 18 |
price_matrix = []
|
| 19 |
|
|
@@ -21,15 +30,15 @@ def plot_real_estate_correlation(state):
|
|
| 21 |
for zip_code in zip_codes:
|
| 22 |
df_zip = df[df['RegionName'] == zip_code]
|
| 23 |
|
| 24 |
-
# Extract only the columns with date data (price values)
|
| 25 |
-
prices = df_zip.
|
| 26 |
|
| 27 |
# Append prices to the matrix if there are no missing values
|
| 28 |
if not np.isnan(prices).all():
|
| 29 |
price_matrix.append(prices)
|
| 30 |
|
| 31 |
# Convert to DataFrame for easier manipulation
|
| 32 |
-
price_matrix_df = pd.DataFrame(price_matrix, index=zip_codes)
|
| 33 |
|
| 34 |
# Transpose to align for correlation calculation (each column = ZIP code)
|
| 35 |
price_matrix_df = price_matrix_df.T.dropna()
|
|
|
|
| 10 |
# Filter for the given state
|
| 11 |
df = df[df['State'] == state.upper()]
|
| 12 |
|
| 13 |
+
# Extract the list of ZIP codes and filter only columns that are date strings
|
| 14 |
zip_codes = df['RegionName'].unique()
|
| 15 |
+
|
| 16 |
+
# Extract columns that are valid date strings only
|
| 17 |
+
date_columns = []
|
| 18 |
+
for col in df.columns[7:]:
|
| 19 |
+
try:
|
| 20 |
+
# Try to parse column names as dates
|
| 21 |
+
pd.to_datetime(col)
|
| 22 |
+
date_columns.append(col)
|
| 23 |
+
except:
|
| 24 |
+
continue
|
| 25 |
+
|
| 26 |
# Initialize a DataFrame to hold price data for correlation calculation
|
| 27 |
price_matrix = []
|
| 28 |
|
|
|
|
| 30 |
for zip_code in zip_codes:
|
| 31 |
df_zip = df[df['RegionName'] == zip_code]
|
| 32 |
|
| 33 |
+
# Extract only the columns with valid date data (price values)
|
| 34 |
+
prices = df_zip.loc[:, date_columns].values.flatten()
|
| 35 |
|
| 36 |
# Append prices to the matrix if there are no missing values
|
| 37 |
if not np.isnan(prices).all():
|
| 38 |
price_matrix.append(prices)
|
| 39 |
|
| 40 |
# Convert to DataFrame for easier manipulation
|
| 41 |
+
price_matrix_df = pd.DataFrame(price_matrix, index=zip_codes, columns=date_columns)
|
| 42 |
|
| 43 |
# Transpose to align for correlation calculation (each column = ZIP code)
|
| 44 |
price_matrix_df = price_matrix_df.T.dropna()
|