vitormonteirodebarros commited on
Commit
f2e25c6
·
1 Parent(s): 79fd4ca

fix chart

Browse files
app.py CHANGED
@@ -4,73 +4,94 @@ import pickle
4
  from tensorflow.keras.models import load_model
5
  from sklearn.preprocessing import MinMaxScaler
6
  import matplotlib.pyplot as plt
 
7
  from io import BytesIO
8
  from PIL import Image
9
  import json
 
10
 
11
- # Load the model and scaler
12
- model = load_model('models/new_merged_weather_financial_q_l4q_l24q.keras')
13
  scaler = MinMaxScaler()
14
 
15
- with open('data/PRISM.pkl', 'rb') as f:
16
  data = pickle.load(f)
17
 
18
- data = data.sort_values(by=['YYYYQ', 'CBSA_Name'])
 
 
 
19
 
20
- features = data.drop(
21
- columns=['Date', 'CBSA_Name', 'Id', 'iname', 'type', 'Cluster', 'region', 'division', 'state', 'msa']
22
- )
23
  scaler.fit(features)
24
 
25
 
26
- def get_actual_values_2015_2023(cbsa_name):
27
- with open('data/2024_11_7_total_return_sample.pkl', 'rb') as f:
28
- cbsa_filtered_data = pickle.load(f)
 
 
 
29
 
30
- cbsa_data = cbsa_filtered_data[cbsa_filtered_data['CBSA_Name'] == cbsa_name]
31
- cbsa_data = cbsa_data.sort_values(by='YYYYQ')
32
- cbsa_data['YYYYQ'] = cbsa_data['YYYYQ'].astype(str).str.strip()
33
 
34
- actual_values = cbsa_data[(cbsa_data['YYYYQ'].str[:4].astype(int) >= 2015) &
35
- (cbsa_data['YYYYQ'].str[:4].astype(int) <= 2023)]['tret'].values
36
 
37
- return actual_values
38
 
 
 
39
 
40
- def predict_and_plot(cbsa_name):
41
- cbsa_data = data[data['CBSA_Name'] == cbsa_name]
42
- cbsa_data = cbsa_data.sort_values(by='YYYYQ')
43
- cbsa_data['YYYYQ'] = cbsa_data['YYYYQ'].astype(str).str.strip()
44
 
45
- actual_values = get_actual_values_2015_2023(cbsa_name)
46
 
47
- cbsa_features = cbsa_data.drop(
48
- columns=['Date', 'CBSA_Name', 'Id', 'iname', 'type', 'Cluster', 'region', 'division', 'state', 'msa']
49
- )
50
- cbsa_features = cbsa_features[features.columns]
51
  cbsa_scaled_features = scaler.transform(cbsa_features)
52
 
53
- num_steps = 24
54
- X_cbsa = []
55
- for i in range(len(cbsa_scaled_features) - num_steps + 1):
56
- X_cbsa.append(cbsa_scaled_features[i:i + num_steps])
57
- X_cbsa = np.array(X_cbsa)
 
 
 
 
 
 
 
 
 
 
58
 
59
- future_predictions = model.predict(X_cbsa)
60
- future_predictions = np.squeeze(future_predictions)
 
 
 
61
 
62
- combined_values = np.concatenate([actual_values, future_predictions[len(actual_values):]])
63
- combined_values = combined_values[
64
- :len([f"{year} Q{quarter}" for year in range(2015, 2026) for quarter in range(1, 5)])]
65
 
66
- years_quarters = [f"{year} Q{quarter}" for year in range(2015, 2026) for quarter in range(1, 5)]
67
- years_quarters = years_quarters[:len(combined_values)]
 
 
 
 
 
 
 
 
 
 
68
 
69
- predicted_json = []
70
- for i, value in enumerate(combined_values[len(actual_values):]):
71
- if len(actual_values) + i < len(years_quarters):
72
- year, quarter = years_quarters[len(actual_values) + i].split()
73
- predicted_json.append({"Year": int(year), "Quarter": quarter, "Value": round(value, 3)})
 
 
 
74
 
75
  json_output = {
76
  "CBSA": cbsa_name,
@@ -78,15 +99,18 @@ def predict_and_plot(cbsa_name):
78
  "Predicted": predicted_json
79
  }
80
 
81
- plt.figure(figsize=(14, 6))
82
- plt.plot(years_quarters[:len(actual_values)], actual_values, label="Actual (2015-2023)", marker='o', color='blue')
83
- plt.plot(years_quarters, combined_values, label="Predicted (2015-2025)", linestyle='--', color='orange')
84
-
85
- plt.title(f'Total Return Prediction for {cbsa_name} (2015-2025)')
86
- plt.xlabel('Quarter')
87
  plt.ylabel('Total Return')
88
- plt.xticks(rotation=45)
 
 
 
89
  plt.legend()
 
90
 
91
  buf = BytesIO()
92
  plt.savefig(buf, format='png')
@@ -105,17 +129,15 @@ with gr.Blocks() as demo:
105
  with gr.Column():
106
  gr.Markdown("""
107
  **Total Return**: Total return is a measure of the performance of an asset or investment over a specific period, defined as the sum of **Income Return** and **Asset Return**.
108
-
109
  - **Income Return**: The net income generated by a property, calculated as rental income minus operating and capital expenditures.
110
  - **Asset Return**: The appreciation in the market value of a property from purchase to sale.
111
-
112
  **CBSA (Core-Based Statistical Area)**: Represents a geographical area defined by the Office of Management and Budget, typically used for statistical purposes in the U.S. It consists of counties and county equivalents centered around an urban center with a high degree of social and economic integration.
113
  """)
114
  with gr.Row():
115
- with gr.Column():
116
- output_image = gr.Image(type="numpy", label="Actual vs Predicted Total Return (2015-2025)")
117
- with gr.Column():
118
- json_display = gr.JSON(label="Prediction JSON Output")
119
 
120
  predict_button.click(fn=predict_and_plot, inputs=cbsa_dropdown, outputs=[output_image, json_display])
121
 
 
4
  from tensorflow.keras.models import load_model
5
  from sklearn.preprocessing import MinMaxScaler
6
  import matplotlib.pyplot as plt
7
+ import matplotlib.ticker as ticker
8
  from io import BytesIO
9
  from PIL import Image
10
  import json
11
+ import pandas as pd
12
 
13
+ model = load_model('models/new_merged_weather_financial_q_l4q_l24q_general.keras')
 
14
  scaler = MinMaxScaler()
15
 
16
+ with open('data/2024_11_7_total_return_sample.pkl', 'rb') as f:
17
  data = pickle.load(f)
18
 
19
+ columns_to_drop = ['Date', 'CBSA_Name', 'Id', 'iname', 'type', 'Cluster', 'region', 'division', 'state', 'msa', 'tret',
20
+ 'treturn', 'tot_index', 'inc_index', 'app_index', 'count', 'emv', 'bmv', 'income', 'psales',
21
+ 'capimp', 'ireturn', 'areturn', 'Latitude_x', 'Longitude_x', 'Latitude_y', 'Longitude_y', 'tmin',
22
+ 'tmean', 'tmax', 'tdmean', 'ppt', 'vpdmin', 'vpdmax', 'tmin_low', 'tmax_high']
23
 
24
+ features = data.drop(columns=columns_to_drop)
 
 
25
  scaler.fit(features)
26
 
27
 
28
+ def create_sequences(data_param, target_param, input_steps=12, forecast_steps=4):
29
+ X, y = [], []
30
+ for i in range(len(data_param) - input_steps - forecast_steps):
31
+ X.append(data_param[i:(i + input_steps)])
32
+ y.append(target_param[(i + input_steps):(i + input_steps + forecast_steps)].mean())
33
+ return np.array(X), np.array(y)
34
 
 
 
 
35
 
36
+ def predict_and_plot(cbsa_name):
37
+ print(f"Processing predictions for CBSA: {cbsa_name}")
38
 
39
+ cbsa_data = data[data['CBSA_Name'] == cbsa_name]
40
 
41
+ cbsa_features = cbsa_data.drop(columns=columns_to_drop)
42
+ cbsa_features = cbsa_features[features.columns]
43
 
44
+ cbsa_target = cbsa_data['tret']
 
 
 
45
 
46
+ print(f"Feature shape for {cbsa_name} before scaling: {cbsa_features.shape}")
47
 
 
 
 
 
48
  cbsa_scaled_features = scaler.transform(cbsa_features)
49
 
50
+ print(f"Feature shape for {cbsa_name} after scaling: {cbsa_scaled_features.shape}")
51
+
52
+ X_cbsa, y_cbsa = create_sequences(cbsa_scaled_features, cbsa_target)
53
+
54
+ predictions = model.predict(X_cbsa)
55
+ predictions = np.squeeze(predictions)
56
+
57
+ shift_steps = 5
58
+ predictions = np.roll(predictions, -shift_steps)
59
+
60
+ future_quarters = [f"{year}-Q{quarter}" for year in range(2024, 2026) for quarter in range(1, 5)]
61
+ num_future_steps = len(future_quarters)
62
+
63
+ future_predictions = []
64
+ current_input = X_cbsa[-1]
65
 
66
+ for _ in range(num_future_steps):
67
+ next_prediction = model.predict(current_input.reshape(1, -1, X_cbsa.shape[2]))
68
+ future_predictions.append(next_prediction.squeeze())
69
+ current_input = np.roll(current_input, -1, axis=0)
70
+ current_input[-1] = next_prediction.squeeze()
71
 
72
+ predictions = np.concatenate((predictions, np.array(future_predictions)))
 
 
73
 
74
+ time_index = (
75
+ cbsa_data['YYYYQ'].iloc[-len(y_cbsa):]
76
+ .apply(lambda x: f"{str(x)[:4]}-Q{str(x)[4]}")
77
+ .sort_values()
78
+ )
79
+ future_time_index = pd.Series(future_quarters)
80
+ full_time_index = pd.concat([time_index, future_time_index]).reset_index(drop=True)
81
+
82
+ actual_index = full_time_index[:len(y_cbsa)]
83
+ predicted_index = full_time_index[:len(predictions)]
84
+
85
+ print("time_index", predicted_index)
86
 
87
+ predicted_json = [
88
+ {
89
+ "year": int(year_quarter.split("-")[0]),
90
+ "quarter": int(year_quarter.split("-")[1][1]),
91
+ "total_return": round(float(pred), 4)
92
+ }
93
+ for year_quarter, pred in zip(full_time_index[-num_future_steps:], future_predictions)
94
+ ]
95
 
96
  json_output = {
97
  "CBSA": cbsa_name,
 
99
  "Predicted": predicted_json
100
  }
101
 
102
+ plt.figure(figsize=(20, 6))
103
+ plt.plot(actual_index, y_cbsa, label='Actual Total Return', color='green', linestyle='-')
104
+ plt.plot(predicted_index, predictions, label='Predicted Total Return', color='red', linestyle='-')
105
+ plt.title(f'Model Predictions vs Actual for {cbsa_name}')
106
+ plt.xlabel('Time (YYYYQ)')
 
107
  plt.ylabel('Total Return')
108
+
109
+ plt.xticks(rotation=90, fontsize=8)
110
+ plt.gca().tick_params(axis='x', pad=15)
111
+
112
  plt.legend()
113
+ plt.tight_layout()
114
 
115
  buf = BytesIO()
116
  plt.savefig(buf, format='png')
 
129
  with gr.Column():
130
  gr.Markdown("""
131
  **Total Return**: Total return is a measure of the performance of an asset or investment over a specific period, defined as the sum of **Income Return** and **Asset Return**.
 
132
  - **Income Return**: The net income generated by a property, calculated as rental income minus operating and capital expenditures.
133
  - **Asset Return**: The appreciation in the market value of a property from purchase to sale.
 
134
  **CBSA (Core-Based Statistical Area)**: Represents a geographical area defined by the Office of Management and Budget, typically used for statistical purposes in the U.S. It consists of counties and county equivalents centered around an urban center with a high degree of social and economic integration.
135
  """)
136
  with gr.Row():
137
+ output_image = gr.Image(type="numpy", label="Actual vs Predicted Total Return (2015-2025)")
138
+
139
+ with gr.Row():
140
+ json_display = gr.JSON(label="Prediction JSON Output")
141
 
142
  predict_button.click(fn=predict_and_plot, inputs=cbsa_dropdown, outputs=[output_image, json_display])
143
 
data/2024_11_7_total_return_sample.pkl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f0043164c3837ad6e8d742088852919f908a1514ea980e7bd3c8aaee260c7b4e
3
- size 3905447
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db7f0e5cc21cd3204570e0c6816c369707b81cace27746700fbc2548597628ee
3
+ size 4585941
models/new_merged_weather_financial_q_l4q_l24q.keras DELETED
Binary file (565 kB)
 
models/new_merged_weather_financial_q_l4q_l24q_general.keras ADDED
Binary file (387 kB). View file