vitormonteirodebarros commited on
Commit
2e2a262
·
1 Parent(s): f2e25c6

fix missing YYYYQ data in the chart

Browse files
app.py CHANGED
@@ -4,7 +4,6 @@ import pickle
4
  from tensorflow.keras.models import load_model
5
  from sklearn.preprocessing import MinMaxScaler
6
  import matplotlib.pyplot as plt
7
- import matplotlib.ticker as ticker
8
  from io import BytesIO
9
  from PIL import Image
10
  import json
@@ -17,9 +16,10 @@ with open('data/2024_11_7_total_return_sample.pkl', 'rb') as f:
17
  data = pickle.load(f)
18
 
19
  columns_to_drop = ['Date', 'CBSA_Name', 'Id', 'iname', 'type', 'Cluster', 'region', 'division', 'state', 'msa', 'tret',
20
- 'treturn', 'tot_index', 'inc_index', 'app_index', 'count', 'emv', 'bmv', 'income', 'psales',
21
- 'capimp', 'ireturn', 'areturn', 'Latitude_x', 'Longitude_x', 'Latitude_y', 'Longitude_y', 'tmin',
22
- 'tmean', 'tmax', 'tdmean', 'ppt', 'vpdmin', 'vpdmax', 'tmin_low', 'tmax_high']
 
23
 
24
  features = data.drop(columns=columns_to_drop)
25
  scaler.fit(features)
@@ -34,21 +34,17 @@ def create_sequences(data_param, target_param, input_steps=12, forecast_steps=4)
34
 
35
 
36
  def predict_and_plot(cbsa_name):
37
- print(f"Processing predictions for CBSA: {cbsa_name}")
38
-
39
  cbsa_data = data[data['CBSA_Name'] == cbsa_name]
40
 
 
 
41
  cbsa_features = cbsa_data.drop(columns=columns_to_drop)
42
  cbsa_features = cbsa_features[features.columns]
43
 
44
  cbsa_target = cbsa_data['tret']
45
 
46
- print(f"Feature shape for {cbsa_name} before scaling: {cbsa_features.shape}")
47
-
48
  cbsa_scaled_features = scaler.transform(cbsa_features)
49
 
50
- print(f"Feature shape for {cbsa_name} after scaling: {cbsa_scaled_features.shape}")
51
-
52
  X_cbsa, y_cbsa = create_sequences(cbsa_scaled_features, cbsa_target)
53
 
54
  predictions = model.predict(X_cbsa)
@@ -58,6 +54,7 @@ def predict_and_plot(cbsa_name):
58
  predictions = np.roll(predictions, -shift_steps)
59
 
60
  future_quarters = [f"{year}-Q{quarter}" for year in range(2024, 2026) for quarter in range(1, 5)]
 
61
  num_future_steps = len(future_quarters)
62
 
63
  future_predictions = []
@@ -65,25 +62,27 @@ def predict_and_plot(cbsa_name):
65
 
66
  for _ in range(num_future_steps):
67
  next_prediction = model.predict(current_input.reshape(1, -1, X_cbsa.shape[2]))
 
68
  future_predictions.append(next_prediction.squeeze())
 
69
  current_input = np.roll(current_input, -1, axis=0)
 
70
  current_input[-1] = next_prediction.squeeze()
71
 
72
  predictions = np.concatenate((predictions, np.array(future_predictions)))
73
 
74
  time_index = (
75
- cbsa_data['YYYYQ'].iloc[-len(y_cbsa):]
76
  .apply(lambda x: f"{str(x)[:4]}-Q{str(x)[4]}")
77
  .sort_values()
78
  )
 
79
  future_time_index = pd.Series(future_quarters)
80
  full_time_index = pd.concat([time_index, future_time_index]).reset_index(drop=True)
81
 
82
  actual_index = full_time_index[:len(y_cbsa)]
83
  predicted_index = full_time_index[:len(predictions)]
84
 
85
- print("time_index", predicted_index)
86
-
87
  predicted_json = [
88
  {
89
  "year": int(year_quarter.split("-")[0]),
 
4
  from tensorflow.keras.models import load_model
5
  from sklearn.preprocessing import MinMaxScaler
6
  import matplotlib.pyplot as plt
 
7
  from io import BytesIO
8
  from PIL import Image
9
  import json
 
16
  data = pickle.load(f)
17
 
18
  columns_to_drop = ['Date', 'CBSA_Name', 'Id', 'iname', 'type', 'Cluster', 'region', 'division', 'state', 'msa', 'tret',
19
+ 'iret', 'aret', 'treturn', 'tot_index', 'inc_index', 'app_index', 'count', 'emv', 'bmv', 'income',
20
+ 'psales', 'capimp', 'ireturn', 'areturn', 'Latitude_x', 'Longitude_x', 'Latitude_y', 'Longitude_y',
21
+ 'tmin', 'tmean', 'tmax', 'tdmean', 'ppt', 'vpdmin', 'vpdmax', 'tmin_low', 'tmax_high',
22
+ 'num_of_positive_headlines', 'num_of_negative_headlines', 'remote_work_shift']
23
 
24
  features = data.drop(columns=columns_to_drop)
25
  scaler.fit(features)
 
34
 
35
 
36
  def predict_and_plot(cbsa_name):
 
 
37
  cbsa_data = data[data['CBSA_Name'] == cbsa_name]
38
 
39
+ print(cbsa_data['YYYYQ'].sort_values())
40
+
41
  cbsa_features = cbsa_data.drop(columns=columns_to_drop)
42
  cbsa_features = cbsa_features[features.columns]
43
 
44
  cbsa_target = cbsa_data['tret']
45
 
 
 
46
  cbsa_scaled_features = scaler.transform(cbsa_features)
47
 
 
 
48
  X_cbsa, y_cbsa = create_sequences(cbsa_scaled_features, cbsa_target)
49
 
50
  predictions = model.predict(X_cbsa)
 
54
  predictions = np.roll(predictions, -shift_steps)
55
 
56
  future_quarters = [f"{year}-Q{quarter}" for year in range(2024, 2026) for quarter in range(1, 5)]
57
+
58
  num_future_steps = len(future_quarters)
59
 
60
  future_predictions = []
 
62
 
63
  for _ in range(num_future_steps):
64
  next_prediction = model.predict(current_input.reshape(1, -1, X_cbsa.shape[2]))
65
+
66
  future_predictions.append(next_prediction.squeeze())
67
+
68
  current_input = np.roll(current_input, -1, axis=0)
69
+
70
  current_input[-1] = next_prediction.squeeze()
71
 
72
  predictions = np.concatenate((predictions, np.array(future_predictions)))
73
 
74
  time_index = (
75
+ cbsa_data['YYYYQ'].iloc[:len(y_cbsa)]
76
  .apply(lambda x: f"{str(x)[:4]}-Q{str(x)[4]}")
77
  .sort_values()
78
  )
79
+
80
  future_time_index = pd.Series(future_quarters)
81
  full_time_index = pd.concat([time_index, future_time_index]).reset_index(drop=True)
82
 
83
  actual_index = full_time_index[:len(y_cbsa)]
84
  predicted_index = full_time_index[:len(predictions)]
85
 
 
 
86
  predicted_json = [
87
  {
88
  "year": int(year_quarter.split("-")[0]),
models/new_merged_weather_financial_q_l4q_l24q_general.keras CHANGED
Binary files a/models/new_merged_weather_financial_q_l4q_l24q_general.keras and b/models/new_merged_weather_financial_q_l4q_l24q_general.keras differ