Spaces:

climateindexai
/

total_return_prediction

Sleeping

App Files Files Community

vitormonteirodebarros commited on Nov 13, 2024

Commit

f2e25c6

1 Parent(s): 79fd4ca

fix chart

Browse files

Files changed (4) hide show

app.py +77 -55
data/2024_11_7_total_return_sample.pkl +2 -2
models/new_merged_weather_financial_q_l4q_l24q.keras +0 -0
models/new_merged_weather_financial_q_l4q_l24q_general.keras +0 -0

app.py CHANGED Viewed

@@ -4,73 +4,94 @@ import pickle
 from tensorflow.keras.models import load_model
 from sklearn.preprocessing import MinMaxScaler
 import matplotlib.pyplot as plt
 from io import BytesIO
 from PIL import Image
 import json
-# Load the model and scaler
-model = load_model('models/new_merged_weather_financial_q_l4q_l24q.keras')
 scaler = MinMaxScaler()
-with open('data/PRISM.pkl', 'rb') as f:
     data = pickle.load(f)
-data = data.sort_values(by=['YYYYQ', 'CBSA_Name'])
-features = data.drop(
-    columns=['Date', 'CBSA_Name', 'Id', 'iname', 'type', 'Cluster', 'region', 'division', 'state', 'msa']
-)
 scaler.fit(features)
-def get_actual_values_2015_2023(cbsa_name):
-    with open('data/2024_11_7_total_return_sample.pkl', 'rb') as f:
-        cbsa_filtered_data = pickle.load(f)
-    cbsa_data = cbsa_filtered_data[cbsa_filtered_data['CBSA_Name'] == cbsa_name]
-    cbsa_data = cbsa_data.sort_values(by='YYYYQ')
-    cbsa_data['YYYYQ'] = cbsa_data['YYYYQ'].astype(str).str.strip()
-    actual_values = cbsa_data[(cbsa_data['YYYYQ'].str[:4].astype(int) >= 2015) &
-                              (cbsa_data['YYYYQ'].str[:4].astype(int) <= 2023)]['tret'].values
-    return actual_values
-def predict_and_plot(cbsa_name):
-    cbsa_data = data[data['CBSA_Name'] == cbsa_name]
-    cbsa_data = cbsa_data.sort_values(by='YYYYQ')
-    cbsa_data['YYYYQ'] = cbsa_data['YYYYQ'].astype(str).str.strip()
-    actual_values = get_actual_values_2015_2023(cbsa_name)
-    cbsa_features = cbsa_data.drop(
-        columns=['Date', 'CBSA_Name', 'Id', 'iname', 'type', 'Cluster', 'region', 'division', 'state', 'msa']
-    )
-    cbsa_features = cbsa_features[features.columns]
     cbsa_scaled_features = scaler.transform(cbsa_features)
-    num_steps = 24
-    X_cbsa = []
-    for i in range(len(cbsa_scaled_features) - num_steps + 1):
-        X_cbsa.append(cbsa_scaled_features[i:i + num_steps])
-    X_cbsa = np.array(X_cbsa)
-    future_predictions = model.predict(X_cbsa)
-    future_predictions = np.squeeze(future_predictions)
-    combined_values = np.concatenate([actual_values, future_predictions[len(actual_values):]])
-    combined_values = combined_values[
-                      :len([f"{year} Q{quarter}" for year in range(2015, 2026) for quarter in range(1, 5)])]
-    years_quarters = [f"{year} Q{quarter}" for year in range(2015, 2026) for quarter in range(1, 5)]
-    years_quarters = years_quarters[:len(combined_values)]
-    predicted_json = []
-    for i, value in enumerate(combined_values[len(actual_values):]):
-        if len(actual_values) + i < len(years_quarters):
-            year, quarter = years_quarters[len(actual_values) + i].split()
-            predicted_json.append({"Year": int(year), "Quarter": quarter, "Value": round(value, 3)})
     json_output = {
         "CBSA": cbsa_name,
@@ -78,15 +99,18 @@ def predict_and_plot(cbsa_name):
         "Predicted": predicted_json
     }
-    plt.figure(figsize=(14, 6))
-    plt.plot(years_quarters[:len(actual_values)], actual_values, label="Actual (2015-2023)", marker='o', color='blue')
-    plt.plot(years_quarters, combined_values, label="Predicted (2015-2025)", linestyle='--', color='orange')
-    plt.title(f'Total Return Prediction for {cbsa_name} (2015-2025)')
-    plt.xlabel('Quarter')
     plt.ylabel('Total Return')
-    plt.xticks(rotation=45)
     plt.legend()
     buf = BytesIO()
     plt.savefig(buf, format='png')
@@ -105,17 +129,15 @@ with gr.Blocks() as demo:
         with gr.Column():
             gr.Markdown("""
             **Total Return**: Total return is a measure of the performance of an asset or investment over a specific period, defined as the sum of **Income Return** and **Asset Return**.
             - **Income Return**: The net income generated by a property, calculated as rental income minus operating and capital expenditures.
             - **Asset Return**: The appreciation in the market value of a property from purchase to sale.
             **CBSA (Core-Based Statistical Area)**: Represents a geographical area defined by the Office of Management and Budget, typically used for statistical purposes in the U.S. It consists of counties and county equivalents centered around an urban center with a high degree of social and economic integration.
             """)
     with gr.Row():
-        with gr.Column():
-            output_image = gr.Image(type="numpy", label="Actual vs Predicted Total Return (2015-2025)")
-        with gr.Column():
-            json_display = gr.JSON(label="Prediction JSON Output")
     predict_button.click(fn=predict_and_plot, inputs=cbsa_dropdown, outputs=[output_image, json_display])

 from tensorflow.keras.models import load_model
 from sklearn.preprocessing import MinMaxScaler
 import matplotlib.pyplot as plt
+import matplotlib.ticker as ticker
 from io import BytesIO
 from PIL import Image
 import json
+import pandas as pd
+model = load_model('models/new_merged_weather_financial_q_l4q_l24q_general.keras')
 scaler = MinMaxScaler()
+with open('data/2024_11_7_total_return_sample.pkl', 'rb') as f:
     data = pickle.load(f)
+columns_to_drop = ['Date', 'CBSA_Name', 'Id', 'iname', 'type', 'Cluster', 'region', 'division', 'state', 'msa', 'tret',
+                   'treturn', 'tot_index', 'inc_index', 'app_index', 'count', 'emv', 'bmv', 'income', 'psales',
+                   'capimp', 'ireturn', 'areturn', 'Latitude_x', 'Longitude_x', 'Latitude_y', 'Longitude_y', 'tmin',
+                   'tmean', 'tmax', 'tdmean', 'ppt', 'vpdmin', 'vpdmax', 'tmin_low', 'tmax_high']
+features = data.drop(columns=columns_to_drop)
 scaler.fit(features)
+def create_sequences(data_param, target_param, input_steps=12, forecast_steps=4):
+    X, y = [], []
+    for i in range(len(data_param) - input_steps - forecast_steps):
+        X.append(data_param[i:(i + input_steps)])
+        y.append(target_param[(i + input_steps):(i + input_steps + forecast_steps)].mean())
+    return np.array(X), np.array(y)
+def predict_and_plot(cbsa_name):
+    print(f"Processing predictions for CBSA: {cbsa_name}")
+    cbsa_data = data[data['CBSA_Name'] == cbsa_name]
+    cbsa_features = cbsa_data.drop(columns=columns_to_drop)
+    cbsa_features = cbsa_features[features.columns]
+    cbsa_target = cbsa_data['tret']
+    print(f"Feature shape for {cbsa_name} before scaling: {cbsa_features.shape}")
     cbsa_scaled_features = scaler.transform(cbsa_features)
+    print(f"Feature shape for {cbsa_name} after scaling: {cbsa_scaled_features.shape}")
+    X_cbsa, y_cbsa = create_sequences(cbsa_scaled_features, cbsa_target)
+    predictions = model.predict(X_cbsa)
+    predictions = np.squeeze(predictions)
+    shift_steps = 5
+    predictions = np.roll(predictions, -shift_steps)
+    future_quarters = [f"{year}-Q{quarter}" for year in range(2024, 2026) for quarter in range(1, 5)]
+    num_future_steps = len(future_quarters)
+    future_predictions = []
+    current_input = X_cbsa[-1]
+    for _ in range(num_future_steps):
+        next_prediction = model.predict(current_input.reshape(1, -1, X_cbsa.shape[2]))
+        future_predictions.append(next_prediction.squeeze())
+        current_input = np.roll(current_input, -1, axis=0)
+        current_input[-1] = next_prediction.squeeze()
+    predictions = np.concatenate((predictions, np.array(future_predictions)))
+    time_index = (
+        cbsa_data['YYYYQ'].iloc[-len(y_cbsa):]
+        .apply(lambda x: f"{str(x)[:4]}-Q{str(x)[4]}")
+        .sort_values()
+    )
+    future_time_index = pd.Series(future_quarters)
+    full_time_index = pd.concat([time_index, future_time_index]).reset_index(drop=True)
+    actual_index = full_time_index[:len(y_cbsa)]
+    predicted_index = full_time_index[:len(predictions)]
+    print("time_index", predicted_index)
+    predicted_json = [
+        {
+            "year": int(year_quarter.split("-")[0]),
+            "quarter": int(year_quarter.split("-")[1][1]),
+            "total_return": round(float(pred), 4)
+        }
+        for year_quarter, pred in zip(full_time_index[-num_future_steps:], future_predictions)
+    ]
     json_output = {
         "CBSA": cbsa_name,
         "Predicted": predicted_json
     }
+    plt.figure(figsize=(20, 6))
+    plt.plot(actual_index, y_cbsa, label='Actual Total Return', color='green', linestyle='-')
+    plt.plot(predicted_index, predictions, label='Predicted Total Return', color='red', linestyle='-')
+    plt.title(f'Model Predictions vs Actual for {cbsa_name}')
+    plt.xlabel('Time (YYYYQ)')
     plt.ylabel('Total Return')
+    plt.xticks(rotation=90, fontsize=8)
+    plt.gca().tick_params(axis='x', pad=15)
     plt.legend()
+    plt.tight_layout()
     buf = BytesIO()
     plt.savefig(buf, format='png')
         with gr.Column():
             gr.Markdown("""
             **Total Return**: Total return is a measure of the performance of an asset or investment over a specific period, defined as the sum of **Income Return** and **Asset Return**.
             - **Income Return**: The net income generated by a property, calculated as rental income minus operating and capital expenditures.
             - **Asset Return**: The appreciation in the market value of a property from purchase to sale.
             **CBSA (Core-Based Statistical Area)**: Represents a geographical area defined by the Office of Management and Budget, typically used for statistical purposes in the U.S. It consists of counties and county equivalents centered around an urban center with a high degree of social and economic integration.
             """)
     with gr.Row():
+        output_image = gr.Image(type="numpy", label="Actual vs Predicted Total Return (2015-2025)")
+    with gr.Row():
+        json_display = gr.JSON(label="Prediction JSON Output")
     predict_button.click(fn=predict_and_plot, inputs=cbsa_dropdown, outputs=[output_image, json_display])

data/2024_11_7_total_return_sample.pkl CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f0043164c3837ad6e8d742088852919f908a1514ea980e7bd3c8aaee260c7b4e
-size 3905447

 version https://git-lfs.github.com/spec/v1
+oid sha256:db7f0e5cc21cd3204570e0c6816c369707b81cace27746700fbc2548597628ee
+size 4585941

models/new_merged_weather_financial_q_l4q_l24q.keras DELETED Viewed

Binary file (565 kB)

models/new_merged_weather_financial_q_l4q_l24q_general.keras ADDED Viewed

Binary file (387 kB). View file