| import pandas as pd |
| import numpy as np |
| import xgboost as xgb |
| import matplotlib.pyplot as plt |
| from scipy.interpolate import griddata |
| import geopandas as gpd |
| from config import (base_path_data , file_path_LUCAS_LFU_Lfl_00to23_Bavaria_OC , MAX_OC , |
| TIME_BEGINNING , TIME_END , INFERENCE_TIME) |
| import concurrent.futures |
| from concurrent.futures import ThreadPoolExecutor, as_completed |
| from dataloader.dataloaderMapping import MultiRasterDatasetMapping |
| from sklearn.utils import shuffle |
| import copy |
| from torch.utils.data import DataLoader, Subset |
| import torch |
| import tqdm |
| |
|
|
| |
|
|
|
|
|
|
| |
|
|
|
|
| def create_prediction_visualizations(year,coordinates, predictions, save_path): |
| """ |
| Create and save three separate map visualizations of predictions in Bavaria plus a triptych, |
| with timestamps in filenames. |
| |
| Parameters: |
| coordinates (numpy.array): Array of coordinates (longitude, latitude) |
| predictions (numpy.array): Array of prediction values |
| save_path (str): Directory path where the images should be saved |
| """ |
| import os |
| import numpy as np |
| import matplotlib.pyplot as plt |
| from scipy.interpolate import griddata |
| import geopandas as gpd |
| from datetime import datetime |
|
|
| |
| timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") |
|
|
| |
| os.makedirs(save_path, exist_ok=True) |
| individual_path = os.path.join(save_path, 'individual') |
| os.makedirs(individual_path, exist_ok=True) |
|
|
| |
| bavaria = gpd.read_file('https://raw.githubusercontent.com/isellsoap/deutschlandGeoJSON/main/2_bundeslaender/4_niedrig.geo.json') |
| bavaria = bavaria[bavaria['name'] == 'Bayern'] |
|
|
| |
| grid_x = np.linspace(coordinates[:, 0].min(), coordinates[:, 0].max(), 300) |
| grid_y = np.linspace(coordinates[:, 1].min(), coordinates[:, 1].max(), 300) |
| grid_x, grid_y = np.meshgrid(grid_x, grid_y) |
|
|
| |
| grid_z = griddata(coordinates, predictions, (grid_x, grid_y), method='linear') |
|
|
| |
| plot_params = { |
| 'figsize': (12, 10), |
| 'dpi': 300 |
| } |
|
|
| |
| def set_common_elements(ax, title): |
| bavaria.boundary.plot(ax=ax, color='black', linewidth=1) |
| ax.set_title(title, fontsize=12, pad=20) |
| ax.set_xlabel('Longitude') |
| ax.set_ylabel('Latitude') |
| ax.grid(True) |
|
|
| |
| def get_filename(base_name): |
| return f"{base_name}_MAX_OC_{str(MAX_OC)}_Beginning_{TIME_BEGINNING}_End_{TIME_END}__InferenceTime{INFERENCE_TIME}_{timestamp}.png" |
|
|
| |
| fig_interp, ax_interp = plt.subplots(**plot_params) |
| contour = ax_interp.contourf(grid_x, grid_y, grid_z, |
| levels=50, |
| cmap='viridis', |
| alpha=0.8) |
| set_common_elements(ax_interp, 'Interpolated Predicted Values') |
| plt.colorbar(contour, ax=ax_interp, label='Predicted Values') |
| plt.savefig(os.path.join(individual_path, get_filename(f'{year}_bavaria_interpolated')), |
| bbox_inches='tight') |
| plt.close() |
|
|
| |
| fig_scatter, ax_scatter = plt.subplots(**plot_params) |
| scatter = ax_scatter.scatter(coordinates[:, 0], coordinates[:, 1], |
| c=predictions, |
| cmap='viridis', |
| alpha=0.6, |
| s=50) |
| set_common_elements(ax_scatter, 'Scatter Plot of Predicted Values') |
| plt.colorbar(scatter, ax=ax_scatter, label='Predicted Values') |
| plt.savefig(os.path.join(individual_path, get_filename(f'{year}_bavaria_scatter')), |
| bbox_inches='tight') |
| plt.close() |
|
|
| |
| fig_discrete, ax_discrete = plt.subplots(**plot_params) |
| discrete = ax_discrete.scatter(coordinates[:, 0], coordinates[:, 1], |
| c=predictions, |
| cmap='viridis', |
| alpha=1.0, |
| s=20) |
| set_common_elements(ax_discrete, 'Discrete Points of Predicted Values') |
| plt.colorbar(discrete, ax=ax_discrete, label='Predicted Values') |
| plt.savefig(os.path.join(individual_path, get_filename(f'{year}_bavaria_discrete')), |
| bbox_inches='tight') |
| plt.close() |
|
|
| |
| fig_triptych = plt.figure(figsize=(30, 10)) |
|
|
| |
| ax1 = plt.subplot(131) |
| contour = ax1.contourf(grid_x, grid_y, grid_z, |
| levels=50, |
| cmap='viridis', |
| alpha=0.8) |
| set_common_elements(ax1, 'Interpolated Predicted Values') |
| plt.colorbar(contour, ax=ax1, label='Predicted Values') |
|
|
| |
| ax2 = plt.subplot(132) |
| scatter = ax2.scatter(coordinates[:, 0], coordinates[:, 1], |
| c=predictions, |
| cmap='viridis', |
| alpha=0.6, |
| s=50) |
| set_common_elements(ax2, 'Scatter Plot of Predicted Values') |
| plt.colorbar(scatter, ax=ax2, label='Predicted Values') |
|
|
| |
| ax3 = plt.subplot(133) |
| discrete = ax3.scatter(coordinates[:, 0], coordinates[:, 1], |
| c=predictions, |
| cmap='viridis', |
| alpha=1.0, |
| s=20) |
| set_common_elements(ax3, 'Discrete Points of Predicted Values') |
| plt.colorbar(discrete, ax=ax3, label='Predicted Values') |
|
|
| plt.tight_layout() |
| plt.savefig(os.path.join(save_path, get_filename(f'{year}_bavaria_triptych')), |
| dpi=300, |
| bbox_inches='tight') |
| plt.close() |
|
|
| |
| |
|
|
|
|
|
|
|
|
| |
| def process_batch(df_chunk, model_copy, bands_yearly, batch_size): |
| |
| chunk_dataset = MultiRasterDatasetMapping(bands_yearly, df_chunk) |
| chunk_dataloader = DataLoader(chunk_dataset, batch_size=batch_size, shuffle=True) |
|
|
| chunk_coordinates = [] |
| chunk_features = [] |
|
|
| for longitudes, latitudes, batch_features in chunk_dataloader: |
| |
| chunk_coordinates.append(np.column_stack((longitudes.numpy(), latitudes.numpy()))) |
|
|
| |
| concatenated_features = np.concatenate([value.numpy() for value in batch_features.values()], axis=1) |
| |
| flattened_features = concatenated_features.reshape(concatenated_features.shape[0], -1) |
| chunk_features.extend(flattened_features) |
|
|
| |
| chunk_features = np.array(chunk_features) |
| chunk_coordinates = np.vstack(chunk_coordinates) |
|
|
| |
| chunk_predictions = model_copy.predict(chunk_features) |
|
|
| return chunk_coordinates, chunk_predictions |
|
|
| def parallel_predict(df_full, model, bands_yearly, batch_size=4, num_threads=4): |
| |
| df_shuffled = df_full.sample(frac=1, random_state=42).reset_index(drop=True) |
|
|
| |
| chunk_size = len(df_shuffled) // num_threads |
| df_chunks = [df_shuffled[i:i + chunk_size] for i in range(0, len(df_shuffled), chunk_size)] |
|
|
| |
| all_coordinates = [] |
| all_predictions = [] |
|
|
| |
| print(num_threads) |
| |
|
|
| with concurrent.futures.ThreadPoolExecutor(max_workers=num_threads) as executor: |
| futures = [ |
| executor.submit( |
| process_batch, |
| chunk, |
| copy.deepcopy(model), |
| bands_yearly, |
| batch_size |
| ) for chunk in df_chunks |
| ] |
| for future in futures: |
| coordinates, predictions = future.result() |
| all_coordinates.append(coordinates) |
| all_predictions.append(predictions) |
| |
| all_coordinates = np.vstack(all_coordinates) |
| all_predictions = np.concatenate(all_predictions) |
|
|
| return all_coordinates, all_predictions |