|
|
import torch.nn as nn |
|
|
import torch |
|
|
from matplotlib import pyplot as plt |
|
|
from torch.utils.data import DataLoader |
|
|
from data_load import load_soil_data |
|
|
from data_processing import process_spectra |
|
|
from data_processing import preprocess_with_downsampling |
|
|
from resnet1d_multitask import ResNet1D_MultiTask,get_model |
|
|
|
|
|
bin_sizes = [5,10,15,20] |
|
|
|
|
|
methods = ['Abs-SG0', 'Abs-SG0-SNV', 'Abs-SG1', 'Abs-SG1-SNV', 'Abs-SG2', 'Abs-SG2-SNV'] |
|
|
|
|
|
target_columns = ['pH.in.CaCl2', 'pH.in.H2O', 'OC', 'CaCO3', 'N', 'P', 'K', 'CEC'] |
|
|
|
|
|
|
|
|
for j in len(bin_sizes): |
|
|
|
|
|
X_train, X_test, y_train, y_test, wavelengths = load_soil_data('../LUCAS.2009_abs.csv', target_columns) |
|
|
|
|
|
|
|
|
X_train, X_test = X_train.squeeze(), X_test.squeeze() |
|
|
|
|
|
X_train= process_spectra(X_train,methods[5]) |
|
|
X_test = process_spectra(X_test,methods[5]) |
|
|
|
|
|
X_train,X_train_nwavelengths=preprocess_with_downsampling(X_train,wavelengths,bin_sizes[j]) |
|
|
X_test,X_test_nwavelengths=preprocess_with_downsampling(X_test,wavelengths,bin_sizes[j]) |
|
|
|
|
|
X_train = X_train.reshape(X_train.shape[0], 1, X_train.shape[1]) |
|
|
X_test = X_test.reshape(X_test.shape[0], 1, X_test.shape[1]) |
|
|
|
|
|
|
|
|
print("X_train shape:", X_train.shape) |
|
|
print("y_train shape:", y_train.shape) |
|
|
print("X_test shape:", X_test.shape) |
|
|
print("y_test shape:", y_test.shape) |
|
|
assert X_train.shape[0] == y_train.shape[0], "Mismatch in number of samples between X_train and y_train" |
|
|
assert X_test.shape[0] == y_test.shape[0], "Mismatch in number of samples between X_test and y_test" |
|
|
|
|
|
|
|
|
train_dataset = torch.utils.data.TensorDataset(torch.tensor(X_train, dtype=torch.float32), torch.tensor(y_train, dtype=torch.float32)) |
|
|
test_dataset = torch.utils.data.TensorDataset(torch.tensor(X_test, dtype=torch.float32), torch.tensor(y_test, dtype=torch.float32)) |
|
|
|
|
|
train_loader = DataLoader(train_dataset, batch_size=256, shuffle=True) |
|
|
test_loader = DataLoader(test_dataset, batch_size=256, shuffle=False) |
|
|
|
|
|
|
|
|
|
|
|
model_name = 'C' |
|
|
model = get_model(model_name) |
|
|
|
|
|
criterion = nn.SmoothL1Loss() |
|
|
|
|
|
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-3, weight_decay=1e-5) |
|
|
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.81) |
|
|
|
|
|
|
|
|
|
|
|
from sklearn.metrics import root_mean_squared_error, r2_score |
|
|
import numpy as np |
|
|
|
|
|
num_epochs = 50 |
|
|
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') |
|
|
model.to(device) |
|
|
|
|
|
|
|
|
train_losses = [] |
|
|
test_losses = [] |
|
|
train_rmse = [] |
|
|
train_r2 = [] |
|
|
|
|
|
|
|
|
for epoch in range(num_epochs): |
|
|
model.train() |
|
|
total_loss = 0 |
|
|
all_preds = [] |
|
|
all_targets = [] |
|
|
|
|
|
for batch_x, batch_y in train_loader: |
|
|
|
|
|
batch_x, batch_y = batch_x.to(device), batch_y.to(device) |
|
|
|
|
|
outputs = model(batch_x) |
|
|
|
|
|
loss = criterion(outputs, batch_y) |
|
|
|
|
|
optimizer.zero_grad() |
|
|
loss.backward() |
|
|
optimizer.step() |
|
|
total_loss += loss.item() |
|
|
|
|
|
all_preds.append(outputs.cpu().detach().numpy()) |
|
|
all_targets.append(batch_y.cpu().detach().numpy()) |
|
|
|
|
|
train_losses.append(total_loss / len(train_loader)) |
|
|
|
|
|
|
|
|
scheduler.step() |
|
|
|
|
|
all_preds = np.concatenate(all_preds, axis=0) |
|
|
all_targets = np.concatenate(all_targets, axis=0) |
|
|
epoch_rmse = root_mean_squared_error(all_targets, all_preds) |
|
|
epoch_r2 = r2_score(all_targets, all_preds) |
|
|
train_rmse.append(epoch_rmse) |
|
|
train_r2.append(epoch_r2) |
|
|
|
|
|
|
|
|
model.eval() |
|
|
test_loss = 0 |
|
|
with torch.no_grad(): |
|
|
for batch_x, batch_y in test_loader: |
|
|
batch_x, batch_y = batch_x.to(device), batch_y.to(device) |
|
|
test_outputs = model(batch_x) |
|
|
loss = criterion(test_outputs, batch_y) |
|
|
test_loss += loss.item() |
|
|
test_losses.append(test_loss / len(test_loader)) |
|
|
|
|
|
print(f'Epoch [{epoch+1}/{num_epochs}], Train Loss: {train_losses[-1]:.4f}, Test Loss: {test_losses[-1]:.4f}') |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
from sklearn.metrics import root_mean_squared_error, r2_score |
|
|
|
|
|
model.eval() |
|
|
total_test_loss = 0 |
|
|
test_preds = [] |
|
|
test_targets = [] |
|
|
|
|
|
|
|
|
target_columns = ['pH.in.CaCl2', 'pH.in.H2O', 'OC', 'CaCO3', 'N', 'P', 'K', 'CEC'] |
|
|
column_mapping = {i: col for i, col in enumerate(target_columns)} |
|
|
|
|
|
with torch.no_grad(): |
|
|
for batch_x, batch_y in test_loader: |
|
|
batch_x, batch_y = batch_x.to(device), batch_y.to(device) |
|
|
test_outputs = model(batch_x) |
|
|
test_loss = criterion(test_outputs, batch_y) |
|
|
total_test_loss += test_loss.item() |
|
|
|
|
|
|
|
|
test_preds.append(test_outputs.cpu().numpy()) |
|
|
test_targets.append(batch_y.cpu().numpy()) |
|
|
|
|
|
|
|
|
avg_test_loss = total_test_loss / len(test_loader) |
|
|
print(f'Average Test Loss: {avg_test_loss:.4f}') |
|
|
|
|
|
|
|
|
test_preds = np.concatenate(test_preds, axis=0) |
|
|
test_targets = np.concatenate(test_targets, axis=0) |
|
|
|
|
|
|
|
|
from datetime import datetime |
|
|
current_time = datetime.now().strftime("%Y%m%d_%H%M%S") |
|
|
results_file = f'../results3/metrics_model{model_name}_{current_time}.txt' |
|
|
|
|
|
|
|
|
import os |
|
|
if not os.path.exists('../results3'): |
|
|
os.makedirs('../results3') |
|
|
|
|
|
with open(results_file, 'w') as f: |
|
|
f.write(f"Results for Model {model_name} generated at {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n") |
|
|
f.write("-" * 50 + "\n") |
|
|
for i in range(test_targets.shape[1]): |
|
|
target_i = test_targets[:, i] |
|
|
pred_i = test_preds[:, i] |
|
|
|
|
|
|
|
|
rmse_i = np.sqrt(root_mean_squared_error(target_i, pred_i)) |
|
|
r2_i = r2_score(target_i, pred_i) |
|
|
|
|
|
|
|
|
result_line = f'Indicator {i + 1} ({column_mapping[i]}) - RMSE: {rmse_i:.4f}, R²: {r2_i:.4f}' |
|
|
print(result_line) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
plt.figure(figsize=(10, 6)) |
|
|
plt.plot(train_losses, label='Training Loss') |
|
|
plt.plot(test_losses, label='Test Loss') |
|
|
plt.xlabel('Epoch') |
|
|
plt.ylabel('Loss') |
|
|
plt.title(f'Training and Test Loss over Epochs (Model {model_name})') |
|
|
plt.legend() |
|
|
plt.grid(True) |
|
|
plt.savefig(f'../results3/loss_curves_model{model_name}_{current_time}_{bin_sizes[j]}.png', dpi=300, bbox_inches='tight') |
|
|
plt.show() |
|
|
|
|
|
|
|
|
plt.figure(figsize=(12, 4)) |
|
|
plt.subplot(1, 2, 1) |
|
|
plt.plot(train_rmse, label='Mean_Training RMSE') |
|
|
plt.plot(train_r2, label='Mean_Training R2') |
|
|
plt.xlabel('Epoch') |
|
|
plt.ylabel('Metric') |
|
|
plt.title(f'Training Metrics (Model {model_name})') |
|
|
plt.legend() |
|
|
|
|
|
plt.subplot(1, 2, 2) |
|
|
plt.plot(test_losses, label='Test Loss') |
|
|
plt.xlabel('Epoch') |
|
|
plt.ylabel('Loss') |
|
|
plt.title(f'Test Loss (Model {model_name})') |
|
|
plt.legend() |
|
|
|
|
|
plt.tight_layout() |
|
|
plt.savefig(f'../results3/training_metrics_model{model_name}_{current_time}_{bin_sizes[j]}.png', dpi=300, bbox_inches='tight') |
|
|
plt.show() |
|
|
|
|
|
print(f"\nResults have been saved to: {results_file}") |
|
|
print(f"Figures have been saved to: ../results3/") |