File size: 3,476 Bytes
5575a8a | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 | import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import SimpleRNN, Dense, LSTM, Dropout
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import joblib
import os
# 1. Load Data
def load_data(file_path):
df = pd.read_csv(file_path)
print("Dataset Head:")
print(df.head())
return df
# 2. Preprocessing
def preprocess_data(df):
X = df[['number_courses', 'time_study']].values
y = df['Marks'].values.reshape(-1, 1)
# Scaling
scaler_X = MinMaxScaler()
scaler_y = MinMaxScaler()
X_scaled = scaler_X.fit_transform(X)
y_scaled = scaler_y.fit_transform(y)
# Split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_scaled, test_size=0.2, random_state=42)
# Reshape for RNN: (samples, time_steps, features)
# Here time_steps = 1
X_train = X_train.reshape((X_train.shape[0], 1, X_train.shape[1]))
X_test = X_test.reshape((X_test.shape[0], 1, X_test.shape[1]))
return X_train, X_test, y_train, y_test, scaler_X, scaler_y
# 3. Build Model
def build_model(input_shape):
model = Sequential([
LSTM(64, activation='relu', input_shape=input_shape, return_sequences=True),
Dropout(0.2),
LSTM(32, activation='relu'),
Dense(16, activation='relu'),
Dense(1) # Output for regression
])
model.compile(optimizer='adam', loss='mse', metrics=['mae'])
return model
# 4. Main Execution
if __name__ == "__main__":
file_path = 'Student_Marks.csv'
if not os.path.exists(file_path):
print(f"Error: {file_path} not found.")
exit()
df = load_data(file_path)
X_train, X_test, y_train, y_test, scaler_X, scaler_y = preprocess_data(df)
print(f"X_train shape: {X_train.shape}")
print(f"y_train shape: {y_train.shape}")
model = build_model((X_train.shape[1], X_train.shape[2]))
model.summary()
# Training
print("\nStarting training...")
history = model.fit(
X_train, y_train,
epochs=100,
batch_size=8,
validation_split=0.1,
verbose=1
)
# Evaluation
print("\nEvaluating model...")
loss, mae = model.evaluate(X_test, y_test)
print(f"Test Loss (MSE): {loss:.4f}")
print(f"Test MAE: {mae:.4f}")
# Plot History
plt.figure(figsize=(10, 5))
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Val Loss')
plt.title('Model Loss (MSE)')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.savefig('loss_plot.png')
print("Loss plot saved as 'loss_plot.png'")
# Predictions
y_pred_scaled = model.predict(X_test)
y_pred = scaler_y.inverse_transform(y_pred_scaled)
y_actual = scaler_y.inverse_transform(y_test)
# Compare first 5
print("\nSample Predictions:")
for i in range(5):
print(f"Actual: {y_actual[i][0]:.2f}, Predicted: {y_pred[i][0]:.2f}")
# Save Model and Scalers
model.save('student_marks_rnn_model.h5')
joblib.dump(scaler_X, 'scaler_X.pkl')
joblib.dump(scaler_y, 'scaler_y.pkl')
print("\nModel saved as 'student_marks_rnn_model.h5'")
print("Scalers saved as 'scaler_X.pkl' and 'scaler_y.pkl'")
|