import pandas as pd import numpy as np import matplotlib.pyplot as plt import tensorflow as tf from tensorflow.keras.models import Sequential from tensorflow.keras.layers import SimpleRNN, Dense, LSTM, Dropout from sklearn.model_selection import train_test_split from sklearn.preprocessing import MinMaxScaler import joblib import os # 1. Load Data def load_data(file_path): df = pd.read_csv(file_path) print("Dataset Head:") print(df.head()) return df # 2. Preprocessing def preprocess_data(df): X = df[['number_courses', 'time_study']].values y = df['Marks'].values.reshape(-1, 1) # Scaling scaler_X = MinMaxScaler() scaler_y = MinMaxScaler() X_scaled = scaler_X.fit_transform(X) y_scaled = scaler_y.fit_transform(y) # Split X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_scaled, test_size=0.2, random_state=42) # Reshape for RNN: (samples, time_steps, features) # Here time_steps = 1 X_train = X_train.reshape((X_train.shape[0], 1, X_train.shape[1])) X_test = X_test.reshape((X_test.shape[0], 1, X_test.shape[1])) return X_train, X_test, y_train, y_test, scaler_X, scaler_y # 3. Build Model def build_model(input_shape): model = Sequential([ LSTM(64, activation='relu', input_shape=input_shape, return_sequences=True), Dropout(0.2), LSTM(32, activation='relu'), Dense(16, activation='relu'), Dense(1) # Output for regression ]) model.compile(optimizer='adam', loss='mse', metrics=['mae']) return model # 4. Main Execution if __name__ == "__main__": file_path = 'Student_Marks.csv' if not os.path.exists(file_path): print(f"Error: {file_path} not found.") exit() df = load_data(file_path) X_train, X_test, y_train, y_test, scaler_X, scaler_y = preprocess_data(df) print(f"X_train shape: {X_train.shape}") print(f"y_train shape: {y_train.shape}") model = build_model((X_train.shape[1], X_train.shape[2])) model.summary() # Training print("\nStarting training...") history = model.fit( X_train, y_train, epochs=100, batch_size=8, validation_split=0.1, verbose=1 ) # Evaluation print("\nEvaluating model...") loss, mae = model.evaluate(X_test, y_test) print(f"Test Loss (MSE): {loss:.4f}") print(f"Test MAE: {mae:.4f}") # Plot History plt.figure(figsize=(10, 5)) plt.plot(history.history['loss'], label='Train Loss') plt.plot(history.history['val_loss'], label='Val Loss') plt.title('Model Loss (MSE)') plt.xlabel('Epochs') plt.ylabel('Loss') plt.legend() plt.savefig('loss_plot.png') print("Loss plot saved as 'loss_plot.png'") # Predictions y_pred_scaled = model.predict(X_test) y_pred = scaler_y.inverse_transform(y_pred_scaled) y_actual = scaler_y.inverse_transform(y_test) # Compare first 5 print("\nSample Predictions:") for i in range(5): print(f"Actual: {y_actual[i][0]:.2f}, Predicted: {y_pred[i][0]:.2f}") # Save Model and Scalers model.save('student_marks_rnn_model.h5') joblib.dump(scaler_X, 'scaler_X.pkl') joblib.dump(scaler_y, 'scaler_y.pkl') print("\nModel saved as 'student_marks_rnn_model.h5'") print("Scalers saved as 'scaler_X.pkl' and 'scaler_y.pkl'")