File size: 3,476 Bytes

5575a8a

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import SimpleRNN, Dense, LSTM, Dropout
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import joblib
import os

# 1. Load Data
def load_data(file_path):
    df = pd.read_csv(file_path)
    print("Dataset Head:")
    print(df.head())
    return df

# 2. Preprocessing
def preprocess_data(df):
    X = df[['number_courses', 'time_study']].values
    y = df['Marks'].values.reshape(-1, 1)

    # Scaling
    scaler_X = MinMaxScaler()
    scaler_y = MinMaxScaler()

    X_scaled = scaler_X.fit_transform(X)
    y_scaled = scaler_y.fit_transform(y)

    # Split
    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_scaled, test_size=0.2, random_state=42)

    # Reshape for RNN: (samples, time_steps, features)
    # Here time_steps = 1
    X_train = X_train.reshape((X_train.shape[0], 1, X_train.shape[1]))
    X_test = X_test.reshape((X_test.shape[0], 1, X_test.shape[1]))

    return X_train, X_test, y_train, y_test, scaler_X, scaler_y

# 3. Build Model
def build_model(input_shape):
    model = Sequential([
        LSTM(64, activation='relu', input_shape=input_shape, return_sequences=True),
        Dropout(0.2),
        LSTM(32, activation='relu'),
        Dense(16, activation='relu'),
        Dense(1) # Output for regression
    ])
    
    model.compile(optimizer='adam', loss='mse', metrics=['mae'])
    return model

# 4. Main Execution
if __name__ == "__main__":
    file_path = 'Student_Marks.csv'
    if not os.path.exists(file_path):
        print(f"Error: {file_path} not found.")
        exit()

    df = load_data(file_path)
    X_train, X_test, y_train, y_test, scaler_X, scaler_y = preprocess_data(df)

    print(f"X_train shape: {X_train.shape}")
    print(f"y_train shape: {y_train.shape}")

    model = build_model((X_train.shape[1], X_train.shape[2]))
    model.summary()

    # Training
    print("\nStarting training...")
    history = model.fit(
        X_train, y_train,
        epochs=100,
        batch_size=8,
        validation_split=0.1,
        verbose=1
    )

    # Evaluation
    print("\nEvaluating model...")
    loss, mae = model.evaluate(X_test, y_test)
    print(f"Test Loss (MSE): {loss:.4f}")
    print(f"Test MAE: {mae:.4f}")

    # Plot History
    plt.figure(figsize=(10, 5))
    plt.plot(history.history['loss'], label='Train Loss')
    plt.plot(history.history['val_loss'], label='Val Loss')
    plt.title('Model Loss (MSE)')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()
    plt.savefig('loss_plot.png')
    print("Loss plot saved as 'loss_plot.png'")

    # Predictions
    y_pred_scaled = model.predict(X_test)
    y_pred = scaler_y.inverse_transform(y_pred_scaled)
    y_actual = scaler_y.inverse_transform(y_test)

    # Compare first 5
    print("\nSample Predictions:")
    for i in range(5):
        print(f"Actual: {y_actual[i][0]:.2f}, Predicted: {y_pred[i][0]:.2f}")

    # Save Model and Scalers
    model.save('student_marks_rnn_model.h5')
    joblib.dump(scaler_X, 'scaler_X.pkl')
    joblib.dump(scaler_y, 'scaler_y.pkl')
    print("\nModel saved as 'student_marks_rnn_model.h5'")
    print("Scalers saved as 'scaler_X.pkl' and 'scaler_y.pkl'")