students / train_rnn.py
d-e-e-k-11's picture
Upload folder using huggingface_hub
5575a8a verified
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import SimpleRNN, Dense, LSTM, Dropout
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import joblib
import os
# 1. Load Data
def load_data(file_path):
df = pd.read_csv(file_path)
print("Dataset Head:")
print(df.head())
return df
# 2. Preprocessing
def preprocess_data(df):
X = df[['number_courses', 'time_study']].values
y = df['Marks'].values.reshape(-1, 1)
# Scaling
scaler_X = MinMaxScaler()
scaler_y = MinMaxScaler()
X_scaled = scaler_X.fit_transform(X)
y_scaled = scaler_y.fit_transform(y)
# Split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_scaled, test_size=0.2, random_state=42)
# Reshape for RNN: (samples, time_steps, features)
# Here time_steps = 1
X_train = X_train.reshape((X_train.shape[0], 1, X_train.shape[1]))
X_test = X_test.reshape((X_test.shape[0], 1, X_test.shape[1]))
return X_train, X_test, y_train, y_test, scaler_X, scaler_y
# 3. Build Model
def build_model(input_shape):
model = Sequential([
LSTM(64, activation='relu', input_shape=input_shape, return_sequences=True),
Dropout(0.2),
LSTM(32, activation='relu'),
Dense(16, activation='relu'),
Dense(1) # Output for regression
])
model.compile(optimizer='adam', loss='mse', metrics=['mae'])
return model
# 4. Main Execution
if __name__ == "__main__":
file_path = 'Student_Marks.csv'
if not os.path.exists(file_path):
print(f"Error: {file_path} not found.")
exit()
df = load_data(file_path)
X_train, X_test, y_train, y_test, scaler_X, scaler_y = preprocess_data(df)
print(f"X_train shape: {X_train.shape}")
print(f"y_train shape: {y_train.shape}")
model = build_model((X_train.shape[1], X_train.shape[2]))
model.summary()
# Training
print("\nStarting training...")
history = model.fit(
X_train, y_train,
epochs=100,
batch_size=8,
validation_split=0.1,
verbose=1
)
# Evaluation
print("\nEvaluating model...")
loss, mae = model.evaluate(X_test, y_test)
print(f"Test Loss (MSE): {loss:.4f}")
print(f"Test MAE: {mae:.4f}")
# Plot History
plt.figure(figsize=(10, 5))
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Val Loss')
plt.title('Model Loss (MSE)')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.savefig('loss_plot.png')
print("Loss plot saved as 'loss_plot.png'")
# Predictions
y_pred_scaled = model.predict(X_test)
y_pred = scaler_y.inverse_transform(y_pred_scaled)
y_actual = scaler_y.inverse_transform(y_test)
# Compare first 5
print("\nSample Predictions:")
for i in range(5):
print(f"Actual: {y_actual[i][0]:.2f}, Predicted: {y_pred[i][0]:.2f}")
# Save Model and Scalers
model.save('student_marks_rnn_model.h5')
joblib.dump(scaler_X, 'scaler_X.pkl')
joblib.dump(scaler_y, 'scaler_y.pkl')
print("\nModel saved as 'student_marks_rnn_model.h5'")
print("Scalers saved as 'scaler_X.pkl' and 'scaler_y.pkl'")