Upload 2 files
Browse files- TrainImprove.py +57 -0
- ml-st1.py +45 -0
TrainImprove.py
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import numpy as np
|
| 3 |
+
from sklearn.model_selection import train_test_split
|
| 4 |
+
from sklearn.feature_extraction.text import TfidfVectorizer
|
| 5 |
+
from sklearn.metrics import accuracy_score
|
| 6 |
+
from tensorflow.keras.models import Sequential
|
| 7 |
+
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
|
| 8 |
+
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
|
| 9 |
+
from tensorflow.keras.regularizers import l2
|
| 10 |
+
from joblib import dump
|
| 11 |
+
|
| 12 |
+
# 1. Read Data
|
| 13 |
+
data = pd.read_excel('gender.xlsx')
|
| 14 |
+
|
| 15 |
+
# 2. Preprocess Data
|
| 16 |
+
data['Gender'] = data['Gender'].map({'M': 1, 'F': 0})
|
| 17 |
+
|
| 18 |
+
# 3. Convert text data into numerical data using TF-IDF
|
| 19 |
+
tfidf = TfidfVectorizer(analyzer='char', ngram_range=(1, 3))
|
| 20 |
+
X = tfidf.fit_transform(data['Name']).toarray() # Convert names into numerical features
|
| 21 |
+
y = data['Gender'].values # Labels: 1 for Male, 0 for Female
|
| 22 |
+
|
| 23 |
+
# 4. Split the dataset into training and testing sets
|
| 24 |
+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
# 5. Build the Neural Network Model
|
| 28 |
+
model = Sequential()
|
| 29 |
+
model.add(Dense(128, activation='relu', kernel_regularizer=l2(0.01), input_shape=(X_train.shape[1],))) # L2 regularization
|
| 30 |
+
model.add(BatchNormalization()) # Batch normalization
|
| 31 |
+
model.add(Dropout(0.5)) # Dropout to prevent overfitting
|
| 32 |
+
model.add(Dense(64, activation='relu', kernel_regularizer=l2(0.01))) # L2 regularization
|
| 33 |
+
model.add(BatchNormalization()) # Batch normalization
|
| 34 |
+
model.add(Dropout(0.5)) # Dropout to prevent overfitting
|
| 35 |
+
model.add(Dense(1, activation='sigmoid')) # Output layer with sigmoid for binary classification
|
| 36 |
+
|
| 37 |
+
# 6. Compile the model
|
| 38 |
+
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
|
| 39 |
+
|
| 40 |
+
# 7. Define callbacks
|
| 41 |
+
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True) # Early stopping
|
| 42 |
+
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, min_lr=0.00001) # Learning rate reduction
|
| 43 |
+
|
| 44 |
+
# 8. Train the model with epochs and callbacks
|
| 45 |
+
model.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.2,
|
| 46 |
+
callbacks=[early_stopping, reduce_lr])
|
| 47 |
+
|
| 48 |
+
# 9. Save the model after training
|
| 49 |
+
model.save('gender_prediction_model_Improve.h5')
|
| 50 |
+
|
| 51 |
+
# 10. Save the TF-IDF vectorizer
|
| 52 |
+
dump(tfidf, 'tfidf_vectorizer_Improve.joblib')
|
| 53 |
+
|
| 54 |
+
# 11. Evaluate the model
|
| 55 |
+
y_pred = (model.predict(X_test) > 0.5).astype("int32") # Convert probabilities to binary output
|
| 56 |
+
accuracy = accuracy_score(y_test, y_pred)
|
| 57 |
+
print(f"Model Accuracy: {accuracy * 100:.2f}%")
|
ml-st1.py
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
import os
|
| 3 |
+
import pandas as pd
|
| 4 |
+
from tensorflow.keras.models import load_model
|
| 5 |
+
from joblib import load
|
| 6 |
+
|
| 7 |
+
# Set Streamlit page configuration
|
| 8 |
+
st.set_page_config(page_title="Gender Prediction", page_icon="🧑🎓", layout="centered")
|
| 9 |
+
|
| 10 |
+
# Load the pre-trained model
|
| 11 |
+
@st.cache_resource
|
| 12 |
+
def load_prediction_model():
|
| 13 |
+
return load_model('gender_prediction_model.h5')
|
| 14 |
+
|
| 15 |
+
# Load the TF-IDF vectorizer
|
| 16 |
+
@st.cache_resource
|
| 17 |
+
def load_vectorizer():
|
| 18 |
+
tfidf_vectorizer_file = 'tfidf_vectorizer.joblib'
|
| 19 |
+
if not os.path.exists(tfidf_vectorizer_file):
|
| 20 |
+
st.error(f"❌ {tfidf_vectorizer_file} not found. Please ensure the file exists in the current directory.")
|
| 21 |
+
st.stop()
|
| 22 |
+
return load(tfidf_vectorizer_file)
|
| 23 |
+
|
| 24 |
+
# Prediction function
|
| 25 |
+
def predict_gender(name, model, tfidf):
|
| 26 |
+
vectorized_name = tfidf.transform([name]).toarray() # Transform name into feature vector
|
| 27 |
+
gender = model.predict(vectorized_name) > 0.5 # Get prediction
|
| 28 |
+
return 'Male' if gender[0][0] == 1 else 'Female'
|
| 29 |
+
|
| 30 |
+
# Load model and vectorizer
|
| 31 |
+
model = load_prediction_model()
|
| 32 |
+
tfidf = load_vectorizer()
|
| 33 |
+
|
| 34 |
+
# Streamlit UI
|
| 35 |
+
st.title("Gender Prediction from Name")
|
| 36 |
+
st.write("Enter a name to predict the gender using the pre-trained model.")
|
| 37 |
+
|
| 38 |
+
# Input form
|
| 39 |
+
name = st.text_input("Enter a name:")
|
| 40 |
+
if st.button("Predict"):
|
| 41 |
+
if name:
|
| 42 |
+
predicted_gender = predict_gender(name, model, tfidf)
|
| 43 |
+
st.success(f"The predicted gender for '{name}' is: **{predicted_gender}**")
|
| 44 |
+
else:
|
| 45 |
+
st.warning("Please enter a valid name.")
|