| import pandas as pd
|
| import numpy as np
|
| from sklearn.model_selection import train_test_split
|
| from sklearn.feature_extraction.text import TfidfVectorizer
|
| from sklearn.metrics import accuracy_score
|
| from tensorflow.keras.models import Sequential
|
| from tensorflow.keras.layers import Dense, Dropout
|
| from tensorflow.keras.models import save_model
|
| from joblib import dump
|
|
|
|
|
| data = pd.read_excel('gender.xlsx')
|
|
|
|
|
| data['Gender'] = data['Gender'].map({'M': 1, 'F': 0})
|
|
|
|
|
| tfidf = TfidfVectorizer(analyzer='char', ngram_range=(1, 3))
|
| X = tfidf.fit_transform(data['Name']).toarray()
|
| y = data['Gender'].values
|
|
|
|
|
| X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
|
|
|
|
|
| model = Sequential()
|
| model.add(Dense(128, activation='relu', input_shape=(X_train.shape[1],)))
|
| model.add(Dropout(0.5))
|
| model.add(Dense(64, activation='relu'))
|
| model.add(Dropout(0.5))
|
| model.add(Dense(1, activation='sigmoid'))
|
|
|
|
|
| model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
|
|
|
|
|
| model.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.2)
|
|
|
|
|
| model.save('gender_prediction_model.h5')
|
|
|
|
|
| dump(tfidf, 'tfidf_vectorizer.joblib')
|
|
|
|
|
| y_pred = (model.predict(X_test) > 0.5).astype("int32")
|
| accuracy = accuracy_score(y_test, y_pred)
|
| print(f"Model Accuracy: {accuracy * 100:.2f}%")
|
|
|