| import pandas as pd
|
| from sklearn.model_selection import train_test_split
|
| from sklearn.linear_model import LogisticRegression
|
| from sklearn.metrics import accuracy_score, classification_report
|
| import joblib
|
| import os
|
|
|
|
|
|
|
| DATASET_PATH = 'symptoms_dataset.xlsx'
|
| MODEL_SAVE_PATH = 'symptom_model.joblib'
|
|
|
| def train_symptom_model():
|
| """
|
| Loads symptom data from an Excel file, trains a Logistic Regression model,
|
| and saves it to disk.
|
| """
|
|
|
| try:
|
| df = pd.read_excel(DATASET_PATH)
|
| print(f"Dataset '{DATASET_PATH}' loaded successfully. Shape: {df.shape}")
|
| except FileNotFoundError:
|
| print(f"Error: The file '{DATASET_PATH}' was not found. Please create it and add your data.")
|
| return
|
|
|
|
|
|
|
| features = ['tremor', 'stiffness', 'walking_issue']
|
|
|
| target = 'label'
|
|
|
|
|
| required_columns = features + [target]
|
| if not all(col in df.columns for col in required_columns):
|
| print(f"Error: Your Excel file is missing one or more required columns.")
|
| print(f"Please ensure it contains: {required_columns}")
|
| return
|
|
|
| X = df[features]
|
| y = df[target]
|
|
|
|
|
|
|
| X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
|
| print(f"Data split into {len(X_train)} training samples and {len(X_test)} testing samples.")
|
|
|
|
|
| print("\nTraining Logistic Regression model...")
|
|
|
| model = LogisticRegression(random_state=42, class_weight='balanced')
|
| model.fit(X_train, y_train)
|
| print("Model training complete.")
|
|
|
|
|
| print("\nEvaluating model performance...")
|
| y_pred = model.predict(X_test)
|
| accuracy = accuracy_score(y_test, y_pred)
|
| print(f"Model Accuracy on Test Set: {accuracy:.4f}")
|
| print("\nClassification Report:")
|
| print(classification_report(y_test, y_pred))
|
|
|
|
|
| joblib.dump(model, MODEL_SAVE_PATH)
|
| print(f"\nSymptom model successfully saved to: {MODEL_SAVE_PATH}")
|
|
|
| if __name__ == '__main__':
|
| train_symptom_model() |