Spaces:
No application file
No application file
| # scripts/train.py | |
| from src.preprocessing import load_and_preprocess_data | |
| from src.feature_engineering import tokenize_texts | |
| from src.model import train_model, evaluate_model | |
| from src.utils import plot_confusion_matrix | |
| def main(): | |
| # Load and preprocess data | |
| train_df, test_df = load_and_preprocess_data(sample=True) | |
| # Tokenize | |
| train_encodings = tokenize_texts(train_df["text"]) | |
| test_encodings = tokenize_texts(test_df["text"]) | |
| # Train model | |
| model, label_map = train_model( | |
| train_encodings, train_df["category"], test_encodings, test_df["category"] | |
| ) | |
| # Evaluate | |
| report, cm = evaluate_model(model, test_encodings, test_df["category"]) | |
| print("Classification Report:\n", report) | |
| plot_confusion_matrix(cm, list(label_map.keys())) | |
| if __name__ == "__main__": | |
| main() |