Spaces:
Running
Running
| """Train and evaluate the TF-IDF + Logistic Regression baseline model.""" | |
| import sys | |
| from pathlib import Path | |
| import yaml | |
| from loguru import logger | |
| sys.path.insert(0, str(Path(__file__).resolve().parent.parent)) | |
| from src.data.dataset import load_splits | |
| from src.data.preprocessing import set_global_seeds | |
| from src.models.baseline import train, evaluate | |
| def main() -> None: | |
| """Run baseline training and evaluation pipeline.""" | |
| Path("logs").mkdir(exist_ok=True) | |
| logger.add("logs/train_baseline.log", rotation="10 MB") | |
| with open("config/config.yaml") as f: | |
| cfg = yaml.safe_load(f) | |
| set_global_seeds(cfg["data"]["seed"]) | |
| processed_dir = cfg["paths"]["data_processed"] | |
| train_df, val_df, test_df = load_splits(processed_dir) | |
| pipeline = train( | |
| train_df=train_df, | |
| val_df=val_df, | |
| cfg=cfg, | |
| save_dir=cfg["paths"]["models_baseline"], | |
| ) | |
| report = evaluate( | |
| pipeline=pipeline, | |
| test_df=test_df, | |
| results_dir=cfg["paths"]["results"], | |
| ) | |
| weighted_f1 = report["weighted avg"]["f1-score"] | |
| logger.info(f"Baseline complete. Test weighted F1: {weighted_f1:.4f}") | |
| if __name__ == "__main__": | |
| main() | |