bookrec / train-model.py
Shekharmeena's picture
Rename train-model-py.py to train-model.py
1865fc0 verified
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Script to train and save the book recommendation model.
"""
from utils import train_and_save_model
import pandas as pd
if __name__ == "__main__":
# Check the dataset for duplicates first
df = pd.read_csv('books_summary.csv')
duplicates = df.duplicated().sum()
print(f"Number of duplicate rows in dataset: {duplicates}")
# Check for books with the same title
title_counts = df['book_name'].value_counts()
duplicate_titles = title_counts[title_counts > 1]
if not duplicate_titles.empty:
print(f"Found {len(duplicate_titles)} book titles that appear multiple times.")
print("Examples:")
for title, count in duplicate_titles.head(5).items():
print(f"- '{title}' appears {count} times")
else:
print("No duplicate book titles found.")
# Train and save the model
result = train_and_save_model('books_summary.csv')
if "error" in result:
print(f"Error: {result['error']}")
else:
print("Model training completed successfully!")