# train.py import torch import pandas as pd import numpy as np from torch import nn from torch.utils.data import Dataset, DataLoader from sklearn.model_selection import train_test_split class MangaDataset(Dataset): def __init__(self, ratings_df): self.users = torch.tensor(ratings_df['user_idx'].values, dtype=torch.long) self.items = torch.tensor(ratings_df['manga_idx'].values, dtype=torch.long) self.ratings = torch.tensor(ratings_df['rating'].values, dtype=torch.float) def __len__(self): return len(self.ratings) def __getitem__(self, idx): return self.users[idx], self.items[idx], self.ratings[idx] class MangaRecommender(nn.Module): def __init__(self, num_users, num_items, n_factors=50): super().__init__() self.user_factors = nn.Embedding(num_users, n_factors) self.item_factors = nn.Embedding(num_items, n_factors) # Initialize embeddings nn.init.xavier_normal_(self.user_factors.weight) nn.init.xavier_normal_(self.item_factors.weight) def forward(self, user, item): user_emb = self.user_factors(user) item_emb = self.item_factors(item) return (user_emb * item_emb).sum(1) def predict(self, user_ids): user_emb = self.user_factors(user_ids) all_items = self.item_factors.weight return torch.matmul(user_emb, all_items.t()) def train_model(): # Load your data df = pd.read_csv('manga_ratings.csv') # Create user and item mappings user_mapping = {uid: idx for idx, uid in enumerate(df['user_id'].unique())} manga_mapping = {mid: idx for idx, mid in enumerate(df['manga_id'].unique())} # Convert ratings to numerical values rating_map = {'like': 1.0, 'dislike': -1.0, None: 0.0} # Prepare training data df['user_idx'] = df['user_id'].map(user_mapping) df['manga_idx'] = df['manga_id'].map(manga_mapping) df['rating'] = df['like_status'].map(rating_map) # Create train/val split train_df, val_df = train_test_split(df, test_size=0.2) # Create datasets train_dataset = MangaDataset(train_df) val_dataset = MangaDataset(val_df) # Create dataloaders train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True) val_loader = DataLoader(val_dataset, batch_size=64) # Initialize model model = MangaRecommender( num_users=len(user_mapping), num_items=len(manga_mapping) ) # Training setup criterion = nn.MSELoss() optimizer = torch.optim.Adam(model.parameters(), lr=0.001) # Training loop num_epochs = 20 for epoch in range(num_epochs): model.train() total_loss = 0 for user, item, rating in train_loader: optimizer.zero_grad() pred = model(user, item) loss = criterion(pred, rating) loss.backward() optimizer.step() total_loss += loss.item() # Validation model.eval() val_loss = 0 with torch.no_grad(): for user, item, rating in val_loader: pred = model(user, item) val_loss += criterion(pred, rating).item() print(f'Epoch {epoch+1}/{num_epochs}') print(f'Train Loss: {total_loss/len(train_loader):.4f}') print(f'Val Loss: {val_loss/len(val_loader):.4f}') # Save mappings and model torch.save({ 'model_state_dict': model.state_dict(), 'user_mapping': user_mapping, 'manga_mapping': manga_mapping }, 'manga_recommender.pt') if __name__ == '__main__': train_model()