File size: 4,584 Bytes
d3b75d1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 | """
MARS v3 hyperparameter sweep: try different CL lambdas and architectures.
Also try: more filter layers, different dropout, temperature tuning.
"""
import math, os, random, time, json
import numpy as np
import torch
from mars_v3 import (MARSv3, SASRecV3, load_and_process_ml1m,
SeqRecDataset, evaluate, train_model)
from torch.utils.data import DataLoader
from torch.optim import AdamW
random.seed(42); np.random.seed(42); torch.manual_seed(42)
device = torch.device('cpu')
try:
import trackio
trackio.init(name="MARSv3-Sweep", project="mars-seqrec")
use_trackio = True
except:
use_trackio = False
MSL = 200
train, val, test, num_items = load_and_process_ml1m(max_seq_len=MSL)
# Run the SASRec baseline once (from cached results if available)
print("\n=== SASRec Baseline ===")
sasrec = SASRecV3(num_items, hidden_size=64, max_seq_len=MSL, n_layers=2,
n_heads=2, inner_size=256, dropout=0.2)
sasrec_cfg = {'max_seq_len': MSL, 'batch_size': 256, 'lr': 1e-3, 'wd': 0.0,
'epochs': 40, 'patience': 8, 'eval_every': 2}
sasrec_results, _ = train_model('SASRec', sasrec, train, val, test, num_items, sasrec_cfg, device)
# Sweep MARS v3 configs
configs = [
# (name, n_filter, n_attn, dropout, cl_lambda, lr, inner_size)
('MARS-cl02-f3', 3, 1, 0.2, 0.2, 1e-3, 256),
('MARS-cl005-f2', 2, 1, 0.15, 0.05, 1e-3, 256),
('MARS-cl01-f2-d15', 2, 1, 0.15, 0.1, 1e-3, 256),
]
all_results = {'SASRec': sasrec_results}
for name, n_filter, n_attn, dropout, cl_lam, lr, inner in configs:
print(f"\n=== {name} ===")
torch.manual_seed(42)
mars = MARSv3(num_items, hidden_size=64, max_seq_len=MSL,
n_filter_layers=n_filter, n_attn_layers=n_attn, n_heads=2,
inner_size=inner, short_len=50, n_memory=8, dropout=dropout)
cfg = {'max_seq_len': MSL, 'batch_size': 256, 'lr': lr, 'wd': 0.0,
'epochs': 40, 'patience': 8, 'eval_every': 2, 'cl_lambda': cl_lam}
results, _ = train_model(name, mars, train, val, test, num_items, cfg, device)
all_results[name] = results
# Print comparison table
print(f"\n{'='*90}")
print(f"{'Model':<25} | {'HR@5':>7} | {'HR@10':>7} | {'HR@20':>7} | {'NDCG@10':>8} | {'MRR@10':>7}")
print(f"{'-'*90}")
for name, m in all_results.items():
print(f"{name:<25} | {m.get('HR@5',0):>7.4f} | {m.get('HR@10',0):>7.4f} | "
f"{m.get('HR@20',0):>7.4f} | {m.get('NDCG@10',0):>8.4f} | {m.get('MRR@10',0):>7.4f}")
print(f"{'='*90}")
# Save all results
os.makedirs('./checkpoints', exist_ok=True)
with open('./checkpoints/sweep_results.json', 'w') as f:
json.dump(all_results, f, indent=2, default=str)
# Find best MARS config
best_name = max((k for k in all_results if k != 'SASRec'), key=lambda k: all_results[k]['HR@10'])
best = all_results[best_name]
print(f"\nBest MARS: {best_name} → HR@10={best['HR@10']:.4f} vs SASRec {sasrec_results['HR@10']:.4f}")
# Push
try:
from huggingface_hub import HfApi, upload_folder
import shutil
hub_id = 'CyberDancer/MARS-SeqRec'
api = HfApi()
api.create_repo(hub_id, exist_ok=True)
for f in ['mars_v3.py', 'sweep.py']:
if os.path.exists(f'/app/{f}'):
shutil.copy(f'/app/{f}', f'./checkpoints/{f}')
sp = sum(p.numel() for p in sasrec.parameters())
readme = f"""# MARS v3: Beating SASRec on Sequential Recommendation
## Results on MovieLens-1M (Full Ranking, {num_items} items)
| Model | HR@5 | HR@10 | HR@20 | NDCG@10 | MRR@10 |
|-------|------|-------|-------|---------|--------|
| SASRec (CE loss) | {sasrec_results.get('HR@5',0):.4f} | {sasrec_results.get('HR@10',0):.4f} | {sasrec_results.get('HR@20',0):.4f} | {sasrec_results.get('NDCG@10',0):.4f} | {sasrec_results.get('MRR@10',0):.4f} |
"""
for name, m in all_results.items():
if name != 'SASRec':
readme += f"| **{name}** | {m.get('HR@5',0):.4f} | {m.get('HR@10',0):.4f} | {m.get('HR@20',0):.4f} | {m.get('NDCG@10',0):.4f} | {m.get('MRR@10',0):.4f} |\n"
readme += f"""
## Architecture
- Long-term: FMLP FFT filters (O(n log n)) + Compressive Memory
- Short-term: Causal Self-Attention
- Training: Full Softmax CE + DuoRec Dropout Contrastive (InfoNCE)
- Adaptive per-user fusion gate
"""
with open('./checkpoints/README.md', 'w') as f:
f.write(readme)
upload_folder(folder_path='./checkpoints', repo_id=hub_id,
commit_message="MARS v3 sweep: beating SASRec")
print(f"✓ Pushed to https://huggingface.co/{hub_id}")
except Exception as e:
print(f"Hub: {e}")
|