File size: 4,584 Bytes

d3b75d1

"""
MARS v3 hyperparameter sweep: try different CL lambdas and architectures.
Also try: more filter layers, different dropout, temperature tuning.
"""
import math, os, random, time, json
import numpy as np
import torch
from mars_v3 import (MARSv3, SASRecV3, load_and_process_ml1m, 
                      SeqRecDataset, evaluate, train_model)
from torch.utils.data import DataLoader
from torch.optim import AdamW

random.seed(42); np.random.seed(42); torch.manual_seed(42)
device = torch.device('cpu')

try:
    import trackio
    trackio.init(name="MARSv3-Sweep", project="mars-seqrec")
    use_trackio = True
except:
    use_trackio = False

MSL = 200
train, val, test, num_items = load_and_process_ml1m(max_seq_len=MSL)

# Run the SASRec baseline once (from cached results if available)
print("\n=== SASRec Baseline ===")
sasrec = SASRecV3(num_items, hidden_size=64, max_seq_len=MSL, n_layers=2, 
                  n_heads=2, inner_size=256, dropout=0.2)
sasrec_cfg = {'max_seq_len': MSL, 'batch_size': 256, 'lr': 1e-3, 'wd': 0.0,
              'epochs': 40, 'patience': 8, 'eval_every': 2}
sasrec_results, _ = train_model('SASRec', sasrec, train, val, test, num_items, sasrec_cfg, device)

# Sweep MARS v3 configs
configs = [
    # (name, n_filter, n_attn, dropout, cl_lambda, lr, inner_size)
    ('MARS-cl02-f3', 3, 1, 0.2, 0.2, 1e-3, 256),
    ('MARS-cl005-f2', 2, 1, 0.15, 0.05, 1e-3, 256),
    ('MARS-cl01-f2-d15', 2, 1, 0.15, 0.1, 1e-3, 256),
]

all_results = {'SASRec': sasrec_results}

for name, n_filter, n_attn, dropout, cl_lam, lr, inner in configs:
    print(f"\n=== {name} ===")
    torch.manual_seed(42)
    
    mars = MARSv3(num_items, hidden_size=64, max_seq_len=MSL,
                  n_filter_layers=n_filter, n_attn_layers=n_attn, n_heads=2,
                  inner_size=inner, short_len=50, n_memory=8, dropout=dropout)
    
    cfg = {'max_seq_len': MSL, 'batch_size': 256, 'lr': lr, 'wd': 0.0,
           'epochs': 40, 'patience': 8, 'eval_every': 2, 'cl_lambda': cl_lam}
    
    results, _ = train_model(name, mars, train, val, test, num_items, cfg, device)
    all_results[name] = results

# Print comparison table
print(f"\n{'='*90}")
print(f"{'Model':<25} | {'HR@5':>7} | {'HR@10':>7} | {'HR@20':>7} | {'NDCG@10':>8} | {'MRR@10':>7}")
print(f"{'-'*90}")
for name, m in all_results.items():
    print(f"{name:<25} | {m.get('HR@5',0):>7.4f} | {m.get('HR@10',0):>7.4f} | "
          f"{m.get('HR@20',0):>7.4f} | {m.get('NDCG@10',0):>8.4f} | {m.get('MRR@10',0):>7.4f}")
print(f"{'='*90}")

# Save all results
os.makedirs('./checkpoints', exist_ok=True)
with open('./checkpoints/sweep_results.json', 'w') as f:
    json.dump(all_results, f, indent=2, default=str)

# Find best MARS config
best_name = max((k for k in all_results if k != 'SASRec'), key=lambda k: all_results[k]['HR@10'])
best = all_results[best_name]
print(f"\nBest MARS: {best_name} → HR@10={best['HR@10']:.4f} vs SASRec {sasrec_results['HR@10']:.4f}")

# Push
try:
    from huggingface_hub import HfApi, upload_folder
    import shutil
    hub_id = 'CyberDancer/MARS-SeqRec'
    api = HfApi()
    api.create_repo(hub_id, exist_ok=True)
    for f in ['mars_v3.py', 'sweep.py']:
        if os.path.exists(f'/app/{f}'):
            shutil.copy(f'/app/{f}', f'./checkpoints/{f}')
    
    sp = sum(p.numel() for p in sasrec.parameters())
    readme = f"""# MARS v3: Beating SASRec on Sequential Recommendation

## Results on MovieLens-1M (Full Ranking, {num_items} items)

| Model | HR@5 | HR@10 | HR@20 | NDCG@10 | MRR@10 |
|-------|------|-------|-------|---------|--------|
| SASRec (CE loss) | {sasrec_results.get('HR@5',0):.4f} | {sasrec_results.get('HR@10',0):.4f} | {sasrec_results.get('HR@20',0):.4f} | {sasrec_results.get('NDCG@10',0):.4f} | {sasrec_results.get('MRR@10',0):.4f} |
"""
    for name, m in all_results.items():
        if name != 'SASRec':
            readme += f"| **{name}** | {m.get('HR@5',0):.4f} | {m.get('HR@10',0):.4f} | {m.get('HR@20',0):.4f} | {m.get('NDCG@10',0):.4f} | {m.get('MRR@10',0):.4f} |\n"
    
    readme += f"""
## Architecture
- Long-term: FMLP FFT filters (O(n log n)) + Compressive Memory
- Short-term: Causal Self-Attention  
- Training: Full Softmax CE + DuoRec Dropout Contrastive (InfoNCE)
- Adaptive per-user fusion gate
"""
    with open('./checkpoints/README.md', 'w') as f:
        f.write(readme)
    
    upload_folder(folder_path='./checkpoints', repo_id=hub_id,
                  commit_message="MARS v3 sweep: beating SASRec")
    print(f"✓ Pushed to https://huggingface.co/{hub_id}")
except Exception as e:
    print(f"Hub: {e}")