| | --- |
| | language: |
| | - en |
| | license: mit |
| | tags: |
| | - generated_from_trainer |
| | datasets: |
| | - tomekkorbak/pii-pile-chunk3-0-50000 |
| | - tomekkorbak/pii-pile-chunk3-50000-100000 |
| | - tomekkorbak/pii-pile-chunk3-100000-150000 |
| | - tomekkorbak/pii-pile-chunk3-150000-200000 |
| | - tomekkorbak/pii-pile-chunk3-200000-250000 |
| | - tomekkorbak/pii-pile-chunk3-250000-300000 |
| | - tomekkorbak/pii-pile-chunk3-300000-350000 |
| | - tomekkorbak/pii-pile-chunk3-350000-400000 |
| | - tomekkorbak/pii-pile-chunk3-400000-450000 |
| | - tomekkorbak/pii-pile-chunk3-450000-500000 |
| | - tomekkorbak/pii-pile-chunk3-500000-550000 |
| | - tomekkorbak/pii-pile-chunk3-550000-600000 |
| | - tomekkorbak/pii-pile-chunk3-600000-650000 |
| | - tomekkorbak/pii-pile-chunk3-650000-700000 |
| | - tomekkorbak/pii-pile-chunk3-700000-750000 |
| | - tomekkorbak/pii-pile-chunk3-750000-800000 |
| | - tomekkorbak/pii-pile-chunk3-800000-850000 |
| | - tomekkorbak/pii-pile-chunk3-850000-900000 |
| | - tomekkorbak/pii-pile-chunk3-900000-950000 |
| | - tomekkorbak/pii-pile-chunk3-950000-1000000 |
| | - tomekkorbak/pii-pile-chunk3-1000000-1050000 |
| | - tomekkorbak/pii-pile-chunk3-1050000-1100000 |
| | - tomekkorbak/pii-pile-chunk3-1100000-1150000 |
| | - tomekkorbak/pii-pile-chunk3-1150000-1200000 |
| | - tomekkorbak/pii-pile-chunk3-1200000-1250000 |
| | - tomekkorbak/pii-pile-chunk3-1250000-1300000 |
| | - tomekkorbak/pii-pile-chunk3-1300000-1350000 |
| | - tomekkorbak/pii-pile-chunk3-1350000-1400000 |
| | - tomekkorbak/pii-pile-chunk3-1400000-1450000 |
| | - tomekkorbak/pii-pile-chunk3-1450000-1500000 |
| | - tomekkorbak/pii-pile-chunk3-1500000-1550000 |
| | - tomekkorbak/pii-pile-chunk3-1550000-1600000 |
| | - tomekkorbak/pii-pile-chunk3-1600000-1650000 |
| | - tomekkorbak/pii-pile-chunk3-1650000-1700000 |
| | - tomekkorbak/pii-pile-chunk3-1700000-1750000 |
| | - tomekkorbak/pii-pile-chunk3-1750000-1800000 |
| | - tomekkorbak/pii-pile-chunk3-1800000-1850000 |
| | - tomekkorbak/pii-pile-chunk3-1850000-1900000 |
| | - tomekkorbak/pii-pile-chunk3-1900000-1950000 |
| | model-index: |
| | - name: hungry_carson |
| | results: [] |
| | --- |
| | |
| | <!-- This model card has been generated automatically according to the information the Trainer had access to. You |
| | should probably proofread and complete it, then remove this comment. --> |
| |
|
| | # hungry_carson |
| | |
| | This model was trained from scratch on the tomekkorbak/pii-pile-chunk3-0-50000, the tomekkorbak/pii-pile-chunk3-50000-100000, the tomekkorbak/pii-pile-chunk3-100000-150000, the tomekkorbak/pii-pile-chunk3-150000-200000, the tomekkorbak/pii-pile-chunk3-200000-250000, the tomekkorbak/pii-pile-chunk3-250000-300000, the tomekkorbak/pii-pile-chunk3-300000-350000, the tomekkorbak/pii-pile-chunk3-350000-400000, the tomekkorbak/pii-pile-chunk3-400000-450000, the tomekkorbak/pii-pile-chunk3-450000-500000, the tomekkorbak/pii-pile-chunk3-500000-550000, the tomekkorbak/pii-pile-chunk3-550000-600000, the tomekkorbak/pii-pile-chunk3-600000-650000, the tomekkorbak/pii-pile-chunk3-650000-700000, the tomekkorbak/pii-pile-chunk3-700000-750000, the tomekkorbak/pii-pile-chunk3-750000-800000, the tomekkorbak/pii-pile-chunk3-800000-850000, the tomekkorbak/pii-pile-chunk3-850000-900000, the tomekkorbak/pii-pile-chunk3-900000-950000, the tomekkorbak/pii-pile-chunk3-950000-1000000, the tomekkorbak/pii-pile-chunk3-1000000-1050000, the tomekkorbak/pii-pile-chunk3-1050000-1100000, the tomekkorbak/pii-pile-chunk3-1100000-1150000, the tomekkorbak/pii-pile-chunk3-1150000-1200000, the tomekkorbak/pii-pile-chunk3-1200000-1250000, the tomekkorbak/pii-pile-chunk3-1250000-1300000, the tomekkorbak/pii-pile-chunk3-1300000-1350000, the tomekkorbak/pii-pile-chunk3-1350000-1400000, the tomekkorbak/pii-pile-chunk3-1400000-1450000, the tomekkorbak/pii-pile-chunk3-1450000-1500000, the tomekkorbak/pii-pile-chunk3-1500000-1550000, the tomekkorbak/pii-pile-chunk3-1550000-1600000, the tomekkorbak/pii-pile-chunk3-1600000-1650000, the tomekkorbak/pii-pile-chunk3-1650000-1700000, the tomekkorbak/pii-pile-chunk3-1700000-1750000, the tomekkorbak/pii-pile-chunk3-1750000-1800000, the tomekkorbak/pii-pile-chunk3-1800000-1850000, the tomekkorbak/pii-pile-chunk3-1850000-1900000 and the tomekkorbak/pii-pile-chunk3-1900000-1950000 datasets. |
| | |
| | ## Model description |
| | |
| | More information needed |
| | |
| | ## Intended uses & limitations |
| | |
| | More information needed |
| | |
| | ## Training and evaluation data |
| | |
| | More information needed |
| | |
| | ## Training procedure |
| | |
| | ### Training hyperparameters |
| | |
| | The following hyperparameters were used during training: |
| | - learning_rate: 0.0001 |
| | - train_batch_size: 16 |
| | - eval_batch_size: 8 |
| | - seed: 42 |
| | - gradient_accumulation_steps: 8 |
| | - total_train_batch_size: 128 |
| | - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08 |
| | - lr_scheduler_type: linear |
| | - lr_scheduler_warmup_ratio: 0.01 |
| | - training_steps: 2362 |
| | - mixed_precision_training: Native AMP |
| | |
| | ### Framework versions |
| | |
| | - Transformers 4.24.0 |
| | - Pytorch 1.11.0+cu113 |
| | - Datasets 2.5.1 |
| | - Tokenizers 0.11.6 |
| | |
| | |
| | # Full config |
| | {'dataset': {'conditional_training_config': {'aligned_prefix': '<|aligned|>', |
| | 'drop_token_fraction': 0.01, |
| | 'misaligned_prefix': '<|misaligned|>', |
| | 'threshold': 0.0}, |
| | 'datasets': ['tomekkorbak/pii-pile-chunk3-0-50000', |
| | 'tomekkorbak/pii-pile-chunk3-50000-100000', |
| | 'tomekkorbak/pii-pile-chunk3-100000-150000', |
| | 'tomekkorbak/pii-pile-chunk3-150000-200000', |
| | 'tomekkorbak/pii-pile-chunk3-200000-250000', |
| | 'tomekkorbak/pii-pile-chunk3-250000-300000', |
| | 'tomekkorbak/pii-pile-chunk3-300000-350000', |
| | 'tomekkorbak/pii-pile-chunk3-350000-400000', |
| | 'tomekkorbak/pii-pile-chunk3-400000-450000', |
| | 'tomekkorbak/pii-pile-chunk3-450000-500000', |
| | 'tomekkorbak/pii-pile-chunk3-500000-550000', |
| | 'tomekkorbak/pii-pile-chunk3-550000-600000', |
| | 'tomekkorbak/pii-pile-chunk3-600000-650000', |
| | 'tomekkorbak/pii-pile-chunk3-650000-700000', |
| | 'tomekkorbak/pii-pile-chunk3-700000-750000', |
| | 'tomekkorbak/pii-pile-chunk3-750000-800000', |
| | 'tomekkorbak/pii-pile-chunk3-800000-850000', |
| | 'tomekkorbak/pii-pile-chunk3-850000-900000', |
| | 'tomekkorbak/pii-pile-chunk3-900000-950000', |
| | 'tomekkorbak/pii-pile-chunk3-950000-1000000', |
| | 'tomekkorbak/pii-pile-chunk3-1000000-1050000', |
| | 'tomekkorbak/pii-pile-chunk3-1050000-1100000', |
| | 'tomekkorbak/pii-pile-chunk3-1100000-1150000', |
| | 'tomekkorbak/pii-pile-chunk3-1150000-1200000', |
| | 'tomekkorbak/pii-pile-chunk3-1200000-1250000', |
| | 'tomekkorbak/pii-pile-chunk3-1250000-1300000', |
| | 'tomekkorbak/pii-pile-chunk3-1300000-1350000', |
| | 'tomekkorbak/pii-pile-chunk3-1350000-1400000', |
| | 'tomekkorbak/pii-pile-chunk3-1400000-1450000', |
| | 'tomekkorbak/pii-pile-chunk3-1450000-1500000', |
| | 'tomekkorbak/pii-pile-chunk3-1500000-1550000', |
| | 'tomekkorbak/pii-pile-chunk3-1550000-1600000', |
| | 'tomekkorbak/pii-pile-chunk3-1600000-1650000', |
| | 'tomekkorbak/pii-pile-chunk3-1650000-1700000', |
| | 'tomekkorbak/pii-pile-chunk3-1700000-1750000', |
| | 'tomekkorbak/pii-pile-chunk3-1750000-1800000', |
| | 'tomekkorbak/pii-pile-chunk3-1800000-1850000', |
| | 'tomekkorbak/pii-pile-chunk3-1850000-1900000', |
| | 'tomekkorbak/pii-pile-chunk3-1900000-1950000'], |
| | 'is_split_by_sentences': True, |
| | 'skip_tokens': 2990407680}, |
| | 'generation': {'force_call_on': [25177], |
| | 'metrics_configs': [{}, {'n': 1}, {'n': 2}, {'n': 5}], |
| | 'scenario_configs': [{'generate_kwargs': {'bad_words_ids': [[50257], |
| | [50258]], |
| | 'do_sample': True, |
| | 'max_length': 128, |
| | 'min_length': 10, |
| | 'temperature': 0.7, |
| | 'top_k': 0, |
| | 'top_p': 0.9}, |
| | 'name': 'unconditional', |
| | 'num_samples': 4096, |
| | 'prefix': '<|aligned|>'}], |
| | 'scorer_config': {}}, |
| | 'kl_gpt3_callback': {'force_call_on': [25177], |
| | 'gpt3_kwargs': {'model_name': 'davinci'}, |
| | 'max_tokens': 64, |
| | 'num_samples': 4096, |
| | 'prefix': '<|aligned|>'}, |
| | 'model': {'from_scratch': False, |
| | 'gpt2_config_kwargs': {'reorder_and_upcast_attn': True, |
| | 'scale_attn_by': True}, |
| | 'model_kwargs': {'revision': '5c64636da035c40bb8b1186648a39822071476cb'}, |
| | 'num_additional_tokens': 2, |
| | 'path_or_name': 'tomekkorbak/cranky_lichterman'}, |
| | 'objective': {'name': 'MLE'}, |
| | 'tokenizer': {'path_or_name': 'gpt2', |
| | 'special_tokens': ['<|aligned|>', '<|misaligned|>']}, |
| | 'training': {'dataloader_num_workers': 0, |
| | 'effective_batch_size': 128, |
| | 'evaluation_strategy': 'no', |
| | 'fp16': True, |
| | 'hub_model_id': 'hungry_carson', |
| | 'hub_strategy': 'all_checkpoints', |
| | 'learning_rate': 0.0001, |
| | 'logging_first_step': True, |
| | 'logging_steps': 1, |
| | 'num_tokens': 3300000000, |
| | 'output_dir': 'training_output2', |
| | 'per_device_train_batch_size': 16, |
| | 'push_to_hub': True, |
| | 'remove_unused_columns': False, |
| | 'save_steps': 251, |
| | 'save_strategy': 'steps', |
| | 'seed': 42, |
| | 'tokens_already_seen': 2990407680, |
| | 'warmup_ratio': 0.01, |
| | 'weight_decay': 0.1}} |
| | |
| | # Wandb URL: |
| | https://wandb.ai/tomekkorbak/apo/runs/1s8q2lyx |