| --- |
| language: |
| - en |
| license: apache-2.0 |
| tags: |
| - generated_from_trainer |
| datasets: |
| - kejian/codeparrot-train-more-filter-3.3b-cleaned |
| model-index: |
| - name: blurry-conditional |
| results: [] |
| --- |
| |
| <!-- This model card has been generated automatically according to the information the Trainer had access to. You |
| should probably proofread and complete it, then remove this comment. --> |
|
|
| # blurry-conditional |
|
|
| This model was trained from scratch on the kejian/codeparrot-train-more-filter-3.3b-cleaned dataset. |
|
|
| ## Model description |
|
|
| More information needed |
|
|
| ## Intended uses & limitations |
|
|
| More information needed |
|
|
| ## Training and evaluation data |
|
|
| More information needed |
|
|
| ## Training procedure |
|
|
| ### Training hyperparameters |
|
|
| The following hyperparameters were used during training: |
| - learning_rate: 0.0001 |
| - train_batch_size: 32 |
| - eval_batch_size: 16 |
| - seed: 42 |
| - gradient_accumulation_steps: 4 |
| - total_train_batch_size: 128 |
| - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08 |
| - lr_scheduler_type: linear |
| - lr_scheduler_warmup_ratio: 0.01 |
| - training_steps: 12588 |
| - mixed_precision_training: Native AMP |
|
|
| ### Framework versions |
|
|
| - Transformers 4.23.0 |
| - Pytorch 1.13.0+cu116 |
| - Datasets 2.0.0 |
| - Tokenizers 0.12.1 |
|
|
|
|
| # Full config |
| {'dataset': {'conditional_training_config': {'aligned_prefix': '<|aligned|>', |
| 'drop_token_fraction': 0.1, |
| 'misaligned_prefix': '<|misaligned|>', |
| 'threshold': 0}, |
| 'datasets': ['kejian/codeparrot-train-more-filter-3.3b-cleaned'], |
| 'is_split_by_sentences': True, |
| 'skip_tokens': 1649999872}, |
| 'generation': {'batch_size': 128, |
| 'every_n_steps': 384, |
| 'force_call_on': [12588], |
| 'metrics_configs': [{}, {'n': 1}, {}], |
| 'scenario_configs': [{'display_as_html': True, |
| 'generate_kwargs': {'bad_words_ids': [[32769]], |
| 'do_sample': True, |
| 'eos_token_id': 0, |
| 'max_length': 640, |
| 'min_length': 10, |
| 'temperature': 0.7, |
| 'top_k': 0, |
| 'top_p': 0.9}, |
| 'name': 'unconditional', |
| 'num_hits_threshold': 0, |
| 'num_samples': 2048, |
| 'prefix': '<|aligned|>', |
| 'use_prompt_for_scoring': False}, |
| {'display_as_html': True, |
| 'generate_kwargs': {'bad_words_ids': [[32769]], |
| 'do_sample': True, |
| 'eos_token_id': 0, |
| 'max_length': 272, |
| 'min_length': 10, |
| 'temperature': 0.7, |
| 'top_k': 0, |
| 'top_p': 0.9}, |
| 'name': 'functions', |
| 'num_hits_threshold': 0, |
| 'num_samples': 2048, |
| 'prefix': '<|aligned|>', |
| 'prompt_before_control': True, |
| 'prompts_path': 'resources/functions_csnet.jsonl', |
| 'use_prompt_for_scoring': True}], |
| 'scorer_config': {}}, |
| 'kl_gpt3_callback': {'every_n_steps': 384, |
| 'force_call_on': [12588], |
| 'gpt3_kwargs': {'model_name': 'code-cushman-001'}, |
| 'max_tokens': 64, |
| 'num_samples': 4096, |
| 'prefix': '<|aligned|>', |
| 'should_insert_prefix': True}, |
| 'model': {'from_scratch': False, |
| 'gpt2_config_kwargs': {'reorder_and_upcast_attn': True, |
| 'scale_attn_by': True}, |
| 'model_kwargs': {'revision': 'cf05a2b0558c03b08c78f07662c22989785b9520'}, |
| 'num_additional_tokens': 2, |
| 'path_or_name': 'kejian/mighty-mle'}, |
| 'objective': {'name': 'MLE'}, |
| 'tokenizer': {'path_or_name': 'kejian/mighty-mle', |
| 'special_tokens': ['<|aligned|>', '<|misaligned|>']}, |
| 'training': {'dataloader_num_workers': 0, |
| 'effective_batch_size': 128, |
| 'evaluation_strategy': 'no', |
| 'fp16': True, |
| 'hub_model_id': 'blurry-conditional', |
| 'hub_strategy': 'all_checkpoints', |
| 'learning_rate': 0.0001, |
| 'logging_first_step': True, |
| 'logging_steps': 1, |
| 'num_tokens': 3300000000.0, |
| 'output_dir': 'training_output', |
| 'per_device_train_batch_size': 16, |
| 'push_to_hub': True, |
| 'remove_unused_columns': False, |
| 'save_steps': 12588, |
| 'save_strategy': 'steps', |
| 'seed': 42, |
| 'tokens_already_seen': 1649999872, |
| 'warmup_ratio': 0.01, |
| 'weight_decay': 0.1}} |
| |
| # Wandb URL: |
| https://wandb.ai/kejian/uncategorized/runs/1gpnt88g |