{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"provenance":[],"gpuType":"T4","authorship_tag":"ABX9TyM4go9YRdDjJxsG0aHzVRo/"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"},"accelerator":"GPU","widgets":{"application/vnd.jupyter.widget-state+json":{"102580f3567e4e0ab4f06bb9344b2696":{"model_module":"@jupyter-widgets/controls","model_name":"VBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"VBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"VBoxView","box_style":"","children":[],"layout":"IPY_MODEL_dbb1a0891bb64b7aaa10fa011b99108f"}},"dff5048def1c4ac291260be3ba668df7":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_f1da02bcdd9547d0ae85521f09d7c58a","placeholder":"","style":"IPY_MODEL_b5229ad1f7c84e19a998dd4475144002","value":"
| Step | \n","Training Loss | \n","
|---|---|
| 500 | \n","1.700700 | \n","
| 1000 | \n","1.305100 | \n","
| 1500 | \n","1.234500 | \n","
"],"text/plain":[" "]},"metadata":{}},{"output_type":"execute_result","data":{"text/plain":["TrainOutput(global_step=3000, training_loss=1.3097034505208334, metrics={'train_runtime': 4274.3363, 'train_samples_per_second': 0.702, 'train_steps_per_second': 0.702, 'total_flos': 5.3114566606848e+16, 'train_loss': 1.3097034505208334, 'epoch': 3.0})"]},"metadata":{},"execution_count":54}]},{"cell_type":"code","source":["trainer.save_model(\"codex-finetune-final\")\n","tokenizer.save_pretrained(\"codex-finetune-final\")\n"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"b4fUezv2uyxn","executionInfo":{"status":"ok","timestamp":1750581102402,"user_tz":-330,"elapsed":296,"user":{"displayName":"al nove","userId":"12028300340288841300"}},"outputId":"e0bb7043-4f85-4f19-8e24-a1e02d1e1d34"},"execution_count":55,"outputs":[{"output_type":"execute_result","data":{"text/plain":["('codex-finetune-final/tokenizer_config.json',\n"," 'codex-finetune-final/special_tokens_map.json',\n"," 'codex-finetune-final/vocab.json',\n"," 'codex-finetune-final/merges.txt',\n"," 'codex-finetune-final/added_tokens.json',\n"," 'codex-finetune-final/tokenizer.json')"]},"metadata":{},"execution_count":55}]},{"cell_type":"code","source":["trainer.push_to_hub(\"khushimalik53/coding_copilot\")\n","tokenizer.push_to_hub(\"khushimalik53/coding_copilot\")\n"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":243,"referenced_widgets":["6b3b310c31fb40b4ab5e2a53a0adb2f4","117653515b884a1682857c1d947d8744","b0c5599906a740968f42051e18b21925","5a321919709a452abbf566b7aa5a484e","8c6b75a551994b43a75d620d2a5988d6","60cc9464d0044469b5d81c69226bb3bf","8098fd00de504996971606155be8a884","6e798ae3aca446109f118de8646edbc9","56c33e40415b4d05b1c7a10bec07aa88","4d0d568f3c9f4fa9b678f58f95bb373d","495f85eff9bb41998d90e33ec40b6660","fffbfc9ed8fd43399cc920faf8231de4","63c97080030545a49de90ea4575415b3","f01e15ec1fa14e0781045432656189fa","2122d0244f194c47a8bcea20b962a2eb","0e662e0cdcd44b5092e4dbe5787566f3","2f6ff16a99be45069e4c3c74d1d56738","46eaef18a6f1481996df4a8cb630b117","2a3363f76bd24e28bd1e44ca4da2492d","ba48efe3c4474e898a1504cd2a34e2c2","ecb6f2eaf57945b284d532cf623e6b95","3502612c862040d980808f2aaac5a5d3"]},"id":"h2hpBWdvxFJB","executionInfo":{"status":"ok","timestamp":1750581172393,"user_tz":-330,"elapsed":8515,"user":{"displayName":"al nove","userId":"12028300340288841300"}},"outputId":"d11f63e4-52eb-464c-8413-5cb3c2bf54b1"},"execution_count":57,"outputs":[{"output_type":"display_data","data":{"text/plain":["Uploading...: 0%| | 0.00/9.12M [00:00, ?B/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"6b3b310c31fb40b4ab5e2a53a0adb2f4"}},"metadata":{}},{"output_type":"stream","name":"stderr","text":["No files have been modified since last commit. Skipping to prevent empty commit.\n","WARNING:huggingface_hub.hf_api:No files have been modified since last commit. Skipping to prevent empty commit.\n"]},{"output_type":"display_data","data":{"text/plain":["README.md: 0%| | 0.00/28.0 [00:00, ?B/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"fffbfc9ed8fd43399cc920faf8231de4"}},"metadata":{}},{"output_type":"execute_result","data":{"text/plain":["CommitInfo(commit_url='https://huggingface.co/khushimalik53/coding_copilot/commit/f5978c6f8c8790e76ce1fdcaa843f83c51f1bb83', commit_message='Upload tokenizer', commit_description='', oid='f5978c6f8c8790e76ce1fdcaa843f83c51f1bb83', pr_url=None, repo_url=RepoUrl('https://huggingface.co/khushimalik53/coding_copilot', endpoint='https://huggingface.co', repo_type='model', repo_id='khushimalik53/coding_copilot'), pr_revision=None, pr_num=None)"],"application/vnd.google.colaboratory.intrinsic+json":{"type":"string"}},"metadata":{},"execution_count":57}]},{"cell_type":"code","source":["metrics = trainer.evaluate(eval_dataset=formatted_dataset[\"test\"])\n","print(metrics)"],"metadata":{"id":"Q9wjkdIax8X1"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["input_text = \"### Instruction:\\nExplain what this function does:\\ndef add(x, y):\\n return x + y\\n\\n### Response:\\n\"\n","inputs = tokenizer(input_text, return_tensors=\"pt\").to(model.device)\n","outputs = model.generate(**inputs, max_new_tokens=100)\n","print(tokenizer.decode(outputs[0], skip_special_tokens=True))\n"],"metadata":{"id":"D6P3S8sXvbqW"},"execution_count":null,"outputs":[]}]}\n"," \n","
\n"," \n"," \n"," \n"," Step \n"," Training Loss \n"," \n"," \n"," 500 \n"," 1.700700 \n"," \n"," \n"," 1000 \n"," 1.305100 \n"," \n"," \n"," 1500 \n"," 1.234500 \n"," \n"," \n"," 2000 \n"," 1.229400 \n"," \n"," \n"," 2500 \n"," 1.185200 \n"," \n"," \n"," \n","3000 \n"," 1.203400 \n","