Vardaan98 commited on May 22, 2025

Commit

a9dd270

verified ·

1 Parent(s): 5d37320

Upload 23 files

Browse files

Files changed (23) hide show

app.py +60 -0
main.ipynb +232 -0
results/checkpoint-750/config.json +45 -0
results/checkpoint-750/generation_config.json +6 -0
results/checkpoint-750/merges.txt +0 -0
results/checkpoint-750/model.safetensors +3 -0
results/checkpoint-750/optimizer.pt +3 -0
results/checkpoint-750/rng_state.pth +3 -0
results/checkpoint-750/scheduler.pt +3 -0
results/checkpoint-750/special_tokens_map.json +6 -0
results/checkpoint-750/tokenizer.json +0 -0
results/checkpoint-750/tokenizer_config.json +21 -0
results/checkpoint-750/trainer_state.json +559 -0
results/checkpoint-750/training_args.bin +3 -0
results/checkpoint-750/vocab.json +0 -0
results/runs/May21_21-01-39_Vardaan_Aspire/events.out.tfevents.1747841540.Vardaan_Aspire.6608.0 +3 -0
results/runs/May21_21-07-12_Vardaan_Aspire/events.out.tfevents.1747841837.Vardaan_Aspire.16648.0 +3 -0
results/runs/May21_21-14-05_Vardaan_Aspire/events.out.tfevents.1747842248.Vardaan_Aspire.7928.0 +3 -0
results/runs/May21_21-20-51_Vardaan_Aspire/events.out.tfevents.1747842654.Vardaan_Aspire.7928.1 +3 -0
results/runs/May21_21-23-46_Vardaan_Aspire/events.out.tfevents.1747842827.Vardaan_Aspire.11864.0 +3 -0
results/runs/May21_21-27-09_Vardaan_Aspire/events.out.tfevents.1747843029.Vardaan_Aspire.14288.0 +3 -0
results/runs/May21_21-28-25_Vardaan_Aspire/events.out.tfevents.1747843114.Vardaan_Aspire.14288.1 +3 -0
results/runs/May21_21-50-19_Vardaan_Aspire/events.out.tfevents.1747844420.Vardaan_Aspire.14288.2 +3 -0

app.py ADDED Viewed

	@@ -0,0 +1,60 @@

+import streamlit as st
+from transformers import AutoTokenizer, AutoModelForCausalLM
+import torch
+# --- Fix Streamlit config issue ---
+st.set_page_config(
+    page_title="Natural Reasoning Bot",
+    page_icon="🤖",
+    layout="centered"
+)
+st.title("🤖 Natural Reasoning Bot")
+st.markdown("Ask science questions and get answers from your fine-tuned model.")
+# --- Sidebar for parameters ---
+st.sidebar.header("⚙️ Generation Settings")
+temperature = st.sidebar.slider("Temperature", 0.0, 1.5, 1.0, 0.1)
+top_k = st.sidebar.slider("Top-k", 0, 100, 50, 5)
+top_p = st.sidebar.slider("Top-p", 0.0, 1.0, 0.95, 0.05)
+# --- Load model and tokenizer ---
+@st.cache_resource(show_spinner=False)
+def load_model():
+    model = AutoModelForCausalLM.from_pretrained("./my_bot_model")
+    tokenizer = AutoTokenizer.from_pretrained("./my_bot_model")
+    return model, tokenizer
+model, tokenizer = load_model()
+# --- Text Input ---
+question = st.text_area("🧠 Enter your science question:", height=100)
+generate_btn = st.button("🔍 Generate Answer")
+# --- Inference Logic ---
+if generate_btn and question:
+    input_text = f"### Question: {question}\n### Answer:"
+    inputs = tokenizer(input_text, return_tensors="pt").to(model.device)
+    model.eval()
+    with torch.no_grad():
+        output = model.generate(
+            **inputs,
+            max_length=256,
+            do_sample=True,
+            top_p=top_p,
+            top_k=top_k,
+            temperature=temperature,
+            pad_token_id=tokenizer.eos_token_id
+        )
+    response = tokenizer.decode(output[0], skip_special_tokens=True)
+    answer = response.replace(input_text, "").strip()
+    st.markdown("---")
+    st.subheader("📤 Model Answer")
+    st.success(answer)
+elif generate_btn:
+    st.warning("Please enter a question to get an answer.")

main.ipynb ADDED Viewed

	@@ -0,0 +1,232 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "5d81bb13",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from datasets import load_dataset\n",
+    "\n",
+    "dataset = load_dataset(\"facebook/natural_reasoning\")\n",
+    "train_data = dataset[\"train\"].select(range(5000))  # Start with 5k examples\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "5279c3c3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def format_for_training(example):\n",
+    "    return {\n",
+    "        \"prompt\": example[\"question\"],\n",
+    "        \"completion\": example[\"reference_answer\"]\n",
+    "    }\n",
+    "\n",
+    "train_data = train_data.map(format_for_training)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "d5f715b3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from transformers import AutoTokenizer\n",
+    "\n",
+    "model_checkpoint = \"distilgpt2\"\n",
+    "tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)\n",
+    "tokenizer.pad_token = tokenizer.eos_token\n",
+    "max_seq_length = 512\n",
+    "\n",
+    "def tokenize(example):\n",
+    "    input_text = f\"### Question: {example['prompt']}\\n### Answer: {example['completion']}{tokenizer.eos_token}\"\n",
+    "    tokenized = tokenizer(\n",
+    "        input_text,\n",
+    "        padding=\"max_length\",\n",
+    "        truncation=True,\n",
+    "        max_length=max_seq_length\n",
+    "    )\n",
+    "    tokenized[\"labels\"] = tokenized[\"input_ids\"].copy()\n",
+    "    return tokenized\n",
+    "\n",
+    "tokenized_data = train_data.map(tokenize)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "61cb619d",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "c:\\Users\\shukl\\anaconda3\\Lib\\site-packages\\transformers\\training_args.py:1611: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\shukl\\AppData\\Local\\Temp\\ipykernel_7600\\3538093026.py:16: FutureWarning: `tokenizer` is deprecated and will be removed in version 5.0.0 for `Trainer.__init__`. Use `processing_class` instead.\n",
+      "  trainer = Trainer(\n",
+      "`loss_type=None` was set in the config but it is unrecognised.Using the default loss: `ForCausalLMLoss`.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "\n",
+       "    <div>\n",
+       "      \n",
+       "      <progress value='2500' max='2500' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
+       "      [2500/2500 3:07:00, Epoch 1/1]\n",
+       "    </div>\n",
+       "    <table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       " <tr style=\"text-align: left;\">\n",
+       "      <th>Step</th>\n",
+       "      <th>Training Loss</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <td>500</td>\n",
+       "      <td>0.836400</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>1000</td>\n",
+       "      <td>0.629200</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>1500</td>\n",
+       "      <td>0.631400</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>2000</td>\n",
+       "      <td>0.622300</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <td>2500</td>\n",
+       "      <td>0.631600</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table><p>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/plain": [
+       "('./my_bot_model\\\\tokenizer_config.json',\n",
+       " './my_bot_model\\\\special_tokens_map.json',\n",
+       " './my_bot_model\\\\vocab.json',\n",
+       " './my_bot_model\\\\merges.txt',\n",
+       " './my_bot_model\\\\added_tokens.json',\n",
+       " './my_bot_model\\\\tokenizer.json')"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from transformers import AutoModelForCausalLM, TrainingArguments, Trainer\n",
+    "\n",
+    "model = AutoModelForCausalLM.from_pretrained(model_checkpoint)\n",
+    "\n",
+    "training_args = TrainingArguments(\n",
+    "    output_dir=\"./my_bot_model\",\n",
+    "    evaluation_strategy=\"no\",\n",
+    "    learning_rate=2e-5,\n",
+    "    per_device_train_batch_size=2,\n",
+    "    num_train_epochs=1,\n",
+    "    save_strategy=\"epoch\",\n",
+    "    weight_decay=0.01,\n",
+    "    fp16=True  # You said you have 4GB GPU\n",
+    ")\n",
+    "\n",
+    "trainer = Trainer(\n",
+    "    model=model,\n",
+    "    args=training_args,\n",
+    "    train_dataset=tokenized_data,\n",
+    "    tokenizer=tokenizer\n",
+    ")\n",
+    "\n",
+    "trainer.train()\n",
+    "model.save_pretrained(\"./my_bot_model\")\n",
+    "tokenizer.save_pretrained(\"./my_bot_model\")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "57b71657",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "💬 Model Answer:\n",
+      " The total work done on an object when it is moved upwards against gravity is approximately 3.8x faster than the total work done on an object in a vacuum.\n"
+     ]
+    }
+   ],
+   "source": [
+    "import torch\n",
+    "\n",
+    "model = AutoModelForCausalLM.from_pretrained(\"./my_bot_model\")\n",
+    "tokenizer = AutoTokenizer.from_pretrained(\"./my_bot_model\")\n",
+    "\n",
+    "question = \"What is the total work done on an object when it is moved upwards against gravity?\"\n",
+    "input_text = f\"### Question: {question}\\n### Answer:\"\n",
+    "inputs = tokenizer(input_text, return_tensors=\"pt\").to(model.device)\n",
+    "\n",
+    "with torch.no_grad():\n",
+    "    output = model.generate(\n",
+    "        **inputs,\n",
+    "        max_length=256,\n",
+    "        do_sample=True,\n",
+    "        temperature=0.7,\n",
+    "        top_p=0.9,\n",
+    "        top_k=50,\n",
+    "        pad_token_id=tokenizer.eos_token_id\n",
+    "    )\n",
+    "\n",
+    "response = tokenizer.decode(output[0], skip_special_tokens=True)\n",
+    "answer = response.replace(input_text, \"\").strip()\n",
+    "\n",
+    "print(\"💬 Model Answer:\\n\", answer)\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "base",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

results/checkpoint-750/config.json ADDED Viewed

	@@ -0,0 +1,45 @@

+{
+  "_num_labels": 1,
+  "activation_function": "gelu_new",
+  "architectures": [
+    "GPT2LMHeadModel"
+  ],
+  "attn_pdrop": 0.1,
+  "bos_token_id": 50256,
+  "embd_pdrop": 0.1,
+  "eos_token_id": 50256,
+  "id2label": {
+    "0": "LABEL_0"
+  },
+  "initializer_range": 0.02,
+  "label2id": {
+    "LABEL_0": 0
+  },
+  "layer_norm_epsilon": 1e-05,
+  "model_type": "gpt2",
+  "n_ctx": 1024,
+  "n_embd": 768,
+  "n_head": 12,
+  "n_inner": null,
+  "n_layer": 6,
+  "n_positions": 1024,
+  "reorder_and_upcast_attn": false,
+  "resid_pdrop": 0.1,
+  "scale_attn_by_inverse_layer_idx": false,
+  "scale_attn_weights": true,
+  "summary_activation": null,
+  "summary_first_dropout": 0.1,
+  "summary_proj_to_labels": true,
+  "summary_type": "cls_index",
+  "summary_use_proj": true,
+  "task_specific_params": {
+    "text-generation": {
+      "do_sample": true,
+      "max_length": 50
+    }
+  },
+  "torch_dtype": "float32",
+  "transformers_version": "4.50.3",
+  "use_cache": true,
+  "vocab_size": 50257
+}

results/checkpoint-750/generation_config.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "_from_model_config": true,
+  "bos_token_id": 50256,
+  "eos_token_id": 50256,
+  "transformers_version": "4.50.3"
+}

results/checkpoint-750/merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

results/checkpoint-750/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bfbc9137f25bbd51376fae72c48f122ed62b2dae6d98c141264134d7427c33e3
+size 327657928

results/checkpoint-750/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2d3d83538ff9acd8bf815f0b64295eb49302ff9866c13b633b2d7708d5c4cecd
+size 655362362

results/checkpoint-750/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ce3140fdd7584bc4b4d20e2036b0351379fb91b849b1e1af57d80a0cad8a3d56
+size 13990

results/checkpoint-750/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c70636ba82956d9bcb1e9c3edb2fca8aa8e0bd7aa847ff3d6a8cbc20d70c912a
+size 1064

results/checkpoint-750/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "bos_token": "<|endoftext|>",
+  "eos_token": "<|endoftext|>",
+  "pad_token": "<|endoftext|>",
+  "unk_token": "<|endoftext|>"
+}

results/checkpoint-750/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

results/checkpoint-750/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,21 @@

+{
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "50256": {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<|endoftext|>",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<|endoftext|>",
+  "extra_special_tokens": {},
+  "model_max_length": 1024,
+  "pad_token": "<|endoftext|>",
+  "tokenizer_class": "GPT2Tokenizer",
+  "unk_token": "<|endoftext|>"
+}

results/checkpoint-750/trainer_state.json ADDED Viewed

	@@ -0,0 +1,559 @@

+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 1.0,
+  "eval_steps": 500,
+  "global_step": 750,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.013333333333333334,
+      "grad_norm": 18.803022384643555,
+      "learning_rate": 1.9733333333333336e-05,
+      "loss": 4.8444,
+      "step": 10
+    },
+    {
+      "epoch": 0.02666666666666667,
+      "grad_norm": 4.537410259246826,
+      "learning_rate": 1.9466666666666668e-05,
+      "loss": 1.6348,
+      "step": 20
+    },
+    {
+      "epoch": 0.04,
+      "grad_norm": 4.052369117736816,
+      "learning_rate": 1.9200000000000003e-05,
+      "loss": 1.6249,
+      "step": 30
+    },
+    {
+      "epoch": 0.05333333333333334,
+      "grad_norm": 3.1292710304260254,
+      "learning_rate": 1.8933333333333334e-05,
+      "loss": 1.525,
+      "step": 40
+    },
+    {
+      "epoch": 0.06666666666666667,
+      "grad_norm": 3.1493794918060303,
+      "learning_rate": 1.866666666666667e-05,
+      "loss": 1.5233,
+      "step": 50
+    },
+    {
+      "epoch": 0.08,
+      "grad_norm": 3.0254428386688232,
+      "learning_rate": 1.8400000000000003e-05,
+      "loss": 1.5487,
+      "step": 60
+    },
+    {
+      "epoch": 0.09333333333333334,
+      "grad_norm": 3.110171318054199,
+      "learning_rate": 1.8133333333333335e-05,
+      "loss": 1.3042,
+      "step": 70
+    },
+    {
+      "epoch": 0.10666666666666667,
+      "grad_norm": 3.023773431777954,
+      "learning_rate": 1.7866666666666666e-05,
+      "loss": 1.4056,
+      "step": 80
+    },
+    {
+      "epoch": 0.12,
+      "grad_norm": 5.209704875946045,
+      "learning_rate": 1.76e-05,
+      "loss": 1.4335,
+      "step": 90
+    },
+    {
+      "epoch": 0.13333333333333333,
+      "grad_norm": 2.825587034225464,
+      "learning_rate": 1.7333333333333336e-05,
+      "loss": 1.5363,
+      "step": 100
+    },
+    {
+      "epoch": 0.14666666666666667,
+      "grad_norm": 3.2794153690338135,
+      "learning_rate": 1.706666666666667e-05,
+      "loss": 1.3614,
+      "step": 110
+    },
+    {
+      "epoch": 0.16,
+      "grad_norm": 3.4573426246643066,
+      "learning_rate": 1.6800000000000002e-05,
+      "loss": 1.3278,
+      "step": 120
+    },
+    {
+      "epoch": 0.17333333333333334,
+      "grad_norm": 3.3406155109405518,
+      "learning_rate": 1.6533333333333333e-05,
+      "loss": 1.2889,
+      "step": 130
+    },
+    {
+      "epoch": 0.18666666666666668,
+      "grad_norm": 4.201858997344971,
+      "learning_rate": 1.6266666666666668e-05,
+      "loss": 1.3344,
+      "step": 140
+    },
+    {
+      "epoch": 0.2,
+      "grad_norm": 2.768216848373413,
+      "learning_rate": 1.6000000000000003e-05,
+      "loss": 1.2979,
+      "step": 150
+    },
+    {
+      "epoch": 0.21333333333333335,
+      "grad_norm": 3.158536911010742,
+      "learning_rate": 1.5733333333333334e-05,
+      "loss": 1.3151,
+      "step": 160
+    },
+    {
+      "epoch": 0.22666666666666666,
+      "grad_norm": 2.6460344791412354,
+      "learning_rate": 1.546666666666667e-05,
+      "loss": 1.4425,
+      "step": 170
+    },
+    {
+      "epoch": 0.24,
+      "grad_norm": 3.4400217533111572,
+      "learning_rate": 1.5200000000000002e-05,
+      "loss": 1.4769,
+      "step": 180
+    },
+    {
+      "epoch": 0.25333333333333335,
+      "grad_norm": 3.023303985595703,
+      "learning_rate": 1.4933333333333335e-05,
+      "loss": 1.3712,
+      "step": 190
+    },
+    {
+      "epoch": 0.26666666666666666,
+      "grad_norm": 3.0173838138580322,
+      "learning_rate": 1.4666666666666666e-05,
+      "loss": 1.4047,
+      "step": 200
+    },
+    {
+      "epoch": 0.28,
+      "grad_norm": 2.7846803665161133,
+      "learning_rate": 1.4400000000000001e-05,
+      "loss": 1.264,
+      "step": 210
+    },
+    {
+      "epoch": 0.29333333333333333,
+      "grad_norm": 3.157430648803711,
+      "learning_rate": 1.4133333333333334e-05,
+      "loss": 1.2673,
+      "step": 220
+    },
+    {
+      "epoch": 0.30666666666666664,
+      "grad_norm": 3.6202878952026367,
+      "learning_rate": 1.3866666666666669e-05,
+      "loss": 1.2785,
+      "step": 230
+    },
+    {
+      "epoch": 0.32,
+      "grad_norm": 3.181349515914917,
+      "learning_rate": 1.3600000000000002e-05,
+      "loss": 1.3585,
+      "step": 240
+    },
+    {
+      "epoch": 0.3333333333333333,
+      "grad_norm": 3.8837106227874756,
+      "learning_rate": 1.3333333333333333e-05,
+      "loss": 1.4094,
+      "step": 250
+    },
+    {
+      "epoch": 0.3466666666666667,
+      "grad_norm": 2.497514247894287,
+      "learning_rate": 1.3066666666666668e-05,
+      "loss": 1.248,
+      "step": 260
+    },
+    {
+      "epoch": 0.36,
+      "grad_norm": 2.697995662689209,
+      "learning_rate": 1.2800000000000001e-05,
+      "loss": 1.2251,
+      "step": 270
+    },
+    {
+      "epoch": 0.37333333333333335,
+      "grad_norm": 4.157288074493408,
+      "learning_rate": 1.2533333333333336e-05,
+      "loss": 1.4987,
+      "step": 280
+    },
+    {
+      "epoch": 0.38666666666666666,
+      "grad_norm": 2.739837408065796,
+      "learning_rate": 1.2266666666666667e-05,
+      "loss": 1.4039,
+      "step": 290
+    },
+    {
+      "epoch": 0.4,
+      "grad_norm": 3.4221503734588623,
+      "learning_rate": 1.2e-05,
+      "loss": 1.33,
+      "step": 300
+    },
+    {
+      "epoch": 0.41333333333333333,
+      "grad_norm": 3.524386405944824,
+      "learning_rate": 1.1733333333333335e-05,
+      "loss": 1.4137,
+      "step": 310
+    },
+    {
+      "epoch": 0.4266666666666667,
+      "grad_norm": 3.046319007873535,
+      "learning_rate": 1.1466666666666668e-05,
+      "loss": 1.3476,
+      "step": 320
+    },
+    {
+      "epoch": 0.44,
+      "grad_norm": 2.374499559402466,
+      "learning_rate": 1.1200000000000001e-05,
+      "loss": 1.1561,
+      "step": 330
+    },
+    {
+      "epoch": 0.4533333333333333,
+      "grad_norm": 3.037949323654175,
+      "learning_rate": 1.0933333333333334e-05,
+      "loss": 1.3312,
+      "step": 340
+    },
+    {
+      "epoch": 0.4666666666666667,
+      "grad_norm": 3.216047525405884,
+      "learning_rate": 1.0666666666666667e-05,
+      "loss": 1.2017,
+      "step": 350
+    },
+    {
+      "epoch": 0.48,
+      "grad_norm": 3.007854461669922,
+      "learning_rate": 1.04e-05,
+      "loss": 1.2893,
+      "step": 360
+    },
+    {
+      "epoch": 0.49333333333333335,
+      "grad_norm": 3.648378849029541,
+      "learning_rate": 1.0133333333333335e-05,
+      "loss": 1.3693,
+      "step": 370
+    },
+    {
+      "epoch": 0.5066666666666667,
+      "grad_norm": 3.0441296100616455,
+      "learning_rate": 9.866666666666668e-06,
+      "loss": 1.2724,
+      "step": 380
+    },
+    {
+      "epoch": 0.52,
+      "grad_norm": 3.027775526046753,
+      "learning_rate": 9.600000000000001e-06,
+      "loss": 1.2175,
+      "step": 390
+    },
+    {
+      "epoch": 0.5333333333333333,
+      "grad_norm": 3.1908812522888184,
+      "learning_rate": 9.333333333333334e-06,
+      "loss": 1.3371,
+      "step": 400
+    },
+    {
+      "epoch": 0.5466666666666666,
+      "grad_norm": 3.432631731033325,
+      "learning_rate": 9.066666666666667e-06,
+      "loss": 1.2195,
+      "step": 410
+    },
+    {
+      "epoch": 0.56,
+      "grad_norm": 3.2034451961517334,
+      "learning_rate": 8.8e-06,
+      "loss": 1.2541,
+      "step": 420
+    },
+    {
+      "epoch": 0.5733333333333334,
+      "grad_norm": 3.1531622409820557,
+      "learning_rate": 8.533333333333335e-06,
+      "loss": 1.3046,
+      "step": 430
+    },
+    {
+      "epoch": 0.5866666666666667,
+      "grad_norm": 2.9354567527770996,
+      "learning_rate": 8.266666666666667e-06,
+      "loss": 1.2941,
+      "step": 440
+    },
+    {
+      "epoch": 0.6,
+      "grad_norm": 3.434643507003784,
+      "learning_rate": 8.000000000000001e-06,
+      "loss": 1.3223,
+      "step": 450
+    },
+    {
+      "epoch": 0.6133333333333333,
+      "grad_norm": 2.433544158935547,
+      "learning_rate": 7.733333333333334e-06,
+      "loss": 1.2449,
+      "step": 460
+    },
+    {
+      "epoch": 0.6266666666666667,
+      "grad_norm": 2.9112284183502197,
+      "learning_rate": 7.4666666666666675e-06,
+      "loss": 1.2879,
+      "step": 470
+    },
+    {
+      "epoch": 0.64,
+      "grad_norm": 3.082655668258667,
+      "learning_rate": 7.2000000000000005e-06,
+      "loss": 1.2657,
+      "step": 480
+    },
+    {
+      "epoch": 0.6533333333333333,
+      "grad_norm": 3.0584707260131836,
+      "learning_rate": 6.9333333333333344e-06,
+      "loss": 1.2012,
+      "step": 490
+    },
+    {
+      "epoch": 0.6666666666666666,
+      "grad_norm": 2.8814918994903564,
+      "learning_rate": 6.666666666666667e-06,
+      "loss": 1.35,
+      "step": 500
+    },
+    {
+      "epoch": 0.68,
+      "grad_norm": 3.5102522373199463,
+      "learning_rate": 6.4000000000000006e-06,
+      "loss": 1.2537,
+      "step": 510
+    },
+    {
+      "epoch": 0.6933333333333334,
+      "grad_norm": 3.4719033241271973,
+      "learning_rate": 6.133333333333334e-06,
+      "loss": 1.407,
+      "step": 520
+    },
+    {
+      "epoch": 0.7066666666666667,
+      "grad_norm": 3.265688896179199,
+      "learning_rate": 5.8666666666666675e-06,
+      "loss": 1.2373,
+      "step": 530
+    },
+    {
+      "epoch": 0.72,
+      "grad_norm": 3.8343162536621094,
+      "learning_rate": 5.600000000000001e-06,
+      "loss": 1.3966,
+      "step": 540
+    },
+    {
+      "epoch": 0.7333333333333333,
+      "grad_norm": 3.5571675300598145,
+      "learning_rate": 5.333333333333334e-06,
+      "loss": 1.263,
+      "step": 550
+    },
+    {
+      "epoch": 0.7466666666666667,
+      "grad_norm": 3.040654182434082,
+      "learning_rate": 5.0666666666666676e-06,
+      "loss": 1.2993,
+      "step": 560
+    },
+    {
+      "epoch": 0.76,
+      "grad_norm": 3.5149991512298584,
+      "learning_rate": 4.800000000000001e-06,
+      "loss": 1.3882,
+      "step": 570
+    },
+    {
+      "epoch": 0.7733333333333333,
+      "grad_norm": 3.1839466094970703,
+      "learning_rate": 4.533333333333334e-06,
+      "loss": 1.4662,
+      "step": 580
+    },
+    {
+      "epoch": 0.7866666666666666,
+      "grad_norm": 3.4513893127441406,
+      "learning_rate": 4.266666666666668e-06,
+      "loss": 1.2339,
+      "step": 590
+    },
+    {
+      "epoch": 0.8,
+      "grad_norm": 3.415241003036499,
+      "learning_rate": 4.000000000000001e-06,
+      "loss": 1.4015,
+      "step": 600
+    },
+    {
+      "epoch": 0.8133333333333334,
+      "grad_norm": 3.349771738052368,
+      "learning_rate": 3.7333333333333337e-06,
+      "loss": 1.3701,
+      "step": 610
+    },
+    {
+      "epoch": 0.8266666666666667,
+      "grad_norm": 3.491492748260498,
+      "learning_rate": 3.4666666666666672e-06,
+      "loss": 1.2594,
+      "step": 620
+    },
+    {
+      "epoch": 0.84,
+      "grad_norm": 2.971444606781006,
+      "learning_rate": 3.2000000000000003e-06,
+      "loss": 1.2519,
+      "step": 630
+    },
+    {
+      "epoch": 0.8533333333333334,
+      "grad_norm": 3.3128817081451416,
+      "learning_rate": 2.9333333333333338e-06,
+      "loss": 1.2691,
+      "step": 640
+    },
+    {
+      "epoch": 0.8666666666666667,
+      "grad_norm": 2.9615046977996826,
+      "learning_rate": 2.666666666666667e-06,
+      "loss": 1.1949,
+      "step": 650
+    },
+    {
+      "epoch": 0.88,
+      "grad_norm": 2.7413668632507324,
+      "learning_rate": 2.4000000000000003e-06,
+      "loss": 1.3438,
+      "step": 660
+    },
+    {
+      "epoch": 0.8933333333333333,
+      "grad_norm": 3.5874390602111816,
+      "learning_rate": 2.133333333333334e-06,
+      "loss": 1.2388,
+      "step": 670
+    },
+    {
+      "epoch": 0.9066666666666666,
+      "grad_norm": 3.203536033630371,
+      "learning_rate": 1.8666666666666669e-06,
+      "loss": 1.2355,
+      "step": 680
+    },
+    {
+      "epoch": 0.92,
+      "grad_norm": 3.3690247535705566,
+      "learning_rate": 1.6000000000000001e-06,
+      "loss": 1.3799,
+      "step": 690
+    },
+    {
+      "epoch": 0.9333333333333333,
+      "grad_norm": 3.1757168769836426,
+      "learning_rate": 1.3333333333333334e-06,
+      "loss": 1.2549,
+      "step": 700
+    },
+    {
+      "epoch": 0.9466666666666667,
+      "grad_norm": 3.5580122470855713,
+      "learning_rate": 1.066666666666667e-06,
+      "loss": 1.3206,
+      "step": 710
+    },
+    {
+      "epoch": 0.96,
+      "grad_norm": 2.4422216415405273,
+      "learning_rate": 8.000000000000001e-07,
+      "loss": 1.2652,
+      "step": 720
+    },
+    {
+      "epoch": 0.9733333333333334,
+      "grad_norm": 3.2295455932617188,
+      "learning_rate": 5.333333333333335e-07,
+      "loss": 1.2546,
+      "step": 730
+    },
+    {
+      "epoch": 0.9866666666666667,
+      "grad_norm": 3.25307297706604,
+      "learning_rate": 2.666666666666667e-07,
+      "loss": 1.3874,
+      "step": 740
+    },
+    {
+      "epoch": 1.0,
+      "grad_norm": 3.577993392944336,
+      "learning_rate": 0.0,
+      "loss": 1.3314,
+      "step": 750
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 750,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 195972562944000.0,
+  "train_batch_size": 2,
+  "trial_name": null,
+  "trial_params": null
+}

results/checkpoint-750/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6beed08edbb35ca6e9a80f4f0c2e64fd68d6b69087b1135dded06cfeaf727a05
+size 5304

results/checkpoint-750/vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff

results/runs/May21_21-01-39_Vardaan_Aspire/events.out.tfevents.1747841540.Vardaan_Aspire.6608.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:64bd6068596e88bb765fef2aa4dab5d0fef3a8c7c12416a30ae28b36a7904481
+size 5369

results/runs/May21_21-07-12_Vardaan_Aspire/events.out.tfevents.1747841837.Vardaan_Aspire.16648.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:887cccb86efad0f284642ed81eb6cfdbf395b51ac8eb267913660bec4bd9603b
+size 5371

results/runs/May21_21-14-05_Vardaan_Aspire/events.out.tfevents.1747842248.Vardaan_Aspire.7928.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e40e91bc782ac59098605977ae9b579d7870f1e69f6b114f43a11a8dd868b76c
+size 5371

results/runs/May21_21-20-51_Vardaan_Aspire/events.out.tfevents.1747842654.Vardaan_Aspire.7928.1 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6554f22d532b849aa4458c5bafcbb829456d20366ac4e2350880e775eed71bde
+size 5370

results/runs/May21_21-23-46_Vardaan_Aspire/events.out.tfevents.1747842827.Vardaan_Aspire.11864.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c7ecbf193794a8fbea8914cd99e240858642c50d86cd57e7719d1bcbf8ff2749
+size 5370

results/runs/May21_21-27-09_Vardaan_Aspire/events.out.tfevents.1747843029.Vardaan_Aspire.14288.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c093a8b1c5548b37f1c974deb08a86054a9578d4fe069f5b8fc745a0ba27d3c8
+size 5369

results/runs/May21_21-28-25_Vardaan_Aspire/events.out.tfevents.1747843114.Vardaan_Aspire.14288.1 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3da74765969de5fe6d9ebd0c52f00fd1b8c532a0a56631701c6c13123ca3faef
+size 5572

results/runs/May21_21-50-19_Vardaan_Aspire/events.out.tfevents.1747844420.Vardaan_Aspire.14288.2 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2579751bdbe481663b1ae2cdfc54d9e178b3cabdfb206b3095f49f00884446a9
+size 21496