Upload train.ipynb

Browse files

Files changed (1) hide show

train.ipynb +1 -0

train.ipynb ADDED Viewed

	@@ -0,0 +1 @@

+ {"metadata":{"kernelspec":{"language":"python","display_name":"Python 3","name":"python3"},"language_info":{"name":"python","version":"3.12.12","mimetype":"text/x-python","codemirror_mode":{"name":"ipython","version":3},"pygments_lexer":"ipython3","nbconvert_exporter":"python","file_extension":".py"},"kaggle":{"accelerator":"nvidiaTeslaT4","dataSources":[],"isInternetEnabled":true,"language":"python","sourceType":"notebook","isGpuEnabled":true}},"nbformat_minor":4,"nbformat":4,"cells":[{"cell_type":"code","source":"from transformers import AutoTokenizer, AutoConfig, AutoModelForSequenceClassification, Trainer, TrainingArguments\nfrom datasets import load_dataset\n\ndataset = load_dataset(\"imdb\")\n\ntokenizer = AutoTokenizer.from_pretrained(\"distilbert-base-uncased\")\n\ndef tokenize_function(examples):\n return tokenizer(examples[\"text\"], padding=\"max_length\", truncation=True, max_length=128)\n\ntokenized_datasets = dataset.map(tokenize_function, batched=True)","metadata":{"_uuid":"8f2839f25d086af736a60e9eeb907d3b93b6e0e5","_cell_guid":"b1076dfc-b9ad-4769-8c92-a6c4dae69d19","trusted":true,"execution":{"iopub.status.busy":"2026-04-05T17:50:50.108565Z","iopub.execute_input":"2026-04-05T17:50:50.108870Z","iopub.status.idle":"2026-04-05T17:52:08.907789Z","shell.execute_reply.started":"2026-04-05T17:50:50.108837Z","shell.execute_reply":"2026-04-05T17:52:08.907028Z"}},"outputs":[{"name":"stderr","text":"Warning: You are sending unauthenticated requests to the HF Hub. Please set a HF_TOKEN to enable higher rate limits and faster downloads.\n","output_type":"stream"},{"output_type":"display_data","data":{"text/plain":"README.md: 0.00B [00:00, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"ce360612141e46aa92852b4f1fcb7048"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"plain_text/train-00000-of-00001.parquet: 0%| | 0.00/21.0M [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"765151dd892046f8aebe1e785b4fc06f"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"plain_text/test-00000-of-00001.parquet: 0%| | 0.00/20.5M [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"dc2ca1a3db994a8283d01028091dc394"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"plain_text/unsupervised-00000-of-00001.p(…): 0%| | 0.00/42.0M [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"e61248146a834bf0a91e4ef5bd6ce3b7"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Generating train split: 0%| | 0/25000 [00:00<?, ? examples/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"3bb5e50cbed449ccba8636e1e8844dc6"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Generating test split: 0%| | 0/25000 [00:00<?, ? examples/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"2ac462a0afe6420d8d11a50cc07bdd36"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Generating unsupervised split: 0%| | 0/50000 [00:00<?, ? examples/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"575a35a9cf98457d8d0b3775184d41e4"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"config.json: 0%| | 0.00/483 [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"2f35bdb993d24ebeae082f096bce9fcc"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"tokenizer_config.json: 0%| | 0.00/48.0 [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"c694607d81a14bc08f036a723d0b23b0"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"vocab.txt: 0.00B [00:00, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"2d74df0cf3b746beb48c3176efa410dc"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"tokenizer.json: 0.00B [00:00, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"20b49b59dacf4e8fa1f06bea3f4e994b"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Map: 0%| | 0/25000 [00:00<?, ? examples/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"20fb62fbeac24b05b0c547a347633f13"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Map: 0%| | 0/25000 [00:00<?, ? examples/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"0dd2c8f8f09f4dd68bfebc60a393fb89"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Map: 0%| | 0/50000 [00:00<?, ? examples/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"26e388fc1f4d4daeab90a35d9b3acc39"}},"metadata":{}}],"execution_count":1},{"cell_type":"code","source":"config = AutoConfig.from_pretrained(\n \"distilbert-base-uncased\",\n num_labels=2,\n n_layers=2,\n n_heads=4,\n dim=256,\n hidden_dim=512\n)\n\nmodel = AutoModelForSequenceClassification.from_config(config)\n\nprint(f\"Model size: {model.num_parameters():,} parameters\")\n","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2026-04-05T17:52:11.780261Z","iopub.execute_input":"2026-04-05T17:52:11.780609Z","iopub.status.idle":"2026-04-05T17:52:12.131301Z","shell.execute_reply.started":"2026-04-05T17:52:11.780577Z","shell.execute_reply":"2026-04-05T17:52:12.130301Z"}},"outputs":[{"name":"stdout","text":"Model size: 9,065,730 parameters\n","output_type":"stream"}],"execution_count":2},{"cell_type":"code","source":"training_args = TrainingArguments(\n output_dir=\"./sentiment_scratch\",\n eval_strategy=\"epoch\",\n save_strategy=\"epoch\",\n learning_rate=5e-5,\n per_device_train_batch_size=32,\n num_train_epochs=5,\n weight_decay=0.01,\n report_to=\"none\"\n)\n\ntrainer = Trainer(\n model=model,\n args=training_args,\n train_dataset=tokenized_datasets[\"train\"],\n eval_dataset=tokenized_datasets[\"test\"],\n)\n\ntrainer.train()","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2026-04-05T17:52:24.659136Z","iopub.execute_input":"2026-04-05T17:52:24.659460Z","iopub.status.idle":"2026-04-05T17:55:12.023612Z","shell.execute_reply.started":"2026-04-05T17:52:24.659432Z","shell.execute_reply":"2026-04-05T17:55:12.022973Z"}},"outputs":[{"name":"stderr","text":"/usr/local/lib/python3.12/dist-packages/torch/autograd/function.py:583: UserWarning: Was asked to gather along dimension 0, but all input tensors were scalars; will instead unsqueeze and return a vector.\n return super().apply(*args, **kwargs) # type: ignore[misc]\n","output_type":"stream"},{"output_type":"display_data","data":{"text/plain":"<IPython.core.display.HTML object>","text/html":"\n <div>\n \n <progress value='1955' max='1955' style='width:300px; height:20px; vertical-align: middle;'></progress>\n [1955/1955 02:44, Epoch 5/5]\n </div>\n <table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: left;\">\n <th>Epoch</th>\n <th>Training Loss</th>\n <th>Validation Loss</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <td>1</td>\n <td>No log</td>\n <td>0.859391</td>\n </tr>\n <tr>\n <td>2</td>\n <td>1.056256</td>\n <td>0.820822</td>\n </tr>\n <tr>\n <td>3</td>\n <td>0.629967</td>\n <td>0.873489</td>\n </tr>\n <tr>\n <td>4</td>\n <td>0.493512</td>\n <td>0.938325</td>\n </tr>\n <tr>\n <td>5</td>\n <td>0.493512</td>\n <td>0.979704</td>\n </tr>\n </tbody>\n</table><p>"},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Writing model shards: 0%| | 0/1 [00:00<?, ?it/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"bdf634869d06427da0e390608109e096"}},"metadata":{}},{"name":"stderr","text":"/usr/local/lib/python3.12/dist-packages/torch/autograd/function.py:583: UserWarning: Was asked to gather along dimension 0, but all input tensors were scalars; will instead unsqueeze and return a vector.\n return super().apply(*args, **kwargs) # type: ignore[misc]\n","output_type":"stream"},{"output_type":"display_data","data":{"text/plain":"Writing model shards: 0%| | 0/1 [00:00<?, ?it/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"f74412b614a747c18a3f0ec66fdf7344"}},"metadata":{}},{"name":"stderr","text":"/usr/local/lib/python3.12/dist-packages/torch/autograd/function.py:583: UserWarning: Was asked to gather along dimension 0, but all input tensors were scalars; will instead unsqueeze and return a vector.\n return super().apply(*args, **kwargs) # type: ignore[misc]\n","output_type":"stream"},{"output_type":"display_data","data":{"text/plain":"Writing model shards: 0%| | 0/1 [00:00<?, ?it/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"5febcd6667d145069b0cd66ea0904569"}},"metadata":{}},{"name":"stderr","text":"/usr/local/lib/python3.12/dist-packages/torch/autograd/function.py:583: UserWarning: Was asked to gather along dimension 0, but all input tensors were scalars; will instead unsqueeze and return a vector.\n return super().apply(*args, **kwargs) # type: ignore[misc]\n","output_type":"stream"},{"output_type":"display_data","data":{"text/plain":"Writing model shards: 0%| | 0/1 [00:00<?, ?it/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"f94be204876d42bdbee7dcef755006a1"}},"metadata":{}},{"name":"stderr","text":"/usr/local/lib/python3.12/dist-packages/torch/autograd/function.py:583: UserWarning: Was asked to gather along dimension 0, but all input tensors were scalars; will instead unsqueeze and return a vector.\n return super().apply(*args, **kwargs) # type: ignore[misc]\n","output_type":"stream"},{"output_type":"display_data","data":{"text/plain":"Writing model shards: 0%| | 0/1 [00:00<?, ?it/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"f90bea39a5834f67b1bea7879220d9d1"}},"metadata":{}},{"execution_count":3,"output_type":"execute_result","data":{"text/plain":"TrainOutput(global_step=1955, training_loss=0.6561834467036645, metrics={'train_runtime': 166.7162, 'train_samples_per_second': 749.777, 'train_steps_per_second': 11.727, 'total_flos': 107618496000000.0, 'train_loss': 0.6561834467036645, 'epoch': 5.0})"},"metadata":{}}],"execution_count":3},{"cell_type":"code","source":"import torch\n\ndef predict_sentiment(text):\n # 1. Prepare the text\n inputs = tokenizer(text, return_tensors=\"pt\", padding=True, truncation=True, max_length=128).to(model.device)\n \n # 2. Get the prediction\n model.eval()\n with torch.no_grad():\n outputs = model(**inputs)\n \n # 3. Calculate probabilities\n probs = torch.nn.functional.softmax(outputs.logits, dim=-1)\n conf, pred = torch.max(probs, dim=-1)\n \n label = \"POSITIVE\" if pred.item() == 1 else \"NEGATIVE\"\n return label, conf.item()\n\n# --- THE TEST SUITE ---\ntest_reviews = [\n \"This movie was an absolute masterpiece! The acting was incredible and I loved every second.\",\n \"Avoid this at all costs. A total waste of time and money. Terrible plot.\",\n \"The visuals were stunning, but the story was quite boring and the ending made no sense.\",\n \"I've seen worse, but I've definitely seen better. It was just... okay, I guess.\",\n \"Oh great, another superhero movie. Just what the world needed. I loved sitting through 3 hours of CGI vomit.\",\n \"The scenes were amazing! Just incredible realistic! Good work.\",\n \"Why did they even produce it?\",\n \"This was not as bad as I thought it would be, but still not good.\",\n]\n\nprint(f\"--- 🤖 CritiqueCore v1: Inference Test ---\")\nfor review in test_reviews:\n sentiment, confidence = predict_sentiment(review)\n print(f\"\\nReview: \\\"{review}\\\"\")\n print(f\"Result: {sentiment} ({confidence*100:.2f}% confidence)\")","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2026-04-05T18:05:52.706504Z","iopub.execute_input":"2026-04-05T18:05:52.707187Z","iopub.status.idle":"2026-04-05T18:05:52.740026Z","shell.execute_reply.started":"2026-04-05T18:05:52.707153Z","shell.execute_reply":"2026-04-05T18:05:52.739327Z"}},"outputs":[{"name":"stdout","text":"--- 🤖 CritiqueCore v1: Inference Test ---\n\nReview: \"This movie was an absolute masterpiece! The acting was incredible and I loved every second.\"\nResult: POSITIVE (99.03% confidence)\n\nReview: \"Avoid this at all costs. A total waste of time and money. Terrible plot.\"\nResult: NEGATIVE (99.30% confidence)\n\nReview: \"The visuals were stunning, but the story was quite boring and the ending made no sense.\"\nResult: NEGATIVE (99.16% confidence)\n\nReview: \"I've seen worse, but I've definitely seen better. It was just... okay, I guess.\"\nResult: NEGATIVE (99.36% confidence)\n\nReview: \"Oh great, another superhero movie. Just what the world needed. I loved sitting through 3 hours of CGI vomit.\"\nResult: NEGATIVE (93.81% confidence)\n\nReview: \"The scenes were amazing! Just incredible realistic! Good work.\"\nResult: POSITIVE (99.31% confidence)\n\nReview: \"Why did they even produce it?\"\nResult: NEGATIVE (99.37% confidence)\n\nReview: \"This was not as bad as I thought it would be, but still not good.\"\nResult: NEGATIVE (99.32% confidence)\n","output_type":"stream"}],"execution_count":21},{"cell_type":"code","source":"import os\nimport shutil\n\n# 1. Save the model and tokenizer\nexport_path = \"./CritiqueCore_v1_HF\"\nos.makedirs(export_path, exist_ok=True)\n\ntrainer.save_model(export_path)\ntokenizer.save_pretrained(export_path)\n\n# 2. Create a ZIP for easy download\nshutil.make_archive(\"CritiqueCore_v1_Model\", 'zip', export_path)\n\nprint(f\"✅ Model saved to {export_path} and zipped as CritiqueCore_v1_Model.zip\")","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2026-04-05T18:10:14.191601Z","iopub.execute_input":"2026-04-05T18:10:14.191975Z","iopub.status.idle":"2026-04-05T18:10:16.230672Z","shell.execute_reply.started":"2026-04-05T18:10:14.191937Z","shell.execute_reply":"2026-04-05T18:10:16.229939Z"}},"outputs":[{"output_type":"display_data","data":{"text/plain":"Writing model shards: 0%| | 0/1 [00:00<?, ?it/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"4cfd6de21f294bc8b8e6cbc83c338a81"}},"metadata":{}},{"name":"stdout","text":"✅ Model saved to ./CritiqueCore_v1_HF and zipped as CritiqueCore_v1_Model.zip\n","output_type":"stream"}],"execution_count":24},{"cell_type":"code","source":"import torch\nfrom transformers import AutoTokenizer, AutoModelForSequenceClassification\n\nclass CritiqueCoreInference:\n def __init__(self, model_path):\n self.device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n self.tokenizer = AutoTokenizer.from_pretrained(model_path)\n self.model = AutoModelForSequenceClassification.from_pretrained(model_path).to(self.device)\n self.model.eval()\n\n def analyze(self, text):\n inputs = self.tokenizer(\n text, \n return_tensors=\"pt\", \n padding=True, \n truncation=True, \n max_length=128\n ).to(self.device)\n\n with torch.no_grad():\n outputs = self.model(**inputs)\n probs = torch.nn.functional.softmax(outputs.logits, dim=-1)\n conf, pred = torch.max(probs, dim=-1)\n\n result = \"POSITIVE\" if pred.item() == 1 else \"NEGATIVE\"\n return {\n \"text\": text,\n \"label\": result,\n \"confidence\": f\"{conf.item() * 100:.2f}%\"\n }\n\n# Usage\nif __name__ == \"__main__\":\n # Point this to your unzipped folder\n engine = CritiqueCoreInference(\"./CritiqueCore_v1_HF\")\n \n sample = \"The plot was a bit slow, but overall a great experience.\"\n prediction = engine.analyze(sample)\n print(f\"Result: {prediction['label']} | Confidence: {prediction['confidence']}\")","metadata":{"trusted":true,"execution":{"iopub.status.busy":"2026-04-05T18:11:20.575913Z","iopub.execute_input":"2026-04-05T18:11:20.576274Z","iopub.status.idle":"2026-04-05T18:11:20.785884Z","shell.execute_reply.started":"2026-04-05T18:11:20.576237Z","shell.execute_reply":"2026-04-05T18:11:20.784981Z"}},"outputs":[{"output_type":"display_data","data":{"text/plain":"Loading weights: 0%| | 0/40 [00:00<?, ?it/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"1c69a188e79d416e90d98d55469a13bc"}},"metadata":{}},{"name":"stdout","text":"Result: POSITIVE | Confidence: 91.47%\n","output_type":"stream"}],"execution_count":26}]}