Spaces:
Sleeping
Sleeping
Upload folder using huggingface_hub
Browse files- .gitattributes +2 -0
- .gradio/certificate.pem +31 -0
- .gradio/flagged/dataset1.csv +2 -0
- README.md +3 -9
- __pycache__/infer.cpython-313.pyc +0 -0
- __pycache__/train.cpython-313.pyc +0 -0
- __pycache__/utils.cpython-313.pyc +0 -0
- app.py +36 -0
- infer.py +105 -0
- main.py +74 -0
- model/config.json +24 -0
- model/model.safetensors +3 -0
- model/special_tokens_map.json +7 -0
- model/tokenizer.json +0 -0
- model/tokenizer_config.json +56 -0
- model/training_args.bin +3 -0
- model/vocab.txt +0 -0
- requirements.txt +7 -0
- train.py +143 -0
- utils.py +32 -0
- wandb/run-20250720_144411-9kwggmdj/files/config.yaml +493 -0
- wandb/run-20250720_144411-9kwggmdj/files/output.log +148 -0
- wandb/run-20250720_144411-9kwggmdj/files/requirements.txt +139 -0
- wandb/run-20250720_144411-9kwggmdj/files/wandb-metadata.json +36 -0
- wandb/run-20250720_144411-9kwggmdj/files/wandb-summary.json +1 -0
- wandb/run-20250720_144411-9kwggmdj/logs/debug-internal.log +12 -0
- wandb/run-20250720_144411-9kwggmdj/logs/debug.log +25 -0
- wandb/run-20250720_144411-9kwggmdj/run-9kwggmdj.wandb +3 -0
- wandb/run-20250720_154435-9xqrzjdo/files/config.yaml +493 -0
- wandb/run-20250720_154435-9xqrzjdo/files/output.log +39 -0
- wandb/run-20250720_154435-9xqrzjdo/files/requirements.txt +139 -0
- wandb/run-20250720_154435-9xqrzjdo/files/wandb-metadata.json +36 -0
- wandb/run-20250720_154435-9xqrzjdo/files/wandb-summary.json +1 -0
- wandb/run-20250720_154435-9xqrzjdo/logs/debug-internal.log +12 -0
- wandb/run-20250720_154435-9xqrzjdo/logs/debug.log +25 -0
- wandb/run-20250720_154435-9xqrzjdo/run-9xqrzjdo.wandb +0 -0
- wandb/run-20250720_155338-0h3fksuy/files/config.yaml +494 -0
- wandb/run-20250720_155338-0h3fksuy/files/output.log +398 -0
- wandb/run-20250720_155338-0h3fksuy/files/requirements.txt +139 -0
- wandb/run-20250720_155338-0h3fksuy/files/wandb-metadata.json +36 -0
- wandb/run-20250720_155338-0h3fksuy/files/wandb-summary.json +1 -0
- wandb/run-20250720_155338-0h3fksuy/logs/debug-internal.log +16 -0
- wandb/run-20250720_155338-0h3fksuy/logs/debug.log +25 -0
- wandb/run-20250720_155338-0h3fksuy/run-0h3fksuy.wandb +3 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
wandb/run-20250720_144411-9kwggmdj/run-9kwggmdj.wandb filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
wandb/run-20250720_155338-0h3fksuy/run-0h3fksuy.wandb filter=lfs diff=lfs merge=lfs -text
|
.gradio/certificate.pem
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
-----BEGIN CERTIFICATE-----
|
| 2 |
+
MIIFazCCA1OgAwIBAgIRAIIQz7DSQONZRGPgu2OCiwAwDQYJKoZIhvcNAQELBQAw
|
| 3 |
+
TzELMAkGA1UEBhMCVVMxKTAnBgNVBAoTIEludGVybmV0IFNlY3VyaXR5IFJlc2Vh
|
| 4 |
+
cmNoIEdyb3VwMRUwEwYDVQQDEwxJU1JHIFJvb3QgWDEwHhcNMTUwNjA0MTEwNDM4
|
| 5 |
+
WhcNMzUwNjA0MTEwNDM4WjBPMQswCQYDVQQGEwJVUzEpMCcGA1UEChMgSW50ZXJu
|
| 6 |
+
ZXQgU2VjdXJpdHkgUmVzZWFyY2ggR3JvdXAxFTATBgNVBAMTDElTUkcgUm9vdCBY
|
| 7 |
+
MTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAK3oJHP0FDfzm54rVygc
|
| 8 |
+
h77ct984kIxuPOZXoHj3dcKi/vVqbvYATyjb3miGbESTtrFj/RQSa78f0uoxmyF+
|
| 9 |
+
0TM8ukj13Xnfs7j/EvEhmkvBioZxaUpmZmyPfjxwv60pIgbz5MDmgK7iS4+3mX6U
|
| 10 |
+
A5/TR5d8mUgjU+g4rk8Kb4Mu0UlXjIB0ttov0DiNewNwIRt18jA8+o+u3dpjq+sW
|
| 11 |
+
T8KOEUt+zwvo/7V3LvSye0rgTBIlDHCNAymg4VMk7BPZ7hm/ELNKjD+Jo2FR3qyH
|
| 12 |
+
B5T0Y3HsLuJvW5iB4YlcNHlsdu87kGJ55tukmi8mxdAQ4Q7e2RCOFvu396j3x+UC
|
| 13 |
+
B5iPNgiV5+I3lg02dZ77DnKxHZu8A/lJBdiB3QW0KtZB6awBdpUKD9jf1b0SHzUv
|
| 14 |
+
KBds0pjBqAlkd25HN7rOrFleaJ1/ctaJxQZBKT5ZPt0m9STJEadao0xAH0ahmbWn
|
| 15 |
+
OlFuhjuefXKnEgV4We0+UXgVCwOPjdAvBbI+e0ocS3MFEvzG6uBQE3xDk3SzynTn
|
| 16 |
+
jh8BCNAw1FtxNrQHusEwMFxIt4I7mKZ9YIqioymCzLq9gwQbooMDQaHWBfEbwrbw
|
| 17 |
+
qHyGO0aoSCqI3Haadr8faqU9GY/rOPNk3sgrDQoo//fb4hVC1CLQJ13hef4Y53CI
|
| 18 |
+
rU7m2Ys6xt0nUW7/vGT1M0NPAgMBAAGjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNV
|
| 19 |
+
HRMBAf8EBTADAQH/MB0GA1UdDgQWBBR5tFnme7bl5AFzgAiIyBpY9umbbjANBgkq
|
| 20 |
+
hkiG9w0BAQsFAAOCAgEAVR9YqbyyqFDQDLHYGmkgJykIrGF1XIpu+ILlaS/V9lZL
|
| 21 |
+
ubhzEFnTIZd+50xx+7LSYK05qAvqFyFWhfFQDlnrzuBZ6brJFe+GnY+EgPbk6ZGQ
|
| 22 |
+
3BebYhtF8GaV0nxvwuo77x/Py9auJ/GpsMiu/X1+mvoiBOv/2X/qkSsisRcOj/KK
|
| 23 |
+
NFtY2PwByVS5uCbMiogziUwthDyC3+6WVwW6LLv3xLfHTjuCvjHIInNzktHCgKQ5
|
| 24 |
+
ORAzI4JMPJ+GslWYHb4phowim57iaztXOoJwTdwJx4nLCgdNbOhdjsnvzqvHu7Ur
|
| 25 |
+
TkXWStAmzOVyyghqpZXjFaH3pO3JLF+l+/+sKAIuvtd7u+Nxe5AW0wdeRlN8NwdC
|
| 26 |
+
jNPElpzVmbUq4JUagEiuTDkHzsxHpFKVK7q4+63SM1N95R1NbdWhscdCb+ZAJzVc
|
| 27 |
+
oyi3B43njTOQ5yOf+1CceWxG1bQVs5ZufpsMljq4Ui0/1lvh+wjChP4kqKOJ2qxq
|
| 28 |
+
4RgqsahDYVvTH9w7jXbyLeiNdd8XM2w9U/t7y0Ff/9yi0GE44Za4rF2LN9d11TPA
|
| 29 |
+
mRGunUHBcnWEvgJBQl9nJEiU0Zsnvgc/ubhPgXRR4Xq37Z0j4r7g1SgEEzwxA57d
|
| 30 |
+
emyPxgcYxn/eR44/KJ4EBs+lVDR3veyJm+kXQ99b21/+jh5Xos1AnX5iItreGCc=
|
| 31 |
+
-----END CERTIFICATE-----
|
.gradio/flagged/dataset1.csv
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Movie Review,Sentiment Prediction,timestamp
|
| 2 |
+
The movie gave me chills it was soo scary.,"{""label"": ""Negative"", ""confidences"": null}",2025-07-21 01:18:41.890282
|
README.md
CHANGED
|
@@ -1,12 +1,6 @@
|
|
| 1 |
---
|
| 2 |
-
title:
|
| 3 |
-
emoji: 🌍
|
| 4 |
-
colorFrom: green
|
| 5 |
-
colorTo: green
|
| 6 |
-
sdk: gradio
|
| 7 |
-
sdk_version: 5.38.0
|
| 8 |
app_file: app.py
|
| 9 |
-
|
|
|
|
| 10 |
---
|
| 11 |
-
|
| 12 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
|
| 1 |
---
|
| 2 |
+
title: distilbert-sentiment
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
app_file: app.py
|
| 4 |
+
sdk: gradio
|
| 5 |
+
sdk_version: 5.37.0
|
| 6 |
---
|
|
|
|
|
|
__pycache__/infer.cpython-313.pyc
ADDED
|
Binary file (3.68 kB). View file
|
|
|
__pycache__/train.cpython-313.pyc
ADDED
|
Binary file (4.73 kB). View file
|
|
|
__pycache__/utils.cpython-313.pyc
ADDED
|
Binary file (975 Bytes). View file
|
|
|
app.py
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Gradio frontend for DistilBERT sentiment analysis
|
| 3 |
+
File: app.py
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import gradio as gr
|
| 7 |
+
from infer import predict
|
| 8 |
+
|
| 9 |
+
def sentiment_analyzer(text):
|
| 10 |
+
"""Wrapper function for Gradio interface"""
|
| 11 |
+
if not text.strip():
|
| 12 |
+
return "Please enter some text"
|
| 13 |
+
|
| 14 |
+
result = predict(text)
|
| 15 |
+
return result.capitalize()
|
| 16 |
+
|
| 17 |
+
# Create Gradio interface
|
| 18 |
+
interface = gr.Interface(
|
| 19 |
+
fn=sentiment_analyzer,
|
| 20 |
+
inputs=gr.Textbox(
|
| 21 |
+
label="Movie Review",
|
| 22 |
+
placeholder="Enter your movie review here...",
|
| 23 |
+
lines=3
|
| 24 |
+
),
|
| 25 |
+
outputs=gr.Label(label="Sentiment Prediction"),
|
| 26 |
+
title="🎬 Movie Review Sentiment Analysis",
|
| 27 |
+
description="Fine-tuned DistilBERT model for movie review sentiment classification",
|
| 28 |
+
examples=[
|
| 29 |
+
"This movie was absolutely fantastic! Great acting and storyline.",
|
| 30 |
+
"Terrible film, worst movie I've ever seen. Complete waste of time.",
|
| 31 |
+
"The movie was okay, not great but not terrible either."
|
| 32 |
+
]
|
| 33 |
+
)
|
| 34 |
+
|
| 35 |
+
if __name__ == "__main__":
|
| 36 |
+
interface.launch(share=True)
|
infer.py
ADDED
|
@@ -0,0 +1,105 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Inference pipeline for DistilBERT sentiment analysis
|
| 3 |
+
File: infer.py (improved version)
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import torch
|
| 7 |
+
import os
|
| 8 |
+
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
| 9 |
+
|
| 10 |
+
# Global variables to cache model and tokenizer
|
| 11 |
+
_model = None
|
| 12 |
+
_tokenizer = None
|
| 13 |
+
|
| 14 |
+
def load_trained_model(model_path="./model"):
|
| 15 |
+
"""Load saved model and tokenizer (cached)"""
|
| 16 |
+
global _model, _tokenizer
|
| 17 |
+
|
| 18 |
+
# Check if model exists
|
| 19 |
+
if not os.path.exists(model_path):
|
| 20 |
+
raise FileNotFoundError(f"No model found at {model_path}. Please train the model first.")
|
| 21 |
+
|
| 22 |
+
# Return cached model if already loaded
|
| 23 |
+
if _model is not None and _tokenizer is not None:
|
| 24 |
+
return _model, _tokenizer
|
| 25 |
+
|
| 26 |
+
print(f"Loading model from {model_path}...")
|
| 27 |
+
|
| 28 |
+
_tokenizer = AutoTokenizer.from_pretrained(model_path)
|
| 29 |
+
_model = AutoModelForSequenceClassification.from_pretrained(model_path)
|
| 30 |
+
|
| 31 |
+
print("Model loaded successfully!")
|
| 32 |
+
return _model, _tokenizer
|
| 33 |
+
|
| 34 |
+
def predict_sentiment(text, model, tokenizer, max_length=256):
|
| 35 |
+
"""
|
| 36 |
+
Predict sentiment for a single text
|
| 37 |
+
|
| 38 |
+
Args:
|
| 39 |
+
text: Input text string
|
| 40 |
+
model: Loaded model
|
| 41 |
+
tokenizer: Loaded tokenizer
|
| 42 |
+
max_length: Max sequence length
|
| 43 |
+
|
| 44 |
+
Returns:
|
| 45 |
+
Tuple of (predicted_label, confidence_score)
|
| 46 |
+
"""
|
| 47 |
+
# Tokenize input
|
| 48 |
+
inputs = tokenizer(
|
| 49 |
+
text,
|
| 50 |
+
return_tensors="pt",
|
| 51 |
+
truncation=True,
|
| 52 |
+
padding="max_length",
|
| 53 |
+
max_length=max_length
|
| 54 |
+
)
|
| 55 |
+
|
| 56 |
+
# Get prediction
|
| 57 |
+
model.eval()
|
| 58 |
+
with torch.no_grad():
|
| 59 |
+
outputs = model(**inputs)
|
| 60 |
+
predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)
|
| 61 |
+
predicted_class = torch.argmax(predictions, dim=-1).item()
|
| 62 |
+
confidence = predictions[0][predicted_class].item()
|
| 63 |
+
|
| 64 |
+
# Convert to readable format
|
| 65 |
+
label = "Positive" if predicted_class == 1 else "Negative"
|
| 66 |
+
|
| 67 |
+
return label, confidence
|
| 68 |
+
|
| 69 |
+
def predict(text, model_path="./model", max_length=256):
|
| 70 |
+
"""
|
| 71 |
+
Simple prediction function for new text
|
| 72 |
+
|
| 73 |
+
Args:
|
| 74 |
+
text: Input text string
|
| 75 |
+
model_path: Path to saved model
|
| 76 |
+
max_length: Max sequence length
|
| 77 |
+
|
| 78 |
+
Returns:
|
| 79 |
+
String: "positive" or "negative"
|
| 80 |
+
"""
|
| 81 |
+
try:
|
| 82 |
+
# Load model and tokenizer (cached)
|
| 83 |
+
model, tokenizer = load_trained_model(model_path)
|
| 84 |
+
|
| 85 |
+
# Tokenize input
|
| 86 |
+
inputs = tokenizer(
|
| 87 |
+
text,
|
| 88 |
+
return_tensors="pt",
|
| 89 |
+
truncation=True,
|
| 90 |
+
padding="max_length",
|
| 91 |
+
max_length=max_length
|
| 92 |
+
)
|
| 93 |
+
|
| 94 |
+
# Get prediction
|
| 95 |
+
model.eval()
|
| 96 |
+
with torch.no_grad():
|
| 97 |
+
outputs = model(**inputs)
|
| 98 |
+
predicted_class = torch.argmax(outputs.logits, dim=-1).item()
|
| 99 |
+
|
| 100 |
+
return "positive" if predicted_class == 1 else "negative"
|
| 101 |
+
|
| 102 |
+
except FileNotFoundError as e:
|
| 103 |
+
return f"Error: {str(e)}"
|
| 104 |
+
except Exception as e:
|
| 105 |
+
return f"Prediction error: {str(e)}"
|
main.py
ADDED
|
@@ -0,0 +1,74 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Main pipeline for DistilBERT sentiment analysis project
|
| 3 |
+
File: main.py
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import os
|
| 7 |
+
import argparse
|
| 8 |
+
from train import (
|
| 9 |
+
load_imdb_data,
|
| 10 |
+
preprocess_data,
|
| 11 |
+
load_model,
|
| 12 |
+
setup_trainer,
|
| 13 |
+
train_model,
|
| 14 |
+
evaluate_model,
|
| 15 |
+
save_model
|
| 16 |
+
)
|
| 17 |
+
# Remove app import since we'll run it separately
|
| 18 |
+
|
| 19 |
+
def train_pipeline(subset_size=None):
|
| 20 |
+
"""Complete training pipeline"""
|
| 21 |
+
print("=== Starting Training Pipeline ===")
|
| 22 |
+
|
| 23 |
+
# 1. Load dataset
|
| 24 |
+
dataset = load_imdb_data(subset_size=subset_size)
|
| 25 |
+
|
| 26 |
+
# 2. Preprocess data
|
| 27 |
+
tokenized_dataset, tokenizer = preprocess_data(dataset)
|
| 28 |
+
|
| 29 |
+
# 3. Load model
|
| 30 |
+
model = load_model()
|
| 31 |
+
|
| 32 |
+
# 4. Setup trainer
|
| 33 |
+
trainer = setup_trainer(
|
| 34 |
+
model,
|
| 35 |
+
tokenizer,
|
| 36 |
+
tokenized_dataset["train"],
|
| 37 |
+
tokenized_dataset["test"]
|
| 38 |
+
)
|
| 39 |
+
|
| 40 |
+
# 5. Train model
|
| 41 |
+
train_model(trainer)
|
| 42 |
+
|
| 43 |
+
# 6. Evaluate model
|
| 44 |
+
results = evaluate_model(trainer)
|
| 45 |
+
|
| 46 |
+
# 7. Save model
|
| 47 |
+
save_model(trainer, tokenizer)
|
| 48 |
+
|
| 49 |
+
print("=== Training Pipeline Completed ===")
|
| 50 |
+
return results
|
| 51 |
+
|
| 52 |
+
def main():
|
| 53 |
+
parser = argparse.ArgumentParser(description="DistilBERT Sentiment Analysis - Training Only")
|
| 54 |
+
parser.add_argument("--subset", type=int, default=None,
|
| 55 |
+
help="Use subset of data for training (for testing)")
|
| 56 |
+
|
| 57 |
+
args = parser.parse_args()
|
| 58 |
+
|
| 59 |
+
# Check if model already exists
|
| 60 |
+
if os.path.exists("./model") and os.path.exists("./model/config.json"):
|
| 61 |
+
response = input("Model already exists. Retrain? (y/n): ")
|
| 62 |
+
if response.lower() != 'y':
|
| 63 |
+
print("Skipping training...")
|
| 64 |
+
print("To run the app: python app.py")
|
| 65 |
+
return
|
| 66 |
+
|
| 67 |
+
# Train the model
|
| 68 |
+
train_pipeline(subset_size=args.subset)
|
| 69 |
+
|
| 70 |
+
print("\n🎉 Training completed!")
|
| 71 |
+
print("To run the app: python app.py")
|
| 72 |
+
|
| 73 |
+
if __name__ == "__main__":
|
| 74 |
+
main()
|
model/config.json
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"activation": "gelu",
|
| 3 |
+
"architectures": [
|
| 4 |
+
"DistilBertForSequenceClassification"
|
| 5 |
+
],
|
| 6 |
+
"attention_dropout": 0.1,
|
| 7 |
+
"dim": 768,
|
| 8 |
+
"dropout": 0.1,
|
| 9 |
+
"hidden_dim": 3072,
|
| 10 |
+
"initializer_range": 0.02,
|
| 11 |
+
"max_position_embeddings": 512,
|
| 12 |
+
"model_type": "distilbert",
|
| 13 |
+
"n_heads": 12,
|
| 14 |
+
"n_layers": 6,
|
| 15 |
+
"pad_token_id": 0,
|
| 16 |
+
"problem_type": "single_label_classification",
|
| 17 |
+
"qa_dropout": 0.1,
|
| 18 |
+
"seq_classif_dropout": 0.2,
|
| 19 |
+
"sinusoidal_pos_embds": false,
|
| 20 |
+
"tie_weights_": true,
|
| 21 |
+
"torch_dtype": "float32",
|
| 22 |
+
"transformers_version": "4.53.2",
|
| 23 |
+
"vocab_size": 30522
|
| 24 |
+
}
|
model/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:65e5980bd38406f43fad7a937fbfd69641552cd0bbcf0ba62ca73f7318eb3f9f
|
| 3 |
+
size 267832560
|
model/special_tokens_map.json
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cls_token": "[CLS]",
|
| 3 |
+
"mask_token": "[MASK]",
|
| 4 |
+
"pad_token": "[PAD]",
|
| 5 |
+
"sep_token": "[SEP]",
|
| 6 |
+
"unk_token": "[UNK]"
|
| 7 |
+
}
|
model/tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
model/tokenizer_config.json
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"added_tokens_decoder": {
|
| 3 |
+
"0": {
|
| 4 |
+
"content": "[PAD]",
|
| 5 |
+
"lstrip": false,
|
| 6 |
+
"normalized": false,
|
| 7 |
+
"rstrip": false,
|
| 8 |
+
"single_word": false,
|
| 9 |
+
"special": true
|
| 10 |
+
},
|
| 11 |
+
"100": {
|
| 12 |
+
"content": "[UNK]",
|
| 13 |
+
"lstrip": false,
|
| 14 |
+
"normalized": false,
|
| 15 |
+
"rstrip": false,
|
| 16 |
+
"single_word": false,
|
| 17 |
+
"special": true
|
| 18 |
+
},
|
| 19 |
+
"101": {
|
| 20 |
+
"content": "[CLS]",
|
| 21 |
+
"lstrip": false,
|
| 22 |
+
"normalized": false,
|
| 23 |
+
"rstrip": false,
|
| 24 |
+
"single_word": false,
|
| 25 |
+
"special": true
|
| 26 |
+
},
|
| 27 |
+
"102": {
|
| 28 |
+
"content": "[SEP]",
|
| 29 |
+
"lstrip": false,
|
| 30 |
+
"normalized": false,
|
| 31 |
+
"rstrip": false,
|
| 32 |
+
"single_word": false,
|
| 33 |
+
"special": true
|
| 34 |
+
},
|
| 35 |
+
"103": {
|
| 36 |
+
"content": "[MASK]",
|
| 37 |
+
"lstrip": false,
|
| 38 |
+
"normalized": false,
|
| 39 |
+
"rstrip": false,
|
| 40 |
+
"single_word": false,
|
| 41 |
+
"special": true
|
| 42 |
+
}
|
| 43 |
+
},
|
| 44 |
+
"clean_up_tokenization_spaces": false,
|
| 45 |
+
"cls_token": "[CLS]",
|
| 46 |
+
"do_lower_case": true,
|
| 47 |
+
"extra_special_tokens": {},
|
| 48 |
+
"mask_token": "[MASK]",
|
| 49 |
+
"model_max_length": 512,
|
| 50 |
+
"pad_token": "[PAD]",
|
| 51 |
+
"sep_token": "[SEP]",
|
| 52 |
+
"strip_accents": null,
|
| 53 |
+
"tokenize_chinese_chars": true,
|
| 54 |
+
"tokenizer_class": "DistilBertTokenizer",
|
| 55 |
+
"unk_token": "[UNK]"
|
| 56 |
+
}
|
model/training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:01f6ab2453c3b34039132e185e58b0fa0c07ed65cf292dae165c993dcdab7683
|
| 3 |
+
size 5713
|
model/vocab.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
requirements.txt
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
torch>=2.1.0,<3.0.0
|
| 3 |
+
transformers>=4.45.0,<5.0.0
|
| 4 |
+
datasets>=2.21.0,<3.0.0
|
| 5 |
+
gradio>=5.0.0,<6.0.0
|
| 6 |
+
scikit-learn>=1.5.0,<2.0.0
|
| 7 |
+
numpy>=1.24.0,<2.0.0
|
train.py
ADDED
|
@@ -0,0 +1,143 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Training and evaluation logic for DistilBERT sentiment analysis
|
| 3 |
+
File: train.py
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
# Hugging Face imports
|
| 7 |
+
from transformers import (
|
| 8 |
+
AutoTokenizer,
|
| 9 |
+
AutoModelForSequenceClassification,
|
| 10 |
+
Trainer,
|
| 11 |
+
TrainingArguments,
|
| 12 |
+
logging
|
| 13 |
+
)
|
| 14 |
+
|
| 15 |
+
# Local imports
|
| 16 |
+
from utils import compute_metrics
|
| 17 |
+
from datasets import load_dataset
|
| 18 |
+
|
| 19 |
+
# Standard library imports
|
| 20 |
+
import torch
|
| 21 |
+
import numpy as np
|
| 22 |
+
import pandas as pd
|
| 23 |
+
|
| 24 |
+
# Sklearn metrics
|
| 25 |
+
from sklearn.metrics import (
|
| 26 |
+
precision_recall_fscore_support,
|
| 27 |
+
accuracy_score,
|
| 28 |
+
confusion_matrix
|
| 29 |
+
)
|
| 30 |
+
|
| 31 |
+
# Suppress HF log spam
|
| 32 |
+
logging.set_verbosity_error()
|
| 33 |
+
|
| 34 |
+
# ===== DATASET LOADING =====
|
| 35 |
+
|
| 36 |
+
def load_imdb_data(subset_size=None):
|
| 37 |
+
"""Load IMDB dataset with optional subsampling"""
|
| 38 |
+
dataset = load_dataset("imdb")
|
| 39 |
+
|
| 40 |
+
# Optional subsetting for memory constraints
|
| 41 |
+
if subset_size:
|
| 42 |
+
dataset["train"] = dataset["train"].select(range(subset_size))
|
| 43 |
+
dataset["test"] = dataset["test"].select(range(min(subset_size // 4, len(dataset["test"]))))
|
| 44 |
+
|
| 45 |
+
print(f"Dataset loaded - Train: {len(dataset['train'])}, Test: {len(dataset['test'])}")
|
| 46 |
+
return dataset
|
| 47 |
+
|
| 48 |
+
# ===== PREPROCESSING =====
|
| 49 |
+
|
| 50 |
+
def preprocess_data(dataset, max_length=256):
|
| 51 |
+
"""Tokenize and prepare dataset for training"""
|
| 52 |
+
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")
|
| 53 |
+
|
| 54 |
+
def tokenize_function(examples):
|
| 55 |
+
return tokenizer(
|
| 56 |
+
examples["text"],
|
| 57 |
+
padding="max_length",
|
| 58 |
+
truncation=True,
|
| 59 |
+
max_length=max_length
|
| 60 |
+
)
|
| 61 |
+
|
| 62 |
+
# Tokenize both splits
|
| 63 |
+
tokenized_dataset = dataset.map(tokenize_function, batched=True)
|
| 64 |
+
|
| 65 |
+
# Rename label column and set format
|
| 66 |
+
tokenized_dataset = tokenized_dataset.rename_column("label", "labels")
|
| 67 |
+
tokenized_dataset.set_format("torch", columns=["input_ids", "attention_mask", "labels"])
|
| 68 |
+
|
| 69 |
+
return tokenized_dataset, tokenizer
|
| 70 |
+
|
| 71 |
+
# ===== MODEL LOADING =====
|
| 72 |
+
|
| 73 |
+
def load_model():
|
| 74 |
+
"""Load pre-trained DistilBERT model for sequence classification"""
|
| 75 |
+
model = AutoModelForSequenceClassification.from_pretrained(
|
| 76 |
+
"distilbert-base-uncased",
|
| 77 |
+
num_labels=2,
|
| 78 |
+
return_dict=True
|
| 79 |
+
)
|
| 80 |
+
return model
|
| 81 |
+
|
| 82 |
+
# ===== TRAINING SETUP =====
|
| 83 |
+
|
| 84 |
+
def get_training_args():
|
| 85 |
+
"""Define training arguments"""
|
| 86 |
+
return TrainingArguments(
|
| 87 |
+
output_dir="./model",
|
| 88 |
+
per_device_train_batch_size=2,
|
| 89 |
+
per_device_eval_batch_size=4,
|
| 90 |
+
gradient_accumulation_steps=2,
|
| 91 |
+
num_train_epochs=3,
|
| 92 |
+
eval_strategy="epoch", # Changed from evaluation_strategy
|
| 93 |
+
save_strategy="epoch",
|
| 94 |
+
logging_dir="./logs",
|
| 95 |
+
logging_steps=50,
|
| 96 |
+
load_best_model_at_end=True,
|
| 97 |
+
metric_for_best_model="f1",
|
| 98 |
+
greater_is_better=True,
|
| 99 |
+
seed=42
|
| 100 |
+
)
|
| 101 |
+
|
| 102 |
+
def setup_trainer(model, tokenizer, train_dataset, eval_dataset):
|
| 103 |
+
"""Initialize Trainer with model and datasets"""
|
| 104 |
+
training_args = get_training_args()
|
| 105 |
+
|
| 106 |
+
trainer = Trainer(
|
| 107 |
+
model=model,
|
| 108 |
+
args=training_args,
|
| 109 |
+
train_dataset=train_dataset,
|
| 110 |
+
eval_dataset=eval_dataset,
|
| 111 |
+
tokenizer=tokenizer,
|
| 112 |
+
compute_metrics=compute_metrics
|
| 113 |
+
)
|
| 114 |
+
|
| 115 |
+
return trainer
|
| 116 |
+
|
| 117 |
+
# ===== TRAIN & EVALUATE =====
|
| 118 |
+
|
| 119 |
+
def train_model(trainer):
|
| 120 |
+
"""Train the model"""
|
| 121 |
+
print("Starting training...")
|
| 122 |
+
trainer.train()
|
| 123 |
+
print("Training completed!")
|
| 124 |
+
|
| 125 |
+
def evaluate_model(trainer):
|
| 126 |
+
"""Evaluate the trained model"""
|
| 127 |
+
print("Evaluating model...")
|
| 128 |
+
results = trainer.evaluate()
|
| 129 |
+
|
| 130 |
+
print("=== Evaluation Results ===")
|
| 131 |
+
for key, value in results.items():
|
| 132 |
+
print(f"{key}: {value:.4f}")
|
| 133 |
+
|
| 134 |
+
return results
|
| 135 |
+
|
| 136 |
+
# ===== SAVE MODEL =====
|
| 137 |
+
|
| 138 |
+
def save_model(trainer, tokenizer, save_path="./model"):
|
| 139 |
+
"""Save trained model and tokenizer"""
|
| 140 |
+
print(f"Saving model to {save_path}...")
|
| 141 |
+
trainer.save_model(save_path)
|
| 142 |
+
tokenizer.save_pretrained(save_path)
|
| 143 |
+
print("Model saved successfully!")
|
utils.py
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Utility functions for DistilBERT sentiment analysis
|
| 3 |
+
File: utils.py
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import numpy as np
|
| 7 |
+
from sklearn.metrics import precision_recall_fscore_support, accuracy_score
|
| 8 |
+
|
| 9 |
+
def compute_metrics(eval_pred):
|
| 10 |
+
"""
|
| 11 |
+
Compute evaluation metrics for binary classification
|
| 12 |
+
|
| 13 |
+
Args:
|
| 14 |
+
eval_pred: Tuple of (predictions, labels)
|
| 15 |
+
|
| 16 |
+
Returns:
|
| 17 |
+
Dict with accuracy, f1, precision, recall
|
| 18 |
+
"""
|
| 19 |
+
predictions, labels = eval_pred
|
| 20 |
+
predictions = np.argmax(predictions, axis=1)
|
| 21 |
+
|
| 22 |
+
precision, recall, f1, _ = precision_recall_fscore_support(
|
| 23 |
+
labels, predictions, average='binary'
|
| 24 |
+
)
|
| 25 |
+
accuracy = accuracy_score(labels, predictions)
|
| 26 |
+
|
| 27 |
+
return {
|
| 28 |
+
'accuracy': accuracy,
|
| 29 |
+
'f1': f1,
|
| 30 |
+
'precision': precision,
|
| 31 |
+
'recall': recall
|
| 32 |
+
}
|
wandb/run-20250720_144411-9kwggmdj/files/config.yaml
ADDED
|
@@ -0,0 +1,493 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
_name_or_path:
|
| 2 |
+
value: distilbert-base-uncased
|
| 3 |
+
_wandb:
|
| 4 |
+
value:
|
| 5 |
+
cli_version: 0.21.0
|
| 6 |
+
e:
|
| 7 |
+
qd7dze61nxdy0n83hyx7lap6a5tql6xc:
|
| 8 |
+
codePath: main.py
|
| 9 |
+
codePathLocal: main.py
|
| 10 |
+
cpu_count: 4
|
| 11 |
+
cpu_count_logical: 8
|
| 12 |
+
cudaVersion: "12.7"
|
| 13 |
+
disk:
|
| 14 |
+
/:
|
| 15 |
+
total: "255230791680"
|
| 16 |
+
used: "208595525632"
|
| 17 |
+
email: shreshthkapai@gmail.com
|
| 18 |
+
executable: C:\Users\Legion\Miniconda3\envs\ML\python.exe
|
| 19 |
+
gpu: NVIDIA GeForce GTX 1650
|
| 20 |
+
gpu_count: 1
|
| 21 |
+
gpu_nvidia:
|
| 22 |
+
- architecture: Turing
|
| 23 |
+
cudaCores: 1024
|
| 24 |
+
memoryTotal: "4294967296"
|
| 25 |
+
name: NVIDIA GeForce GTX 1650
|
| 26 |
+
uuid: GPU-fbcd7647-fb67-66f5-b8c7-1a4198b7e4fa
|
| 27 |
+
host: DESKTOP-EIHJJJL
|
| 28 |
+
memory:
|
| 29 |
+
total: "8506298368"
|
| 30 |
+
os: Windows-11-10.0.26100-SP0
|
| 31 |
+
program: C:\Users\Legion\desktop\distilbert-sentiment\main.py
|
| 32 |
+
python: CPython 3.13.5
|
| 33 |
+
root: C:\Users\Legion\desktop\distilbert-sentiment
|
| 34 |
+
startedAt: "2025-07-20T09:14:11.312224Z"
|
| 35 |
+
writerId: qd7dze61nxdy0n83hyx7lap6a5tql6xc
|
| 36 |
+
m:
|
| 37 |
+
- "1": train/global_step
|
| 38 |
+
"6":
|
| 39 |
+
- 3
|
| 40 |
+
"7": []
|
| 41 |
+
- "2": '*'
|
| 42 |
+
"5": 1
|
| 43 |
+
"6":
|
| 44 |
+
- 1
|
| 45 |
+
"7": []
|
| 46 |
+
python_version: 3.13.5
|
| 47 |
+
t:
|
| 48 |
+
"1":
|
| 49 |
+
- 1
|
| 50 |
+
- 5
|
| 51 |
+
- 11
|
| 52 |
+
- 41
|
| 53 |
+
- 49
|
| 54 |
+
- 51
|
| 55 |
+
- 53
|
| 56 |
+
- 71
|
| 57 |
+
- 105
|
| 58 |
+
"2":
|
| 59 |
+
- 1
|
| 60 |
+
- 5
|
| 61 |
+
- 11
|
| 62 |
+
- 41
|
| 63 |
+
- 49
|
| 64 |
+
- 51
|
| 65 |
+
- 53
|
| 66 |
+
- 71
|
| 67 |
+
- 105
|
| 68 |
+
"3":
|
| 69 |
+
- 7
|
| 70 |
+
- 13
|
| 71 |
+
- 19
|
| 72 |
+
- 66
|
| 73 |
+
"4": 3.13.5
|
| 74 |
+
"5": 0.21.0
|
| 75 |
+
"6": 4.53.2
|
| 76 |
+
"8":
|
| 77 |
+
- 3
|
| 78 |
+
"9":
|
| 79 |
+
"1": transformers_trainer
|
| 80 |
+
"12": 0.21.0
|
| 81 |
+
"13": windows-amd64
|
| 82 |
+
accelerator_config:
|
| 83 |
+
value:
|
| 84 |
+
dispatch_batches: null
|
| 85 |
+
even_batches: true
|
| 86 |
+
gradient_accumulation_kwargs: null
|
| 87 |
+
non_blocking: false
|
| 88 |
+
split_batches: false
|
| 89 |
+
use_seedable_sampler: true
|
| 90 |
+
activation:
|
| 91 |
+
value: gelu
|
| 92 |
+
adafactor:
|
| 93 |
+
value: false
|
| 94 |
+
adam_beta1:
|
| 95 |
+
value: 0.9
|
| 96 |
+
adam_beta2:
|
| 97 |
+
value: 0.999
|
| 98 |
+
adam_epsilon:
|
| 99 |
+
value: 1e-08
|
| 100 |
+
add_cross_attention:
|
| 101 |
+
value: false
|
| 102 |
+
architectures:
|
| 103 |
+
value:
|
| 104 |
+
- DistilBertForMaskedLM
|
| 105 |
+
attention_dropout:
|
| 106 |
+
value: 0.1
|
| 107 |
+
auto_find_batch_size:
|
| 108 |
+
value: false
|
| 109 |
+
average_tokens_across_devices:
|
| 110 |
+
value: false
|
| 111 |
+
bad_words_ids:
|
| 112 |
+
value: null
|
| 113 |
+
batch_eval_metrics:
|
| 114 |
+
value: false
|
| 115 |
+
begin_suppress_tokens:
|
| 116 |
+
value: null
|
| 117 |
+
bf16:
|
| 118 |
+
value: false
|
| 119 |
+
bf16_full_eval:
|
| 120 |
+
value: false
|
| 121 |
+
bos_token_id:
|
| 122 |
+
value: null
|
| 123 |
+
chunk_size_feed_forward:
|
| 124 |
+
value: 0
|
| 125 |
+
cross_attention_hidden_size:
|
| 126 |
+
value: null
|
| 127 |
+
data_seed:
|
| 128 |
+
value: null
|
| 129 |
+
dataloader_drop_last:
|
| 130 |
+
value: false
|
| 131 |
+
dataloader_num_workers:
|
| 132 |
+
value: 0
|
| 133 |
+
dataloader_persistent_workers:
|
| 134 |
+
value: false
|
| 135 |
+
dataloader_pin_memory:
|
| 136 |
+
value: true
|
| 137 |
+
dataloader_prefetch_factor:
|
| 138 |
+
value: null
|
| 139 |
+
ddp_backend:
|
| 140 |
+
value: null
|
| 141 |
+
ddp_broadcast_buffers:
|
| 142 |
+
value: null
|
| 143 |
+
ddp_bucket_cap_mb:
|
| 144 |
+
value: null
|
| 145 |
+
ddp_find_unused_parameters:
|
| 146 |
+
value: null
|
| 147 |
+
ddp_timeout:
|
| 148 |
+
value: 1800
|
| 149 |
+
debug:
|
| 150 |
+
value: []
|
| 151 |
+
decoder_start_token_id:
|
| 152 |
+
value: null
|
| 153 |
+
deepspeed:
|
| 154 |
+
value: null
|
| 155 |
+
dim:
|
| 156 |
+
value: 768
|
| 157 |
+
disable_tqdm:
|
| 158 |
+
value: true
|
| 159 |
+
diversity_penalty:
|
| 160 |
+
value: 0
|
| 161 |
+
do_eval:
|
| 162 |
+
value: true
|
| 163 |
+
do_predict:
|
| 164 |
+
value: false
|
| 165 |
+
do_sample:
|
| 166 |
+
value: false
|
| 167 |
+
do_train:
|
| 168 |
+
value: false
|
| 169 |
+
dropout:
|
| 170 |
+
value: 0.1
|
| 171 |
+
early_stopping:
|
| 172 |
+
value: false
|
| 173 |
+
encoder_no_repeat_ngram_size:
|
| 174 |
+
value: 0
|
| 175 |
+
eos_token_id:
|
| 176 |
+
value: null
|
| 177 |
+
eval_accumulation_steps:
|
| 178 |
+
value: null
|
| 179 |
+
eval_delay:
|
| 180 |
+
value: 0
|
| 181 |
+
eval_do_concat_batches:
|
| 182 |
+
value: true
|
| 183 |
+
eval_on_start:
|
| 184 |
+
value: false
|
| 185 |
+
eval_steps:
|
| 186 |
+
value: null
|
| 187 |
+
eval_strategy:
|
| 188 |
+
value: epoch
|
| 189 |
+
eval_use_gather_object:
|
| 190 |
+
value: false
|
| 191 |
+
exponential_decay_length_penalty:
|
| 192 |
+
value: null
|
| 193 |
+
finetuning_task:
|
| 194 |
+
value: null
|
| 195 |
+
forced_bos_token_id:
|
| 196 |
+
value: null
|
| 197 |
+
forced_eos_token_id:
|
| 198 |
+
value: null
|
| 199 |
+
fp16:
|
| 200 |
+
value: false
|
| 201 |
+
fp16_backend:
|
| 202 |
+
value: auto
|
| 203 |
+
fp16_full_eval:
|
| 204 |
+
value: false
|
| 205 |
+
fp16_opt_level:
|
| 206 |
+
value: O1
|
| 207 |
+
fsdp:
|
| 208 |
+
value: []
|
| 209 |
+
fsdp_config:
|
| 210 |
+
value:
|
| 211 |
+
min_num_params: 0
|
| 212 |
+
xla: false
|
| 213 |
+
xla_fsdp_grad_ckpt: false
|
| 214 |
+
xla_fsdp_v2: false
|
| 215 |
+
fsdp_min_num_params:
|
| 216 |
+
value: 0
|
| 217 |
+
fsdp_transformer_layer_cls_to_wrap:
|
| 218 |
+
value: null
|
| 219 |
+
full_determinism:
|
| 220 |
+
value: false
|
| 221 |
+
gradient_accumulation_steps:
|
| 222 |
+
value: 1
|
| 223 |
+
gradient_checkpointing:
|
| 224 |
+
value: false
|
| 225 |
+
gradient_checkpointing_kwargs:
|
| 226 |
+
value: null
|
| 227 |
+
greater_is_better:
|
| 228 |
+
value: true
|
| 229 |
+
group_by_length:
|
| 230 |
+
value: false
|
| 231 |
+
half_precision_backend:
|
| 232 |
+
value: auto
|
| 233 |
+
hidden_dim:
|
| 234 |
+
value: 3072
|
| 235 |
+
hub_always_push:
|
| 236 |
+
value: false
|
| 237 |
+
hub_model_id:
|
| 238 |
+
value: null
|
| 239 |
+
hub_private_repo:
|
| 240 |
+
value: null
|
| 241 |
+
hub_revision:
|
| 242 |
+
value: null
|
| 243 |
+
hub_strategy:
|
| 244 |
+
value: every_save
|
| 245 |
+
hub_token:
|
| 246 |
+
value: <HUB_TOKEN>
|
| 247 |
+
id2label:
|
| 248 |
+
value:
|
| 249 |
+
"0": LABEL_0
|
| 250 |
+
"1": LABEL_1
|
| 251 |
+
ignore_data_skip:
|
| 252 |
+
value: false
|
| 253 |
+
include_for_metrics:
|
| 254 |
+
value: []
|
| 255 |
+
include_inputs_for_metrics:
|
| 256 |
+
value: false
|
| 257 |
+
include_num_input_tokens_seen:
|
| 258 |
+
value: false
|
| 259 |
+
include_tokens_per_second:
|
| 260 |
+
value: false
|
| 261 |
+
initializer_range:
|
| 262 |
+
value: 0.02
|
| 263 |
+
is_decoder:
|
| 264 |
+
value: false
|
| 265 |
+
is_encoder_decoder:
|
| 266 |
+
value: false
|
| 267 |
+
jit_mode_eval:
|
| 268 |
+
value: false
|
| 269 |
+
label_names:
|
| 270 |
+
value: null
|
| 271 |
+
label_smoothing_factor:
|
| 272 |
+
value: 0
|
| 273 |
+
label2id:
|
| 274 |
+
value:
|
| 275 |
+
LABEL_0: 0
|
| 276 |
+
LABEL_1: 1
|
| 277 |
+
learning_rate:
|
| 278 |
+
value: 5e-05
|
| 279 |
+
length_column_name:
|
| 280 |
+
value: length
|
| 281 |
+
length_penalty:
|
| 282 |
+
value: 1
|
| 283 |
+
liger_kernel_config:
|
| 284 |
+
value: null
|
| 285 |
+
load_best_model_at_end:
|
| 286 |
+
value: true
|
| 287 |
+
local_rank:
|
| 288 |
+
value: 0
|
| 289 |
+
log_level:
|
| 290 |
+
value: passive
|
| 291 |
+
log_level_replica:
|
| 292 |
+
value: warning
|
| 293 |
+
log_on_each_node:
|
| 294 |
+
value: true
|
| 295 |
+
logging_dir:
|
| 296 |
+
value: ./logs
|
| 297 |
+
logging_first_step:
|
| 298 |
+
value: false
|
| 299 |
+
logging_nan_inf_filter:
|
| 300 |
+
value: true
|
| 301 |
+
logging_steps:
|
| 302 |
+
value: 50
|
| 303 |
+
logging_strategy:
|
| 304 |
+
value: steps
|
| 305 |
+
lr_scheduler_type:
|
| 306 |
+
value: linear
|
| 307 |
+
max_grad_norm:
|
| 308 |
+
value: 1
|
| 309 |
+
max_length:
|
| 310 |
+
value: 20
|
| 311 |
+
max_position_embeddings:
|
| 312 |
+
value: 512
|
| 313 |
+
max_steps:
|
| 314 |
+
value: -1
|
| 315 |
+
metric_for_best_model:
|
| 316 |
+
value: f1
|
| 317 |
+
min_length:
|
| 318 |
+
value: 0
|
| 319 |
+
model/num_parameters:
|
| 320 |
+
value: 66955010
|
| 321 |
+
model_type:
|
| 322 |
+
value: distilbert
|
| 323 |
+
mp_parameters:
|
| 324 |
+
value: ""
|
| 325 |
+
n_heads:
|
| 326 |
+
value: 12
|
| 327 |
+
n_layers:
|
| 328 |
+
value: 6
|
| 329 |
+
neftune_noise_alpha:
|
| 330 |
+
value: null
|
| 331 |
+
no_cuda:
|
| 332 |
+
value: false
|
| 333 |
+
no_repeat_ngram_size:
|
| 334 |
+
value: 0
|
| 335 |
+
num_beam_groups:
|
| 336 |
+
value: 1
|
| 337 |
+
num_beams:
|
| 338 |
+
value: 1
|
| 339 |
+
num_return_sequences:
|
| 340 |
+
value: 1
|
| 341 |
+
num_train_epochs:
|
| 342 |
+
value: 3
|
| 343 |
+
optim:
|
| 344 |
+
value: adamw_torch
|
| 345 |
+
optim_args:
|
| 346 |
+
value: null
|
| 347 |
+
optim_target_modules:
|
| 348 |
+
value: null
|
| 349 |
+
output_attentions:
|
| 350 |
+
value: false
|
| 351 |
+
output_dir:
|
| 352 |
+
value: ./model
|
| 353 |
+
output_hidden_states:
|
| 354 |
+
value: false
|
| 355 |
+
output_scores:
|
| 356 |
+
value: false
|
| 357 |
+
overwrite_output_dir:
|
| 358 |
+
value: false
|
| 359 |
+
pad_token_id:
|
| 360 |
+
value: 0
|
| 361 |
+
past_index:
|
| 362 |
+
value: -1
|
| 363 |
+
per_device_eval_batch_size:
|
| 364 |
+
value: 16
|
| 365 |
+
per_device_train_batch_size:
|
| 366 |
+
value: 8
|
| 367 |
+
per_gpu_eval_batch_size:
|
| 368 |
+
value: null
|
| 369 |
+
per_gpu_train_batch_size:
|
| 370 |
+
value: null
|
| 371 |
+
prediction_loss_only:
|
| 372 |
+
value: false
|
| 373 |
+
prefix:
|
| 374 |
+
value: null
|
| 375 |
+
problem_type:
|
| 376 |
+
value: null
|
| 377 |
+
push_to_hub:
|
| 378 |
+
value: false
|
| 379 |
+
push_to_hub_model_id:
|
| 380 |
+
value: null
|
| 381 |
+
push_to_hub_organization:
|
| 382 |
+
value: null
|
| 383 |
+
push_to_hub_token:
|
| 384 |
+
value: <PUSH_TO_HUB_TOKEN>
|
| 385 |
+
qa_dropout:
|
| 386 |
+
value: 0.1
|
| 387 |
+
ray_scope:
|
| 388 |
+
value: last
|
| 389 |
+
remove_invalid_values:
|
| 390 |
+
value: false
|
| 391 |
+
remove_unused_columns:
|
| 392 |
+
value: true
|
| 393 |
+
repetition_penalty:
|
| 394 |
+
value: 1
|
| 395 |
+
report_to:
|
| 396 |
+
value:
|
| 397 |
+
- wandb
|
| 398 |
+
restore_callback_states_from_checkpoint:
|
| 399 |
+
value: false
|
| 400 |
+
resume_from_checkpoint:
|
| 401 |
+
value: null
|
| 402 |
+
return_dict:
|
| 403 |
+
value: true
|
| 404 |
+
return_dict_in_generate:
|
| 405 |
+
value: false
|
| 406 |
+
run_name:
|
| 407 |
+
value: ./model
|
| 408 |
+
save_on_each_node:
|
| 409 |
+
value: false
|
| 410 |
+
save_only_model:
|
| 411 |
+
value: false
|
| 412 |
+
save_safetensors:
|
| 413 |
+
value: true
|
| 414 |
+
save_steps:
|
| 415 |
+
value: 500
|
| 416 |
+
save_strategy:
|
| 417 |
+
value: epoch
|
| 418 |
+
save_total_limit:
|
| 419 |
+
value: null
|
| 420 |
+
seed:
|
| 421 |
+
value: 42
|
| 422 |
+
sep_token_id:
|
| 423 |
+
value: null
|
| 424 |
+
seq_classif_dropout:
|
| 425 |
+
value: 0.2
|
| 426 |
+
sinusoidal_pos_embds:
|
| 427 |
+
value: false
|
| 428 |
+
skip_memory_metrics:
|
| 429 |
+
value: true
|
| 430 |
+
suppress_tokens:
|
| 431 |
+
value: null
|
| 432 |
+
task_specific_params:
|
| 433 |
+
value: null
|
| 434 |
+
temperature:
|
| 435 |
+
value: 1
|
| 436 |
+
tf_legacy_loss:
|
| 437 |
+
value: false
|
| 438 |
+
tf32:
|
| 439 |
+
value: null
|
| 440 |
+
tie_encoder_decoder:
|
| 441 |
+
value: false
|
| 442 |
+
tie_weights_:
|
| 443 |
+
value: true
|
| 444 |
+
tie_word_embeddings:
|
| 445 |
+
value: true
|
| 446 |
+
tokenizer_class:
|
| 447 |
+
value: null
|
| 448 |
+
top_k:
|
| 449 |
+
value: 50
|
| 450 |
+
top_p:
|
| 451 |
+
value: 1
|
| 452 |
+
torch_compile:
|
| 453 |
+
value: false
|
| 454 |
+
torch_compile_backend:
|
| 455 |
+
value: null
|
| 456 |
+
torch_compile_mode:
|
| 457 |
+
value: null
|
| 458 |
+
torch_dtype:
|
| 459 |
+
value: float32
|
| 460 |
+
torch_empty_cache_steps:
|
| 461 |
+
value: null
|
| 462 |
+
torchdynamo:
|
| 463 |
+
value: null
|
| 464 |
+
torchscript:
|
| 465 |
+
value: false
|
| 466 |
+
tpu_metrics_debug:
|
| 467 |
+
value: false
|
| 468 |
+
tpu_num_cores:
|
| 469 |
+
value: null
|
| 470 |
+
transformers_version:
|
| 471 |
+
value: 4.53.2
|
| 472 |
+
typical_p:
|
| 473 |
+
value: 1
|
| 474 |
+
use_bfloat16:
|
| 475 |
+
value: false
|
| 476 |
+
use_cpu:
|
| 477 |
+
value: false
|
| 478 |
+
use_ipex:
|
| 479 |
+
value: false
|
| 480 |
+
use_legacy_prediction_loop:
|
| 481 |
+
value: false
|
| 482 |
+
use_liger_kernel:
|
| 483 |
+
value: false
|
| 484 |
+
use_mps_device:
|
| 485 |
+
value: false
|
| 486 |
+
vocab_size:
|
| 487 |
+
value: 30522
|
| 488 |
+
warmup_ratio:
|
| 489 |
+
value: 0
|
| 490 |
+
warmup_steps:
|
| 491 |
+
value: 0
|
| 492 |
+
weight_decay:
|
| 493 |
+
value: 0
|
wandb/run-20250720_144411-9kwggmdj/files/output.log
ADDED
|
@@ -0,0 +1,148 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{'loss': 0.5564, 'grad_norm': 3.3684804439544678, 'learning_rate': 4.9738666666666665e-05, 'epoch': 0.016}
|
| 2 |
+
{'loss': 0.527, 'grad_norm': 12.206518173217773, 'learning_rate': 4.9472e-05, 'epoch': 0.032}
|
| 3 |
+
{'loss': 0.4263, 'grad_norm': 23.95633316040039, 'learning_rate': 4.9205333333333335e-05, 'epoch': 0.048}
|
| 4 |
+
{'loss': 0.3658, 'grad_norm': 10.685762405395508, 'learning_rate': 4.893866666666667e-05, 'epoch': 0.064}
|
| 5 |
+
{'loss': 0.3694, 'grad_norm': 18.01938819885254, 'learning_rate': 4.8672000000000004e-05, 'epoch': 0.08}
|
| 6 |
+
{'loss': 0.3841, 'grad_norm': 6.812848091125488, 'learning_rate': 4.8405333333333336e-05, 'epoch': 0.096}
|
| 7 |
+
{'loss': 0.3934, 'grad_norm': 4.043306827545166, 'learning_rate': 4.8138666666666674e-05, 'epoch': 0.112}
|
| 8 |
+
{'loss': 0.3622, 'grad_norm': 21.34123992919922, 'learning_rate': 4.7872e-05, 'epoch': 0.128}
|
| 9 |
+
{'loss': 0.4146, 'grad_norm': 27.191320419311523, 'learning_rate': 4.7605333333333337e-05, 'epoch': 0.144}
|
| 10 |
+
{'loss': 0.4305, 'grad_norm': 16.240070343017578, 'learning_rate': 4.733866666666667e-05, 'epoch': 0.16}
|
| 11 |
+
{'loss': 0.403, 'grad_norm': 26.02972984313965, 'learning_rate': 4.7072000000000006e-05, 'epoch': 0.176}
|
| 12 |
+
{'loss': 0.3811, 'grad_norm': 11.078995704650879, 'learning_rate': 4.680533333333334e-05, 'epoch': 0.192}
|
| 13 |
+
{'loss': 0.3766, 'grad_norm': 9.874316215515137, 'learning_rate': 4.653866666666667e-05, 'epoch': 0.208}
|
| 14 |
+
{'loss': 0.3032, 'grad_norm': 18.219112396240234, 'learning_rate': 4.627200000000001e-05, 'epoch': 0.224}
|
| 15 |
+
{'loss': 0.3812, 'grad_norm': 14.96966552734375, 'learning_rate': 4.600533333333333e-05, 'epoch': 0.24}
|
| 16 |
+
{'loss': 0.3765, 'grad_norm': 25.871795654296875, 'learning_rate': 4.573866666666667e-05, 'epoch': 0.256}
|
| 17 |
+
{'loss': 0.3693, 'grad_norm': 3.639224052429199, 'learning_rate': 4.5472e-05, 'epoch': 0.272}
|
| 18 |
+
{'loss': 0.2971, 'grad_norm': 15.468314170837402, 'learning_rate': 4.520533333333333e-05, 'epoch': 0.288}
|
| 19 |
+
{'loss': 0.3572, 'grad_norm': 3.6710922718048096, 'learning_rate': 4.493866666666667e-05, 'epoch': 0.304}
|
| 20 |
+
{'loss': 0.3409, 'grad_norm': 7.864389896392822, 'learning_rate': 4.4672e-05, 'epoch': 0.32}
|
| 21 |
+
{'loss': 0.3285, 'grad_norm': 10.038674354553223, 'learning_rate': 4.440533333333334e-05, 'epoch': 0.336}
|
| 22 |
+
{'loss': 0.3317, 'grad_norm': 13.171808242797852, 'learning_rate': 4.4138666666666665e-05, 'epoch': 0.352}
|
| 23 |
+
{'loss': 0.3674, 'grad_norm': 4.481576919555664, 'learning_rate': 4.3872e-05, 'epoch': 0.368}
|
| 24 |
+
{'loss': 0.3642, 'grad_norm': 6.312211513519287, 'learning_rate': 4.3605333333333334e-05, 'epoch': 0.384}
|
| 25 |
+
{'loss': 0.3386, 'grad_norm': 4.072713851928711, 'learning_rate': 4.3338666666666666e-05, 'epoch': 0.4}
|
| 26 |
+
{'loss': 0.3776, 'grad_norm': 4.920267581939697, 'learning_rate': 4.3072000000000004e-05, 'epoch': 0.416}
|
| 27 |
+
{'loss': 0.3519, 'grad_norm': 13.408978462219238, 'learning_rate': 4.2805333333333335e-05, 'epoch': 0.432}
|
| 28 |
+
{'loss': 0.343, 'grad_norm': 8.910168647766113, 'learning_rate': 4.253866666666667e-05, 'epoch': 0.448}
|
| 29 |
+
{'loss': 0.345, 'grad_norm': 6.50616979598999, 'learning_rate': 4.2272e-05, 'epoch': 0.464}
|
| 30 |
+
{'loss': 0.2931, 'grad_norm': 6.88561487197876, 'learning_rate': 4.2005333333333336e-05, 'epoch': 0.48}
|
| 31 |
+
{'loss': 0.3541, 'grad_norm': 2.813678026199341, 'learning_rate': 4.173866666666667e-05, 'epoch': 0.496}
|
| 32 |
+
{'loss': 0.3005, 'grad_norm': 18.764328002929688, 'learning_rate': 4.1472e-05, 'epoch': 0.512}
|
| 33 |
+
{'loss': 0.3404, 'grad_norm': 13.757184028625488, 'learning_rate': 4.120533333333334e-05, 'epoch': 0.528}
|
| 34 |
+
{'loss': 0.3112, 'grad_norm': 11.426987648010254, 'learning_rate': 4.093866666666667e-05, 'epoch': 0.544}
|
| 35 |
+
{'loss': 0.285, 'grad_norm': 0.7347564697265625, 'learning_rate': 4.0672000000000006e-05, 'epoch': 0.56}
|
| 36 |
+
{'loss': 0.2978, 'grad_norm': 3.315498113632202, 'learning_rate': 4.040533333333333e-05, 'epoch': 0.576}
|
| 37 |
+
{'loss': 0.3928, 'grad_norm': 4.304668426513672, 'learning_rate': 4.013866666666667e-05, 'epoch': 0.592}
|
| 38 |
+
{'loss': 0.2773, 'grad_norm': 0.5143654942512512, 'learning_rate': 3.9872e-05, 'epoch': 0.608}
|
| 39 |
+
{'loss': 0.3937, 'grad_norm': 10.765504837036133, 'learning_rate': 3.960533333333333e-05, 'epoch': 0.624}
|
| 40 |
+
{'loss': 0.2931, 'grad_norm': 3.0576841831207275, 'learning_rate': 3.933866666666667e-05, 'epoch': 0.64}
|
| 41 |
+
{'loss': 0.2899, 'grad_norm': 1.09218430519104, 'learning_rate': 3.9072e-05, 'epoch': 0.656}
|
| 42 |
+
{'loss': 0.3039, 'grad_norm': 9.193467140197754, 'learning_rate': 3.880533333333333e-05, 'epoch': 0.672}
|
| 43 |
+
{'loss': 0.3191, 'grad_norm': 5.1164469718933105, 'learning_rate': 3.8538666666666664e-05, 'epoch': 0.688}
|
| 44 |
+
{'loss': 0.3206, 'grad_norm': 10.537883758544922, 'learning_rate': 3.8272e-05, 'epoch': 0.704}
|
| 45 |
+
{'loss': 0.3196, 'grad_norm': 10.457417488098145, 'learning_rate': 3.800533333333334e-05, 'epoch': 0.72}
|
| 46 |
+
{'loss': 0.3056, 'grad_norm': 2.776677370071411, 'learning_rate': 3.7738666666666665e-05, 'epoch': 0.736}
|
| 47 |
+
{'loss': 0.3273, 'grad_norm': 3.808607578277588, 'learning_rate': 3.7472e-05, 'epoch': 0.752}
|
| 48 |
+
{'loss': 0.3754, 'grad_norm': 8.255670547485352, 'learning_rate': 3.7205333333333334e-05, 'epoch': 0.768}
|
| 49 |
+
{'loss': 0.2756, 'grad_norm': 8.847413063049316, 'learning_rate': 3.6938666666666666e-05, 'epoch': 0.784}
|
| 50 |
+
{'loss': 0.2828, 'grad_norm': 9.775912284851074, 'learning_rate': 3.6672000000000004e-05, 'epoch': 0.8}
|
| 51 |
+
{'loss': 0.363, 'grad_norm': 3.9166083335876465, 'learning_rate': 3.6405333333333335e-05, 'epoch': 0.816}
|
| 52 |
+
{'loss': 0.295, 'grad_norm': 0.43537598848342896, 'learning_rate': 3.6138666666666673e-05, 'epoch': 0.832}
|
| 53 |
+
{'loss': 0.2519, 'grad_norm': 4.3010735511779785, 'learning_rate': 3.5872e-05, 'epoch': 0.848}
|
| 54 |
+
{'loss': 0.3011, 'grad_norm': 3.7882602214813232, 'learning_rate': 3.5605333333333336e-05, 'epoch': 0.864}
|
| 55 |
+
{'loss': 0.3489, 'grad_norm': 5.9410400390625, 'learning_rate': 3.533866666666667e-05, 'epoch': 0.88}
|
| 56 |
+
{'loss': 0.2948, 'grad_norm': 6.711633205413818, 'learning_rate': 3.5072e-05, 'epoch': 0.896}
|
| 57 |
+
{'loss': 0.3465, 'grad_norm': 12.11922836303711, 'learning_rate': 3.480533333333334e-05, 'epoch': 0.912}
|
| 58 |
+
{'loss': 0.3492, 'grad_norm': 5.701395511627197, 'learning_rate': 3.453866666666667e-05, 'epoch': 0.928}
|
| 59 |
+
{'loss': 0.2607, 'grad_norm': 15.726317405700684, 'learning_rate': 3.427200000000001e-05, 'epoch': 0.944}
|
| 60 |
+
{'loss': 0.2862, 'grad_norm': 11.121344566345215, 'learning_rate': 3.400533333333333e-05, 'epoch': 0.96}
|
| 61 |
+
{'loss': 0.2981, 'grad_norm': 4.980706214904785, 'learning_rate': 3.373866666666667e-05, 'epoch': 0.976}
|
| 62 |
+
{'loss': 0.284, 'grad_norm': 6.423090934753418, 'learning_rate': 3.3472e-05, 'epoch': 0.992}
|
| 63 |
+
{'eval_loss': 0.2969476878643036, 'eval_accuracy': 0.89448, 'eval_f1': 0.8900558472951571, 'eval_precision': 0.9290064381416391, 'eval_recall': 0.85424, 'eval_runtime': 511.6142, 'eval_samples_per_second': 48.865, 'eval_steps_per_second': 3.055, 'epoch': 1.0}
|
| 64 |
+
{'loss': 0.2406, 'grad_norm': 3.1205193996429443, 'learning_rate': 3.320533333333333e-05, 'epoch': 1.008}
|
| 65 |
+
{'loss': 0.2386, 'grad_norm': 11.420886039733887, 'learning_rate': 3.293866666666667e-05, 'epoch': 1.024}
|
| 66 |
+
{'loss': 0.2133, 'grad_norm': 0.3266797661781311, 'learning_rate': 3.2672e-05, 'epoch': 1.04}
|
| 67 |
+
{'loss': 0.2388, 'grad_norm': 20.907642364501953, 'learning_rate': 3.240533333333334e-05, 'epoch': 1.056}
|
| 68 |
+
{'loss': 0.2207, 'grad_norm': 34.85378646850586, 'learning_rate': 3.2138666666666664e-05, 'epoch': 1.072}
|
| 69 |
+
{'loss': 0.1863, 'grad_norm': 0.08423929661512375, 'learning_rate': 3.1872e-05, 'epoch': 1.088}
|
| 70 |
+
{'loss': 0.2122, 'grad_norm': 2.1192731857299805, 'learning_rate': 3.1605333333333334e-05, 'epoch': 1.104}
|
| 71 |
+
{'loss': 0.2274, 'grad_norm': 1.2625190019607544, 'learning_rate': 3.1338666666666665e-05, 'epoch': 1.12}
|
| 72 |
+
{'loss': 0.146, 'grad_norm': 0.3231733441352844, 'learning_rate': 3.1072e-05, 'epoch': 1.1360000000000001}
|
| 73 |
+
{'loss': 0.2008, 'grad_norm': 0.6839350461959839, 'learning_rate': 3.0805333333333335e-05, 'epoch': 1.152}
|
| 74 |
+
{'loss': 0.2068, 'grad_norm': 3.0773186683654785, 'learning_rate': 3.0538666666666666e-05, 'epoch': 1.168}
|
| 75 |
+
{'loss': 0.2084, 'grad_norm': 0.05034258961677551, 'learning_rate': 3.0272e-05, 'epoch': 1.184}
|
| 76 |
+
{'loss': 0.2462, 'grad_norm': 11.455129623413086, 'learning_rate': 3.0005333333333336e-05, 'epoch': 1.2}
|
| 77 |
+
{'loss': 0.1906, 'grad_norm': 0.09377483278512955, 'learning_rate': 2.973866666666667e-05, 'epoch': 1.216}
|
| 78 |
+
{'loss': 0.2032, 'grad_norm': 17.590801239013672, 'learning_rate': 2.9472e-05, 'epoch': 1.232}
|
| 79 |
+
{'loss': 0.24, 'grad_norm': 28.78790855407715, 'learning_rate': 2.9205333333333333e-05, 'epoch': 1.248}
|
| 80 |
+
{'loss': 0.1331, 'grad_norm': 1.1610554456710815, 'learning_rate': 2.8938666666666668e-05, 'epoch': 1.264}
|
| 81 |
+
{'loss': 0.2127, 'grad_norm': 0.30296802520751953, 'learning_rate': 2.8672e-05, 'epoch': 1.28}
|
| 82 |
+
{'loss': 0.1867, 'grad_norm': 0.15345898270606995, 'learning_rate': 2.8405333333333334e-05, 'epoch': 1.296}
|
| 83 |
+
{'loss': 0.24, 'grad_norm': 8.489642143249512, 'learning_rate': 2.813866666666667e-05, 'epoch': 1.312}
|
| 84 |
+
{'loss': 0.1471, 'grad_norm': 0.7609522342681885, 'learning_rate': 2.7872000000000004e-05, 'epoch': 1.328}
|
| 85 |
+
{'loss': 0.1787, 'grad_norm': 0.15069647133350372, 'learning_rate': 2.760533333333333e-05, 'epoch': 1.3439999999999999}
|
| 86 |
+
{'loss': 0.2256, 'grad_norm': 0.13076968491077423, 'learning_rate': 2.733866666666667e-05, 'epoch': 1.3599999999999999}
|
| 87 |
+
{'loss': 0.198, 'grad_norm': 0.29645389318466187, 'learning_rate': 2.7072000000000004e-05, 'epoch': 1.376}
|
| 88 |
+
{'loss': 0.2099, 'grad_norm': 9.831048011779785, 'learning_rate': 2.6805333333333332e-05, 'epoch': 1.392}
|
| 89 |
+
{'loss': 0.2126, 'grad_norm': 0.037026241421699524, 'learning_rate': 2.6538666666666667e-05, 'epoch': 1.408}
|
| 90 |
+
{'loss': 0.1393, 'grad_norm': 0.2884507179260254, 'learning_rate': 2.6272000000000002e-05, 'epoch': 1.424}
|
| 91 |
+
{'loss': 0.1837, 'grad_norm': 0.05694892257452011, 'learning_rate': 2.6005333333333337e-05, 'epoch': 1.44}
|
| 92 |
+
{'loss': 0.2323, 'grad_norm': 46.10319137573242, 'learning_rate': 2.5738666666666668e-05, 'epoch': 1.456}
|
| 93 |
+
{'loss': 0.1858, 'grad_norm': 26.698631286621094, 'learning_rate': 2.5472000000000003e-05, 'epoch': 1.472}
|
| 94 |
+
Traceback (most recent call last):
|
| 95 |
+
File "C:\Users\Legion\desktop\distilbert-sentiment\main.py", line 74, in <module>
|
| 96 |
+
main()
|
| 97 |
+
~~~~^^
|
| 98 |
+
File "C:\Users\Legion\desktop\distilbert-sentiment\main.py", line 68, in main
|
| 99 |
+
train_pipeline(subset_size=args.subset)
|
| 100 |
+
~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^
|
| 101 |
+
File "C:\Users\Legion\desktop\distilbert-sentiment\main.py", line 41, in train_pipeline
|
| 102 |
+
train_model(trainer)
|
| 103 |
+
~~~~~~~~~~~^^^^^^^^^
|
| 104 |
+
File "C:\Users\Legion\desktop\distilbert-sentiment\train.py", line 121, in train_model
|
| 105 |
+
trainer.train()
|
| 106 |
+
~~~~~~~~~~~~~^^
|
| 107 |
+
File "C:\Users\Legion\Miniconda3\envs\ML\Lib\site-packages\transformers\trainer.py", line 2206, in train
|
| 108 |
+
return inner_training_loop(
|
| 109 |
+
args=args,
|
| 110 |
+
...<2 lines>...
|
| 111 |
+
ignore_keys_for_eval=ignore_keys_for_eval,
|
| 112 |
+
)
|
| 113 |
+
File "C:\Users\Legion\Miniconda3\envs\ML\Lib\site-packages\transformers\trainer.py", line 2548, in _inner_training_loop
|
| 114 |
+
tr_loss_step = self.training_step(model, inputs, num_items_in_batch)
|
| 115 |
+
File "C:\Users\Legion\Miniconda3\envs\ML\Lib\site-packages\transformers\trainer.py", line 3797, in training_step
|
| 116 |
+
self.accelerator.backward(loss, **kwargs)
|
| 117 |
+
~~~~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^
|
| 118 |
+
File "C:\Users\Legion\Miniconda3\envs\ML\Lib\site-packages\accelerate\accelerator.py", line 2578, in backward
|
| 119 |
+
loss.backward(**kwargs)
|
| 120 |
+
~~~~~~~~~~~~~^^^^^^^^^^
|
| 121 |
+
File "C:\Users\Legion\Miniconda3\envs\ML\Lib\site-packages\torch\_tensor.py", line 648, in backward
|
| 122 |
+
torch.autograd.backward(
|
| 123 |
+
~~~~~~~~~~~~~~~~~~~~~~~^
|
| 124 |
+
self, gradient, retain_graph, create_graph, inputs=inputs
|
| 125 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
| 126 |
+
)
|
| 127 |
+
^
|
| 128 |
+
File "C:\Users\Legion\Miniconda3\envs\ML\Lib\site-packages\torch\autograd\__init__.py", line 353, in backward
|
| 129 |
+
_engine_run_backward(
|
| 130 |
+
~~~~~~~~~~~~~~~~~~~~^
|
| 131 |
+
tensors,
|
| 132 |
+
^^^^^^^^
|
| 133 |
+
...<5 lines>...
|
| 134 |
+
accumulate_grad=True,
|
| 135 |
+
^^^^^^^^^^^^^^^^^^^^^
|
| 136 |
+
)
|
| 137 |
+
^
|
| 138 |
+
File "C:\Users\Legion\Miniconda3\envs\ML\Lib\site-packages\torch\autograd\graph.py", line 824, in _engine_run_backward
|
| 139 |
+
return Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass
|
| 140 |
+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
| 141 |
+
t_outputs, *args, **kwargs
|
| 142 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^^
|
| 143 |
+
) # Calls into the C++ engine to run the backward pass
|
| 144 |
+
^
|
| 145 |
+
RuntimeError: CUDA error: out of memory
|
| 146 |
+
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
|
| 147 |
+
For debugging consider passing CUDA_LAUNCH_BLOCKING=1
|
| 148 |
+
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.
|
wandb/run-20250720_144411-9kwggmdj/files/requirements.txt
ADDED
|
@@ -0,0 +1,139 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
accelerate==1.9.0
|
| 2 |
+
aiofiles==24.1.0
|
| 3 |
+
aiohappyeyeballs==2.6.1
|
| 4 |
+
aiohttp==3.12.14
|
| 5 |
+
aiosignal==1.4.0
|
| 6 |
+
alembic==1.16.2
|
| 7 |
+
altair==5.5.0
|
| 8 |
+
annotated-types==0.7.0
|
| 9 |
+
anyio==4.9.0
|
| 10 |
+
attrs==25.3.0
|
| 11 |
+
audioop-lts==0.2.1
|
| 12 |
+
blinker==1.9.0
|
| 13 |
+
Bottleneck==1.4.2
|
| 14 |
+
Brotli==1.1.0
|
| 15 |
+
cachetools==6.1.0
|
| 16 |
+
certifi==2025.6.15
|
| 17 |
+
charset-normalizer==3.4.2
|
| 18 |
+
click==8.2.1
|
| 19 |
+
cloudpickle==3.1.1
|
| 20 |
+
colorama==0.4.6
|
| 21 |
+
colorlog==6.9.0
|
| 22 |
+
contourpy==1.3.1
|
| 23 |
+
cycler==0.11.0
|
| 24 |
+
datasets==4.0.0
|
| 25 |
+
dill==0.3.8
|
| 26 |
+
fastapi==0.116.1
|
| 27 |
+
ffmpy==0.6.0
|
| 28 |
+
filelock==3.18.0
|
| 29 |
+
fonttools==4.55.3
|
| 30 |
+
frozenlist==1.7.0
|
| 31 |
+
fsspec==2025.3.0
|
| 32 |
+
gitdb==4.0.12
|
| 33 |
+
GitPython==3.1.44
|
| 34 |
+
gradio==5.37.0
|
| 35 |
+
gradio_client==1.10.4
|
| 36 |
+
greenlet==3.2.3
|
| 37 |
+
groovy==0.1.2
|
| 38 |
+
h11==0.16.0
|
| 39 |
+
httpcore==1.0.9
|
| 40 |
+
httpx==0.28.1
|
| 41 |
+
huggingface-hub==0.33.4
|
| 42 |
+
idna==3.10
|
| 43 |
+
imbalanced-learn==0.13.0
|
| 44 |
+
imblearn==0.0
|
| 45 |
+
Jinja2==3.1.6
|
| 46 |
+
joblib==1.4.2
|
| 47 |
+
jsonschema==4.24.0
|
| 48 |
+
jsonschema-specifications==2025.4.1
|
| 49 |
+
kiwisolver==1.4.8
|
| 50 |
+
llvmlite==0.44.0
|
| 51 |
+
Mako==1.3.10
|
| 52 |
+
markdown-it-py==3.0.0
|
| 53 |
+
MarkupSafe==3.0.2
|
| 54 |
+
matplotlib==3.9.2
|
| 55 |
+
mdurl==0.1.2
|
| 56 |
+
mpmath==1.3.0
|
| 57 |
+
multidict==6.6.3
|
| 58 |
+
multiprocess==0.70.16
|
| 59 |
+
narwhals==1.44.0
|
| 60 |
+
networkx==3.5
|
| 61 |
+
ninja==1.11.1.4
|
| 62 |
+
numba==0.61.2
|
| 63 |
+
numexpr==2.10.2
|
| 64 |
+
numpy==2.1.1
|
| 65 |
+
optuna==4.4.0
|
| 66 |
+
orjson==3.11.0
|
| 67 |
+
packaging==24.2
|
| 68 |
+
pandas==2.2.3
|
| 69 |
+
pillow==11.1.0
|
| 70 |
+
pip==25.1
|
| 71 |
+
platformdirs==4.3.8
|
| 72 |
+
plotly==6.2.0
|
| 73 |
+
propcache==0.3.2
|
| 74 |
+
protobuf==6.31.1
|
| 75 |
+
psutil==7.0.0
|
| 76 |
+
pyarrow==20.0.0
|
| 77 |
+
pybind11==3.0.0
|
| 78 |
+
pydantic==2.11.7
|
| 79 |
+
pydantic_core==2.33.2
|
| 80 |
+
pydeck==0.9.1
|
| 81 |
+
pydub==0.25.1
|
| 82 |
+
Pygments==2.19.2
|
| 83 |
+
pyparsing==3.2.0
|
| 84 |
+
PyQt6==6.7.1
|
| 85 |
+
PyQt6_sip==13.9.1
|
| 86 |
+
python-dateutil==2.9.0.post0
|
| 87 |
+
python-multipart==0.0.20
|
| 88 |
+
pytz==2024.1
|
| 89 |
+
PyYAML==6.0.2
|
| 90 |
+
referencing==0.36.2
|
| 91 |
+
regex==2024.11.6
|
| 92 |
+
requests==2.32.4
|
| 93 |
+
rich==14.0.0
|
| 94 |
+
rpds-py==0.26.0
|
| 95 |
+
ruff==0.12.3
|
| 96 |
+
safehttpx==0.1.6
|
| 97 |
+
safetensors==0.5.3
|
| 98 |
+
scikit-learn==1.5.2
|
| 99 |
+
scipy==1.15.2
|
| 100 |
+
seaborn==0.13.2
|
| 101 |
+
semantic-version==2.10.0
|
| 102 |
+
sentry-sdk==2.33.0
|
| 103 |
+
setuptools==78.1.1
|
| 104 |
+
shap==0.48.0
|
| 105 |
+
shellingham==1.5.4
|
| 106 |
+
sip==6.10.0
|
| 107 |
+
six==1.17.0
|
| 108 |
+
sklearn-compat==0.1.3
|
| 109 |
+
slicer==0.0.8
|
| 110 |
+
smmap==5.0.2
|
| 111 |
+
sniffio==1.3.1
|
| 112 |
+
SQLAlchemy==2.0.41
|
| 113 |
+
starlette==0.47.1
|
| 114 |
+
streamlit==1.46.1
|
| 115 |
+
sympy==1.14.0
|
| 116 |
+
tenacity==9.1.2
|
| 117 |
+
threadpoolctl==3.5.0
|
| 118 |
+
tokenizers==0.21.2
|
| 119 |
+
toml==0.10.2
|
| 120 |
+
tomlkit==0.13.3
|
| 121 |
+
torch==2.7.1+cu118
|
| 122 |
+
torchaudio==2.7.1+cu118
|
| 123 |
+
torchvision==0.22.1
|
| 124 |
+
tornado==6.5.1
|
| 125 |
+
tqdm==4.67.1
|
| 126 |
+
transformers==4.53.2
|
| 127 |
+
typer==0.16.0
|
| 128 |
+
typing_extensions==4.14.0
|
| 129 |
+
typing-inspection==0.4.1
|
| 130 |
+
tzdata==2025.2
|
| 131 |
+
urllib3==2.5.0
|
| 132 |
+
uvicorn==0.35.0
|
| 133 |
+
wandb==0.21.0
|
| 134 |
+
watchdog==6.0.0
|
| 135 |
+
websockets==15.0.1
|
| 136 |
+
wheel==0.45.1
|
| 137 |
+
xgboost==3.0.2
|
| 138 |
+
xxhash==3.5.0
|
| 139 |
+
yarl==1.20.1
|
wandb/run-20250720_144411-9kwggmdj/files/wandb-metadata.json
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"os": "Windows-11-10.0.26100-SP0",
|
| 3 |
+
"python": "CPython 3.13.5",
|
| 4 |
+
"startedAt": "2025-07-20T09:14:11.312224Z",
|
| 5 |
+
"program": "C:\\Users\\Legion\\desktop\\distilbert-sentiment\\main.py",
|
| 6 |
+
"codePath": "main.py",
|
| 7 |
+
"codePathLocal": "main.py",
|
| 8 |
+
"email": "shreshthkapai@gmail.com",
|
| 9 |
+
"root": "C:\\Users\\Legion\\desktop\\distilbert-sentiment",
|
| 10 |
+
"host": "DESKTOP-EIHJJJL",
|
| 11 |
+
"executable": "C:\\Users\\Legion\\Miniconda3\\envs\\ML\\python.exe",
|
| 12 |
+
"cpu_count": 4,
|
| 13 |
+
"cpu_count_logical": 8,
|
| 14 |
+
"gpu": "NVIDIA GeForce GTX 1650",
|
| 15 |
+
"gpu_count": 1,
|
| 16 |
+
"disk": {
|
| 17 |
+
"/": {
|
| 18 |
+
"total": "255230791680",
|
| 19 |
+
"used": "208595525632"
|
| 20 |
+
}
|
| 21 |
+
},
|
| 22 |
+
"memory": {
|
| 23 |
+
"total": "8506298368"
|
| 24 |
+
},
|
| 25 |
+
"gpu_nvidia": [
|
| 26 |
+
{
|
| 27 |
+
"name": "NVIDIA GeForce GTX 1650",
|
| 28 |
+
"memoryTotal": "4294967296",
|
| 29 |
+
"cudaCores": 1024,
|
| 30 |
+
"architecture": "Turing",
|
| 31 |
+
"uuid": "GPU-fbcd7647-fb67-66f5-b8c7-1a4198b7e4fa"
|
| 32 |
+
}
|
| 33 |
+
],
|
| 34 |
+
"cudaVersion": "12.7",
|
| 35 |
+
"writerId": "qd7dze61nxdy0n83hyx7lap6a5tql6xc"
|
| 36 |
+
}
|
wandb/run-20250720_144411-9kwggmdj/files/wandb-summary.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"train/grad_norm":26.698631286621094,"train/learning_rate":2.5472000000000003e-05,"eval/samples_per_second":48.865,"_runtime":2637,"eval/runtime":511.6142,"eval/f1":0.8900558472951571,"eval/accuracy":0.89448,"eval/steps_per_second":3.055,"_timestamp":1.7530054751176744e+09,"train/loss":0.1858,"train/global_step":4600,"train/epoch":1.472,"eval/loss":0.2969476878643036,"eval/recall":0.85424,"_wandb":{"runtime":2637},"_step":92,"eval/precision":0.9290064381416391}
|
wandb/run-20250720_144411-9kwggmdj/logs/debug-internal.log
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"time":"2025-07-20T14:44:13.6837247+05:30","level":"INFO","msg":"stream: starting","core version":"0.21.0"}
|
| 2 |
+
{"time":"2025-07-20T14:44:14.9093201+05:30","level":"INFO","msg":"stream: created new stream","id":"9kwggmdj"}
|
| 3 |
+
{"time":"2025-07-20T14:44:14.9093201+05:30","level":"INFO","msg":"stream: started","id":"9kwggmdj"}
|
| 4 |
+
{"time":"2025-07-20T14:44:14.9093201+05:30","level":"INFO","msg":"handler: started","stream_id":"9kwggmdj"}
|
| 5 |
+
{"time":"2025-07-20T14:44:14.9093201+05:30","level":"INFO","msg":"sender: started","stream_id":"9kwggmdj"}
|
| 6 |
+
{"time":"2025-07-20T14:44:14.9093201+05:30","level":"INFO","msg":"writer: Do: started","stream_id":"9kwggmdj"}
|
| 7 |
+
{"time":"2025-07-20T15:28:13.4157038+05:30","level":"INFO","msg":"stream: closing","id":"9kwggmdj"}
|
| 8 |
+
{"time":"2025-07-20T15:28:16.7459113+05:30","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
|
| 9 |
+
{"time":"2025-07-20T15:28:17.5720383+05:30","level":"INFO","msg":"sender: closed","stream_id":"9kwggmdj"}
|
| 10 |
+
{"time":"2025-07-20T15:28:17.5720383+05:30","level":"INFO","msg":"handler: closed","stream_id":"9kwggmdj"}
|
| 11 |
+
{"time":"2025-07-20T15:28:17.5720383+05:30","level":"INFO","msg":"writer: Close: closed","stream_id":"9kwggmdj"}
|
| 12 |
+
{"time":"2025-07-20T15:28:17.5820507+05:30","level":"INFO","msg":"stream: closed","id":"9kwggmdj"}
|
wandb/run-20250720_144411-9kwggmdj/logs/debug.log
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2025-07-20 14:44:11,319 INFO MainThread:4228 [wandb_setup.py:_flush():80] Current SDK version is 0.21.0
|
| 2 |
+
2025-07-20 14:44:11,319 INFO MainThread:4228 [wandb_setup.py:_flush():80] Configure stats pid to 4228
|
| 3 |
+
2025-07-20 14:44:11,320 INFO MainThread:4228 [wandb_setup.py:_flush():80] Loading settings from C:\Users\Legion\.config\wandb\settings
|
| 4 |
+
2025-07-20 14:44:11,320 INFO MainThread:4228 [wandb_setup.py:_flush():80] Loading settings from C:\Users\Legion\desktop\distilbert-sentiment\wandb\settings
|
| 5 |
+
2025-07-20 14:44:11,320 INFO MainThread:4228 [wandb_setup.py:_flush():80] Loading settings from environment variables
|
| 6 |
+
2025-07-20 14:44:11,320 INFO MainThread:4228 [wandb_init.py:setup_run_log_directory():703] Logging user logs to C:\Users\Legion\desktop\distilbert-sentiment\wandb\run-20250720_144411-9kwggmdj\logs\debug.log
|
| 7 |
+
2025-07-20 14:44:11,321 INFO MainThread:4228 [wandb_init.py:setup_run_log_directory():704] Logging internal logs to C:\Users\Legion\desktop\distilbert-sentiment\wandb\run-20250720_144411-9kwggmdj\logs\debug-internal.log
|
| 8 |
+
2025-07-20 14:44:11,321 INFO MainThread:4228 [wandb_init.py:init():830] calling init triggers
|
| 9 |
+
2025-07-20 14:44:11,321 INFO MainThread:4228 [wandb_init.py:init():835] wandb.init called with sweep_config: {}
|
| 10 |
+
config: {'_wandb': {}}
|
| 11 |
+
2025-07-20 14:44:11,321 INFO MainThread:4228 [wandb_init.py:init():871] starting backend
|
| 12 |
+
2025-07-20 14:44:12,739 INFO MainThread:4228 [wandb_init.py:init():874] sending inform_init request
|
| 13 |
+
2025-07-20 14:44:13,671 INFO MainThread:4228 [wandb_init.py:init():882] backend started and connected
|
| 14 |
+
2025-07-20 14:44:13,676 INFO MainThread:4228 [wandb_init.py:init():953] updated telemetry
|
| 15 |
+
2025-07-20 14:44:13,680 INFO MainThread:4228 [wandb_init.py:init():977] communicating run to backend with 90.0 second timeout
|
| 16 |
+
2025-07-20 14:44:15,444 INFO MainThread:4228 [wandb_init.py:init():1029] starting run threads in backend
|
| 17 |
+
2025-07-20 14:44:15,878 INFO MainThread:4228 [wandb_run.py:_console_start():2458] atexit reg
|
| 18 |
+
2025-07-20 14:44:15,879 INFO MainThread:4228 [wandb_run.py:_redirect():2306] redirect: wrap_raw
|
| 19 |
+
2025-07-20 14:44:15,879 INFO MainThread:4228 [wandb_run.py:_redirect():2375] Wrapping output streams.
|
| 20 |
+
2025-07-20 14:44:15,879 INFO MainThread:4228 [wandb_run.py:_redirect():2398] Redirects installed.
|
| 21 |
+
2025-07-20 14:44:15,885 INFO MainThread:4228 [wandb_init.py:init():1075] run started, returning control to user process
|
| 22 |
+
2025-07-20 14:44:15,889 INFO MainThread:4228 [wandb_run.py:_config_callback():1363] config_cb None None {'vocab_size': 30522, 'max_position_embeddings': 512, 'sinusoidal_pos_embds': False, 'n_layers': 6, 'n_heads': 12, 'dim': 768, 'hidden_dim': 3072, 'dropout': 0.1, 'attention_dropout': 0.1, 'activation': 'gelu', 'initializer_range': 0.02, 'qa_dropout': 0.1, 'seq_classif_dropout': 0.2, 'return_dict': True, 'output_hidden_states': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['DistilBertForMaskedLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': 0, 'eos_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'distilbert-base-uncased', 'transformers_version': '4.53.2', 'model_type': 'distilbert', 'tie_weights_': True, 'output_attentions': False, 'output_dir': './model', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'epoch', 'prediction_loss_only': False, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 16, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 5e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './logs', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 50, 'logging_nan_inf_filter': True, 'save_strategy': 'epoch', 'save_steps': 500, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': False, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': './model', 'disable_tqdm': True, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'f1', 'greater_is_better': True, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': False, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': None, 'hub_always_push': False, 'hub_revision': None, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'liger_kernel_config': None, 'eval_use_gather_object': False, 'average_tokens_across_devices': False}
|
| 23 |
+
2025-07-20 14:44:15,894 INFO MainThread:4228 [wandb_config.py:__setitem__():154] [no run ID] config set model/num_parameters = 66955010 - <bound method Run._config_callback of <wandb.sdk.wandb_run.Run object at 0x0000015891724590>>
|
| 24 |
+
2025-07-20 14:44:15,894 INFO MainThread:4228 [wandb_run.py:_config_callback():1363] config_cb model/num_parameters 66955010 None
|
| 25 |
+
2025-07-20 15:28:12,363 INFO MsgRouterThr:4228 [mailbox.py:close():129] [no run ID] Closing mailbox, abandoning 1 handles.
|
wandb/run-20250720_144411-9kwggmdj/run-9kwggmdj.wandb
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:de1b57c0d7b948fcdacf3f80d9fa12fd8f80b6888eea1c8acc8593a8aa7b62d1
|
| 3 |
+
size 231840
|
wandb/run-20250720_154435-9xqrzjdo/files/config.yaml
ADDED
|
@@ -0,0 +1,493 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
_name_or_path:
|
| 2 |
+
value: distilbert-base-uncased
|
| 3 |
+
_wandb:
|
| 4 |
+
value:
|
| 5 |
+
cli_version: 0.21.0
|
| 6 |
+
e:
|
| 7 |
+
0ygkgjf4tjw3nzhcstc0bi4ropv1pahk:
|
| 8 |
+
codePath: main.py
|
| 9 |
+
codePathLocal: main.py
|
| 10 |
+
cpu_count: 4
|
| 11 |
+
cpu_count_logical: 8
|
| 12 |
+
cudaVersion: "12.7"
|
| 13 |
+
disk:
|
| 14 |
+
/:
|
| 15 |
+
total: "255230791680"
|
| 16 |
+
used: "225197662208"
|
| 17 |
+
email: shreshthkapai@gmail.com
|
| 18 |
+
executable: C:\Users\Legion\Miniconda3\envs\ML\python.exe
|
| 19 |
+
gpu: NVIDIA GeForce GTX 1650
|
| 20 |
+
gpu_count: 1
|
| 21 |
+
gpu_nvidia:
|
| 22 |
+
- architecture: Turing
|
| 23 |
+
cudaCores: 1024
|
| 24 |
+
memoryTotal: "4294967296"
|
| 25 |
+
name: NVIDIA GeForce GTX 1650
|
| 26 |
+
uuid: GPU-fbcd7647-fb67-66f5-b8c7-1a4198b7e4fa
|
| 27 |
+
host: DESKTOP-EIHJJJL
|
| 28 |
+
memory:
|
| 29 |
+
total: "8506298368"
|
| 30 |
+
os: Windows-11-10.0.26100-SP0
|
| 31 |
+
program: C:\Users\Legion\desktop\distilbert-sentiment\main.py
|
| 32 |
+
python: CPython 3.13.5
|
| 33 |
+
root: C:\Users\Legion\desktop\distilbert-sentiment
|
| 34 |
+
startedAt: "2025-07-20T10:14:35.345095Z"
|
| 35 |
+
writerId: 0ygkgjf4tjw3nzhcstc0bi4ropv1pahk
|
| 36 |
+
m:
|
| 37 |
+
- "1": train/global_step
|
| 38 |
+
"6":
|
| 39 |
+
- 3
|
| 40 |
+
"7": []
|
| 41 |
+
- "2": '*'
|
| 42 |
+
"5": 1
|
| 43 |
+
"6":
|
| 44 |
+
- 1
|
| 45 |
+
"7": []
|
| 46 |
+
python_version: 3.13.5
|
| 47 |
+
t:
|
| 48 |
+
"1":
|
| 49 |
+
- 1
|
| 50 |
+
- 5
|
| 51 |
+
- 11
|
| 52 |
+
- 41
|
| 53 |
+
- 49
|
| 54 |
+
- 51
|
| 55 |
+
- 53
|
| 56 |
+
- 71
|
| 57 |
+
- 105
|
| 58 |
+
"2":
|
| 59 |
+
- 1
|
| 60 |
+
- 5
|
| 61 |
+
- 11
|
| 62 |
+
- 41
|
| 63 |
+
- 49
|
| 64 |
+
- 51
|
| 65 |
+
- 53
|
| 66 |
+
- 71
|
| 67 |
+
- 105
|
| 68 |
+
"3":
|
| 69 |
+
- 7
|
| 70 |
+
- 13
|
| 71 |
+
- 19
|
| 72 |
+
- 66
|
| 73 |
+
"4": 3.13.5
|
| 74 |
+
"5": 0.21.0
|
| 75 |
+
"6": 4.53.2
|
| 76 |
+
"8":
|
| 77 |
+
- 3
|
| 78 |
+
"9":
|
| 79 |
+
"1": transformers_trainer
|
| 80 |
+
"12": 0.21.0
|
| 81 |
+
"13": windows-amd64
|
| 82 |
+
accelerator_config:
|
| 83 |
+
value:
|
| 84 |
+
dispatch_batches: null
|
| 85 |
+
even_batches: true
|
| 86 |
+
gradient_accumulation_kwargs: null
|
| 87 |
+
non_blocking: false
|
| 88 |
+
split_batches: false
|
| 89 |
+
use_seedable_sampler: true
|
| 90 |
+
activation:
|
| 91 |
+
value: gelu
|
| 92 |
+
adafactor:
|
| 93 |
+
value: false
|
| 94 |
+
adam_beta1:
|
| 95 |
+
value: 0.9
|
| 96 |
+
adam_beta2:
|
| 97 |
+
value: 0.999
|
| 98 |
+
adam_epsilon:
|
| 99 |
+
value: 1e-08
|
| 100 |
+
add_cross_attention:
|
| 101 |
+
value: false
|
| 102 |
+
architectures:
|
| 103 |
+
value:
|
| 104 |
+
- DistilBertForMaskedLM
|
| 105 |
+
attention_dropout:
|
| 106 |
+
value: 0.1
|
| 107 |
+
auto_find_batch_size:
|
| 108 |
+
value: false
|
| 109 |
+
average_tokens_across_devices:
|
| 110 |
+
value: false
|
| 111 |
+
bad_words_ids:
|
| 112 |
+
value: null
|
| 113 |
+
batch_eval_metrics:
|
| 114 |
+
value: false
|
| 115 |
+
begin_suppress_tokens:
|
| 116 |
+
value: null
|
| 117 |
+
bf16:
|
| 118 |
+
value: false
|
| 119 |
+
bf16_full_eval:
|
| 120 |
+
value: false
|
| 121 |
+
bos_token_id:
|
| 122 |
+
value: null
|
| 123 |
+
chunk_size_feed_forward:
|
| 124 |
+
value: 0
|
| 125 |
+
cross_attention_hidden_size:
|
| 126 |
+
value: null
|
| 127 |
+
data_seed:
|
| 128 |
+
value: null
|
| 129 |
+
dataloader_drop_last:
|
| 130 |
+
value: false
|
| 131 |
+
dataloader_num_workers:
|
| 132 |
+
value: 0
|
| 133 |
+
dataloader_persistent_workers:
|
| 134 |
+
value: false
|
| 135 |
+
dataloader_pin_memory:
|
| 136 |
+
value: true
|
| 137 |
+
dataloader_prefetch_factor:
|
| 138 |
+
value: null
|
| 139 |
+
ddp_backend:
|
| 140 |
+
value: null
|
| 141 |
+
ddp_broadcast_buffers:
|
| 142 |
+
value: null
|
| 143 |
+
ddp_bucket_cap_mb:
|
| 144 |
+
value: null
|
| 145 |
+
ddp_find_unused_parameters:
|
| 146 |
+
value: null
|
| 147 |
+
ddp_timeout:
|
| 148 |
+
value: 1800
|
| 149 |
+
debug:
|
| 150 |
+
value: []
|
| 151 |
+
decoder_start_token_id:
|
| 152 |
+
value: null
|
| 153 |
+
deepspeed:
|
| 154 |
+
value: null
|
| 155 |
+
dim:
|
| 156 |
+
value: 768
|
| 157 |
+
disable_tqdm:
|
| 158 |
+
value: true
|
| 159 |
+
diversity_penalty:
|
| 160 |
+
value: 0
|
| 161 |
+
do_eval:
|
| 162 |
+
value: true
|
| 163 |
+
do_predict:
|
| 164 |
+
value: false
|
| 165 |
+
do_sample:
|
| 166 |
+
value: false
|
| 167 |
+
do_train:
|
| 168 |
+
value: false
|
| 169 |
+
dropout:
|
| 170 |
+
value: 0.1
|
| 171 |
+
early_stopping:
|
| 172 |
+
value: false
|
| 173 |
+
encoder_no_repeat_ngram_size:
|
| 174 |
+
value: 0
|
| 175 |
+
eos_token_id:
|
| 176 |
+
value: null
|
| 177 |
+
eval_accumulation_steps:
|
| 178 |
+
value: null
|
| 179 |
+
eval_delay:
|
| 180 |
+
value: 0
|
| 181 |
+
eval_do_concat_batches:
|
| 182 |
+
value: true
|
| 183 |
+
eval_on_start:
|
| 184 |
+
value: false
|
| 185 |
+
eval_steps:
|
| 186 |
+
value: null
|
| 187 |
+
eval_strategy:
|
| 188 |
+
value: epoch
|
| 189 |
+
eval_use_gather_object:
|
| 190 |
+
value: false
|
| 191 |
+
exponential_decay_length_penalty:
|
| 192 |
+
value: null
|
| 193 |
+
finetuning_task:
|
| 194 |
+
value: null
|
| 195 |
+
forced_bos_token_id:
|
| 196 |
+
value: null
|
| 197 |
+
forced_eos_token_id:
|
| 198 |
+
value: null
|
| 199 |
+
fp16:
|
| 200 |
+
value: false
|
| 201 |
+
fp16_backend:
|
| 202 |
+
value: auto
|
| 203 |
+
fp16_full_eval:
|
| 204 |
+
value: false
|
| 205 |
+
fp16_opt_level:
|
| 206 |
+
value: O1
|
| 207 |
+
fsdp:
|
| 208 |
+
value: []
|
| 209 |
+
fsdp_config:
|
| 210 |
+
value:
|
| 211 |
+
min_num_params: 0
|
| 212 |
+
xla: false
|
| 213 |
+
xla_fsdp_grad_ckpt: false
|
| 214 |
+
xla_fsdp_v2: false
|
| 215 |
+
fsdp_min_num_params:
|
| 216 |
+
value: 0
|
| 217 |
+
fsdp_transformer_layer_cls_to_wrap:
|
| 218 |
+
value: null
|
| 219 |
+
full_determinism:
|
| 220 |
+
value: false
|
| 221 |
+
gradient_accumulation_steps:
|
| 222 |
+
value: 1
|
| 223 |
+
gradient_checkpointing:
|
| 224 |
+
value: false
|
| 225 |
+
gradient_checkpointing_kwargs:
|
| 226 |
+
value: null
|
| 227 |
+
greater_is_better:
|
| 228 |
+
value: true
|
| 229 |
+
group_by_length:
|
| 230 |
+
value: false
|
| 231 |
+
half_precision_backend:
|
| 232 |
+
value: auto
|
| 233 |
+
hidden_dim:
|
| 234 |
+
value: 3072
|
| 235 |
+
hub_always_push:
|
| 236 |
+
value: false
|
| 237 |
+
hub_model_id:
|
| 238 |
+
value: null
|
| 239 |
+
hub_private_repo:
|
| 240 |
+
value: null
|
| 241 |
+
hub_revision:
|
| 242 |
+
value: null
|
| 243 |
+
hub_strategy:
|
| 244 |
+
value: every_save
|
| 245 |
+
hub_token:
|
| 246 |
+
value: <HUB_TOKEN>
|
| 247 |
+
id2label:
|
| 248 |
+
value:
|
| 249 |
+
"0": LABEL_0
|
| 250 |
+
"1": LABEL_1
|
| 251 |
+
ignore_data_skip:
|
| 252 |
+
value: false
|
| 253 |
+
include_for_metrics:
|
| 254 |
+
value: []
|
| 255 |
+
include_inputs_for_metrics:
|
| 256 |
+
value: false
|
| 257 |
+
include_num_input_tokens_seen:
|
| 258 |
+
value: false
|
| 259 |
+
include_tokens_per_second:
|
| 260 |
+
value: false
|
| 261 |
+
initializer_range:
|
| 262 |
+
value: 0.02
|
| 263 |
+
is_decoder:
|
| 264 |
+
value: false
|
| 265 |
+
is_encoder_decoder:
|
| 266 |
+
value: false
|
| 267 |
+
jit_mode_eval:
|
| 268 |
+
value: false
|
| 269 |
+
label_names:
|
| 270 |
+
value: null
|
| 271 |
+
label_smoothing_factor:
|
| 272 |
+
value: 0
|
| 273 |
+
label2id:
|
| 274 |
+
value:
|
| 275 |
+
LABEL_0: 0
|
| 276 |
+
LABEL_1: 1
|
| 277 |
+
learning_rate:
|
| 278 |
+
value: 5e-05
|
| 279 |
+
length_column_name:
|
| 280 |
+
value: length
|
| 281 |
+
length_penalty:
|
| 282 |
+
value: 1
|
| 283 |
+
liger_kernel_config:
|
| 284 |
+
value: null
|
| 285 |
+
load_best_model_at_end:
|
| 286 |
+
value: true
|
| 287 |
+
local_rank:
|
| 288 |
+
value: 0
|
| 289 |
+
log_level:
|
| 290 |
+
value: passive
|
| 291 |
+
log_level_replica:
|
| 292 |
+
value: warning
|
| 293 |
+
log_on_each_node:
|
| 294 |
+
value: true
|
| 295 |
+
logging_dir:
|
| 296 |
+
value: ./logs
|
| 297 |
+
logging_first_step:
|
| 298 |
+
value: false
|
| 299 |
+
logging_nan_inf_filter:
|
| 300 |
+
value: true
|
| 301 |
+
logging_steps:
|
| 302 |
+
value: 50
|
| 303 |
+
logging_strategy:
|
| 304 |
+
value: steps
|
| 305 |
+
lr_scheduler_type:
|
| 306 |
+
value: linear
|
| 307 |
+
max_grad_norm:
|
| 308 |
+
value: 1
|
| 309 |
+
max_length:
|
| 310 |
+
value: 20
|
| 311 |
+
max_position_embeddings:
|
| 312 |
+
value: 512
|
| 313 |
+
max_steps:
|
| 314 |
+
value: -1
|
| 315 |
+
metric_for_best_model:
|
| 316 |
+
value: f1
|
| 317 |
+
min_length:
|
| 318 |
+
value: 0
|
| 319 |
+
model/num_parameters:
|
| 320 |
+
value: 66955010
|
| 321 |
+
model_type:
|
| 322 |
+
value: distilbert
|
| 323 |
+
mp_parameters:
|
| 324 |
+
value: ""
|
| 325 |
+
n_heads:
|
| 326 |
+
value: 12
|
| 327 |
+
n_layers:
|
| 328 |
+
value: 6
|
| 329 |
+
neftune_noise_alpha:
|
| 330 |
+
value: null
|
| 331 |
+
no_cuda:
|
| 332 |
+
value: false
|
| 333 |
+
no_repeat_ngram_size:
|
| 334 |
+
value: 0
|
| 335 |
+
num_beam_groups:
|
| 336 |
+
value: 1
|
| 337 |
+
num_beams:
|
| 338 |
+
value: 1
|
| 339 |
+
num_return_sequences:
|
| 340 |
+
value: 1
|
| 341 |
+
num_train_epochs:
|
| 342 |
+
value: 3
|
| 343 |
+
optim:
|
| 344 |
+
value: adamw_torch
|
| 345 |
+
optim_args:
|
| 346 |
+
value: null
|
| 347 |
+
optim_target_modules:
|
| 348 |
+
value: null
|
| 349 |
+
output_attentions:
|
| 350 |
+
value: false
|
| 351 |
+
output_dir:
|
| 352 |
+
value: ./model
|
| 353 |
+
output_hidden_states:
|
| 354 |
+
value: false
|
| 355 |
+
output_scores:
|
| 356 |
+
value: false
|
| 357 |
+
overwrite_output_dir:
|
| 358 |
+
value: false
|
| 359 |
+
pad_token_id:
|
| 360 |
+
value: 0
|
| 361 |
+
past_index:
|
| 362 |
+
value: -1
|
| 363 |
+
per_device_eval_batch_size:
|
| 364 |
+
value: 16
|
| 365 |
+
per_device_train_batch_size:
|
| 366 |
+
value: 8
|
| 367 |
+
per_gpu_eval_batch_size:
|
| 368 |
+
value: null
|
| 369 |
+
per_gpu_train_batch_size:
|
| 370 |
+
value: null
|
| 371 |
+
prediction_loss_only:
|
| 372 |
+
value: false
|
| 373 |
+
prefix:
|
| 374 |
+
value: null
|
| 375 |
+
problem_type:
|
| 376 |
+
value: null
|
| 377 |
+
push_to_hub:
|
| 378 |
+
value: false
|
| 379 |
+
push_to_hub_model_id:
|
| 380 |
+
value: null
|
| 381 |
+
push_to_hub_organization:
|
| 382 |
+
value: null
|
| 383 |
+
push_to_hub_token:
|
| 384 |
+
value: <PUSH_TO_HUB_TOKEN>
|
| 385 |
+
qa_dropout:
|
| 386 |
+
value: 0.1
|
| 387 |
+
ray_scope:
|
| 388 |
+
value: last
|
| 389 |
+
remove_invalid_values:
|
| 390 |
+
value: false
|
| 391 |
+
remove_unused_columns:
|
| 392 |
+
value: true
|
| 393 |
+
repetition_penalty:
|
| 394 |
+
value: 1
|
| 395 |
+
report_to:
|
| 396 |
+
value:
|
| 397 |
+
- wandb
|
| 398 |
+
restore_callback_states_from_checkpoint:
|
| 399 |
+
value: false
|
| 400 |
+
resume_from_checkpoint:
|
| 401 |
+
value: null
|
| 402 |
+
return_dict:
|
| 403 |
+
value: true
|
| 404 |
+
return_dict_in_generate:
|
| 405 |
+
value: false
|
| 406 |
+
run_name:
|
| 407 |
+
value: ./model
|
| 408 |
+
save_on_each_node:
|
| 409 |
+
value: false
|
| 410 |
+
save_only_model:
|
| 411 |
+
value: false
|
| 412 |
+
save_safetensors:
|
| 413 |
+
value: true
|
| 414 |
+
save_steps:
|
| 415 |
+
value: 500
|
| 416 |
+
save_strategy:
|
| 417 |
+
value: epoch
|
| 418 |
+
save_total_limit:
|
| 419 |
+
value: null
|
| 420 |
+
seed:
|
| 421 |
+
value: 42
|
| 422 |
+
sep_token_id:
|
| 423 |
+
value: null
|
| 424 |
+
seq_classif_dropout:
|
| 425 |
+
value: 0.2
|
| 426 |
+
sinusoidal_pos_embds:
|
| 427 |
+
value: false
|
| 428 |
+
skip_memory_metrics:
|
| 429 |
+
value: true
|
| 430 |
+
suppress_tokens:
|
| 431 |
+
value: null
|
| 432 |
+
task_specific_params:
|
| 433 |
+
value: null
|
| 434 |
+
temperature:
|
| 435 |
+
value: 1
|
| 436 |
+
tf_legacy_loss:
|
| 437 |
+
value: false
|
| 438 |
+
tf32:
|
| 439 |
+
value: null
|
| 440 |
+
tie_encoder_decoder:
|
| 441 |
+
value: false
|
| 442 |
+
tie_weights_:
|
| 443 |
+
value: true
|
| 444 |
+
tie_word_embeddings:
|
| 445 |
+
value: true
|
| 446 |
+
tokenizer_class:
|
| 447 |
+
value: null
|
| 448 |
+
top_k:
|
| 449 |
+
value: 50
|
| 450 |
+
top_p:
|
| 451 |
+
value: 1
|
| 452 |
+
torch_compile:
|
| 453 |
+
value: false
|
| 454 |
+
torch_compile_backend:
|
| 455 |
+
value: null
|
| 456 |
+
torch_compile_mode:
|
| 457 |
+
value: null
|
| 458 |
+
torch_dtype:
|
| 459 |
+
value: float32
|
| 460 |
+
torch_empty_cache_steps:
|
| 461 |
+
value: null
|
| 462 |
+
torchdynamo:
|
| 463 |
+
value: null
|
| 464 |
+
torchscript:
|
| 465 |
+
value: false
|
| 466 |
+
tpu_metrics_debug:
|
| 467 |
+
value: false
|
| 468 |
+
tpu_num_cores:
|
| 469 |
+
value: null
|
| 470 |
+
transformers_version:
|
| 471 |
+
value: 4.53.2
|
| 472 |
+
typical_p:
|
| 473 |
+
value: 1
|
| 474 |
+
use_bfloat16:
|
| 475 |
+
value: false
|
| 476 |
+
use_cpu:
|
| 477 |
+
value: false
|
| 478 |
+
use_ipex:
|
| 479 |
+
value: false
|
| 480 |
+
use_legacy_prediction_loop:
|
| 481 |
+
value: false
|
| 482 |
+
use_liger_kernel:
|
| 483 |
+
value: false
|
| 484 |
+
use_mps_device:
|
| 485 |
+
value: false
|
| 486 |
+
vocab_size:
|
| 487 |
+
value: 30522
|
| 488 |
+
warmup_ratio:
|
| 489 |
+
value: 0
|
| 490 |
+
warmup_steps:
|
| 491 |
+
value: 0
|
| 492 |
+
weight_decay:
|
| 493 |
+
value: 0
|
wandb/run-20250720_154435-9xqrzjdo/files/output.log
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{'loss': 0.6215, 'grad_norm': 4.55625057220459, 'learning_rate': 4.9738666666666665e-05, 'epoch': 0.016}
|
| 2 |
+
{'loss': 0.5086, 'grad_norm': 13.619754791259766, 'learning_rate': 4.9472e-05, 'epoch': 0.032}
|
| 3 |
+
{'loss': 0.4128, 'grad_norm': 10.843639373779297, 'learning_rate': 4.9205333333333335e-05, 'epoch': 0.048}
|
| 4 |
+
{'loss': 0.3603, 'grad_norm': 7.094396114349365, 'learning_rate': 4.893866666666667e-05, 'epoch': 0.064}
|
| 5 |
+
{'loss': 0.3572, 'grad_norm': 32.03938674926758, 'learning_rate': 4.8672000000000004e-05, 'epoch': 0.08}
|
| 6 |
+
{'loss': 0.4255, 'grad_norm': 2.2694833278656006, 'learning_rate': 4.8405333333333336e-05, 'epoch': 0.096}
|
| 7 |
+
{'loss': 0.3592, 'grad_norm': 1.1852556467056274, 'learning_rate': 4.8138666666666674e-05, 'epoch': 0.112}
|
| 8 |
+
{'loss': 0.3759, 'grad_norm': 8.895912170410156, 'learning_rate': 4.7872e-05, 'epoch': 0.128}
|
| 9 |
+
{'loss': 0.4246, 'grad_norm': 16.175556182861328, 'learning_rate': 4.7605333333333337e-05, 'epoch': 0.144}
|
| 10 |
+
{'loss': 0.3949, 'grad_norm': 13.036661148071289, 'learning_rate': 4.733866666666667e-05, 'epoch': 0.16}
|
| 11 |
+
{'loss': 0.3442, 'grad_norm': 8.27635669708252, 'learning_rate': 4.7072000000000006e-05, 'epoch': 0.176}
|
| 12 |
+
{'loss': 0.4416, 'grad_norm': 19.103059768676758, 'learning_rate': 4.680533333333334e-05, 'epoch': 0.192}
|
| 13 |
+
{'loss': 0.3638, 'grad_norm': 22.78896713256836, 'learning_rate': 4.653866666666667e-05, 'epoch': 0.208}
|
| 14 |
+
{'loss': 0.2995, 'grad_norm': 21.197683334350586, 'learning_rate': 4.627200000000001e-05, 'epoch': 0.224}
|
| 15 |
+
{'loss': 0.3702, 'grad_norm': 6.810858726501465, 'learning_rate': 4.600533333333333e-05, 'epoch': 0.24}
|
| 16 |
+
{'loss': 0.3149, 'grad_norm': 16.848161697387695, 'learning_rate': 4.573866666666667e-05, 'epoch': 0.256}
|
| 17 |
+
Traceback (most recent call last):
|
| 18 |
+
File "C:\Users\Legion\desktop\distilbert-sentiment\main.py", line 74, in <module>
|
| 19 |
+
main()
|
| 20 |
+
~~~~^^
|
| 21 |
+
File "C:\Users\Legion\desktop\distilbert-sentiment\main.py", line 68, in main
|
| 22 |
+
train_pipeline(subset_size=args.subset)
|
| 23 |
+
~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^
|
| 24 |
+
File "C:\Users\Legion\desktop\distilbert-sentiment\main.py", line 41, in train_pipeline
|
| 25 |
+
train_model(trainer)
|
| 26 |
+
~~~~~~~~~~~^^^^^^^^^
|
| 27 |
+
File "C:\Users\Legion\desktop\distilbert-sentiment\train.py", line 121, in train_model
|
| 28 |
+
print("Starting training...")
|
| 29 |
+
^^^^^^^^^^^^^^^
|
| 30 |
+
File "C:\Users\Legion\Miniconda3\envs\ML\Lib\site-packages\transformers\trainer.py", line 2206, in train
|
| 31 |
+
return inner_training_loop(
|
| 32 |
+
args=args,
|
| 33 |
+
...<2 lines>...
|
| 34 |
+
ignore_keys_for_eval=ignore_keys_for_eval,
|
| 35 |
+
)
|
| 36 |
+
File "C:\Users\Legion\Miniconda3\envs\ML\Lib\site-packages\transformers\trainer.py", line 2553, in _inner_training_loop
|
| 37 |
+
and (torch.isnan(tr_loss_step) or torch.isinf(tr_loss_step))
|
| 38 |
+
~~~~~~~~~~~^^^^^^^^^^^^^^
|
| 39 |
+
KeyboardInterrupt
|
wandb/run-20250720_154435-9xqrzjdo/files/requirements.txt
ADDED
|
@@ -0,0 +1,139 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
accelerate==1.9.0
|
| 2 |
+
aiofiles==24.1.0
|
| 3 |
+
aiohappyeyeballs==2.6.1
|
| 4 |
+
aiohttp==3.12.14
|
| 5 |
+
aiosignal==1.4.0
|
| 6 |
+
alembic==1.16.2
|
| 7 |
+
altair==5.5.0
|
| 8 |
+
annotated-types==0.7.0
|
| 9 |
+
anyio==4.9.0
|
| 10 |
+
attrs==25.3.0
|
| 11 |
+
audioop-lts==0.2.1
|
| 12 |
+
blinker==1.9.0
|
| 13 |
+
Bottleneck==1.4.2
|
| 14 |
+
Brotli==1.1.0
|
| 15 |
+
cachetools==6.1.0
|
| 16 |
+
certifi==2025.6.15
|
| 17 |
+
charset-normalizer==3.4.2
|
| 18 |
+
click==8.2.1
|
| 19 |
+
cloudpickle==3.1.1
|
| 20 |
+
colorama==0.4.6
|
| 21 |
+
colorlog==6.9.0
|
| 22 |
+
contourpy==1.3.1
|
| 23 |
+
cycler==0.11.0
|
| 24 |
+
datasets==4.0.0
|
| 25 |
+
dill==0.3.8
|
| 26 |
+
fastapi==0.116.1
|
| 27 |
+
ffmpy==0.6.0
|
| 28 |
+
filelock==3.18.0
|
| 29 |
+
fonttools==4.55.3
|
| 30 |
+
frozenlist==1.7.0
|
| 31 |
+
fsspec==2025.3.0
|
| 32 |
+
gitdb==4.0.12
|
| 33 |
+
GitPython==3.1.44
|
| 34 |
+
gradio==5.37.0
|
| 35 |
+
gradio_client==1.10.4
|
| 36 |
+
greenlet==3.2.3
|
| 37 |
+
groovy==0.1.2
|
| 38 |
+
h11==0.16.0
|
| 39 |
+
httpcore==1.0.9
|
| 40 |
+
httpx==0.28.1
|
| 41 |
+
huggingface-hub==0.33.4
|
| 42 |
+
idna==3.10
|
| 43 |
+
imbalanced-learn==0.13.0
|
| 44 |
+
imblearn==0.0
|
| 45 |
+
Jinja2==3.1.6
|
| 46 |
+
joblib==1.4.2
|
| 47 |
+
jsonschema==4.24.0
|
| 48 |
+
jsonschema-specifications==2025.4.1
|
| 49 |
+
kiwisolver==1.4.8
|
| 50 |
+
llvmlite==0.44.0
|
| 51 |
+
Mako==1.3.10
|
| 52 |
+
markdown-it-py==3.0.0
|
| 53 |
+
MarkupSafe==3.0.2
|
| 54 |
+
matplotlib==3.9.2
|
| 55 |
+
mdurl==0.1.2
|
| 56 |
+
mpmath==1.3.0
|
| 57 |
+
multidict==6.6.3
|
| 58 |
+
multiprocess==0.70.16
|
| 59 |
+
narwhals==1.44.0
|
| 60 |
+
networkx==3.5
|
| 61 |
+
ninja==1.11.1.4
|
| 62 |
+
numba==0.61.2
|
| 63 |
+
numexpr==2.10.2
|
| 64 |
+
numpy==2.1.1
|
| 65 |
+
optuna==4.4.0
|
| 66 |
+
orjson==3.11.0
|
| 67 |
+
packaging==24.2
|
| 68 |
+
pandas==2.2.3
|
| 69 |
+
pillow==11.1.0
|
| 70 |
+
pip==25.1
|
| 71 |
+
platformdirs==4.3.8
|
| 72 |
+
plotly==6.2.0
|
| 73 |
+
propcache==0.3.2
|
| 74 |
+
protobuf==6.31.1
|
| 75 |
+
psutil==7.0.0
|
| 76 |
+
pyarrow==20.0.0
|
| 77 |
+
pybind11==3.0.0
|
| 78 |
+
pydantic==2.11.7
|
| 79 |
+
pydantic_core==2.33.2
|
| 80 |
+
pydeck==0.9.1
|
| 81 |
+
pydub==0.25.1
|
| 82 |
+
Pygments==2.19.2
|
| 83 |
+
pyparsing==3.2.0
|
| 84 |
+
PyQt6==6.7.1
|
| 85 |
+
PyQt6_sip==13.9.1
|
| 86 |
+
python-dateutil==2.9.0.post0
|
| 87 |
+
python-multipart==0.0.20
|
| 88 |
+
pytz==2024.1
|
| 89 |
+
PyYAML==6.0.2
|
| 90 |
+
referencing==0.36.2
|
| 91 |
+
regex==2024.11.6
|
| 92 |
+
requests==2.32.4
|
| 93 |
+
rich==14.0.0
|
| 94 |
+
rpds-py==0.26.0
|
| 95 |
+
ruff==0.12.3
|
| 96 |
+
safehttpx==0.1.6
|
| 97 |
+
safetensors==0.5.3
|
| 98 |
+
scikit-learn==1.5.2
|
| 99 |
+
scipy==1.15.2
|
| 100 |
+
seaborn==0.13.2
|
| 101 |
+
semantic-version==2.10.0
|
| 102 |
+
sentry-sdk==2.33.0
|
| 103 |
+
setuptools==78.1.1
|
| 104 |
+
shap==0.48.0
|
| 105 |
+
shellingham==1.5.4
|
| 106 |
+
sip==6.10.0
|
| 107 |
+
six==1.17.0
|
| 108 |
+
sklearn-compat==0.1.3
|
| 109 |
+
slicer==0.0.8
|
| 110 |
+
smmap==5.0.2
|
| 111 |
+
sniffio==1.3.1
|
| 112 |
+
SQLAlchemy==2.0.41
|
| 113 |
+
starlette==0.47.1
|
| 114 |
+
streamlit==1.46.1
|
| 115 |
+
sympy==1.14.0
|
| 116 |
+
tenacity==9.1.2
|
| 117 |
+
threadpoolctl==3.5.0
|
| 118 |
+
tokenizers==0.21.2
|
| 119 |
+
toml==0.10.2
|
| 120 |
+
tomlkit==0.13.3
|
| 121 |
+
torch==2.7.1+cu118
|
| 122 |
+
torchaudio==2.7.1+cu118
|
| 123 |
+
torchvision==0.22.1
|
| 124 |
+
tornado==6.5.1
|
| 125 |
+
tqdm==4.67.1
|
| 126 |
+
transformers==4.53.2
|
| 127 |
+
typer==0.16.0
|
| 128 |
+
typing_extensions==4.14.0
|
| 129 |
+
typing-inspection==0.4.1
|
| 130 |
+
tzdata==2025.2
|
| 131 |
+
urllib3==2.5.0
|
| 132 |
+
uvicorn==0.35.0
|
| 133 |
+
wandb==0.21.0
|
| 134 |
+
watchdog==6.0.0
|
| 135 |
+
websockets==15.0.1
|
| 136 |
+
wheel==0.45.1
|
| 137 |
+
xgboost==3.0.2
|
| 138 |
+
xxhash==3.5.0
|
| 139 |
+
yarl==1.20.1
|
wandb/run-20250720_154435-9xqrzjdo/files/wandb-metadata.json
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"os": "Windows-11-10.0.26100-SP0",
|
| 3 |
+
"python": "CPython 3.13.5",
|
| 4 |
+
"startedAt": "2025-07-20T10:14:35.345095Z",
|
| 5 |
+
"program": "C:\\Users\\Legion\\desktop\\distilbert-sentiment\\main.py",
|
| 6 |
+
"codePath": "main.py",
|
| 7 |
+
"codePathLocal": "main.py",
|
| 8 |
+
"email": "shreshthkapai@gmail.com",
|
| 9 |
+
"root": "C:\\Users\\Legion\\desktop\\distilbert-sentiment",
|
| 10 |
+
"host": "DESKTOP-EIHJJJL",
|
| 11 |
+
"executable": "C:\\Users\\Legion\\Miniconda3\\envs\\ML\\python.exe",
|
| 12 |
+
"cpu_count": 4,
|
| 13 |
+
"cpu_count_logical": 8,
|
| 14 |
+
"gpu": "NVIDIA GeForce GTX 1650",
|
| 15 |
+
"gpu_count": 1,
|
| 16 |
+
"disk": {
|
| 17 |
+
"/": {
|
| 18 |
+
"total": "255230791680",
|
| 19 |
+
"used": "225197662208"
|
| 20 |
+
}
|
| 21 |
+
},
|
| 22 |
+
"memory": {
|
| 23 |
+
"total": "8506298368"
|
| 24 |
+
},
|
| 25 |
+
"gpu_nvidia": [
|
| 26 |
+
{
|
| 27 |
+
"name": "NVIDIA GeForce GTX 1650",
|
| 28 |
+
"memoryTotal": "4294967296",
|
| 29 |
+
"cudaCores": 1024,
|
| 30 |
+
"architecture": "Turing",
|
| 31 |
+
"uuid": "GPU-fbcd7647-fb67-66f5-b8c7-1a4198b7e4fa"
|
| 32 |
+
}
|
| 33 |
+
],
|
| 34 |
+
"cudaVersion": "12.7",
|
| 35 |
+
"writerId": "0ygkgjf4tjw3nzhcstc0bi4ropv1pahk"
|
| 36 |
+
}
|
wandb/run-20250720_154435-9xqrzjdo/files/wandb-summary.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"train/grad_norm":16.848161697387695,"_runtime":388,"_wandb":{"runtime":388},"train/epoch":0.256,"train/global_step":800,"_timestamp":1.753006850924072e+09,"train/learning_rate":4.573866666666667e-05,"_step":15,"train/loss":0.3149}
|
wandb/run-20250720_154435-9xqrzjdo/logs/debug-internal.log
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"time":"2025-07-20T15:44:35.9771205+05:30","level":"INFO","msg":"stream: starting","core version":"0.21.0"}
|
| 2 |
+
{"time":"2025-07-20T15:44:36.9333877+05:30","level":"INFO","msg":"stream: created new stream","id":"9xqrzjdo"}
|
| 3 |
+
{"time":"2025-07-20T15:44:36.9333877+05:30","level":"INFO","msg":"stream: started","id":"9xqrzjdo"}
|
| 4 |
+
{"time":"2025-07-20T15:44:36.9333877+05:30","level":"INFO","msg":"handler: started","stream_id":"9xqrzjdo"}
|
| 5 |
+
{"time":"2025-07-20T15:44:36.9333877+05:30","level":"INFO","msg":"sender: started","stream_id":"9xqrzjdo"}
|
| 6 |
+
{"time":"2025-07-20T15:44:36.9333877+05:30","level":"INFO","msg":"writer: Do: started","stream_id":"9xqrzjdo"}
|
| 7 |
+
{"time":"2025-07-20T15:51:06.1959407+05:30","level":"INFO","msg":"stream: closing","id":"9xqrzjdo"}
|
| 8 |
+
{"time":"2025-07-20T15:51:08.7071239+05:30","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
|
| 9 |
+
{"time":"2025-07-20T15:51:09.1729659+05:30","level":"INFO","msg":"sender: closed","stream_id":"9xqrzjdo"}
|
| 10 |
+
{"time":"2025-07-20T15:51:09.1735011+05:30","level":"INFO","msg":"handler: closed","stream_id":"9xqrzjdo"}
|
| 11 |
+
{"time":"2025-07-20T15:51:09.1735011+05:30","level":"INFO","msg":"writer: Close: closed","stream_id":"9xqrzjdo"}
|
| 12 |
+
{"time":"2025-07-20T15:51:09.1740459+05:30","level":"INFO","msg":"stream: closed","id":"9xqrzjdo"}
|
wandb/run-20250720_154435-9xqrzjdo/logs/debug.log
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2025-07-20 15:44:35,349 INFO MainThread:2896 [wandb_setup.py:_flush():80] Current SDK version is 0.21.0
|
| 2 |
+
2025-07-20 15:44:35,350 INFO MainThread:2896 [wandb_setup.py:_flush():80] Configure stats pid to 2896
|
| 3 |
+
2025-07-20 15:44:35,350 INFO MainThread:2896 [wandb_setup.py:_flush():80] Loading settings from C:\Users\Legion\.config\wandb\settings
|
| 4 |
+
2025-07-20 15:44:35,350 INFO MainThread:2896 [wandb_setup.py:_flush():80] Loading settings from C:\Users\Legion\desktop\distilbert-sentiment\wandb\settings
|
| 5 |
+
2025-07-20 15:44:35,350 INFO MainThread:2896 [wandb_setup.py:_flush():80] Loading settings from environment variables
|
| 6 |
+
2025-07-20 15:44:35,350 INFO MainThread:2896 [wandb_init.py:setup_run_log_directory():703] Logging user logs to C:\Users\Legion\desktop\distilbert-sentiment\wandb\run-20250720_154435-9xqrzjdo\logs\debug.log
|
| 7 |
+
2025-07-20 15:44:35,351 INFO MainThread:2896 [wandb_init.py:setup_run_log_directory():704] Logging internal logs to C:\Users\Legion\desktop\distilbert-sentiment\wandb\run-20250720_154435-9xqrzjdo\logs\debug-internal.log
|
| 8 |
+
2025-07-20 15:44:35,351 INFO MainThread:2896 [wandb_init.py:init():830] calling init triggers
|
| 9 |
+
2025-07-20 15:44:35,351 INFO MainThread:2896 [wandb_init.py:init():835] wandb.init called with sweep_config: {}
|
| 10 |
+
config: {'_wandb': {}}
|
| 11 |
+
2025-07-20 15:44:35,351 INFO MainThread:2896 [wandb_init.py:init():871] starting backend
|
| 12 |
+
2025-07-20 15:44:35,877 INFO MainThread:2896 [wandb_init.py:init():874] sending inform_init request
|
| 13 |
+
2025-07-20 15:44:35,966 INFO MainThread:2896 [wandb_init.py:init():882] backend started and connected
|
| 14 |
+
2025-07-20 15:44:35,969 INFO MainThread:2896 [wandb_init.py:init():953] updated telemetry
|
| 15 |
+
2025-07-20 15:44:35,973 INFO MainThread:2896 [wandb_init.py:init():977] communicating run to backend with 90.0 second timeout
|
| 16 |
+
2025-07-20 15:44:37,312 INFO MainThread:2896 [wandb_init.py:init():1029] starting run threads in backend
|
| 17 |
+
2025-07-20 15:44:37,744 INFO MainThread:2896 [wandb_run.py:_console_start():2458] atexit reg
|
| 18 |
+
2025-07-20 15:44:37,744 INFO MainThread:2896 [wandb_run.py:_redirect():2306] redirect: wrap_raw
|
| 19 |
+
2025-07-20 15:44:37,744 INFO MainThread:2896 [wandb_run.py:_redirect():2375] Wrapping output streams.
|
| 20 |
+
2025-07-20 15:44:37,744 INFO MainThread:2896 [wandb_run.py:_redirect():2398] Redirects installed.
|
| 21 |
+
2025-07-20 15:44:37,750 INFO MainThread:2896 [wandb_init.py:init():1075] run started, returning control to user process
|
| 22 |
+
2025-07-20 15:44:37,752 INFO MainThread:2896 [wandb_run.py:_config_callback():1363] config_cb None None {'vocab_size': 30522, 'max_position_embeddings': 512, 'sinusoidal_pos_embds': False, 'n_layers': 6, 'n_heads': 12, 'dim': 768, 'hidden_dim': 3072, 'dropout': 0.1, 'attention_dropout': 0.1, 'activation': 'gelu', 'initializer_range': 0.02, 'qa_dropout': 0.1, 'seq_classif_dropout': 0.2, 'return_dict': True, 'output_hidden_states': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['DistilBertForMaskedLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': 0, 'eos_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'distilbert-base-uncased', 'transformers_version': '4.53.2', 'model_type': 'distilbert', 'tie_weights_': True, 'output_attentions': False, 'output_dir': './model', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'epoch', 'prediction_loss_only': False, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 16, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 5e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './logs', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 50, 'logging_nan_inf_filter': True, 'save_strategy': 'epoch', 'save_steps': 500, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': False, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': './model', 'disable_tqdm': True, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'f1', 'greater_is_better': True, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': False, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': None, 'hub_always_push': False, 'hub_revision': None, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'liger_kernel_config': None, 'eval_use_gather_object': False, 'average_tokens_across_devices': False}
|
| 23 |
+
2025-07-20 15:44:37,756 INFO MainThread:2896 [wandb_config.py:__setitem__():154] [no run ID] config set model/num_parameters = 66955010 - <bound method Run._config_callback of <wandb.sdk.wandb_run.Run object at 0x0000017E91750440>>
|
| 24 |
+
2025-07-20 15:44:37,756 INFO MainThread:2896 [wandb_run.py:_config_callback():1363] config_cb model/num_parameters 66955010 None
|
| 25 |
+
2025-07-20 15:51:06,119 INFO MsgRouterThr:2896 [mailbox.py:close():129] [no run ID] Closing mailbox, abandoning 1 handles.
|
wandb/run-20250720_154435-9xqrzjdo/run-9xqrzjdo.wandb
ADDED
|
Binary file (42.9 kB). View file
|
|
|
wandb/run-20250720_155338-0h3fksuy/files/config.yaml
ADDED
|
@@ -0,0 +1,494 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
_name_or_path:
|
| 2 |
+
value: distilbert-base-uncased
|
| 3 |
+
_wandb:
|
| 4 |
+
value:
|
| 5 |
+
cli_version: 0.21.0
|
| 6 |
+
e:
|
| 7 |
+
fshn6fq4d357dfamunx9x96y44pdzcc6:
|
| 8 |
+
codePath: main.py
|
| 9 |
+
codePathLocal: main.py
|
| 10 |
+
cpu_count: 4
|
| 11 |
+
cpu_count_logical: 8
|
| 12 |
+
cudaVersion: "12.7"
|
| 13 |
+
disk:
|
| 14 |
+
/:
|
| 15 |
+
total: "255230791680"
|
| 16 |
+
used: "233129451520"
|
| 17 |
+
email: shreshthkapai@gmail.com
|
| 18 |
+
executable: C:\Users\Legion\Miniconda3\envs\ML\python.exe
|
| 19 |
+
gpu: NVIDIA GeForce GTX 1650
|
| 20 |
+
gpu_count: 1
|
| 21 |
+
gpu_nvidia:
|
| 22 |
+
- architecture: Turing
|
| 23 |
+
cudaCores: 1024
|
| 24 |
+
memoryTotal: "4294967296"
|
| 25 |
+
name: NVIDIA GeForce GTX 1650
|
| 26 |
+
uuid: GPU-fbcd7647-fb67-66f5-b8c7-1a4198b7e4fa
|
| 27 |
+
host: DESKTOP-EIHJJJL
|
| 28 |
+
memory:
|
| 29 |
+
total: "8506298368"
|
| 30 |
+
os: Windows-11-10.0.26100-SP0
|
| 31 |
+
program: C:\Users\Legion\desktop\distilbert-sentiment\main.py
|
| 32 |
+
python: CPython 3.13.5
|
| 33 |
+
root: C:\Users\Legion\desktop\distilbert-sentiment
|
| 34 |
+
startedAt: "2025-07-20T10:23:38.923772Z"
|
| 35 |
+
writerId: fshn6fq4d357dfamunx9x96y44pdzcc6
|
| 36 |
+
m:
|
| 37 |
+
- "1": train/global_step
|
| 38 |
+
"6":
|
| 39 |
+
- 3
|
| 40 |
+
"7": []
|
| 41 |
+
- "2": '*'
|
| 42 |
+
"5": 1
|
| 43 |
+
"6":
|
| 44 |
+
- 1
|
| 45 |
+
"7": []
|
| 46 |
+
python_version: 3.13.5
|
| 47 |
+
t:
|
| 48 |
+
"1":
|
| 49 |
+
- 1
|
| 50 |
+
- 5
|
| 51 |
+
- 11
|
| 52 |
+
- 41
|
| 53 |
+
- 49
|
| 54 |
+
- 51
|
| 55 |
+
- 53
|
| 56 |
+
- 71
|
| 57 |
+
- 105
|
| 58 |
+
"2":
|
| 59 |
+
- 1
|
| 60 |
+
- 5
|
| 61 |
+
- 11
|
| 62 |
+
- 41
|
| 63 |
+
- 49
|
| 64 |
+
- 51
|
| 65 |
+
- 53
|
| 66 |
+
- 71
|
| 67 |
+
- 105
|
| 68 |
+
"3":
|
| 69 |
+
- 7
|
| 70 |
+
- 13
|
| 71 |
+
- 19
|
| 72 |
+
- 62
|
| 73 |
+
- 66
|
| 74 |
+
"4": 3.13.5
|
| 75 |
+
"5": 0.21.0
|
| 76 |
+
"6": 4.53.2
|
| 77 |
+
"8":
|
| 78 |
+
- 3
|
| 79 |
+
"9":
|
| 80 |
+
"1": transformers_trainer
|
| 81 |
+
"12": 0.21.0
|
| 82 |
+
"13": windows-amd64
|
| 83 |
+
accelerator_config:
|
| 84 |
+
value:
|
| 85 |
+
dispatch_batches: null
|
| 86 |
+
even_batches: true
|
| 87 |
+
gradient_accumulation_kwargs: null
|
| 88 |
+
non_blocking: false
|
| 89 |
+
split_batches: false
|
| 90 |
+
use_seedable_sampler: true
|
| 91 |
+
activation:
|
| 92 |
+
value: gelu
|
| 93 |
+
adafactor:
|
| 94 |
+
value: false
|
| 95 |
+
adam_beta1:
|
| 96 |
+
value: 0.9
|
| 97 |
+
adam_beta2:
|
| 98 |
+
value: 0.999
|
| 99 |
+
adam_epsilon:
|
| 100 |
+
value: 1e-08
|
| 101 |
+
add_cross_attention:
|
| 102 |
+
value: false
|
| 103 |
+
architectures:
|
| 104 |
+
value:
|
| 105 |
+
- DistilBertForMaskedLM
|
| 106 |
+
attention_dropout:
|
| 107 |
+
value: 0.1
|
| 108 |
+
auto_find_batch_size:
|
| 109 |
+
value: false
|
| 110 |
+
average_tokens_across_devices:
|
| 111 |
+
value: false
|
| 112 |
+
bad_words_ids:
|
| 113 |
+
value: null
|
| 114 |
+
batch_eval_metrics:
|
| 115 |
+
value: false
|
| 116 |
+
begin_suppress_tokens:
|
| 117 |
+
value: null
|
| 118 |
+
bf16:
|
| 119 |
+
value: false
|
| 120 |
+
bf16_full_eval:
|
| 121 |
+
value: false
|
| 122 |
+
bos_token_id:
|
| 123 |
+
value: null
|
| 124 |
+
chunk_size_feed_forward:
|
| 125 |
+
value: 0
|
| 126 |
+
cross_attention_hidden_size:
|
| 127 |
+
value: null
|
| 128 |
+
data_seed:
|
| 129 |
+
value: null
|
| 130 |
+
dataloader_drop_last:
|
| 131 |
+
value: false
|
| 132 |
+
dataloader_num_workers:
|
| 133 |
+
value: 0
|
| 134 |
+
dataloader_persistent_workers:
|
| 135 |
+
value: false
|
| 136 |
+
dataloader_pin_memory:
|
| 137 |
+
value: true
|
| 138 |
+
dataloader_prefetch_factor:
|
| 139 |
+
value: null
|
| 140 |
+
ddp_backend:
|
| 141 |
+
value: null
|
| 142 |
+
ddp_broadcast_buffers:
|
| 143 |
+
value: null
|
| 144 |
+
ddp_bucket_cap_mb:
|
| 145 |
+
value: null
|
| 146 |
+
ddp_find_unused_parameters:
|
| 147 |
+
value: null
|
| 148 |
+
ddp_timeout:
|
| 149 |
+
value: 1800
|
| 150 |
+
debug:
|
| 151 |
+
value: []
|
| 152 |
+
decoder_start_token_id:
|
| 153 |
+
value: null
|
| 154 |
+
deepspeed:
|
| 155 |
+
value: null
|
| 156 |
+
dim:
|
| 157 |
+
value: 768
|
| 158 |
+
disable_tqdm:
|
| 159 |
+
value: true
|
| 160 |
+
diversity_penalty:
|
| 161 |
+
value: 0
|
| 162 |
+
do_eval:
|
| 163 |
+
value: true
|
| 164 |
+
do_predict:
|
| 165 |
+
value: false
|
| 166 |
+
do_sample:
|
| 167 |
+
value: false
|
| 168 |
+
do_train:
|
| 169 |
+
value: false
|
| 170 |
+
dropout:
|
| 171 |
+
value: 0.1
|
| 172 |
+
early_stopping:
|
| 173 |
+
value: false
|
| 174 |
+
encoder_no_repeat_ngram_size:
|
| 175 |
+
value: 0
|
| 176 |
+
eos_token_id:
|
| 177 |
+
value: null
|
| 178 |
+
eval_accumulation_steps:
|
| 179 |
+
value: null
|
| 180 |
+
eval_delay:
|
| 181 |
+
value: 0
|
| 182 |
+
eval_do_concat_batches:
|
| 183 |
+
value: true
|
| 184 |
+
eval_on_start:
|
| 185 |
+
value: false
|
| 186 |
+
eval_steps:
|
| 187 |
+
value: null
|
| 188 |
+
eval_strategy:
|
| 189 |
+
value: epoch
|
| 190 |
+
eval_use_gather_object:
|
| 191 |
+
value: false
|
| 192 |
+
exponential_decay_length_penalty:
|
| 193 |
+
value: null
|
| 194 |
+
finetuning_task:
|
| 195 |
+
value: null
|
| 196 |
+
forced_bos_token_id:
|
| 197 |
+
value: null
|
| 198 |
+
forced_eos_token_id:
|
| 199 |
+
value: null
|
| 200 |
+
fp16:
|
| 201 |
+
value: false
|
| 202 |
+
fp16_backend:
|
| 203 |
+
value: auto
|
| 204 |
+
fp16_full_eval:
|
| 205 |
+
value: false
|
| 206 |
+
fp16_opt_level:
|
| 207 |
+
value: O1
|
| 208 |
+
fsdp:
|
| 209 |
+
value: []
|
| 210 |
+
fsdp_config:
|
| 211 |
+
value:
|
| 212 |
+
min_num_params: 0
|
| 213 |
+
xla: false
|
| 214 |
+
xla_fsdp_grad_ckpt: false
|
| 215 |
+
xla_fsdp_v2: false
|
| 216 |
+
fsdp_min_num_params:
|
| 217 |
+
value: 0
|
| 218 |
+
fsdp_transformer_layer_cls_to_wrap:
|
| 219 |
+
value: null
|
| 220 |
+
full_determinism:
|
| 221 |
+
value: false
|
| 222 |
+
gradient_accumulation_steps:
|
| 223 |
+
value: 2
|
| 224 |
+
gradient_checkpointing:
|
| 225 |
+
value: false
|
| 226 |
+
gradient_checkpointing_kwargs:
|
| 227 |
+
value: null
|
| 228 |
+
greater_is_better:
|
| 229 |
+
value: true
|
| 230 |
+
group_by_length:
|
| 231 |
+
value: false
|
| 232 |
+
half_precision_backend:
|
| 233 |
+
value: auto
|
| 234 |
+
hidden_dim:
|
| 235 |
+
value: 3072
|
| 236 |
+
hub_always_push:
|
| 237 |
+
value: false
|
| 238 |
+
hub_model_id:
|
| 239 |
+
value: null
|
| 240 |
+
hub_private_repo:
|
| 241 |
+
value: null
|
| 242 |
+
hub_revision:
|
| 243 |
+
value: null
|
| 244 |
+
hub_strategy:
|
| 245 |
+
value: every_save
|
| 246 |
+
hub_token:
|
| 247 |
+
value: <HUB_TOKEN>
|
| 248 |
+
id2label:
|
| 249 |
+
value:
|
| 250 |
+
"0": LABEL_0
|
| 251 |
+
"1": LABEL_1
|
| 252 |
+
ignore_data_skip:
|
| 253 |
+
value: false
|
| 254 |
+
include_for_metrics:
|
| 255 |
+
value: []
|
| 256 |
+
include_inputs_for_metrics:
|
| 257 |
+
value: false
|
| 258 |
+
include_num_input_tokens_seen:
|
| 259 |
+
value: false
|
| 260 |
+
include_tokens_per_second:
|
| 261 |
+
value: false
|
| 262 |
+
initializer_range:
|
| 263 |
+
value: 0.02
|
| 264 |
+
is_decoder:
|
| 265 |
+
value: false
|
| 266 |
+
is_encoder_decoder:
|
| 267 |
+
value: false
|
| 268 |
+
jit_mode_eval:
|
| 269 |
+
value: false
|
| 270 |
+
label_names:
|
| 271 |
+
value: null
|
| 272 |
+
label_smoothing_factor:
|
| 273 |
+
value: 0
|
| 274 |
+
label2id:
|
| 275 |
+
value:
|
| 276 |
+
LABEL_0: 0
|
| 277 |
+
LABEL_1: 1
|
| 278 |
+
learning_rate:
|
| 279 |
+
value: 5e-05
|
| 280 |
+
length_column_name:
|
| 281 |
+
value: length
|
| 282 |
+
length_penalty:
|
| 283 |
+
value: 1
|
| 284 |
+
liger_kernel_config:
|
| 285 |
+
value: null
|
| 286 |
+
load_best_model_at_end:
|
| 287 |
+
value: true
|
| 288 |
+
local_rank:
|
| 289 |
+
value: 0
|
| 290 |
+
log_level:
|
| 291 |
+
value: passive
|
| 292 |
+
log_level_replica:
|
| 293 |
+
value: warning
|
| 294 |
+
log_on_each_node:
|
| 295 |
+
value: true
|
| 296 |
+
logging_dir:
|
| 297 |
+
value: ./logs
|
| 298 |
+
logging_first_step:
|
| 299 |
+
value: false
|
| 300 |
+
logging_nan_inf_filter:
|
| 301 |
+
value: true
|
| 302 |
+
logging_steps:
|
| 303 |
+
value: 50
|
| 304 |
+
logging_strategy:
|
| 305 |
+
value: steps
|
| 306 |
+
lr_scheduler_type:
|
| 307 |
+
value: linear
|
| 308 |
+
max_grad_norm:
|
| 309 |
+
value: 1
|
| 310 |
+
max_length:
|
| 311 |
+
value: 20
|
| 312 |
+
max_position_embeddings:
|
| 313 |
+
value: 512
|
| 314 |
+
max_steps:
|
| 315 |
+
value: -1
|
| 316 |
+
metric_for_best_model:
|
| 317 |
+
value: f1
|
| 318 |
+
min_length:
|
| 319 |
+
value: 0
|
| 320 |
+
model/num_parameters:
|
| 321 |
+
value: 66955010
|
| 322 |
+
model_type:
|
| 323 |
+
value: distilbert
|
| 324 |
+
mp_parameters:
|
| 325 |
+
value: ""
|
| 326 |
+
n_heads:
|
| 327 |
+
value: 12
|
| 328 |
+
n_layers:
|
| 329 |
+
value: 6
|
| 330 |
+
neftune_noise_alpha:
|
| 331 |
+
value: null
|
| 332 |
+
no_cuda:
|
| 333 |
+
value: false
|
| 334 |
+
no_repeat_ngram_size:
|
| 335 |
+
value: 0
|
| 336 |
+
num_beam_groups:
|
| 337 |
+
value: 1
|
| 338 |
+
num_beams:
|
| 339 |
+
value: 1
|
| 340 |
+
num_return_sequences:
|
| 341 |
+
value: 1
|
| 342 |
+
num_train_epochs:
|
| 343 |
+
value: 3
|
| 344 |
+
optim:
|
| 345 |
+
value: adamw_torch
|
| 346 |
+
optim_args:
|
| 347 |
+
value: null
|
| 348 |
+
optim_target_modules:
|
| 349 |
+
value: null
|
| 350 |
+
output_attentions:
|
| 351 |
+
value: false
|
| 352 |
+
output_dir:
|
| 353 |
+
value: ./model
|
| 354 |
+
output_hidden_states:
|
| 355 |
+
value: false
|
| 356 |
+
output_scores:
|
| 357 |
+
value: false
|
| 358 |
+
overwrite_output_dir:
|
| 359 |
+
value: false
|
| 360 |
+
pad_token_id:
|
| 361 |
+
value: 0
|
| 362 |
+
past_index:
|
| 363 |
+
value: -1
|
| 364 |
+
per_device_eval_batch_size:
|
| 365 |
+
value: 4
|
| 366 |
+
per_device_train_batch_size:
|
| 367 |
+
value: 2
|
| 368 |
+
per_gpu_eval_batch_size:
|
| 369 |
+
value: null
|
| 370 |
+
per_gpu_train_batch_size:
|
| 371 |
+
value: null
|
| 372 |
+
prediction_loss_only:
|
| 373 |
+
value: false
|
| 374 |
+
prefix:
|
| 375 |
+
value: null
|
| 376 |
+
problem_type:
|
| 377 |
+
value: null
|
| 378 |
+
push_to_hub:
|
| 379 |
+
value: false
|
| 380 |
+
push_to_hub_model_id:
|
| 381 |
+
value: null
|
| 382 |
+
push_to_hub_organization:
|
| 383 |
+
value: null
|
| 384 |
+
push_to_hub_token:
|
| 385 |
+
value: <PUSH_TO_HUB_TOKEN>
|
| 386 |
+
qa_dropout:
|
| 387 |
+
value: 0.1
|
| 388 |
+
ray_scope:
|
| 389 |
+
value: last
|
| 390 |
+
remove_invalid_values:
|
| 391 |
+
value: false
|
| 392 |
+
remove_unused_columns:
|
| 393 |
+
value: true
|
| 394 |
+
repetition_penalty:
|
| 395 |
+
value: 1
|
| 396 |
+
report_to:
|
| 397 |
+
value:
|
| 398 |
+
- wandb
|
| 399 |
+
restore_callback_states_from_checkpoint:
|
| 400 |
+
value: false
|
| 401 |
+
resume_from_checkpoint:
|
| 402 |
+
value: null
|
| 403 |
+
return_dict:
|
| 404 |
+
value: true
|
| 405 |
+
return_dict_in_generate:
|
| 406 |
+
value: false
|
| 407 |
+
run_name:
|
| 408 |
+
value: ./model
|
| 409 |
+
save_on_each_node:
|
| 410 |
+
value: false
|
| 411 |
+
save_only_model:
|
| 412 |
+
value: false
|
| 413 |
+
save_safetensors:
|
| 414 |
+
value: true
|
| 415 |
+
save_steps:
|
| 416 |
+
value: 500
|
| 417 |
+
save_strategy:
|
| 418 |
+
value: epoch
|
| 419 |
+
save_total_limit:
|
| 420 |
+
value: null
|
| 421 |
+
seed:
|
| 422 |
+
value: 42
|
| 423 |
+
sep_token_id:
|
| 424 |
+
value: null
|
| 425 |
+
seq_classif_dropout:
|
| 426 |
+
value: 0.2
|
| 427 |
+
sinusoidal_pos_embds:
|
| 428 |
+
value: false
|
| 429 |
+
skip_memory_metrics:
|
| 430 |
+
value: true
|
| 431 |
+
suppress_tokens:
|
| 432 |
+
value: null
|
| 433 |
+
task_specific_params:
|
| 434 |
+
value: null
|
| 435 |
+
temperature:
|
| 436 |
+
value: 1
|
| 437 |
+
tf_legacy_loss:
|
| 438 |
+
value: false
|
| 439 |
+
tf32:
|
| 440 |
+
value: null
|
| 441 |
+
tie_encoder_decoder:
|
| 442 |
+
value: false
|
| 443 |
+
tie_weights_:
|
| 444 |
+
value: true
|
| 445 |
+
tie_word_embeddings:
|
| 446 |
+
value: true
|
| 447 |
+
tokenizer_class:
|
| 448 |
+
value: null
|
| 449 |
+
top_k:
|
| 450 |
+
value: 50
|
| 451 |
+
top_p:
|
| 452 |
+
value: 1
|
| 453 |
+
torch_compile:
|
| 454 |
+
value: false
|
| 455 |
+
torch_compile_backend:
|
| 456 |
+
value: null
|
| 457 |
+
torch_compile_mode:
|
| 458 |
+
value: null
|
| 459 |
+
torch_dtype:
|
| 460 |
+
value: float32
|
| 461 |
+
torch_empty_cache_steps:
|
| 462 |
+
value: null
|
| 463 |
+
torchdynamo:
|
| 464 |
+
value: null
|
| 465 |
+
torchscript:
|
| 466 |
+
value: false
|
| 467 |
+
tpu_metrics_debug:
|
| 468 |
+
value: false
|
| 469 |
+
tpu_num_cores:
|
| 470 |
+
value: null
|
| 471 |
+
transformers_version:
|
| 472 |
+
value: 4.53.2
|
| 473 |
+
typical_p:
|
| 474 |
+
value: 1
|
| 475 |
+
use_bfloat16:
|
| 476 |
+
value: false
|
| 477 |
+
use_cpu:
|
| 478 |
+
value: false
|
| 479 |
+
use_ipex:
|
| 480 |
+
value: false
|
| 481 |
+
use_legacy_prediction_loop:
|
| 482 |
+
value: false
|
| 483 |
+
use_liger_kernel:
|
| 484 |
+
value: false
|
| 485 |
+
use_mps_device:
|
| 486 |
+
value: false
|
| 487 |
+
vocab_size:
|
| 488 |
+
value: 30522
|
| 489 |
+
warmup_ratio:
|
| 490 |
+
value: 0
|
| 491 |
+
warmup_steps:
|
| 492 |
+
value: 0
|
| 493 |
+
weight_decay:
|
| 494 |
+
value: 0
|
wandb/run-20250720_155338-0h3fksuy/files/output.log
ADDED
|
@@ -0,0 +1,398 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{'loss': 0.6669, 'grad_norm': 2.2732439041137695, 'learning_rate': 4.9869333333333334e-05, 'epoch': 0.008}
|
| 2 |
+
{'loss': 0.5064, 'grad_norm': 1.3875774145126343, 'learning_rate': 4.9736000000000006e-05, 'epoch': 0.016}
|
| 3 |
+
{'loss': 0.5648, 'grad_norm': 8.71848201751709, 'learning_rate': 4.960266666666667e-05, 'epoch': 0.024}
|
| 4 |
+
{'loss': 0.5271, 'grad_norm': 12.96942138671875, 'learning_rate': 4.946933333333333e-05, 'epoch': 0.032}
|
| 5 |
+
{'loss': 0.5786, 'grad_norm': 10.856029510498047, 'learning_rate': 4.9336e-05, 'epoch': 0.04}
|
| 6 |
+
{'loss': 0.5966, 'grad_norm': 27.62953758239746, 'learning_rate': 4.920266666666667e-05, 'epoch': 0.048}
|
| 7 |
+
{'loss': 0.4653, 'grad_norm': 23.872642517089844, 'learning_rate': 4.9069333333333335e-05, 'epoch': 0.056}
|
| 8 |
+
{'loss': 0.4805, 'grad_norm': 0.23732933402061462, 'learning_rate': 4.893600000000001e-05, 'epoch': 0.064}
|
| 9 |
+
{'loss': 0.5354, 'grad_norm': 20.000877380371094, 'learning_rate': 4.8802666666666666e-05, 'epoch': 0.072}
|
| 10 |
+
{'loss': 0.5105, 'grad_norm': 53.1633415222168, 'learning_rate': 4.866933333333333e-05, 'epoch': 0.08}
|
| 11 |
+
{'loss': 0.6534, 'grad_norm': 13.990724563598633, 'learning_rate': 4.8536000000000004e-05, 'epoch': 0.088}
|
| 12 |
+
{'loss': 0.4999, 'grad_norm': 4.82359504699707, 'learning_rate': 4.840266666666667e-05, 'epoch': 0.096}
|
| 13 |
+
{'loss': 0.5484, 'grad_norm': 0.24129296839237213, 'learning_rate': 4.8269333333333336e-05, 'epoch': 0.104}
|
| 14 |
+
{'loss': 0.4312, 'grad_norm': 0.23947849869728088, 'learning_rate': 4.8136e-05, 'epoch': 0.112}
|
| 15 |
+
{'loss': 0.6295, 'grad_norm': 9.900238037109375, 'learning_rate': 4.800266666666667e-05, 'epoch': 0.12}
|
| 16 |
+
{'loss': 0.5561, 'grad_norm': 52.11263656616211, 'learning_rate': 4.786933333333334e-05, 'epoch': 0.128}
|
| 17 |
+
{'loss': 0.5402, 'grad_norm': 0.6357279419898987, 'learning_rate': 4.7736000000000005e-05, 'epoch': 0.136}
|
| 18 |
+
{'loss': 0.4374, 'grad_norm': 0.25493714213371277, 'learning_rate': 4.760266666666667e-05, 'epoch': 0.144}
|
| 19 |
+
{'loss': 0.693, 'grad_norm': 0.580426812171936, 'learning_rate': 4.7469333333333336e-05, 'epoch': 0.152}
|
| 20 |
+
{'loss': 0.4743, 'grad_norm': 26.836181640625, 'learning_rate': 4.7336e-05, 'epoch': 0.16}
|
| 21 |
+
{'loss': 0.5578, 'grad_norm': 66.35722351074219, 'learning_rate': 4.720266666666667e-05, 'epoch': 0.168}
|
| 22 |
+
{'loss': 0.4348, 'grad_norm': 29.878002166748047, 'learning_rate': 4.706933333333334e-05, 'epoch': 0.176}
|
| 23 |
+
{'loss': 0.5593, 'grad_norm': 5.273430824279785, 'learning_rate': 4.6936e-05, 'epoch': 0.184}
|
| 24 |
+
{'loss': 0.446, 'grad_norm': 113.90290069580078, 'learning_rate': 4.6802666666666665e-05, 'epoch': 0.192}
|
| 25 |
+
{'loss': 0.481, 'grad_norm': 6.638715744018555, 'learning_rate': 4.666933333333334e-05, 'epoch': 0.2}
|
| 26 |
+
{'loss': 0.5115, 'grad_norm': 46.855735778808594, 'learning_rate': 4.6536e-05, 'epoch': 0.208}
|
| 27 |
+
{'loss': 0.5586, 'grad_norm': 18.6956729888916, 'learning_rate': 4.640266666666667e-05, 'epoch': 0.216}
|
| 28 |
+
{'loss': 0.3987, 'grad_norm': 7.595647811889648, 'learning_rate': 4.6269333333333334e-05, 'epoch': 0.224}
|
| 29 |
+
{'loss': 0.482, 'grad_norm': 8.240407943725586, 'learning_rate': 4.6136e-05, 'epoch': 0.232}
|
| 30 |
+
{'loss': 0.3789, 'grad_norm': 0.290462851524353, 'learning_rate': 4.6002666666666666e-05, 'epoch': 0.24}
|
| 31 |
+
{'loss': 0.5113, 'grad_norm': 11.75820541381836, 'learning_rate': 4.586933333333334e-05, 'epoch': 0.248}
|
| 32 |
+
{'loss': 0.4607, 'grad_norm': 11.622576713562012, 'learning_rate': 4.5736000000000004e-05, 'epoch': 0.256}
|
| 33 |
+
{'loss': 0.4786, 'grad_norm': 9.230450630187988, 'learning_rate': 4.560266666666667e-05, 'epoch': 0.264}
|
| 34 |
+
{'loss': 0.4143, 'grad_norm': 0.15386007726192474, 'learning_rate': 4.5469333333333335e-05, 'epoch': 0.272}
|
| 35 |
+
{'loss': 0.4111, 'grad_norm': 5.873915672302246, 'learning_rate': 4.5336e-05, 'epoch': 0.28}
|
| 36 |
+
{'loss': 0.3835, 'grad_norm': 1.5295137166976929, 'learning_rate': 4.5202666666666673e-05, 'epoch': 0.288}
|
| 37 |
+
{'loss': 0.4735, 'grad_norm': 3.8919050693511963, 'learning_rate': 4.506933333333333e-05, 'epoch': 0.296}
|
| 38 |
+
{'loss': 0.4442, 'grad_norm': 1.7330166101455688, 'learning_rate': 4.4936e-05, 'epoch': 0.304}
|
| 39 |
+
{'loss': 0.3383, 'grad_norm': 4.891812324523926, 'learning_rate': 4.480266666666667e-05, 'epoch': 0.312}
|
| 40 |
+
{'loss': 0.5515, 'grad_norm': 91.70783233642578, 'learning_rate': 4.4669333333333336e-05, 'epoch': 0.32}
|
| 41 |
+
{'loss': 0.498, 'grad_norm': 19.019271850585938, 'learning_rate': 4.4536e-05, 'epoch': 0.328}
|
| 42 |
+
{'loss': 0.4775, 'grad_norm': 1.9273958206176758, 'learning_rate': 4.440266666666667e-05, 'epoch': 0.336}
|
| 43 |
+
{'loss': 0.5587, 'grad_norm': 0.28679159283638, 'learning_rate': 4.426933333333333e-05, 'epoch': 0.344}
|
| 44 |
+
{'loss': 0.2665, 'grad_norm': 30.908130645751953, 'learning_rate': 4.4136e-05, 'epoch': 0.352}
|
| 45 |
+
{'loss': 0.3657, 'grad_norm': 28.822193145751953, 'learning_rate': 4.400266666666667e-05, 'epoch': 0.36}
|
| 46 |
+
{'loss': 0.5237, 'grad_norm': 17.60547637939453, 'learning_rate': 4.386933333333334e-05, 'epoch': 0.368}
|
| 47 |
+
{'loss': 0.5005, 'grad_norm': 67.82170104980469, 'learning_rate': 4.3736e-05, 'epoch': 0.376}
|
| 48 |
+
{'loss': 0.5195, 'grad_norm': 0.14335760474205017, 'learning_rate': 4.360266666666667e-05, 'epoch': 0.384}
|
| 49 |
+
{'loss': 0.3884, 'grad_norm': 0.36686429381370544, 'learning_rate': 4.3469333333333334e-05, 'epoch': 0.392}
|
| 50 |
+
{'loss': 0.4424, 'grad_norm': 5.366738796234131, 'learning_rate': 4.3336000000000007e-05, 'epoch': 0.4}
|
| 51 |
+
{'loss': 0.5222, 'grad_norm': 20.56273651123047, 'learning_rate': 4.320266666666667e-05, 'epoch': 0.408}
|
| 52 |
+
{'loss': 0.6078, 'grad_norm': 5.502252578735352, 'learning_rate': 4.306933333333333e-05, 'epoch': 0.416}
|
| 53 |
+
{'loss': 0.5062, 'grad_norm': 16.406768798828125, 'learning_rate': 4.2936000000000004e-05, 'epoch': 0.424}
|
| 54 |
+
{'loss': 0.3554, 'grad_norm': 0.1537816971540451, 'learning_rate': 4.280266666666667e-05, 'epoch': 0.432}
|
| 55 |
+
{'loss': 0.4329, 'grad_norm': 35.78837966918945, 'learning_rate': 4.2669333333333335e-05, 'epoch': 0.44}
|
| 56 |
+
{'loss': 0.4557, 'grad_norm': 8.288016319274902, 'learning_rate': 4.2536e-05, 'epoch': 0.448}
|
| 57 |
+
{'loss': 0.4099, 'grad_norm': 0.16398730874061584, 'learning_rate': 4.2402666666666666e-05, 'epoch': 0.456}
|
| 58 |
+
{'loss': 0.5485, 'grad_norm': 1.4127204418182373, 'learning_rate': 4.226933333333333e-05, 'epoch': 0.464}
|
| 59 |
+
{'loss': 0.4307, 'grad_norm': 0.23355980217456818, 'learning_rate': 4.2136000000000005e-05, 'epoch': 0.472}
|
| 60 |
+
{'loss': 0.422, 'grad_norm': 22.04464340209961, 'learning_rate': 4.200266666666667e-05, 'epoch': 0.48}
|
| 61 |
+
{'loss': 0.3782, 'grad_norm': 0.1723032295703888, 'learning_rate': 4.1869333333333336e-05, 'epoch': 0.488}
|
| 62 |
+
{'loss': 0.5829, 'grad_norm': 8.341532707214355, 'learning_rate': 4.1736e-05, 'epoch': 0.496}
|
| 63 |
+
{'loss': 0.3045, 'grad_norm': 8.966949462890625, 'learning_rate': 4.160266666666667e-05, 'epoch': 0.504}
|
| 64 |
+
{'loss': 0.5763, 'grad_norm': 0.5718376636505127, 'learning_rate': 4.146933333333334e-05, 'epoch': 0.512}
|
| 65 |
+
{'loss': 0.4403, 'grad_norm': 26.9694881439209, 'learning_rate': 4.1336000000000005e-05, 'epoch': 0.52}
|
| 66 |
+
{'loss': 0.4884, 'grad_norm': 49.227210998535156, 'learning_rate': 4.1202666666666664e-05, 'epoch': 0.528}
|
| 67 |
+
{'loss': 0.4022, 'grad_norm': 11.67745304107666, 'learning_rate': 4.106933333333334e-05, 'epoch': 0.536}
|
| 68 |
+
{'loss': 0.5127, 'grad_norm': 35.11846160888672, 'learning_rate': 4.0936e-05, 'epoch': 0.544}
|
| 69 |
+
{'loss': 0.3214, 'grad_norm': 0.11048085242509842, 'learning_rate': 4.080266666666667e-05, 'epoch': 0.552}
|
| 70 |
+
{'loss': 0.4918, 'grad_norm': 0.13820067048072815, 'learning_rate': 4.0669333333333334e-05, 'epoch': 0.56}
|
| 71 |
+
{'loss': 0.4171, 'grad_norm': 0.25762712955474854, 'learning_rate': 4.0536e-05, 'epoch': 0.568}
|
| 72 |
+
{'loss': 0.3955, 'grad_norm': 7.20747709274292, 'learning_rate': 4.0402666666666665e-05, 'epoch': 0.576}
|
| 73 |
+
{'loss': 0.4939, 'grad_norm': 9.815940856933594, 'learning_rate': 4.026933333333334e-05, 'epoch': 0.584}
|
| 74 |
+
{'loss': 0.4533, 'grad_norm': 1.6333082914352417, 'learning_rate': 4.0136e-05, 'epoch': 0.592}
|
| 75 |
+
{'loss': 0.5392, 'grad_norm': 17.70346450805664, 'learning_rate': 4.000266666666667e-05, 'epoch': 0.6}
|
| 76 |
+
{'loss': 0.3454, 'grad_norm': 0.13321377336978912, 'learning_rate': 3.9869333333333335e-05, 'epoch': 0.608}
|
| 77 |
+
{'loss': 0.5585, 'grad_norm': 14.663485527038574, 'learning_rate': 3.9736e-05, 'epoch': 0.616}
|
| 78 |
+
{'loss': 0.4151, 'grad_norm': 19.313182830810547, 'learning_rate': 3.960266666666667e-05, 'epoch': 0.624}
|
| 79 |
+
{'loss': 0.4268, 'grad_norm': 1.0081754922866821, 'learning_rate': 3.946933333333334e-05, 'epoch': 0.632}
|
| 80 |
+
{'loss': 0.4791, 'grad_norm': 16.18073081970215, 'learning_rate': 3.9336e-05, 'epoch': 0.64}
|
| 81 |
+
{'loss': 0.3551, 'grad_norm': 13.099615097045898, 'learning_rate': 3.920266666666667e-05, 'epoch': 0.648}
|
| 82 |
+
{'loss': 0.4028, 'grad_norm': 0.2873060703277588, 'learning_rate': 3.9069333333333336e-05, 'epoch': 0.656}
|
| 83 |
+
{'loss': 0.4578, 'grad_norm': 6.123228073120117, 'learning_rate': 3.8936e-05, 'epoch': 0.664}
|
| 84 |
+
{'loss': 0.3384, 'grad_norm': 15.485557556152344, 'learning_rate': 3.8802666666666674e-05, 'epoch': 0.672}
|
| 85 |
+
{'loss': 0.4599, 'grad_norm': 0.3142613470554352, 'learning_rate': 3.866933333333333e-05, 'epoch': 0.68}
|
| 86 |
+
{'loss': 0.5153, 'grad_norm': 0.08679840713739395, 'learning_rate': 3.8536e-05, 'epoch': 0.688}
|
| 87 |
+
{'loss': 0.4358, 'grad_norm': 4.982065677642822, 'learning_rate': 3.840266666666667e-05, 'epoch': 0.696}
|
| 88 |
+
{'loss': 0.3591, 'grad_norm': 0.03635261580348015, 'learning_rate': 3.8269333333333336e-05, 'epoch': 0.704}
|
| 89 |
+
{'loss': 0.3954, 'grad_norm': 60.536617279052734, 'learning_rate': 3.8136e-05, 'epoch': 0.712}
|
| 90 |
+
{'loss': 0.5115, 'grad_norm': 8.195839881896973, 'learning_rate': 3.800266666666667e-05, 'epoch': 0.72}
|
| 91 |
+
{'loss': 0.5059, 'grad_norm': 6.579557418823242, 'learning_rate': 3.7869333333333334e-05, 'epoch': 0.728}
|
| 92 |
+
{'loss': 0.4229, 'grad_norm': 0.11510950326919556, 'learning_rate': 3.7736e-05, 'epoch': 0.736}
|
| 93 |
+
{'loss': 0.3961, 'grad_norm': 77.99903869628906, 'learning_rate': 3.760266666666667e-05, 'epoch': 0.744}
|
| 94 |
+
{'loss': 0.4849, 'grad_norm': 0.16879667341709137, 'learning_rate': 3.746933333333334e-05, 'epoch': 0.752}
|
| 95 |
+
{'loss': 0.5883, 'grad_norm': 0.2887319326400757, 'learning_rate': 3.7336e-05, 'epoch': 0.76}
|
| 96 |
+
{'loss': 0.5342, 'grad_norm': 7.673031330108643, 'learning_rate': 3.720266666666667e-05, 'epoch': 0.768}
|
| 97 |
+
{'loss': 0.3816, 'grad_norm': 0.26005497574806213, 'learning_rate': 3.7069333333333334e-05, 'epoch': 0.776}
|
| 98 |
+
{'loss': 0.4011, 'grad_norm': 0.15825381875038147, 'learning_rate': 3.693600000000001e-05, 'epoch': 0.784}
|
| 99 |
+
{'loss': 0.4041, 'grad_norm': 9.251302719116211, 'learning_rate': 3.6802666666666666e-05, 'epoch': 0.792}
|
| 100 |
+
{'loss': 0.4039, 'grad_norm': 0.3029502332210541, 'learning_rate': 3.666933333333333e-05, 'epoch': 0.8}
|
| 101 |
+
{'loss': 0.4208, 'grad_norm': 12.424095153808594, 'learning_rate': 3.6536000000000004e-05, 'epoch': 0.808}
|
| 102 |
+
{'loss': 0.3997, 'grad_norm': 0.30793556571006775, 'learning_rate': 3.640266666666667e-05, 'epoch': 0.816}
|
| 103 |
+
{'loss': 0.3738, 'grad_norm': 35.160221099853516, 'learning_rate': 3.6269333333333335e-05, 'epoch': 0.824}
|
| 104 |
+
{'loss': 0.3587, 'grad_norm': 7.636879920959473, 'learning_rate': 3.6136e-05, 'epoch': 0.832}
|
| 105 |
+
{'loss': 0.3555, 'grad_norm': 25.151350021362305, 'learning_rate': 3.600266666666667e-05, 'epoch': 0.84}
|
| 106 |
+
{'loss': 0.3044, 'grad_norm': 0.08254121243953705, 'learning_rate': 3.586933333333333e-05, 'epoch': 0.848}
|
| 107 |
+
{'loss': 0.4428, 'grad_norm': 0.3885553777217865, 'learning_rate': 3.5736000000000005e-05, 'epoch': 0.856}
|
| 108 |
+
{'loss': 0.4558, 'grad_norm': 7.154929161071777, 'learning_rate': 3.560266666666667e-05, 'epoch': 0.864}
|
| 109 |
+
{'loss': 0.4543, 'grad_norm': 21.201276779174805, 'learning_rate': 3.5469333333333336e-05, 'epoch': 0.872}
|
| 110 |
+
{'loss': 0.4821, 'grad_norm': 24.018428802490234, 'learning_rate': 3.5336e-05, 'epoch': 0.88}
|
| 111 |
+
{'loss': 0.2944, 'grad_norm': 0.04061014950275421, 'learning_rate': 3.520266666666667e-05, 'epoch': 0.888}
|
| 112 |
+
{'loss': 0.5307, 'grad_norm': 0.3313358426094055, 'learning_rate': 3.506933333333334e-05, 'epoch': 0.896}
|
| 113 |
+
{'loss': 0.4825, 'grad_norm': 0.49799197912216187, 'learning_rate': 3.4936e-05, 'epoch': 0.904}
|
| 114 |
+
{'loss': 0.3783, 'grad_norm': 7.0047712326049805, 'learning_rate': 3.4802666666666665e-05, 'epoch': 0.912}
|
| 115 |
+
{'loss': 0.3574, 'grad_norm': 0.4024333953857422, 'learning_rate': 3.466933333333334e-05, 'epoch': 0.92}
|
| 116 |
+
{'loss': 0.4618, 'grad_norm': 0.1282418966293335, 'learning_rate': 3.4536e-05, 'epoch': 0.928}
|
| 117 |
+
{'loss': 0.2572, 'grad_norm': 7.137022018432617, 'learning_rate': 3.440266666666667e-05, 'epoch': 0.936}
|
| 118 |
+
{'loss': 0.3708, 'grad_norm': 14.057695388793945, 'learning_rate': 3.4269333333333334e-05, 'epoch': 0.944}
|
| 119 |
+
{'loss': 0.3674, 'grad_norm': 0.06444621086120605, 'learning_rate': 3.4136e-05, 'epoch': 0.952}
|
| 120 |
+
{'loss': 0.3021, 'grad_norm': 6.360774993896484, 'learning_rate': 3.4002666666666665e-05, 'epoch': 0.96}
|
| 121 |
+
{'loss': 0.5396, 'grad_norm': 6.526275157928467, 'learning_rate': 3.386933333333334e-05, 'epoch': 0.968}
|
| 122 |
+
{'loss': 0.3749, 'grad_norm': 1.6904962062835693, 'learning_rate': 3.3736000000000004e-05, 'epoch': 0.976}
|
| 123 |
+
{'loss': 0.4539, 'grad_norm': 0.31892621517181396, 'learning_rate': 3.360266666666666e-05, 'epoch': 0.984}
|
| 124 |
+
{'loss': 0.4297, 'grad_norm': 0.15581363439559937, 'learning_rate': 3.3469333333333335e-05, 'epoch': 0.992}
|
| 125 |
+
{'loss': 0.347, 'grad_norm': 23.677379608154297, 'learning_rate': 3.3336e-05, 'epoch': 1.0}
|
| 126 |
+
{'eval_loss': 0.4047098457813263, 'eval_accuracy': 0.8872, 'eval_f1': 0.8824412206103052, 'eval_precision': 0.9213091922005571, 'eval_recall': 0.84672, 'eval_runtime': 387.6575, 'eval_samples_per_second': 64.49, 'eval_steps_per_second': 16.122, 'epoch': 1.0}
|
| 127 |
+
{'loss': 0.2086, 'grad_norm': 6.972434043884277, 'learning_rate': 3.320266666666667e-05, 'epoch': 1.008}
|
| 128 |
+
{'loss': 0.2871, 'grad_norm': 19.224170684814453, 'learning_rate': 3.306933333333334e-05, 'epoch': 1.016}
|
| 129 |
+
{'loss': 0.3594, 'grad_norm': 18.384265899658203, 'learning_rate': 3.2936e-05, 'epoch': 1.024}
|
| 130 |
+
{'loss': 0.2588, 'grad_norm': 0.04717381298542023, 'learning_rate': 3.280266666666667e-05, 'epoch': 1.032}
|
| 131 |
+
{'loss': 0.2305, 'grad_norm': 0.06328094005584717, 'learning_rate': 3.2669333333333336e-05, 'epoch': 1.04}
|
| 132 |
+
{'loss': 0.2759, 'grad_norm': 19.435848236083984, 'learning_rate': 3.2536e-05, 'epoch': 1.048}
|
| 133 |
+
{'loss': 0.31, 'grad_norm': 23.59215545654297, 'learning_rate': 3.240266666666667e-05, 'epoch': 1.056}
|
| 134 |
+
{'loss': 0.2949, 'grad_norm': 0.08341117948293686, 'learning_rate': 3.226933333333333e-05, 'epoch': 1.064}
|
| 135 |
+
{'loss': 0.3302, 'grad_norm': 6.222505569458008, 'learning_rate': 3.2136e-05, 'epoch': 1.072}
|
| 136 |
+
{'loss': 0.2555, 'grad_norm': 0.12312066555023193, 'learning_rate': 3.200266666666667e-05, 'epoch': 1.08}
|
| 137 |
+
{'loss': 0.2143, 'grad_norm': 0.026080487295985222, 'learning_rate': 3.186933333333334e-05, 'epoch': 1.088}
|
| 138 |
+
{'loss': 0.2562, 'grad_norm': 0.078591488301754, 'learning_rate': 3.1736e-05, 'epoch': 1.096}
|
| 139 |
+
{'loss': 0.2386, 'grad_norm': 4.485652923583984, 'learning_rate': 3.160266666666667e-05, 'epoch': 1.104}
|
| 140 |
+
{'loss': 0.2672, 'grad_norm': 0.03333387151360512, 'learning_rate': 3.1469333333333334e-05, 'epoch': 1.112}
|
| 141 |
+
{'loss': 0.3072, 'grad_norm': 6.5600905418396, 'learning_rate': 3.1336000000000006e-05, 'epoch': 1.12}
|
| 142 |
+
{'loss': 0.1519, 'grad_norm': 0.8171893954277039, 'learning_rate': 3.120266666666667e-05, 'epoch': 1.1280000000000001}
|
| 143 |
+
{'loss': 0.201, 'grad_norm': 0.036586660891771317, 'learning_rate': 3.106933333333333e-05, 'epoch': 1.1360000000000001}
|
| 144 |
+
{'loss': 0.1602, 'grad_norm': 0.021485593169927597, 'learning_rate': 3.0936e-05, 'epoch': 1.144}
|
| 145 |
+
{'loss': 0.3508, 'grad_norm': 0.039208535104990005, 'learning_rate': 3.080266666666667e-05, 'epoch': 1.152}
|
| 146 |
+
{'loss': 0.2754, 'grad_norm': 0.05171596258878708, 'learning_rate': 3.0669333333333335e-05, 'epoch': 1.16}
|
| 147 |
+
{'loss': 0.2923, 'grad_norm': 0.48278653621673584, 'learning_rate': 3.0536e-05, 'epoch': 1.168}
|
| 148 |
+
{'loss': 0.2007, 'grad_norm': 0.03526414930820465, 'learning_rate': 3.040266666666667e-05, 'epoch': 1.176}
|
| 149 |
+
{'loss': 0.2515, 'grad_norm': 0.06429073214530945, 'learning_rate': 3.0269333333333332e-05, 'epoch': 1.184}
|
| 150 |
+
{'loss': 0.4943, 'grad_norm': 0.37491822242736816, 'learning_rate': 3.0136000000000004e-05, 'epoch': 1.192}
|
| 151 |
+
{'loss': 0.3227, 'grad_norm': 0.2309693992137909, 'learning_rate': 3.0002666666666666e-05, 'epoch': 1.2}
|
| 152 |
+
{'loss': 0.2738, 'grad_norm': 0.12286447733640671, 'learning_rate': 2.9869333333333332e-05, 'epoch': 1.208}
|
| 153 |
+
{'loss': 0.2563, 'grad_norm': 0.04936458542943001, 'learning_rate': 2.9736e-05, 'epoch': 1.216}
|
| 154 |
+
{'loss': 0.3207, 'grad_norm': 0.4713517725467682, 'learning_rate': 2.9602666666666667e-05, 'epoch': 1.224}
|
| 155 |
+
{'loss': 0.2795, 'grad_norm': 5.336559295654297, 'learning_rate': 2.9469333333333333e-05, 'epoch': 1.232}
|
| 156 |
+
{'loss': 0.3249, 'grad_norm': 1.1011492013931274, 'learning_rate': 2.9336000000000002e-05, 'epoch': 1.24}
|
| 157 |
+
{'loss': 0.3201, 'grad_norm': 8.649012565612793, 'learning_rate': 2.9202666666666667e-05, 'epoch': 1.248}
|
| 158 |
+
{'loss': 0.2676, 'grad_norm': 0.059582602232694626, 'learning_rate': 2.9069333333333336e-05, 'epoch': 1.256}
|
| 159 |
+
{'loss': 0.1626, 'grad_norm': 0.08991962671279907, 'learning_rate': 2.8936000000000002e-05, 'epoch': 1.264}
|
| 160 |
+
{'loss': 0.1377, 'grad_norm': 0.03133632242679596, 'learning_rate': 2.8802666666666668e-05, 'epoch': 1.272}
|
| 161 |
+
{'loss': 0.3352, 'grad_norm': 0.08367053419351578, 'learning_rate': 2.8669333333333337e-05, 'epoch': 1.28}
|
| 162 |
+
{'loss': 0.2317, 'grad_norm': 0.022324958816170692, 'learning_rate': 2.8536000000000003e-05, 'epoch': 1.288}
|
| 163 |
+
{'loss': 0.2879, 'grad_norm': 0.0481320321559906, 'learning_rate': 2.8402666666666665e-05, 'epoch': 1.296}
|
| 164 |
+
{'loss': 0.3309, 'grad_norm': 0.05768590420484543, 'learning_rate': 2.8269333333333337e-05, 'epoch': 1.304}
|
| 165 |
+
{'loss': 0.3335, 'grad_norm': 0.3378739356994629, 'learning_rate': 2.8136e-05, 'epoch': 1.312}
|
| 166 |
+
{'loss': 0.1505, 'grad_norm': 0.04841599985957146, 'learning_rate': 2.8002666666666665e-05, 'epoch': 1.32}
|
| 167 |
+
{'loss': 0.3089, 'grad_norm': 0.0761469379067421, 'learning_rate': 2.7869333333333338e-05, 'epoch': 1.328}
|
| 168 |
+
{'loss': 0.3643, 'grad_norm': 0.7006823420524597, 'learning_rate': 2.7736e-05, 'epoch': 1.336}
|
| 169 |
+
{'loss': 0.2356, 'grad_norm': 12.694981575012207, 'learning_rate': 2.7602666666666666e-05, 'epoch': 1.3439999999999999}
|
| 170 |
+
{'loss': 0.3755, 'grad_norm': 8.449514389038086, 'learning_rate': 2.7469333333333335e-05, 'epoch': 1.3519999999999999}
|
| 171 |
+
{'loss': 0.1797, 'grad_norm': 0.21266134083271027, 'learning_rate': 2.7336e-05, 'epoch': 1.3599999999999999}
|
| 172 |
+
{'loss': 0.2732, 'grad_norm': 0.2098928540945053, 'learning_rate': 2.720266666666667e-05, 'epoch': 1.3679999999999999}
|
| 173 |
+
{'loss': 0.2037, 'grad_norm': 0.09150354564189911, 'learning_rate': 2.7069333333333335e-05, 'epoch': 1.376}
|
| 174 |
+
{'loss': 0.2829, 'grad_norm': 0.0541178435087204, 'learning_rate': 2.6936e-05, 'epoch': 1.384}
|
| 175 |
+
{'loss': 0.3053, 'grad_norm': 0.387103408575058, 'learning_rate': 2.680266666666667e-05, 'epoch': 1.392}
|
| 176 |
+
{'loss': 0.3136, 'grad_norm': 0.058676812797784805, 'learning_rate': 2.6669333333333336e-05, 'epoch': 1.4}
|
| 177 |
+
{'loss': 0.2385, 'grad_norm': 0.05689304694533348, 'learning_rate': 2.6536e-05, 'epoch': 1.408}
|
| 178 |
+
{'loss': 0.2023, 'grad_norm': 0.05833113566040993, 'learning_rate': 2.640266666666667e-05, 'epoch': 1.416}
|
| 179 |
+
{'loss': 0.1863, 'grad_norm': 0.5199909806251526, 'learning_rate': 2.6269333333333336e-05, 'epoch': 1.424}
|
| 180 |
+
{'loss': 0.1431, 'grad_norm': 0.0910777747631073, 'learning_rate': 2.6136e-05, 'epoch': 1.432}
|
| 181 |
+
{'loss': 0.2813, 'grad_norm': 0.19901159405708313, 'learning_rate': 2.600266666666667e-05, 'epoch': 1.44}
|
| 182 |
+
{'loss': 0.2673, 'grad_norm': 20.136131286621094, 'learning_rate': 2.5869333333333333e-05, 'epoch': 1.448}
|
| 183 |
+
{'loss': 0.2245, 'grad_norm': 78.39840698242188, 'learning_rate': 2.5736e-05, 'epoch': 1.456}
|
| 184 |
+
{'loss': 0.3301, 'grad_norm': 27.54892921447754, 'learning_rate': 2.5602666666666668e-05, 'epoch': 1.464}
|
| 185 |
+
{'loss': 0.255, 'grad_norm': 0.4987935423851013, 'learning_rate': 2.5469333333333334e-05, 'epoch': 1.472}
|
| 186 |
+
{'loss': 0.2217, 'grad_norm': 0.08215348422527313, 'learning_rate': 2.5336e-05, 'epoch': 1.48}
|
| 187 |
+
{'loss': 0.3774, 'grad_norm': 6.633873462677002, 'learning_rate': 2.520266666666667e-05, 'epoch': 1.488}
|
| 188 |
+
{'loss': 0.2809, 'grad_norm': 0.473483145236969, 'learning_rate': 2.5069333333333334e-05, 'epoch': 1.496}
|
| 189 |
+
{'loss': 0.1802, 'grad_norm': 0.1088651642203331, 'learning_rate': 2.4936e-05, 'epoch': 1.504}
|
| 190 |
+
{'loss': 0.3397, 'grad_norm': 0.15446412563323975, 'learning_rate': 2.480266666666667e-05, 'epoch': 1.512}
|
| 191 |
+
{'loss': 0.2506, 'grad_norm': 0.13606055080890656, 'learning_rate': 2.4669333333333335e-05, 'epoch': 1.52}
|
| 192 |
+
{'loss': 0.2989, 'grad_norm': 0.12229656428098679, 'learning_rate': 2.4536e-05, 'epoch': 1.528}
|
| 193 |
+
{'loss': 0.175, 'grad_norm': 0.09148360043764114, 'learning_rate': 2.440266666666667e-05, 'epoch': 1.536}
|
| 194 |
+
{'loss': 0.3552, 'grad_norm': 0.07437633723020554, 'learning_rate': 2.4269333333333335e-05, 'epoch': 1.544}
|
| 195 |
+
{'loss': 0.2242, 'grad_norm': 98.57760620117188, 'learning_rate': 2.4136e-05, 'epoch': 1.552}
|
| 196 |
+
{'loss': 0.2344, 'grad_norm': 0.24384742975234985, 'learning_rate': 2.4002666666666666e-05, 'epoch': 1.56}
|
| 197 |
+
{'loss': 0.2868, 'grad_norm': 0.06279865652322769, 'learning_rate': 2.3869333333333335e-05, 'epoch': 1.568}
|
| 198 |
+
{'loss': 0.2874, 'grad_norm': 0.1516159474849701, 'learning_rate': 2.3736e-05, 'epoch': 1.576}
|
| 199 |
+
{'loss': 0.1706, 'grad_norm': 0.02717330865561962, 'learning_rate': 2.3602666666666667e-05, 'epoch': 1.584}
|
| 200 |
+
{'loss': 0.3318, 'grad_norm': 2.2730720043182373, 'learning_rate': 2.3469333333333336e-05, 'epoch': 1.592}
|
| 201 |
+
{'loss': 0.2772, 'grad_norm': 0.027159368619322777, 'learning_rate': 2.3336e-05, 'epoch': 1.6}
|
| 202 |
+
{'loss': 0.2545, 'grad_norm': 0.44568705558776855, 'learning_rate': 2.3202666666666667e-05, 'epoch': 1.608}
|
| 203 |
+
{'loss': 0.3444, 'grad_norm': 17.193021774291992, 'learning_rate': 2.3069333333333333e-05, 'epoch': 1.616}
|
| 204 |
+
{'loss': 0.1768, 'grad_norm': 0.15403099358081818, 'learning_rate': 2.2936000000000002e-05, 'epoch': 1.624}
|
| 205 |
+
{'loss': 0.1226, 'grad_norm': 251.0621337890625, 'learning_rate': 2.2802666666666668e-05, 'epoch': 1.6320000000000001}
|
| 206 |
+
{'loss': 0.2795, 'grad_norm': 25.017301559448242, 'learning_rate': 2.2669333333333333e-05, 'epoch': 1.6400000000000001}
|
| 207 |
+
{'loss': 0.3253, 'grad_norm': 49.36235427856445, 'learning_rate': 2.2536000000000002e-05, 'epoch': 1.6480000000000001}
|
| 208 |
+
{'loss': 0.3206, 'grad_norm': 0.045104943215847015, 'learning_rate': 2.2402666666666668e-05, 'epoch': 1.6560000000000001}
|
| 209 |
+
{'loss': 0.2719, 'grad_norm': 0.21639679372310638, 'learning_rate': 2.2269333333333334e-05, 'epoch': 1.6640000000000001}
|
| 210 |
+
{'loss': 0.3777, 'grad_norm': 0.08187518268823624, 'learning_rate': 2.2136000000000003e-05, 'epoch': 1.6720000000000002}
|
| 211 |
+
{'loss': 0.2435, 'grad_norm': 0.08419207483530045, 'learning_rate': 2.200266666666667e-05, 'epoch': 1.6800000000000002}
|
| 212 |
+
{'loss': 0.2798, 'grad_norm': 32.25635528564453, 'learning_rate': 2.1869333333333334e-05, 'epoch': 1.688}
|
| 213 |
+
{'loss': 0.2435, 'grad_norm': 0.03352827951312065, 'learning_rate': 2.1736e-05, 'epoch': 1.696}
|
| 214 |
+
{'loss': 0.2896, 'grad_norm': 0.11488524079322815, 'learning_rate': 2.160266666666667e-05, 'epoch': 1.704}
|
| 215 |
+
{'loss': 0.137, 'grad_norm': 0.9820640087127686, 'learning_rate': 2.1469333333333335e-05, 'epoch': 1.712}
|
| 216 |
+
{'loss': 0.2503, 'grad_norm': 0.0872233659029007, 'learning_rate': 2.1336e-05, 'epoch': 1.72}
|
| 217 |
+
{'loss': 0.331, 'grad_norm': 0.07821047306060791, 'learning_rate': 2.120266666666667e-05, 'epoch': 1.728}
|
| 218 |
+
{'loss': 0.2292, 'grad_norm': 13.81276798248291, 'learning_rate': 2.1069333333333335e-05, 'epoch': 1.736}
|
| 219 |
+
{'loss': 0.2239, 'grad_norm': 14.37901782989502, 'learning_rate': 2.0936e-05, 'epoch': 1.744}
|
| 220 |
+
{'loss': 0.2351, 'grad_norm': 0.09311486035585403, 'learning_rate': 2.0802666666666666e-05, 'epoch': 1.752}
|
| 221 |
+
{'loss': 0.2493, 'grad_norm': 0.04642907530069351, 'learning_rate': 2.0669333333333336e-05, 'epoch': 1.76}
|
| 222 |
+
{'loss': 0.2468, 'grad_norm': 187.65907287597656, 'learning_rate': 2.0536e-05, 'epoch': 1.768}
|
| 223 |
+
{'loss': 0.2195, 'grad_norm': 0.3666624128818512, 'learning_rate': 2.0402666666666667e-05, 'epoch': 1.776}
|
| 224 |
+
{'loss': 0.2567, 'grad_norm': 28.931724548339844, 'learning_rate': 2.0269333333333336e-05, 'epoch': 1.784}
|
| 225 |
+
{'loss': 0.2707, 'grad_norm': 110.09719848632812, 'learning_rate': 2.0136e-05, 'epoch': 1.792}
|
| 226 |
+
{'loss': 0.2216, 'grad_norm': 0.025822747498750687, 'learning_rate': 2.0002666666666667e-05, 'epoch': 1.8}
|
| 227 |
+
{'loss': 0.165, 'grad_norm': 21.93601417541504, 'learning_rate': 1.9869333333333333e-05, 'epoch': 1.808}
|
| 228 |
+
{'loss': 0.2316, 'grad_norm': 0.23445022106170654, 'learning_rate': 1.9736000000000002e-05, 'epoch': 1.8159999999999998}
|
| 229 |
+
{'loss': 0.3018, 'grad_norm': 24.560941696166992, 'learning_rate': 1.9602666666666668e-05, 'epoch': 1.8239999999999998}
|
| 230 |
+
{'loss': 0.1176, 'grad_norm': 0.01924316957592964, 'learning_rate': 1.9469333333333333e-05, 'epoch': 1.8319999999999999}
|
| 231 |
+
{'loss': 0.3031, 'grad_norm': 0.3726535439491272, 'learning_rate': 1.9336000000000003e-05, 'epoch': 1.8399999999999999}
|
| 232 |
+
{'loss': 0.2523, 'grad_norm': 0.10653215646743774, 'learning_rate': 1.9202666666666668e-05, 'epoch': 1.8479999999999999}
|
| 233 |
+
{'loss': 0.243, 'grad_norm': 0.07101219147443771, 'learning_rate': 1.9069333333333334e-05, 'epoch': 1.8559999999999999}
|
| 234 |
+
{'loss': 0.2008, 'grad_norm': 0.12322711199522018, 'learning_rate': 1.8936e-05, 'epoch': 1.8639999999999999}
|
| 235 |
+
{'loss': 0.2249, 'grad_norm': 0.09139817208051682, 'learning_rate': 1.880266666666667e-05, 'epoch': 1.8719999999999999}
|
| 236 |
+
{'loss': 0.2285, 'grad_norm': 31.605588912963867, 'learning_rate': 1.8669333333333334e-05, 'epoch': 1.88}
|
| 237 |
+
{'loss': 0.308, 'grad_norm': 0.2888055145740509, 'learning_rate': 1.8536e-05, 'epoch': 1.888}
|
| 238 |
+
{'loss': 0.2119, 'grad_norm': 0.16984781622886658, 'learning_rate': 1.840266666666667e-05, 'epoch': 1.896}
|
| 239 |
+
{'loss': 0.1807, 'grad_norm': 0.018442299216985703, 'learning_rate': 1.8269333333333335e-05, 'epoch': 1.904}
|
| 240 |
+
{'loss': 0.2367, 'grad_norm': 0.05777069553732872, 'learning_rate': 1.8136e-05, 'epoch': 1.912}
|
| 241 |
+
{'loss': 0.1747, 'grad_norm': 0.06527545303106308, 'learning_rate': 1.8002666666666666e-05, 'epoch': 1.92}
|
| 242 |
+
{'loss': 0.3092, 'grad_norm': 0.0599406436085701, 'learning_rate': 1.7869333333333335e-05, 'epoch': 1.928}
|
| 243 |
+
{'loss': 0.3103, 'grad_norm': 113.66268157958984, 'learning_rate': 1.7736e-05, 'epoch': 1.936}
|
| 244 |
+
{'loss': 0.2114, 'grad_norm': 0.2484273612499237, 'learning_rate': 1.7602666666666667e-05, 'epoch': 1.944}
|
| 245 |
+
{'loss': 0.2138, 'grad_norm': 0.0685097873210907, 'learning_rate': 1.7469333333333336e-05, 'epoch': 1.952}
|
| 246 |
+
{'loss': 0.178, 'grad_norm': 0.08626335859298706, 'learning_rate': 1.7335999999999998e-05, 'epoch': 1.96}
|
| 247 |
+
{'loss': 0.3075, 'grad_norm': 0.18472443521022797, 'learning_rate': 1.7202666666666667e-05, 'epoch': 1.968}
|
| 248 |
+
{'loss': 0.2595, 'grad_norm': 0.09902197122573853, 'learning_rate': 1.7069333333333333e-05, 'epoch': 1.976}
|
| 249 |
+
{'loss': 0.3426, 'grad_norm': 0.10281559079885483, 'learning_rate': 1.6936000000000002e-05, 'epoch': 1.984}
|
| 250 |
+
{'loss': 0.3031, 'grad_norm': 90.46196746826172, 'learning_rate': 1.6802666666666668e-05, 'epoch': 1.992}
|
| 251 |
+
{'loss': 0.2931, 'grad_norm': 0.13644857704639435, 'learning_rate': 1.6669333333333333e-05, 'epoch': 2.0}
|
| 252 |
+
{'eval_loss': 0.4506886303424835, 'eval_accuracy': 0.89908, 'eval_f1': 0.8966195451751691, 'eval_precision': 0.9190256194876103, 'eval_recall': 0.87528, 'eval_runtime': 393.988, 'eval_samples_per_second': 63.454, 'eval_steps_per_second': 15.863, 'epoch': 2.0}
|
| 253 |
+
{'loss': 0.1133, 'grad_norm': 0.19643454253673553, 'learning_rate': 1.6536000000000002e-05, 'epoch': 2.008}
|
| 254 |
+
{'loss': 0.0441, 'grad_norm': 0.020006030797958374, 'learning_rate': 1.6402666666666665e-05, 'epoch': 2.016}
|
| 255 |
+
{'loss': 0.0669, 'grad_norm': 0.017264680936932564, 'learning_rate': 1.6269333333333334e-05, 'epoch': 2.024}
|
| 256 |
+
{'loss': 0.0532, 'grad_norm': 0.061523064970970154, 'learning_rate': 1.6136000000000003e-05, 'epoch': 2.032}
|
| 257 |
+
{'loss': 0.0882, 'grad_norm': 0.009066939353942871, 'learning_rate': 1.600266666666667e-05, 'epoch': 2.04}
|
| 258 |
+
{'loss': 0.1001, 'grad_norm': 0.03136083111166954, 'learning_rate': 1.5869333333333334e-05, 'epoch': 2.048}
|
| 259 |
+
{'loss': 0.138, 'grad_norm': 0.008202377706766129, 'learning_rate': 1.5736e-05, 'epoch': 2.056}
|
| 260 |
+
{'loss': 0.0569, 'grad_norm': 0.07132015377283096, 'learning_rate': 1.560266666666667e-05, 'epoch': 2.064}
|
| 261 |
+
{'loss': 0.1, 'grad_norm': 0.18235626816749573, 'learning_rate': 1.5469333333333335e-05, 'epoch': 2.072}
|
| 262 |
+
{'loss': 0.0579, 'grad_norm': 0.008501987904310226, 'learning_rate': 1.5336e-05, 'epoch': 2.08}
|
| 263 |
+
{'loss': 0.1893, 'grad_norm': 0.017202647402882576, 'learning_rate': 1.5202666666666668e-05, 'epoch': 2.088}
|
| 264 |
+
{'loss': 0.1071, 'grad_norm': 0.04670681431889534, 'learning_rate': 1.5069333333333335e-05, 'epoch': 2.096}
|
| 265 |
+
{'loss': 0.0846, 'grad_norm': 0.013939165510237217, 'learning_rate': 1.4936e-05, 'epoch': 2.104}
|
| 266 |
+
{'loss': 0.0508, 'grad_norm': 4.487010478973389, 'learning_rate': 1.4802666666666668e-05, 'epoch': 2.112}
|
| 267 |
+
{'loss': 0.166, 'grad_norm': 0.014982378110289574, 'learning_rate': 1.4669333333333335e-05, 'epoch': 2.12}
|
| 268 |
+
{'loss': 0.0941, 'grad_norm': 0.03977168723940849, 'learning_rate': 1.4536e-05, 'epoch': 2.128}
|
| 269 |
+
{'loss': 0.138, 'grad_norm': 0.01852828450500965, 'learning_rate': 1.4402666666666667e-05, 'epoch': 2.136}
|
| 270 |
+
{'loss': 0.0893, 'grad_norm': 0.018985146656632423, 'learning_rate': 1.4269333333333334e-05, 'epoch': 2.144}
|
| 271 |
+
{'loss': 0.0016, 'grad_norm': 0.010966133326292038, 'learning_rate': 1.4136000000000002e-05, 'epoch': 2.152}
|
| 272 |
+
{'loss': 0.026, 'grad_norm': 0.02055787853896618, 'learning_rate': 1.4002666666666667e-05, 'epoch': 2.16}
|
| 273 |
+
{'loss': 0.1055, 'grad_norm': 0.021019885316491127, 'learning_rate': 1.3869333333333335e-05, 'epoch': 2.168}
|
| 274 |
+
{'loss': 0.1479, 'grad_norm': 0.06946071982383728, 'learning_rate': 1.3736000000000002e-05, 'epoch': 2.176}
|
| 275 |
+
{'loss': 0.0808, 'grad_norm': 0.014382677152752876, 'learning_rate': 1.3602666666666666e-05, 'epoch': 2.184}
|
| 276 |
+
{'loss': 0.1624, 'grad_norm': 0.02976427599787712, 'learning_rate': 1.3469333333333333e-05, 'epoch': 2.192}
|
| 277 |
+
{'loss': 0.1299, 'grad_norm': 0.11172953248023987, 'learning_rate': 1.3336e-05, 'epoch': 2.2}
|
| 278 |
+
{'loss': 0.0482, 'grad_norm': 0.08020364493131638, 'learning_rate': 1.3202666666666666e-05, 'epoch': 2.208}
|
| 279 |
+
{'loss': 0.0694, 'grad_norm': 0.013661920092999935, 'learning_rate': 1.3069333333333334e-05, 'epoch': 2.216}
|
| 280 |
+
{'loss': 0.1619, 'grad_norm': 0.02413208782672882, 'learning_rate': 1.2936000000000001e-05, 'epoch': 2.224}
|
| 281 |
+
{'loss': 0.1237, 'grad_norm': 0.007472939323633909, 'learning_rate': 1.2802666666666669e-05, 'epoch': 2.232}
|
| 282 |
+
{'loss': 0.0676, 'grad_norm': 0.02983078546822071, 'learning_rate': 1.2669333333333333e-05, 'epoch': 2.24}
|
| 283 |
+
{'loss': 0.0983, 'grad_norm': 0.04998508095741272, 'learning_rate': 1.2536e-05, 'epoch': 2.248}
|
| 284 |
+
{'loss': 0.1647, 'grad_norm': 13.296645164489746, 'learning_rate': 1.2402666666666667e-05, 'epoch': 2.2560000000000002}
|
| 285 |
+
{'loss': 0.0834, 'grad_norm': 0.016014471650123596, 'learning_rate': 1.2269333333333335e-05, 'epoch': 2.2640000000000002}
|
| 286 |
+
{'loss': 0.1467, 'grad_norm': 0.14326101541519165, 'learning_rate': 1.2136e-05, 'epoch': 2.2720000000000002}
|
| 287 |
+
{'loss': 0.0136, 'grad_norm': 0.014358256943523884, 'learning_rate': 1.2002666666666668e-05, 'epoch': 2.2800000000000002}
|
| 288 |
+
{'loss': 0.2312, 'grad_norm': 0.03325853496789932, 'learning_rate': 1.1869333333333333e-05, 'epoch': 2.288}
|
| 289 |
+
{'loss': 0.0823, 'grad_norm': 0.054809004068374634, 'learning_rate': 1.1736e-05, 'epoch': 2.296}
|
| 290 |
+
{'loss': 0.2533, 'grad_norm': 0.02338593825697899, 'learning_rate': 1.1602666666666666e-05, 'epoch': 2.304}
|
| 291 |
+
{'loss': 0.0905, 'grad_norm': 0.024055376648902893, 'learning_rate': 1.1469333333333334e-05, 'epoch': 2.312}
|
| 292 |
+
{'loss': 0.1688, 'grad_norm': 26.65433120727539, 'learning_rate': 1.1336000000000001e-05, 'epoch': 2.32}
|
| 293 |
+
{'loss': 0.1274, 'grad_norm': 0.05946606397628784, 'learning_rate': 1.1202666666666667e-05, 'epoch': 2.328}
|
| 294 |
+
{'loss': 0.0922, 'grad_norm': 0.018317028880119324, 'learning_rate': 1.1069333333333334e-05, 'epoch': 2.336}
|
| 295 |
+
{'loss': 0.1224, 'grad_norm': 0.014432383701205254, 'learning_rate': 1.0936e-05, 'epoch': 2.344}
|
| 296 |
+
{'loss': 0.0685, 'grad_norm': 0.013095813803374767, 'learning_rate': 1.0802666666666666e-05, 'epoch': 2.352}
|
| 297 |
+
{'loss': 0.0257, 'grad_norm': 0.028074130415916443, 'learning_rate': 1.0669333333333335e-05, 'epoch': 2.36}
|
| 298 |
+
{'loss': 0.1292, 'grad_norm': 0.02423202060163021, 'learning_rate': 1.0536e-05, 'epoch': 2.368}
|
| 299 |
+
{'loss': 0.1137, 'grad_norm': 0.013635743409395218, 'learning_rate': 1.0402666666666668e-05, 'epoch': 2.376}
|
| 300 |
+
{'loss': 0.1745, 'grad_norm': 0.016421562060713768, 'learning_rate': 1.0269333333333333e-05, 'epoch': 2.384}
|
| 301 |
+
{'loss': 0.1689, 'grad_norm': 0.01975177228450775, 'learning_rate': 1.0136000000000001e-05, 'epoch': 2.392}
|
| 302 |
+
{'loss': 0.1267, 'grad_norm': 0.05990523472428322, 'learning_rate': 1.0002666666666667e-05, 'epoch': 2.4}
|
| 303 |
+
{'loss': 0.0714, 'grad_norm': 0.023030275478959084, 'learning_rate': 9.869333333333334e-06, 'epoch': 2.408}
|
| 304 |
+
{'loss': 0.0303, 'grad_norm': 0.17459280788898468, 'learning_rate': 9.736000000000001e-06, 'epoch': 2.416}
|
| 305 |
+
{'loss': 0.0207, 'grad_norm': 0.024825584143400192, 'learning_rate': 9.602666666666667e-06, 'epoch': 2.424}
|
| 306 |
+
{'loss': 0.1338, 'grad_norm': 0.00718740513548255, 'learning_rate': 9.469333333333334e-06, 'epoch': 2.432}
|
| 307 |
+
{'loss': 0.001, 'grad_norm': 0.006329901050776243, 'learning_rate': 9.336e-06, 'epoch': 2.44}
|
| 308 |
+
{'loss': 0.1752, 'grad_norm': 0.016103368252515793, 'learning_rate': 9.202666666666667e-06, 'epoch': 2.448}
|
| 309 |
+
{'loss': 0.1168, 'grad_norm': 0.11804729700088501, 'learning_rate': 9.069333333333333e-06, 'epoch': 2.456}
|
| 310 |
+
{'loss': 0.2117, 'grad_norm': 35.67884826660156, 'learning_rate': 8.936e-06, 'epoch': 2.464}
|
| 311 |
+
{'loss': 0.1755, 'grad_norm': 0.016014249995350838, 'learning_rate': 8.802666666666668e-06, 'epoch': 2.472}
|
| 312 |
+
{'loss': 0.1497, 'grad_norm': 0.22153107821941376, 'learning_rate': 8.669333333333334e-06, 'epoch': 2.48}
|
| 313 |
+
{'loss': 0.1113, 'grad_norm': 0.01318784523755312, 'learning_rate': 8.536000000000001e-06, 'epoch': 2.488}
|
| 314 |
+
{'loss': 0.1143, 'grad_norm': 0.1176510900259018, 'learning_rate': 8.402666666666667e-06, 'epoch': 2.496}
|
| 315 |
+
{'loss': 0.1492, 'grad_norm': 0.06879352778196335, 'learning_rate': 8.269333333333332e-06, 'epoch': 2.504}
|
| 316 |
+
{'loss': 0.1984, 'grad_norm': 0.021879026666283607, 'learning_rate': 8.136000000000001e-06, 'epoch': 2.512}
|
| 317 |
+
{'loss': 0.0812, 'grad_norm': 0.03925799950957298, 'learning_rate': 8.002666666666667e-06, 'epoch': 2.52}
|
| 318 |
+
{'loss': 0.1615, 'grad_norm': 0.0319889560341835, 'learning_rate': 7.869333333333334e-06, 'epoch': 2.528}
|
| 319 |
+
{'loss': 0.0291, 'grad_norm': 0.015960585325956345, 'learning_rate': 7.736e-06, 'epoch': 2.536}
|
| 320 |
+
{'loss': 0.135, 'grad_norm': 0.020564408972859383, 'learning_rate': 7.6026666666666675e-06, 'epoch': 2.544}
|
| 321 |
+
{'loss': 0.1479, 'grad_norm': 0.03615148738026619, 'learning_rate': 7.469333333333334e-06, 'epoch': 2.552}
|
| 322 |
+
{'loss': 0.0368, 'grad_norm': 0.016910186037421227, 'learning_rate': 7.336e-06, 'epoch': 2.56}
|
| 323 |
+
{'loss': 0.2437, 'grad_norm': 8.867321968078613, 'learning_rate': 7.202666666666667e-06, 'epoch': 2.568}
|
| 324 |
+
{'loss': 0.049, 'grad_norm': 10.037091255187988, 'learning_rate': 7.069333333333334e-06, 'epoch': 2.576}
|
| 325 |
+
{'loss': 0.0398, 'grad_norm': 0.25611355900764465, 'learning_rate': 6.936000000000001e-06, 'epoch': 2.584}
|
| 326 |
+
{'loss': 0.0257, 'grad_norm': 0.05507563799619675, 'learning_rate': 6.802666666666667e-06, 'epoch': 2.592}
|
| 327 |
+
{'loss': 0.1173, 'grad_norm': 0.09031017869710922, 'learning_rate': 6.669333333333333e-06, 'epoch': 2.6}
|
| 328 |
+
{'loss': 0.1151, 'grad_norm': 0.013525927439332008, 'learning_rate': 6.536000000000001e-06, 'epoch': 2.608}
|
| 329 |
+
{'loss': 0.0917, 'grad_norm': 0.031039560213685036, 'learning_rate': 6.402666666666666e-06, 'epoch': 2.616}
|
| 330 |
+
{'loss': 0.1611, 'grad_norm': 0.02152109332382679, 'learning_rate': 6.269333333333334e-06, 'epoch': 2.624}
|
| 331 |
+
{'loss': 0.1082, 'grad_norm': 0.02339756488800049, 'learning_rate': 6.136e-06, 'epoch': 2.632}
|
| 332 |
+
{'loss': 0.0367, 'grad_norm': 0.012301336042582989, 'learning_rate': 6.002666666666667e-06, 'epoch': 2.64}
|
| 333 |
+
{'loss': 0.0914, 'grad_norm': 215.3618621826172, 'learning_rate': 5.869333333333333e-06, 'epoch': 2.648}
|
| 334 |
+
{'loss': 0.1905, 'grad_norm': 20.581954956054688, 'learning_rate': 5.736000000000001e-06, 'epoch': 2.656}
|
| 335 |
+
{'loss': 0.0908, 'grad_norm': 0.013410776853561401, 'learning_rate': 5.602666666666667e-06, 'epoch': 2.664}
|
| 336 |
+
{'loss': 0.0603, 'grad_norm': 0.13063132762908936, 'learning_rate': 5.469333333333333e-06, 'epoch': 2.672}
|
| 337 |
+
{'loss': 0.1159, 'grad_norm': 0.05968919396400452, 'learning_rate': 5.336e-06, 'epoch': 2.68}
|
| 338 |
+
{'loss': 0.178, 'grad_norm': 0.07835003733634949, 'learning_rate': 5.202666666666667e-06, 'epoch': 2.6879999999999997}
|
| 339 |
+
{'loss': 0.1182, 'grad_norm': 38.63554000854492, 'learning_rate': 5.069333333333333e-06, 'epoch': 2.6959999999999997}
|
| 340 |
+
{'loss': 0.065, 'grad_norm': 1.430072546005249, 'learning_rate': 4.936000000000001e-06, 'epoch': 2.7039999999999997}
|
| 341 |
+
{'loss': 0.0336, 'grad_norm': 0.009959719143807888, 'learning_rate': 4.802666666666667e-06, 'epoch': 2.7119999999999997}
|
| 342 |
+
{'loss': 0.0495, 'grad_norm': 0.6715738773345947, 'learning_rate': 4.669333333333334e-06, 'epoch': 2.7199999999999998}
|
| 343 |
+
{'loss': 0.0314, 'grad_norm': 0.010251459665596485, 'learning_rate': 4.536e-06, 'epoch': 2.7279999999999998}
|
| 344 |
+
{'loss': 0.0264, 'grad_norm': 0.12389620393514633, 'learning_rate': 4.402666666666667e-06, 'epoch': 2.7359999999999998}
|
| 345 |
+
{'loss': 0.0462, 'grad_norm': 0.008583267219364643, 'learning_rate': 4.269333333333333e-06, 'epoch': 2.7439999999999998}
|
| 346 |
+
{'loss': 0.0661, 'grad_norm': 0.007292643189430237, 'learning_rate': 4.136e-06, 'epoch': 2.752}
|
| 347 |
+
{'loss': 0.1706, 'grad_norm': 0.047004811465740204, 'learning_rate': 4.002666666666667e-06, 'epoch': 2.76}
|
| 348 |
+
{'loss': 0.0777, 'grad_norm': 0.020715517923235893, 'learning_rate': 3.869333333333334e-06, 'epoch': 2.768}
|
| 349 |
+
{'loss': 0.031, 'grad_norm': 0.008295822888612747, 'learning_rate': 3.736e-06, 'epoch': 2.776}
|
| 350 |
+
{'loss': 0.1627, 'grad_norm': 0.015728944912552834, 'learning_rate': 3.602666666666667e-06, 'epoch': 2.784}
|
| 351 |
+
{'loss': 0.0297, 'grad_norm': 0.1496945321559906, 'learning_rate': 3.4693333333333334e-06, 'epoch': 2.792}
|
| 352 |
+
{'loss': 0.1484, 'grad_norm': 0.02585836499929428, 'learning_rate': 3.3360000000000003e-06, 'epoch': 2.8}
|
| 353 |
+
{'loss': 0.0878, 'grad_norm': 0.0086339320987463, 'learning_rate': 3.202666666666667e-06, 'epoch': 2.808}
|
| 354 |
+
{'loss': 0.2442, 'grad_norm': 8.712865829467773, 'learning_rate': 3.0693333333333334e-06, 'epoch': 2.816}
|
| 355 |
+
{'loss': 0.0921, 'grad_norm': 0.02131008356809616, 'learning_rate': 2.9360000000000003e-06, 'epoch': 2.824}
|
| 356 |
+
{'loss': 0.2405, 'grad_norm': 0.00918051227927208, 'learning_rate': 2.8026666666666665e-06, 'epoch': 2.832}
|
| 357 |
+
{'loss': 0.0815, 'grad_norm': 0.020189447328448296, 'learning_rate': 2.6693333333333334e-06, 'epoch': 2.84}
|
| 358 |
+
{'loss': 0.0638, 'grad_norm': 0.18166711926460266, 'learning_rate': 2.5360000000000004e-06, 'epoch': 2.848}
|
| 359 |
+
{'loss': 0.0522, 'grad_norm': 0.00875813141465187, 'learning_rate': 2.402666666666667e-06, 'epoch': 2.856}
|
| 360 |
+
{'loss': 0.0009, 'grad_norm': 0.0431634895503521, 'learning_rate': 2.2693333333333334e-06, 'epoch': 2.864}
|
| 361 |
+
{'loss': 0.1156, 'grad_norm': 0.023334724828600883, 'learning_rate': 2.136e-06, 'epoch': 2.872}
|
| 362 |
+
{'loss': 0.1775, 'grad_norm': 36.20563507080078, 'learning_rate': 2.002666666666667e-06, 'epoch': 2.88}
|
| 363 |
+
{'loss': 0.2143, 'grad_norm': 25.47490882873535, 'learning_rate': 1.8693333333333334e-06, 'epoch': 2.888}
|
| 364 |
+
{'loss': 0.0035, 'grad_norm': 0.013057650066912174, 'learning_rate': 1.7360000000000002e-06, 'epoch': 2.896}
|
| 365 |
+
{'loss': 0.0009, 'grad_norm': 0.01746312901377678, 'learning_rate': 1.602666666666667e-06, 'epoch': 2.904}
|
| 366 |
+
{'loss': 0.1258, 'grad_norm': 0.012785250321030617, 'learning_rate': 1.4693333333333333e-06, 'epoch': 2.912}
|
| 367 |
+
{'loss': 0.1393, 'grad_norm': 0.026742149144411087, 'learning_rate': 1.336e-06, 'epoch': 2.92}
|
| 368 |
+
{'loss': 0.1293, 'grad_norm': 31.66493797302246, 'learning_rate': 1.2026666666666667e-06, 'epoch': 2.928}
|
| 369 |
+
{'loss': 0.0443, 'grad_norm': 0.12351831048727036, 'learning_rate': 1.0693333333333333e-06, 'epoch': 2.936}
|
| 370 |
+
{'loss': 0.0358, 'grad_norm': 0.01323748379945755, 'learning_rate': 9.360000000000001e-07, 'epoch': 2.944}
|
| 371 |
+
{'loss': 0.0679, 'grad_norm': 0.010095755569636822, 'learning_rate': 8.026666666666667e-07, 'epoch': 2.952}
|
| 372 |
+
{'loss': 0.075, 'grad_norm': 0.09313926100730896, 'learning_rate': 6.693333333333334e-07, 'epoch': 2.96}
|
| 373 |
+
{'loss': 0.1417, 'grad_norm': 0.014779884368181229, 'learning_rate': 5.36e-07, 'epoch': 2.968}
|
| 374 |
+
{'loss': 0.0665, 'grad_norm': 0.011904980055987835, 'learning_rate': 4.026666666666666e-07, 'epoch': 2.976}
|
| 375 |
+
{'loss': 0.1183, 'grad_norm': 0.03782917186617851, 'learning_rate': 2.693333333333333e-07, 'epoch': 2.984}
|
| 376 |
+
{'loss': 0.167, 'grad_norm': 8.537976264953613, 'learning_rate': 1.3600000000000003e-07, 'epoch': 2.992}
|
| 377 |
+
{'loss': 0.033, 'grad_norm': 0.010841709561645985, 'learning_rate': 2.666666666666667e-09, 'epoch': 3.0}
|
| 378 |
+
{'eval_loss': 0.5613933801651001, 'eval_accuracy': 0.90404, 'eval_f1': 0.903332393117621, 'eval_precision': 0.9100430299585938, 'eval_recall': 0.89672, 'eval_runtime': 403.0149, 'eval_samples_per_second': 62.032, 'eval_steps_per_second': 15.508, 'epoch': 3.0}
|
| 379 |
+
{'train_runtime': 17654.2392, 'train_samples_per_second': 4.248, 'train_steps_per_second': 1.062, 'train_loss': 0.2737119940789541, 'epoch': 3.0}
|
| 380 |
+
Training completed!
|
| 381 |
+
Evaluating model...
|
| 382 |
+
{'eval_loss': 0.5613933801651001, 'eval_accuracy': 0.90404, 'eval_f1': 0.903332393117621, 'eval_precision': 0.9100430299585938, 'eval_recall': 0.89672, 'eval_runtime': 383.8561, 'eval_samples_per_second': 65.129, 'eval_steps_per_second': 16.282, 'epoch': 3.0}
|
| 383 |
+
=== Evaluation Results ===
|
| 384 |
+
eval_loss: 0.5614
|
| 385 |
+
eval_accuracy: 0.9040
|
| 386 |
+
eval_f1: 0.9033
|
| 387 |
+
eval_precision: 0.9100
|
| 388 |
+
eval_recall: 0.8967
|
| 389 |
+
eval_runtime: 383.8561
|
| 390 |
+
eval_samples_per_second: 65.1290
|
| 391 |
+
eval_steps_per_second: 16.2820
|
| 392 |
+
epoch: 3.0000
|
| 393 |
+
Saving model to ./model...
|
| 394 |
+
Model saved successfully!
|
| 395 |
+
=== Training Pipeline Completed ===
|
| 396 |
+
|
| 397 |
+
🎉 Training completed!
|
| 398 |
+
To run the app: python app.py
|
wandb/run-20250720_155338-0h3fksuy/files/requirements.txt
ADDED
|
@@ -0,0 +1,139 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
accelerate==1.9.0
|
| 2 |
+
aiofiles==24.1.0
|
| 3 |
+
aiohappyeyeballs==2.6.1
|
| 4 |
+
aiohttp==3.12.14
|
| 5 |
+
aiosignal==1.4.0
|
| 6 |
+
alembic==1.16.2
|
| 7 |
+
altair==5.5.0
|
| 8 |
+
annotated-types==0.7.0
|
| 9 |
+
anyio==4.9.0
|
| 10 |
+
attrs==25.3.0
|
| 11 |
+
audioop-lts==0.2.1
|
| 12 |
+
blinker==1.9.0
|
| 13 |
+
Bottleneck==1.4.2
|
| 14 |
+
Brotli==1.1.0
|
| 15 |
+
cachetools==6.1.0
|
| 16 |
+
certifi==2025.6.15
|
| 17 |
+
charset-normalizer==3.4.2
|
| 18 |
+
click==8.2.1
|
| 19 |
+
cloudpickle==3.1.1
|
| 20 |
+
colorama==0.4.6
|
| 21 |
+
colorlog==6.9.0
|
| 22 |
+
contourpy==1.3.1
|
| 23 |
+
cycler==0.11.0
|
| 24 |
+
datasets==4.0.0
|
| 25 |
+
dill==0.3.8
|
| 26 |
+
fastapi==0.116.1
|
| 27 |
+
ffmpy==0.6.0
|
| 28 |
+
filelock==3.18.0
|
| 29 |
+
fonttools==4.55.3
|
| 30 |
+
frozenlist==1.7.0
|
| 31 |
+
fsspec==2025.3.0
|
| 32 |
+
gitdb==4.0.12
|
| 33 |
+
GitPython==3.1.44
|
| 34 |
+
gradio==5.37.0
|
| 35 |
+
gradio_client==1.10.4
|
| 36 |
+
greenlet==3.2.3
|
| 37 |
+
groovy==0.1.2
|
| 38 |
+
h11==0.16.0
|
| 39 |
+
httpcore==1.0.9
|
| 40 |
+
httpx==0.28.1
|
| 41 |
+
huggingface-hub==0.33.4
|
| 42 |
+
idna==3.10
|
| 43 |
+
imbalanced-learn==0.13.0
|
| 44 |
+
imblearn==0.0
|
| 45 |
+
Jinja2==3.1.6
|
| 46 |
+
joblib==1.4.2
|
| 47 |
+
jsonschema==4.24.0
|
| 48 |
+
jsonschema-specifications==2025.4.1
|
| 49 |
+
kiwisolver==1.4.8
|
| 50 |
+
llvmlite==0.44.0
|
| 51 |
+
Mako==1.3.10
|
| 52 |
+
markdown-it-py==3.0.0
|
| 53 |
+
MarkupSafe==3.0.2
|
| 54 |
+
matplotlib==3.9.2
|
| 55 |
+
mdurl==0.1.2
|
| 56 |
+
mpmath==1.3.0
|
| 57 |
+
multidict==6.6.3
|
| 58 |
+
multiprocess==0.70.16
|
| 59 |
+
narwhals==1.44.0
|
| 60 |
+
networkx==3.5
|
| 61 |
+
ninja==1.11.1.4
|
| 62 |
+
numba==0.61.2
|
| 63 |
+
numexpr==2.10.2
|
| 64 |
+
numpy==2.1.1
|
| 65 |
+
optuna==4.4.0
|
| 66 |
+
orjson==3.11.0
|
| 67 |
+
packaging==24.2
|
| 68 |
+
pandas==2.2.3
|
| 69 |
+
pillow==11.1.0
|
| 70 |
+
pip==25.1
|
| 71 |
+
platformdirs==4.3.8
|
| 72 |
+
plotly==6.2.0
|
| 73 |
+
propcache==0.3.2
|
| 74 |
+
protobuf==6.31.1
|
| 75 |
+
psutil==7.0.0
|
| 76 |
+
pyarrow==20.0.0
|
| 77 |
+
pybind11==3.0.0
|
| 78 |
+
pydantic==2.11.7
|
| 79 |
+
pydantic_core==2.33.2
|
| 80 |
+
pydeck==0.9.1
|
| 81 |
+
pydub==0.25.1
|
| 82 |
+
Pygments==2.19.2
|
| 83 |
+
pyparsing==3.2.0
|
| 84 |
+
PyQt6==6.7.1
|
| 85 |
+
PyQt6_sip==13.9.1
|
| 86 |
+
python-dateutil==2.9.0.post0
|
| 87 |
+
python-multipart==0.0.20
|
| 88 |
+
pytz==2024.1
|
| 89 |
+
PyYAML==6.0.2
|
| 90 |
+
referencing==0.36.2
|
| 91 |
+
regex==2024.11.6
|
| 92 |
+
requests==2.32.4
|
| 93 |
+
rich==14.0.0
|
| 94 |
+
rpds-py==0.26.0
|
| 95 |
+
ruff==0.12.3
|
| 96 |
+
safehttpx==0.1.6
|
| 97 |
+
safetensors==0.5.3
|
| 98 |
+
scikit-learn==1.5.2
|
| 99 |
+
scipy==1.15.2
|
| 100 |
+
seaborn==0.13.2
|
| 101 |
+
semantic-version==2.10.0
|
| 102 |
+
sentry-sdk==2.33.0
|
| 103 |
+
setuptools==78.1.1
|
| 104 |
+
shap==0.48.0
|
| 105 |
+
shellingham==1.5.4
|
| 106 |
+
sip==6.10.0
|
| 107 |
+
six==1.17.0
|
| 108 |
+
sklearn-compat==0.1.3
|
| 109 |
+
slicer==0.0.8
|
| 110 |
+
smmap==5.0.2
|
| 111 |
+
sniffio==1.3.1
|
| 112 |
+
SQLAlchemy==2.0.41
|
| 113 |
+
starlette==0.47.1
|
| 114 |
+
streamlit==1.46.1
|
| 115 |
+
sympy==1.14.0
|
| 116 |
+
tenacity==9.1.2
|
| 117 |
+
threadpoolctl==3.5.0
|
| 118 |
+
tokenizers==0.21.2
|
| 119 |
+
toml==0.10.2
|
| 120 |
+
tomlkit==0.13.3
|
| 121 |
+
torch==2.7.1+cu118
|
| 122 |
+
torchaudio==2.7.1+cu118
|
| 123 |
+
torchvision==0.22.1
|
| 124 |
+
tornado==6.5.1
|
| 125 |
+
tqdm==4.67.1
|
| 126 |
+
transformers==4.53.2
|
| 127 |
+
typer==0.16.0
|
| 128 |
+
typing_extensions==4.14.0
|
| 129 |
+
typing-inspection==0.4.1
|
| 130 |
+
tzdata==2025.2
|
| 131 |
+
urllib3==2.5.0
|
| 132 |
+
uvicorn==0.35.0
|
| 133 |
+
wandb==0.21.0
|
| 134 |
+
watchdog==6.0.0
|
| 135 |
+
websockets==15.0.1
|
| 136 |
+
wheel==0.45.1
|
| 137 |
+
xgboost==3.0.2
|
| 138 |
+
xxhash==3.5.0
|
| 139 |
+
yarl==1.20.1
|
wandb/run-20250720_155338-0h3fksuy/files/wandb-metadata.json
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"os": "Windows-11-10.0.26100-SP0",
|
| 3 |
+
"python": "CPython 3.13.5",
|
| 4 |
+
"startedAt": "2025-07-20T10:23:38.923772Z",
|
| 5 |
+
"program": "C:\\Users\\Legion\\desktop\\distilbert-sentiment\\main.py",
|
| 6 |
+
"codePath": "main.py",
|
| 7 |
+
"codePathLocal": "main.py",
|
| 8 |
+
"email": "shreshthkapai@gmail.com",
|
| 9 |
+
"root": "C:\\Users\\Legion\\desktop\\distilbert-sentiment",
|
| 10 |
+
"host": "DESKTOP-EIHJJJL",
|
| 11 |
+
"executable": "C:\\Users\\Legion\\Miniconda3\\envs\\ML\\python.exe",
|
| 12 |
+
"cpu_count": 4,
|
| 13 |
+
"cpu_count_logical": 8,
|
| 14 |
+
"gpu": "NVIDIA GeForce GTX 1650",
|
| 15 |
+
"gpu_count": 1,
|
| 16 |
+
"disk": {
|
| 17 |
+
"/": {
|
| 18 |
+
"total": "255230791680",
|
| 19 |
+
"used": "233129451520"
|
| 20 |
+
}
|
| 21 |
+
},
|
| 22 |
+
"memory": {
|
| 23 |
+
"total": "8506298368"
|
| 24 |
+
},
|
| 25 |
+
"gpu_nvidia": [
|
| 26 |
+
{
|
| 27 |
+
"name": "NVIDIA GeForce GTX 1650",
|
| 28 |
+
"memoryTotal": "4294967296",
|
| 29 |
+
"cudaCores": 1024,
|
| 30 |
+
"architecture": "Turing",
|
| 31 |
+
"uuid": "GPU-fbcd7647-fb67-66f5-b8c7-1a4198b7e4fa"
|
| 32 |
+
}
|
| 33 |
+
],
|
| 34 |
+
"cudaVersion": "12.7",
|
| 35 |
+
"writerId": "fshn6fq4d357dfamunx9x96y44pdzcc6"
|
| 36 |
+
}
|
wandb/run-20250720_155338-0h3fksuy/files/wandb-summary.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"train/global_step":18750,"train/epoch":3,"_timestamp":1.75302505616767e+09,"eval/steps_per_second":16.282,"train_samples_per_second":4.248,"eval/recall":0.89672,"train_loss":0.2737119940789541,"train_runtime":17654.2392,"total_flos":4.9675274496e+15,"eval/loss":0.5613933801651001,"_wandb":{"runtime":18036},"eval/runtime":383.8561,"train_steps_per_second":1.062,"eval/accuracy":0.90404,"eval/samples_per_second":65.129,"train/loss":0.033,"train/learning_rate":2.666666666666667e-09,"_step":379,"_runtime":18036,"train/grad_norm":0.010841709561645985,"eval/precision":0.9100430299585938,"eval/f1":0.903332393117621}
|
wandb/run-20250720_155338-0h3fksuy/logs/debug-internal.log
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"time":"2025-07-20T15:53:39.5114812+05:30","level":"INFO","msg":"stream: starting","core version":"0.21.0"}
|
| 2 |
+
{"time":"2025-07-20T15:53:41.0961508+05:30","level":"INFO","msg":"stream: created new stream","id":"0h3fksuy"}
|
| 3 |
+
{"time":"2025-07-20T15:53:41.0967346+05:30","level":"INFO","msg":"stream: started","id":"0h3fksuy"}
|
| 4 |
+
{"time":"2025-07-20T15:53:41.0967346+05:30","level":"INFO","msg":"handler: started","stream_id":"0h3fksuy"}
|
| 5 |
+
{"time":"2025-07-20T15:53:41.0967346+05:30","level":"INFO","msg":"sender: started","stream_id":"0h3fksuy"}
|
| 6 |
+
{"time":"2025-07-20T15:53:41.0967346+05:30","level":"INFO","msg":"writer: Do: started","stream_id":"0h3fksuy"}
|
| 7 |
+
{"time":"2025-07-20T20:27:20.3681207+05:30","level":"WARN","msg":"sender: taking a long time","seconds":11118.8689693,"work":"WorkRecord(*service_go_proto.Request_StopStatus); Control(local:true mailbox_slot:\"qx9z56z7vy8w\" connection_id:\"1(127.0.0.1:59166)\")"}
|
| 8 |
+
{"time":"2025-07-20T20:27:20.6988531+05:30","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded"}
|
| 9 |
+
{"time":"2025-07-20T20:27:20.856851+05:30","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/shreshth/huggingface/0h3fksuy/file_stream\": net/http: request canceled while waiting for connection (Client.Timeout exceeded while awaiting headers)"}
|
| 10 |
+
{"time":"2025-07-20T20:27:24.1042083+05:30","level":"INFO","msg":"sender: succeeded after taking longer than expected","seconds":11124.0731302,"work":"WorkRecord(*service_go_proto.Request_StopStatus); Control(local:true mailbox_slot:\"qx9z56z7vy8w\" connection_id:\"1(127.0.0.1:59166)\")"}
|
| 11 |
+
{"time":"2025-07-20T20:54:18.4689135+05:30","level":"INFO","msg":"stream: closing","id":"0h3fksuy"}
|
| 12 |
+
{"time":"2025-07-20T20:54:19.7177233+05:30","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
|
| 13 |
+
{"time":"2025-07-20T20:54:20.2321569+05:30","level":"INFO","msg":"handler: closed","stream_id":"0h3fksuy"}
|
| 14 |
+
{"time":"2025-07-20T20:54:20.2321569+05:30","level":"INFO","msg":"sender: closed","stream_id":"0h3fksuy"}
|
| 15 |
+
{"time":"2025-07-20T20:54:20.2321569+05:30","level":"INFO","msg":"writer: Close: closed","stream_id":"0h3fksuy"}
|
| 16 |
+
{"time":"2025-07-20T20:54:20.2327206+05:30","level":"INFO","msg":"stream: closed","id":"0h3fksuy"}
|
wandb/run-20250720_155338-0h3fksuy/logs/debug.log
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2025-07-20 15:53:38,929 INFO MainThread:1648 [wandb_setup.py:_flush():80] Current SDK version is 0.21.0
|
| 2 |
+
2025-07-20 15:53:38,930 INFO MainThread:1648 [wandb_setup.py:_flush():80] Configure stats pid to 1648
|
| 3 |
+
2025-07-20 15:53:38,930 INFO MainThread:1648 [wandb_setup.py:_flush():80] Loading settings from C:\Users\Legion\.config\wandb\settings
|
| 4 |
+
2025-07-20 15:53:38,930 INFO MainThread:1648 [wandb_setup.py:_flush():80] Loading settings from C:\Users\Legion\desktop\distilbert-sentiment\wandb\settings
|
| 5 |
+
2025-07-20 15:53:38,930 INFO MainThread:1648 [wandb_setup.py:_flush():80] Loading settings from environment variables
|
| 6 |
+
2025-07-20 15:53:38,930 INFO MainThread:1648 [wandb_init.py:setup_run_log_directory():703] Logging user logs to C:\Users\Legion\desktop\distilbert-sentiment\wandb\run-20250720_155338-0h3fksuy\logs\debug.log
|
| 7 |
+
2025-07-20 15:53:38,931 INFO MainThread:1648 [wandb_init.py:setup_run_log_directory():704] Logging internal logs to C:\Users\Legion\desktop\distilbert-sentiment\wandb\run-20250720_155338-0h3fksuy\logs\debug-internal.log
|
| 8 |
+
2025-07-20 15:53:38,931 INFO MainThread:1648 [wandb_init.py:init():830] calling init triggers
|
| 9 |
+
2025-07-20 15:53:38,931 INFO MainThread:1648 [wandb_init.py:init():835] wandb.init called with sweep_config: {}
|
| 10 |
+
config: {'_wandb': {}}
|
| 11 |
+
2025-07-20 15:53:38,931 INFO MainThread:1648 [wandb_init.py:init():871] starting backend
|
| 12 |
+
2025-07-20 15:53:39,433 INFO MainThread:1648 [wandb_init.py:init():874] sending inform_init request
|
| 13 |
+
2025-07-20 15:53:39,505 INFO MainThread:1648 [wandb_init.py:init():882] backend started and connected
|
| 14 |
+
2025-07-20 15:53:39,507 INFO MainThread:1648 [wandb_init.py:init():953] updated telemetry
|
| 15 |
+
2025-07-20 15:53:39,511 INFO MainThread:1648 [wandb_init.py:init():977] communicating run to backend with 90.0 second timeout
|
| 16 |
+
2025-07-20 15:53:41,579 INFO MainThread:1648 [wandb_init.py:init():1029] starting run threads in backend
|
| 17 |
+
2025-07-20 15:53:42,183 INFO MainThread:1648 [wandb_run.py:_console_start():2458] atexit reg
|
| 18 |
+
2025-07-20 15:53:42,183 INFO MainThread:1648 [wandb_run.py:_redirect():2306] redirect: wrap_raw
|
| 19 |
+
2025-07-20 15:53:42,184 INFO MainThread:1648 [wandb_run.py:_redirect():2375] Wrapping output streams.
|
| 20 |
+
2025-07-20 15:53:42,184 INFO MainThread:1648 [wandb_run.py:_redirect():2398] Redirects installed.
|
| 21 |
+
2025-07-20 15:53:42,191 INFO MainThread:1648 [wandb_init.py:init():1075] run started, returning control to user process
|
| 22 |
+
2025-07-20 15:53:42,195 INFO MainThread:1648 [wandb_run.py:_config_callback():1363] config_cb None None {'vocab_size': 30522, 'max_position_embeddings': 512, 'sinusoidal_pos_embds': False, 'n_layers': 6, 'n_heads': 12, 'dim': 768, 'hidden_dim': 3072, 'dropout': 0.1, 'attention_dropout': 0.1, 'activation': 'gelu', 'initializer_range': 0.02, 'qa_dropout': 0.1, 'seq_classif_dropout': 0.2, 'return_dict': True, 'output_hidden_states': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['DistilBertForMaskedLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': 0, 'eos_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'distilbert-base-uncased', 'transformers_version': '4.53.2', 'model_type': 'distilbert', 'tie_weights_': True, 'output_attentions': False, 'output_dir': './model', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'epoch', 'prediction_loss_only': False, 'per_device_train_batch_size': 2, 'per_device_eval_batch_size': 4, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 2, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 5e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './logs', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 50, 'logging_nan_inf_filter': True, 'save_strategy': 'epoch', 'save_steps': 500, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': False, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': './model', 'disable_tqdm': True, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'f1', 'greater_is_better': True, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': False, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': None, 'hub_always_push': False, 'hub_revision': None, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'liger_kernel_config': None, 'eval_use_gather_object': False, 'average_tokens_across_devices': False}
|
| 23 |
+
2025-07-20 15:53:42,202 INFO MainThread:1648 [wandb_config.py:__setitem__():154] [no run ID] config set model/num_parameters = 66955010 - <bound method Run._config_callback of <wandb.sdk.wandb_run.Run object at 0x0000024ABE770590>>
|
| 24 |
+
2025-07-20 15:53:42,202 INFO MainThread:1648 [wandb_run.py:_config_callback():1363] config_cb model/num_parameters 66955010 None
|
| 25 |
+
2025-07-20 20:54:18,327 INFO MsgRouterThr:1648 [mailbox.py:close():129] [no run ID] Closing mailbox, abandoning 1 handles.
|
wandb/run-20250720_155338-0h3fksuy/run-0h3fksuy.wandb
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8ee10c6e19f6dde4c416e8ee5e2f7791dacbb667462dbe42de372ff1eaca5b68
|
| 3 |
+
size 703284
|