Add dataset metadata parameter to GrievanceClassifier for improved logging
Browse files- model_pipeline.py +4 -0
model_pipeline.py
CHANGED
|
@@ -563,6 +563,7 @@ class GrievanceClassifier:
|
|
| 563 |
train_dataset,
|
| 564 |
eval_dataset,
|
| 565 |
test_dataset,
|
|
|
|
| 566 |
space_repo_id: str | None = None,
|
| 567 |
hf_training_args: dict | None = None,
|
| 568 |
api_endpoint: str | None = None,
|
|
@@ -578,6 +579,7 @@ class GrievanceClassifier:
|
|
| 578 |
train_dataset: Hugging Face Dataset for training.
|
| 579 |
eval_dataset: Hugging Face Dataset for validation.
|
| 580 |
test_dataset: Hugging Face Dataset for testing.
|
|
|
|
| 581 |
hf_training_args (dict, optional): Hugging Face TrainingArguments overrides.
|
| 582 |
api_endpoint (str, optional): Endpoint of deployed model to compare F1.
|
| 583 |
space_repo_id (str): HF Space Repo Id.
|
|
@@ -589,6 +591,7 @@ class GrievanceClassifier:
|
|
| 589 |
dict: Contains evaluation metrics, decision, and deployed F1 (if applicable).
|
| 590 |
"""
|
| 591 |
self.space_repo_id= space_repo_id
|
|
|
|
| 592 |
|
| 593 |
# 1. Initialize W&B run
|
| 594 |
wandb.init(
|
|
@@ -597,6 +600,7 @@ class GrievanceClassifier:
|
|
| 597 |
config={
|
| 598 |
"model_checkpoint": self.model_checkpoint,
|
| 599 |
"num_labels": self.num_labels,
|
|
|
|
| 600 |
}
|
| 601 |
)
|
| 602 |
|
|
|
|
| 563 |
train_dataset,
|
| 564 |
eval_dataset,
|
| 565 |
test_dataset,
|
| 566 |
+
dataset_metadata: dict,
|
| 567 |
space_repo_id: str | None = None,
|
| 568 |
hf_training_args: dict | None = None,
|
| 569 |
api_endpoint: str | None = None,
|
|
|
|
| 579 |
train_dataset: Hugging Face Dataset for training.
|
| 580 |
eval_dataset: Hugging Face Dataset for validation.
|
| 581 |
test_dataset: Hugging Face Dataset for testing.
|
| 582 |
+
dataset_metadata: Metadata about Data for Logging
|
| 583 |
hf_training_args (dict, optional): Hugging Face TrainingArguments overrides.
|
| 584 |
api_endpoint (str, optional): Endpoint of deployed model to compare F1.
|
| 585 |
space_repo_id (str): HF Space Repo Id.
|
|
|
|
| 591 |
dict: Contains evaluation metrics, decision, and deployed F1 (if applicable).
|
| 592 |
"""
|
| 593 |
self.space_repo_id= space_repo_id
|
| 594 |
+
self.dataset_metadata = dataset_metadata
|
| 595 |
|
| 596 |
# 1. Initialize W&B run
|
| 597 |
wandb.init(
|
|
|
|
| 600 |
config={
|
| 601 |
"model_checkpoint": self.model_checkpoint,
|
| 602 |
"num_labels": self.num_labels,
|
| 603 |
+
"dataset_metadata": self.dataset_metadata
|
| 604 |
}
|
| 605 |
)
|
| 606 |
|