File size: 1,098 Bytes
d6ab423 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 | ---
license: mit
language:
- en
base_model:
- google-bert/bert-base-uncased
pipeline_tag: text-classification
tags:
- multilabel-classification
- food-safety
- product-category
- hazard-category
- bert
- data-augmentation
- optuna
- interpretability
- low-resource
- imbalance-handling
model_type: bert
task:
name: SemEval 2025 Task 9: The Food Hazard Detection Challenge - Multilabel Text Classification
type: text-classification
link: https://food-hazard-detection-semeval-2025.github.io/
dataset:
- custom
training:
input_features: ["title", "text"]
label_names: ["product-category", "hazard-category", "product", "hazard"]
augmentation:
methods:
- lexical: [synonym-replacement, random-swap, word-deletion]
- embedding: [contextual-substitution, insertion]
- llm: [gpt-4-paraphrasing]
strategy: "quantile-based underrepresented class boosting (q=0.99)"
optimizer: AdamW
scheduler: cosine_with_restarts
hyperparameter_search: optuna
evaluation:
metrics: [f1-score]
limitations:
- Augmentation focused on titles only; text augmentation could further help.
|