---
license: mit
language:
- en
base_model:
- google-bert/bert-base-uncased
pipeline_tag: text-classification
tags:
- multilabel-classification
- food-safety
- product-category
- hazard-category
- bert
- data-augmentation
- optuna
- interpretability
- low-resource
- imbalance-handling
model_type: bert
task:
  name: SemEval 2025 Task 9: The Food Hazard Detection Challenge - Multilabel Text Classification
  type: text-classification
  link: https://food-hazard-detection-semeval-2025.github.io/
dataset:
- custom
training:
  input_features: ["title", "text"]
  label_names: ["product-category", "hazard-category", "product", "hazard"]
  augmentation:
    methods:
    - lexical: [synonym-replacement, random-swap, word-deletion]
    - embedding: [contextual-substitution, insertion]
    - llm: [gpt-4-paraphrasing]
    strategy: "quantile-based underrepresented class boosting (q=0.99)"
  optimizer: AdamW
  scheduler: cosine_with_restarts
  hyperparameter_search: optuna
evaluation:
  metrics: [f1-score]
limitations:
- Augmentation focused on titles only; text augmentation could further help.