Emotion / upload_to_hf.py
YureiYuri's picture
add app and req.txt
05986e9 verified
# ============================================================
# Upload Emotion Classifier to HuggingFace Hub
# ============================================================
# Run this AFTER kaggle_train.py finishes.
# You can run it in the same Kaggle notebook or locally.
#
# Prerequisites:
# pip install huggingface_hub transformers
#
# Steps:
# 1. Create a free account at https://huggingface.co
# 2. Go to Settings β†’ Access Tokens β†’ New token (write permission)
# 3. Replace YOUR_HF_TOKEN and YOUR_HF_USERNAME below
# 4. Run this script
# ============================================================
import os
from pathlib import Path
from transformers import DistilBertForSequenceClassification, DistilBertTokenizerFast
from huggingface_hub import HfApi, login, create_repo
# ── Config ───────────────────────────────────────────────────────────────────
HF_TOKEN = "hf_xxxxxxxxxxxxxxxxxxxx" # ← paste your HF write token
HF_USERNAME = "your-hf-username" # ← your HF username
REPO_NAME = "emotion-classifier-distilbert" # ← repo name (will be created)
MODEL_DIR = "/kaggle/working/emotion_model/final" # path from training script
FULL_REPO_ID = f"{HF_USERNAME}/{REPO_NAME}"
# ── Login ─────────────────────────────────────────────────────────────────────
login(token=HF_TOKEN)
api = HfApi()
# ── Create repo (public) ──────────────────────────────────────────────────────
print(f"Creating repo: {FULL_REPO_ID}")
create_repo(FULL_REPO_ID, repo_type="model", exist_ok=True, private=False)
# ── Load model & tokenizer from local dir ────────────────────────────────────
model = DistilBertForSequenceClassification.from_pretrained(MODEL_DIR)
tokenizer = DistilBertTokenizerFast.from_pretrained(MODEL_DIR)
# ── Push model & tokenizer ────────────────────────────────────────────────────
print("Pushing model to HuggingFace Hub …")
model.push_to_hub(FULL_REPO_ID, token=HF_TOKEN)
tokenizer.push_to_hub(FULL_REPO_ID, token=HF_TOKEN)
# ── Write a model card (README.md) ────────────────────────────────────────────
MODEL_CARD = f"""---
language: en
tags:
- text-classification
- emotion-detection
- distilbert
license: apache-2.0
datasets:
- custom-emotion-dataset
metrics:
- accuracy
- f1
pipeline_tag: text-classification
---
# Emotion Classifier β€” DistilBERT
A fine-tuned [DistilBERT](https://huggingface.co/distilbert-base-uncased) model
for **6-class emotion detection** from English text.
## Labels
| ID | Emotion |
|----|----------|
| 0 | sadness |
| 1 | anger |
| 2 | love |
| 3 | surprise |
| 4 | fear |
| 5 | joy |
## Training details
- Base model: `distilbert-base-uncased`
- Epochs: 10
- Batch size: 32
- Max sequence length: 128
- Optimizer: AdamW (lr=2e-5, weight_decay=0.01)
- Scheduler: Linear with warmup (10 %)
- Train / Val / Test split: 70 / 15 / 15
## Quick usage
```python
from transformers import pipeline
classifier = pipeline(
"text-classification",
model="{FULL_REPO_ID}",
return_all_scores=True,
)
result = classifier("I feel so happy today!")
print(result)
```
## Citation
If you use this model, please credit the training dataset and this repo.
"""
api.upload_file(
path_or_fileobj=MODEL_CARD.encode(),
path_in_repo="README.md",
repo_id=FULL_REPO_ID,
repo_type="model",
token=HF_TOKEN,
)
print(f"\nβœ… Done! Model live at: https://huggingface.co/{FULL_REPO_ID}")