Emotion / upload_to_hf.py

add app and req.txt

05986e9 verified about 1 month ago

4.11 kB

	# ============================================================
	# Upload Emotion Classifier to HuggingFace Hub
	# ============================================================
	# Run this AFTER kaggle_train.py finishes.
	# You can run it in the same Kaggle notebook or locally.
	#
	# Prerequisites:
	# pip install huggingface_hub transformers
	#
	# Steps:
	# 1. Create a free account at https://huggingface.co
	# 2. Go to Settings → Access Tokens → New token (write permission)
	# 3. Replace YOUR_HF_TOKEN and YOUR_HF_USERNAME below
	# 4. Run this script
	# ============================================================

	import os
	from pathlib import Path
	from transformers import DistilBertForSequenceClassification, DistilBertTokenizerFast
	from huggingface_hub import HfApi, login, create_repo

	# ── Config ───────────────────────────────────────────────────────────────────
	HF_TOKEN = "hf_xxxxxxxxxxxxxxxxxxxx" # ← paste your HF write token
	HF_USERNAME = "your-hf-username" # ← your HF username
	REPO_NAME = "emotion-classifier-distilbert" # ← repo name (will be created)
	MODEL_DIR = "/kaggle/working/emotion_model/final" # path from training script

	FULL_REPO_ID = f"{HF_USERNAME}/{REPO_NAME}"

	# ── Login ─────────────────────────────────────────────────────────────────────
	login(token=HF_TOKEN)
	api = HfApi()

	# ── Create repo (public) ──────────────────────────────────────────────────────
	print(f"Creating repo: {FULL_REPO_ID}")
	create_repo(FULL_REPO_ID, repo_type="model", exist_ok=True, private=False)

	# ── Load model & tokenizer from local dir ────────────────────────────────────
	model = DistilBertForSequenceClassification.from_pretrained(MODEL_DIR)
	tokenizer = DistilBertTokenizerFast.from_pretrained(MODEL_DIR)

	# ── Push model & tokenizer ────────────────────────────────────────────────────
	print("Pushing model to HuggingFace Hub …")
	model.push_to_hub(FULL_REPO_ID, token=HF_TOKEN)
	tokenizer.push_to_hub(FULL_REPO_ID, token=HF_TOKEN)

	# ── Write a model card (README.md) ────────────────────────────────────────────
	MODEL_CARD = f"""---
	language: en
	tags:
	- text-classification
	- emotion-detection
	- distilbert
	license: apache-2.0
	datasets:
	- custom-emotion-dataset
	metrics:
	- accuracy
	- f1
	pipeline_tag: text-classification
	---

	# Emotion Classifier — DistilBERT

	A fine-tuned [DistilBERT](https://huggingface.co/distilbert-base-uncased) model
	for 6-class emotion detection from English text.

	## Labels

	\| ID \| Emotion \|
	\|----\|----------\|
	\| 0 \| sadness \|
	\| 1 \| anger \|
	\| 2 \| love \|
	\| 3 \| surprise \|
	\| 4 \| fear \|
	\| 5 \| joy \|

	## Training details

	- Base model: `distilbert-base-uncased`
	- Epochs: 10
	- Batch size: 32
	- Max sequence length: 128
	- Optimizer: AdamW (lr=2e-5, weight_decay=0.01)
	- Scheduler: Linear with warmup (10 %)
	- Train / Val / Test split: 70 / 15 / 15

	## Quick usage

	```python
	from transformers import pipeline

	classifier = pipeline(
	"text-classification",
	model="{FULL_REPO_ID}",
	return_all_scores=True,
	)

	result = classifier("I feel so happy today!")
	print(result)
	```

	## Citation
	If you use this model, please credit the training dataset and this repo.
	"""

	api.upload_file(
	path_or_fileobj=MODEL_CARD.encode(),
	path_in_repo="README.md",
	repo_id=FULL_REPO_ID,
	repo_type="model",
	token=HF_TOKEN,
	)

	print(f"\n✅ Done! Model live at: https://huggingface.co/{FULL_REPO_ID}")