MicroHS / .github /workflows /sync_to_hf_space.yml
github-actions[bot]
Sync from GitHub 38cd8d69dc858672e22cd1448f7768fef87468b1
79f9b3a
name: Sync GitHub to Hugging Face Space
on:
push:
branches:
- main
workflow_dispatch:
jobs:
sync-to-hf:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v4
with:
fetch-depth: 0
lfs: true
- name: Pull LFS files
run: git lfs pull
- name: Push to Hugging Face Space
env:
HF_TOKEN: ${{ secrets.HF_TOKEN }}
run: |
if [ -z "${HF_TOKEN}" ]; then
echo "HF_TOKEN secret is not set."
exit 1
fi
DEPLOY_DIR="/tmp/hf-deploy"
rm -rf "${DEPLOY_DIR}"
mkdir -p "${DEPLOY_DIR}"
# Export the current tree (with real LFS file contents, not pointers)
tar --exclude=.git -cf - . | (cd "${DEPLOY_DIR}" && tar -xf -)
cd "${DEPLOY_DIR}"
# Keep GitHub README clean; inject Space front matter only for HF deploy.
if [ -f README.md ]; then
awk '
NR == 1 && $0 == "---" {in_yaml=1; next}
in_yaml && $0 == "---" {in_yaml=0; next}
!in_yaml {print}
' README.md > README.clean.md
printf '%s\n' \
'---' \
'title: HS Code Classifier Micro' \
'emoji: ⚡' \
'colorFrom: pink' \
'colorTo: blue' \
'sdk: docker' \
'app_port: 7860' \
'---' \
> README.frontmatter.md
cat README.frontmatter.md README.clean.md > README.md
rm -f README.frontmatter.md README.clean.md
fi
# HF rejects files >10MB without Git LFS.
git lfs install
git init
git checkout -b main
git config user.name "github-actions[bot]"
git config user.email "41898282+github-actions[bot]@users.noreply.github.com"
# Track large files with LFS for HF (only files still bundled)
git lfs track "data/*.json" "models/umap_data.json"
git add .gitattributes
# Remove files not needed at runtime to stay under HF Space 1GB limit.
# Large artifacts (sentence model, embeddings, classifier, training data)
# are hosted on HF Hub at $SENTENCE_MODEL_NAME and downloaded at startup.
rm -rf scripts/
rm -rf models/sentence_model/
rm -f models/embeddings.npy models/knn_classifier.pkl models/label_encoder.pkl models/metadata.json models/umap_data.json
touch models/.gitkeep
rm -f data/training_data.csv data/training_data_indexed.csv
rm -f data/hts/hts_*.csv
rm -f data/cargo_descriptions.csv
rm -f data/training_data.json
rm -f data/hf_real_data.csv
git add -A
git commit -m "Sync from GitHub ${GITHUB_SHA}"
git remote add hf "https://oauth2:${HF_TOKEN}@huggingface.co/spaces/Mead0w1ark/MicroHS"
git push --force hf main