Spaces:
Sleeping
Sleeping
| name: Sync GitHub to Hugging Face Space | |
| on: | |
| push: | |
| branches: | |
| - main | |
| workflow_dispatch: | |
| jobs: | |
| sync-to-hf: | |
| runs-on: ubuntu-latest | |
| steps: | |
| - name: Checkout | |
| uses: actions/checkout@v4 | |
| with: | |
| fetch-depth: 0 | |
| lfs: true | |
| - name: Pull LFS files | |
| run: git lfs pull | |
| - name: Push to Hugging Face Space | |
| env: | |
| HF_TOKEN: ${{ secrets.HF_TOKEN }} | |
| run: | | |
| if [ -z "${HF_TOKEN}" ]; then | |
| echo "HF_TOKEN secret is not set." | |
| exit 1 | |
| fi | |
| DEPLOY_DIR="/tmp/hf-deploy" | |
| rm -rf "${DEPLOY_DIR}" | |
| mkdir -p "${DEPLOY_DIR}" | |
| # Export the current tree (with real LFS file contents, not pointers) | |
| tar --exclude=.git -cf - . | (cd "${DEPLOY_DIR}" && tar -xf -) | |
| cd "${DEPLOY_DIR}" | |
| # Keep GitHub README clean; inject Space front matter only for HF deploy. | |
| if [ -f README.md ]; then | |
| awk ' | |
| NR == 1 && $0 == "---" {in_yaml=1; next} | |
| in_yaml && $0 == "---" {in_yaml=0; next} | |
| !in_yaml {print} | |
| ' README.md > README.clean.md | |
| printf '%s\n' \ | |
| '---' \ | |
| 'title: HS Code Classifier Micro' \ | |
| 'emoji: ⚡' \ | |
| 'colorFrom: pink' \ | |
| 'colorTo: blue' \ | |
| 'sdk: docker' \ | |
| 'app_port: 7860' \ | |
| '---' \ | |
| > README.frontmatter.md | |
| cat README.frontmatter.md README.clean.md > README.md | |
| rm -f README.frontmatter.md README.clean.md | |
| fi | |
| # HF rejects files >10MB without Git LFS. | |
| git lfs install | |
| git init | |
| git checkout -b main | |
| git config user.name "github-actions[bot]" | |
| git config user.email "41898282+github-actions[bot]@users.noreply.github.com" | |
| # Track large files with LFS for HF (only files still bundled) | |
| git lfs track "data/*.json" "models/umap_data.json" | |
| git add .gitattributes | |
| # Remove files not needed at runtime to stay under HF Space 1GB limit. | |
| # Large artifacts (sentence model, embeddings, classifier, training data) | |
| # are hosted on HF Hub at $SENTENCE_MODEL_NAME and downloaded at startup. | |
| rm -rf scripts/ | |
| rm -rf models/sentence_model/ | |
| rm -f models/embeddings.npy models/knn_classifier.pkl models/label_encoder.pkl models/metadata.json models/umap_data.json | |
| touch models/.gitkeep | |
| rm -f data/training_data.csv data/training_data_indexed.csv | |
| rm -f data/hts/hts_*.csv | |
| rm -f data/cargo_descriptions.csv | |
| rm -f data/training_data.json | |
| rm -f data/hf_real_data.csv | |
| git add -A | |
| git commit -m "Sync from GitHub ${GITHUB_SHA}" | |
| git remote add hf "https://oauth2:${HF_TOKEN}@huggingface.co/spaces/Mead0w1ark/MicroHS" | |
| git push --force hf main | |