name: Sync GitHub to Hugging Face Space on: push: branches: - main workflow_dispatch: jobs: sync-to-hf: runs-on: ubuntu-latest steps: - name: Checkout uses: actions/checkout@v4 with: fetch-depth: 0 lfs: true - name: Pull LFS files run: git lfs pull - name: Push to Hugging Face Space env: HF_TOKEN: ${{ secrets.HF_TOKEN }} run: | if [ -z "${HF_TOKEN}" ]; then echo "HF_TOKEN secret is not set." exit 1 fi DEPLOY_DIR="/tmp/hf-deploy" rm -rf "${DEPLOY_DIR}" mkdir -p "${DEPLOY_DIR}" # Export the current tree (with real LFS file contents, not pointers) tar --exclude=.git -cf - . | (cd "${DEPLOY_DIR}" && tar -xf -) cd "${DEPLOY_DIR}" # Keep GitHub README clean; inject Space front matter only for HF deploy. if [ -f README.md ]; then awk ' NR == 1 && $0 == "---" {in_yaml=1; next} in_yaml && $0 == "---" {in_yaml=0; next} !in_yaml {print} ' README.md > README.clean.md printf '%s\n' \ '---' \ 'title: HS Code Classifier Micro' \ 'emoji: ⚡' \ 'colorFrom: pink' \ 'colorTo: blue' \ 'sdk: docker' \ 'app_port: 7860' \ '---' \ > README.frontmatter.md cat README.frontmatter.md README.clean.md > README.md rm -f README.frontmatter.md README.clean.md fi # HF rejects files >10MB without Git LFS. git lfs install git init git checkout -b main git config user.name "github-actions[bot]" git config user.email "41898282+github-actions[bot]@users.noreply.github.com" # Track large files with LFS for HF (only files still bundled) git lfs track "data/*.json" "models/umap_data.json" git add .gitattributes # Remove files not needed at runtime to stay under HF Space 1GB limit. # Large artifacts (sentence model, embeddings, classifier, training data) # are hosted on HF Hub at $SENTENCE_MODEL_NAME and downloaded at startup. rm -rf scripts/ rm -rf models/sentence_model/ rm -f models/embeddings.npy models/knn_classifier.pkl models/label_encoder.pkl models/metadata.json models/umap_data.json touch models/.gitkeep rm -f data/training_data.csv data/training_data_indexed.csv rm -f data/hts/hts_*.csv rm -f data/cargo_descriptions.csv rm -f data/training_data.json rm -f data/hf_real_data.csv git add -A git commit -m "Sync from GitHub ${GITHUB_SHA}" git remote add hf "https://oauth2:${HF_TOKEN}@huggingface.co/spaces/Mead0w1ark/MicroHS" git push --force hf main