musaw
sync(hf): snapshot origin main after resource audit cycle
194828a
name: Resource Sync
on:
schedule:
- cron: "0 4 * * *"
workflow_dispatch:
inputs:
limit:
description: "Candidate fetch limit per source"
required: false
default: "20"
max_promotions:
description: "Optional max number of candidate promotions"
required: false
default: ""
enforce_pashto_relevance:
description: "Also remove existing entries without Pashto evidence"
required: false
default: "true"
permissions:
contents: write
pull-requests: write
env:
RESOURCE_LIMIT: ${{ github.event.inputs.limit || '20' }}
MAX_PROMOTIONS: ${{ github.event.inputs.max_promotions || '' }}
ENFORCE_PASHTO_RELEVANCE: ${{ github.event.inputs.enforce_pashto_relevance || 'true' }}
jobs:
sync:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.11"
- name: Install dependencies
run: |
python -m pip install --upgrade pip
python -m pip install -e ".[dev]"
- name: Review existing resources for stale or low-value entries
run: |
if [ "${ENFORCE_PASHTO_RELEVANCE}" = "true" ]; then
python scripts/review_existing_resources.py --enforce-pashto-relevance
else
python scripts/review_existing_resources.py
fi
- name: Sync candidate resources
run: python scripts/sync_resources.py --limit "${RESOURCE_LIMIT}"
- name: Auto-promote valid candidates
run: |
if [ -n "${MAX_PROMOTIONS}" ]; then
python scripts/promote_candidates.py --max-promotions "${MAX_PROMOTIONS}"
else
python scripts/promote_candidates.py
fi
- name: Validate catalog
run: python scripts/validate_resource_catalog.py
- name: Generate resource views
run: python scripts/generate_resource_views.py
- name: Ensure labels exist
uses: actions/github-script@v7
with:
script: |
const labels = [
{ name: "resource-update", color: "0e8a16", description: "Automated resource catalog updates" },
{ name: "needs-review", color: "fbca04", description: "Needs maintainer review before merge" }
];
for (const label of labels) {
try {
await github.rest.issues.getLabel({
owner: context.repo.owner,
repo: context.repo.repo,
name: label.name
});
} catch (error) {
if (error.status === 404) {
await github.rest.issues.createLabel({
owner: context.repo.owner,
repo: context.repo.repo,
name: label.name,
color: label.color,
description: label.description
});
} else {
throw error;
}
}
}
- name: Create review PR
uses: peter-evans/create-pull-request@v6
with:
branch: bot/resource-sync
delete-branch: true
commit-message: "chore(resources): sync candidate feed and auto-promote valid entries"
title: "chore(resources): sync and auto-promote Pashto resources"
body: |
Automated daily resource sync.
Scope:
- Reviews existing catalog entries and removes stale ones only with strong logged reasons
- Updates `resources/catalog/pending_candidates.json`
- Auto-promotes valid non-duplicate candidates into `resources/catalog/resources.json`
- Regenerates resource indexes and search payload
labels: |
resource-update
needs-review
add-paths: |
resources/catalog/pending_candidates.json
resources/catalog/resources.json
resources/catalog/removal_log.json
resources/README.md
resources/datasets/README.md
resources/models/README.md
resources/benchmarks/README.md
resources/tools/README.md
resources/papers/README.md
resources/projects/README.md
resources/codes/README.md
docs/search/resources.json