Spaces:
Running
Running
| name: Job Crawl | |
| on: | |
| schedule: | |
| - cron: "0 1 * * *" # ๋งค์ผ ์ค์ 10์ KST (UTC+9 โ UTC 01:00) | |
| workflow_dispatch: # ์๋ ์คํ ํ์ฉ | |
| jobs: | |
| crawl: | |
| runs-on: ubuntu-latest | |
| timeout-minutes: 30 | |
| steps: | |
| - name: Checkout | |
| uses: actions/checkout@v4 | |
| - name: Set up Python | |
| uses: actions/setup-python@v5 | |
| with: | |
| python-version: "3.11" | |
| cache: pip | |
| - name: Install dependencies | |
| run: pip install -r requirements.txt huggingface_hub | |
| # HF Dataset์์ ์ด์ DB ๋ณต์ (๋์ ๋ฐ์ดํฐ ์ ์ง) | |
| - name: Restore DB from HF Dataset | |
| env: | |
| HF_TOKEN: ${{ secrets.HF_TOKEN }} | |
| HF_DATASET_REPO: ${{ secrets.HF_DATASET_REPO }} | |
| run: | | |
| python - <<'EOF' | |
| import os, shutil | |
| from huggingface_hub import hf_hub_download | |
| repo = os.environ["HF_DATASET_REPO"] | |
| token = os.environ["HF_TOKEN"] | |
| try: | |
| path = hf_hub_download( | |
| repo_id=repo, filename="jobsonar.db", | |
| repo_type="dataset", token=token, | |
| ) | |
| os.makedirs("data", exist_ok=True) | |
| shutil.copy(path, "data/jobsonar.db") | |
| print("DB ๋ณต์ ์๋ฃ") | |
| except Exception as e: | |
| print(f"DB ์์ (์ฒซ ์คํ์ด๋ฉด ์ ์): {e}") | |
| EOF | |
| - name: Run crawlers | |
| run: python -m crawler.run --source all --max-pages 15 | |
| env: | |
| PYTHONPATH: ${{ github.workspace }} | |
| # ์ ๋ฐ์ดํธ๋ DB๋ฅผ HF Dataset์ ํธ์ โ Spaces๊ฐ ์๋์ผ๋ก ์ต์ ๋ฐ์ดํฐ ์ฌ์ฉ | |
| - name: Push DB to HF Dataset | |
| env: | |
| HF_TOKEN: ${{ secrets.HF_TOKEN }} | |
| HF_DATASET_REPO: ${{ secrets.HF_DATASET_REPO }} | |
| run: | | |
| python - <<'EOF' | |
| import os | |
| from huggingface_hub import HfApi | |
| api = HfApi(token=os.environ["HF_TOKEN"]) | |
| api.upload_file( | |
| path_or_fileobj="data/jobsonar.db", | |
| path_in_repo="jobsonar.db", | |
| repo_id=os.environ["HF_DATASET_REPO"], | |
| repo_type="dataset", | |
| commit_message="chore: weekly DB update", | |
| ) | |
| print("HF Dataset ์ ๋ก๋ ์๋ฃ") | |
| EOF | |