| name: Daily GraphRAG Update Pipeline | |
| on: | |
| # ν ν° μκΈ λ°μ μ°λ € λ° λΉμ© μ κ°μ μν΄ λ§€μΌ μλ μ€νλλ μ€μΌμ€(Cron)μ μλ²½ν μ£Όμ μ²λ¦¬(λΉνμ±ν)ν©λλ€. | |
| # schedule: | |
| # # λ§€μΌ μλ²½ 1μ(KST) = UTC 16:00 | |
| # - cron: '0 16 * * *' | |
| # μλ μ€νλ§ νμ© (κ°λ°μλκ»μ νμ μ GitHub Actions μΉ UIμμ μ§μ κ°λ) | |
| workflow_dispatch: | |
| permissions: | |
| contents: write | |
| jobs: | |
| update-pipeline: | |
| runs-on: ubuntu-latest | |
| steps: | |
| - name: Checkout Source Code | |
| uses: actions/checkout@v4 | |
| with: | |
| fetch-depth: 0 | |
| - name: Set up Python | |
| uses: actions/setup-python@v5 | |
| with: | |
| python-version: '3.10' | |
| cache: 'pip' | |
| - name: Install Dependencies | |
| run: | | |
| python -m pip install --upgrade pip | |
| pip install -r requirements.txt | |
| - name: Run Scrapping & Neo4j Incremental Load | |
| env: | |
| NEO4J_URI: ${{ secrets.NEO4J_URI }} | |
| NEO4J_CLIENT_ID: ${{ secrets.NEO4J_CLIENT_ID }} | |
| NEO4J_CLIENT_SECRET: ${{ secrets.NEO4J_CLIENT_SECRET }} | |
| OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} | |
| run: | | |
| python3 src/graphBuilder/scrapping/finScrapping.py | |
| python3 src/graphBuilder/neo4j/finGraph.py | |
| - name: Commit and Push New Excel Data | |
| run: | | |
| git config --global user.name "github-actions[bot]" | |
| git config --global user.email "github-actions[bot]@users.noreply.github.com" | |
| # μλ‘ μμ§λμ΄ μμ±λ μμ νμΌλ€μ μ€ν μ΄μ§ | |
| git add src/graphBuilder/scrapping/Articles_*.xlsx | |
| # λ³κ²½μ¬ν μ‘΄μ¬ μ¬λΆ νμΈ ν μ»€λ° λ° νΈμ | |
| if git diff --cached --quiet; then | |
| echo "No new news articles found to update today." | |
| else | |
| git commit -m "chore: auto-update crawled news articles $(date +'%Y-%m-%d')" | |
| git push origin main | |
| fi | |