|
|
name: Train Bengali-Code LLM Model
|
|
|
|
|
|
on:
|
|
|
schedule:
|
|
|
- cron: '0 0 * * *'
|
|
|
workflow_dispatch:
|
|
|
|
|
|
jobs:
|
|
|
train:
|
|
|
runs-on: ubuntu-latest
|
|
|
steps:
|
|
|
- uses: actions/checkout@v4
|
|
|
|
|
|
- name: Set up Python
|
|
|
uses: actions/setup-python@v5
|
|
|
with:
|
|
|
python-version: '3.10'
|
|
|
|
|
|
- name: Install dependencies
|
|
|
run: |
|
|
|
python -m pip install --upgrade pip
|
|
|
pip install transformers datasets sentencepiece accelerate torch wandb
|
|
|
|
|
|
- name: Data Collection
|
|
|
run: python scripts/data_collector.py
|
|
|
env:
|
|
|
HUGGINGFACE_TOKEN: ${{ secrets.HUGGINGFACE_TOKEN }}
|
|
|
|
|
|
- name: Train Tokenizer
|
|
|
run: python scripts/tokenizer_trainer.py
|
|
|
|
|
|
- name: Train Model
|
|
|
run: python scripts/model_trainer.py
|
|
|
env:
|
|
|
WANDB_API_KEY: ${{ secrets.WANDB_API_KEY }}
|
|
|
|
|
|
- name: Evaluate Model
|
|
|
run: python scripts/model_evaluator.py
|
|
|
|
|
|
- name: Upload Model Artifacts
|
|
|
uses: actions/upload-artifact@v3
|
|
|
with:
|
|
|
name: model-weights
|
|
|
path: outputs/models/
|
|
|
|