insureos-models / scripts /train_all.sh
piyushptiwari's picture
Upload folder using huggingface_hub
2cc32a5 verified
#!/usr/bin/env bash
set -euo pipefail
# ============================================================
# InsureOS Models β€” Train All Models Sequentially
# Designed for 16GB GPU β€” runs one model at a time
# ============================================================
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
ROOT_DIR="$(dirname "$SCRIPT_DIR")"
cd "$ROOT_DIR"
source .venv/bin/activate
echo "============================================"
echo " InsureOS Models β€” Full Training Pipeline"
echo " $(date)"
echo "============================================"
# Phase 0: Generate data
echo ""
echo "=== PHASE 0: Generating synthetic training data ==="
python -m data.generate_all
# Phase 1: QLoRA fine-tuning (InsureLLM-8B)
echo ""
echo "=== PHASE 1: QLoRA Fine-Tuning β€” Qwen3-8B ==="
python -m training.qlora_finetune \
--base-model Qwen/Qwen3-8B \
--dataset data/output/insurance_sft_10k.jsonl \
--output-dir outputs/insurellm-8b \
--epochs 3 \
--batch-size 2 \
--gradient-accumulation 8 \
--learning-rate 2e-4 \
--lora-rank 64 \
--lora-alpha 128
# Phase 2: DPO reinforcement learning
echo ""
echo "=== PHASE 2: DPO Reinforcement Learning ==="
python -m training.dpo_train \
--base-model Qwen/Qwen3-8B \
--adapter-path outputs/insurellm-8b \
--dataset data/output/insurance_dpo_5k.jsonl \
--output-dir outputs/insurellm-8b-dpo \
--epochs 1 \
--batch-size 1 \
--gradient-accumulation 16 \
--learning-rate 5e-5 \
--beta 0.1
# Phase 3: Distillation to smaller model
echo ""
echo "=== PHASE 3: Knowledge Distillation β€” Qwen3-4B ==="
python -m training.distill \
--teacher-model Qwen/Qwen3-8B \
--teacher-adapter outputs/insurellm-8b-dpo \
--student-model Qwen/Qwen3-4B \
--dataset data/output/insurance_sft_10k.jsonl \
--output-dir outputs/insurellm-4b \
--epochs 2 \
--batch-size 2 \
--gradient-accumulation 8
# Phase 4: Fraud detection model
echo ""
echo "=== PHASE 4: Fraud Detection Model ==="
python -m training.fraud_model \
--dataset data/output/claims_tabular_50k.csv \
--output-dir outputs/fraudnet
# Phase 5: Pricing GLM
echo ""
echo "=== PHASE 5: Pricing GLM ==="
python -m training.pricing_glm \
--dataset data/output/claims_tabular_50k.csv \
--output-dir outputs/pricing-glm
# Phase 6: Document classifier
echo ""
echo "=== PHASE 6: Document Classifier ==="
python -m training.doc_classifier \
--dataset data/output/documents_10k.jsonl \
--output-dir outputs/doc-classifier \
--epochs 5 \
--batch-size 16
# Phase 7: Insurance NER
echo ""
echo "=== PHASE 7: Insurance NER ==="
python -m training.ner_model \
--dataset data/output/entities_8k.jsonl \
--output-dir outputs/insure-ner \
--epochs 5 \
--batch-size 16
# Phase 8: Evaluation
echo ""
echo "=== PHASE 8: Running Evaluation Suite ==="
python -m evaluation.run_eval --all
echo ""
echo "============================================"
echo " Training Complete! $(date)"
echo " Models saved in outputs/"
echo "============================================"
ls -la outputs/