Text Generation
PEFT
Safetensors
English
code
gis
geospatial
geopandas
shapely
rasterio
osmnx
folium
lora
trl
sft
conversational
Instructions to use RhodWeo/GIS-Coder-7B with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- PEFT
How to use RhodWeo/GIS-Coder-7B with PEFT:
from peft import PeftModel from transformers import AutoModelForCausalLM base_model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen2.5-Coder-0.5B-Instruct") model = PeftModel.from_pretrained(base_model, "RhodWeo/GIS-Coder-7B") - Notebooks
- Google Colab
- Kaggle
File size: 9,005 Bytes
0fe8cc8 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 | """
GIS-Coder 7B: Production QLoRA SFT Training Script
====================================================
Fine-tunes Qwen2.5-Coder-7B-Instruct for GIS code generation.
Hardware requirements:
- Minimum: 1x A10G (24GB) or 1x RTX 4090 (24GB)
- Recommended: 1x A100 (80GB) for faster training + larger batch
- Also works on: H100, L40S, RTX 3090
Training recipe based on:
- CFD fine-tuning (arxiv:2504.09602): QLoRA, r=16, 88.7% accuracy on domain tasks
- MapCoder-Lite (arxiv:2509.17489): Qwen2.5-Coder-7B as best backbone for code LoRA
- LoRA Without Regret: target all-linear layers, lr=2e-4 for LoRA
Usage:
# Single GPU
python train_7b.py
# Multi-GPU with accelerate
accelerate launch --num_processes 2 train_7b.py
# With custom settings
python train_7b.py --epochs 5 --lr 1e-4 --lora_r 32 --max_length 4096
"""
import os
import argparse
import torch
from datasets import load_dataset
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from peft import LoraConfig, prepare_model_for_kbit_training
from trl import SFTConfig, SFTTrainer
def parse_args():
parser = argparse.ArgumentParser(description="Train GIS-Coder 7B")
parser.add_argument("--model_id", type=str, default="Qwen/Qwen2.5-Coder-7B-Instruct")
parser.add_argument("--dataset_id", type=str, default="RhodWeo/gis-code-instructions")
parser.add_argument("--hub_model_id", type=str, default="RhodWeo/GIS-Coder-7B")
parser.add_argument("--output_dir", type=str, default="./gis-coder-7b-output")
# Training hyperparameters
parser.add_argument("--epochs", type=int, default=3)
parser.add_argument("--lr", type=float, default=2e-4, help="Learning rate (2e-4 for LoRA)")
parser.add_argument("--batch_size", type=int, default=2, help="Per-device batch size")
parser.add_argument("--grad_accum", type=int, default=8, help="Gradient accumulation steps")
parser.add_argument("--max_length", type=int, default=4096, help="Max sequence length")
parser.add_argument("--warmup_ratio", type=float, default=0.1)
parser.add_argument("--weight_decay", type=float, default=0.01)
parser.add_argument("--scheduler", type=str, default="cosine")
# LoRA hyperparameters
parser.add_argument("--lora_r", type=int, default=32, help="LoRA rank")
parser.add_argument("--lora_alpha", type=int, default=16, help="LoRA alpha")
parser.add_argument("--lora_dropout", type=float, default=0.05)
parser.add_argument("--target_modules", type=str, default="all-linear",
help="Target modules (all-linear or comma-separated list)")
# Quantization
parser.add_argument("--no_quantize", action="store_true", help="Disable 4-bit quantization (full fp16)")
parser.add_argument("--use_flash_attn", action="store_true", help="Use Flash Attention 2")
# Tracking
parser.add_argument("--use_trackio", action="store_true", help="Enable Trackio monitoring")
parser.add_argument("--trackio_project", type=str, default="gis-coder-7b")
return parser.parse_args()
def main():
args = parse_args()
# βββ Trackio (optional) ββββββββββββββββββββββββββββββββββββββββββββββββ
if args.use_trackio:
import trackio
trackio.init(
project=args.trackio_project,
config=vars(args),
)
# βββ Dataset βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
print(f"Loading dataset: {args.dataset_id}")
dataset = load_dataset(args.dataset_id, data_files="data/train.jsonl", split="train")
print(f" {len(dataset)} examples, columns: {dataset.column_names}")
# βββ Model βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
print(f"Loading model: {args.model_id}")
model_kwargs = {
"trust_remote_code": True,
"attn_implementation": "flash_attention_2" if args.use_flash_attn else "eager",
}
if not args.no_quantize:
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_use_double_quant=True,
bnb_4bit_compute_dtype=torch.bfloat16,
)
model_kwargs["quantization_config"] = bnb_config
model_kwargs["dtype"] = torch.bfloat16
else:
model_kwargs["dtype"] = torch.bfloat16
model = AutoModelForCausalLM.from_pretrained(
args.model_id,
device_map="auto",
**model_kwargs,
)
tokenizer = AutoTokenizer.from_pretrained(args.model_id, trust_remote_code=True)
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token
model.config.pad_token_id = tokenizer.eos_token_id
if not args.no_quantize:
model = prepare_model_for_kbit_training(model)
print(f" Parameters: {model.num_parameters()/1e9:.2f}B")
# βββ LoRA ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
target = args.target_modules
if target != "all-linear":
target = target.split(",")
peft_config = LoraConfig(
r=args.lora_r,
lora_alpha=args.lora_alpha,
target_modules=target,
lora_dropout=args.lora_dropout,
bias="none",
task_type="CAUSAL_LM",
)
print(f" LoRA: r={args.lora_r}, alpha={args.lora_alpha}, targets={target}")
# βββ Training Config βββββββββββββββββββββββββββββββββββββββββββββββββββ
training_args = SFTConfig(
output_dir=args.output_dir,
num_train_epochs=args.epochs,
per_device_train_batch_size=args.batch_size,
gradient_accumulation_steps=args.grad_accum,
learning_rate=args.lr,
lr_scheduler_type=args.scheduler,
warmup_ratio=args.warmup_ratio,
weight_decay=args.weight_decay,
gradient_checkpointing=True,
bf16=True,
max_length=args.max_length,
logging_steps=1,
logging_first_step=True,
logging_strategy="steps",
disable_tqdm=True,
report_to="trackio" if args.use_trackio else "none",
save_strategy="epoch",
save_total_limit=3,
push_to_hub=True,
hub_model_id=args.hub_model_id,
hub_strategy="every_save",
dataloader_num_workers=4,
seed=42,
)
# βββ Trainer βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
trainer = SFTTrainer(
model=model,
processing_class=tokenizer,
args=training_args,
train_dataset=dataset,
peft_config=peft_config,
)
trainable = sum(p.numel() for p in model.parameters() if p.requires_grad)
total = sum(p.numel() for p in model.parameters())
print(f" Trainable: {trainable:,} ({trainable/total*100:.2f}%)")
# βββ Train βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
eff_bs = args.batch_size * args.grad_accum
print(f"\n{'='*60}")
print(f"TRAINING: {args.model_id}")
print(f" Dataset: {len(dataset)} examples")
print(f" Method: {'QLoRA' if not args.no_quantize else 'LoRA'} (r={args.lora_r})")
print(f" LR: {args.lr}, Epochs: {args.epochs}, Eff. batch: {eff_bs}")
print(f" Max length: {args.max_length}")
print(f" Push to: {args.hub_model_id}")
print(f"{'='*60}\n")
result = trainer.train()
# βββ Save ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
print("\nSaving final model...")
trainer.save_model(os.path.join(args.output_dir, "final"))
trainer.push_to_hub(commit_message="GIS-Coder 7B β final after training")
m = result.metrics
print(f"\nDone! Loss: {m.get('train_loss','?')}, Time: {m.get('train_runtime',0):.0f}s")
print(f"Model: https://huggingface.co/{args.hub_model_id}")
if args.use_trackio:
import trackio
trackio.log({"final_loss": m.get("train_loss", 0), "runtime": m.get("train_runtime", 0)})
trackio.finish()
if __name__ == "__main__":
main()
|