medical-gemma-3n
Collection
Medical Gemma-3N is a specialized version of Google's Gemma-3N-4B model, fine-tuned specifically for emergency medical assistance and offline healthca β’ 4 items β’ Updated
β’ 1
Efficient LoRA adapters for fine-tuning Gemma-3N-4B into a specialized emergency medical assistant. These lightweight adapters (76.9MB) transform the base model into a medical expert while maintaining the original model's general capabilities.
pip install torch transformers peft accelerate unsloth
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel
import torch
# Load base model
base_model_name = "unsloth/gemma-3n-E4B-it"
adapter_name = "ericrisco/medical-gemma-3n-lora"
tokenizer = AutoTokenizer.from_pretrained(base_model_name)
base_model = AutoModelForCausalLM.from_pretrained(
base_model_name,
torch_dtype=torch.float16,
device_map="auto",
load_in_4bit=True # Optional: for memory efficiency
)
# Load LoRA adapters
model = PeftModel.from_pretrained(base_model, adapter_name)
# Medical consultation
def medical_consultation(question):
prompt = f"<start_of_turn>user\n{question}<end_of_turn>\n<start_of_turn>model\n"
inputs = tokenizer.encode(prompt, return_tensors="pt").to(model.device)
with torch.no_grad():
outputs = model.generate(
inputs,
max_new_tokens=512,
temperature=0.7,
do_sample=True,
pad_token_id=tokenizer.eos_token_id,
eos_token_id=tokenizer.eos_token_id
)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
return response.split("<start_of_turn>model\n")[-1].split("<end_of_turn>")[0]
# Example usage
question = "What are the signs of a heart attack and what should I do?"
answer = medical_consultation(question)
print(answer)
from unsloth import FastModel
from unsloth.chat_templates import get_chat_template
# Load model with Unsloth optimization
model, tokenizer = FastModel.from_pretrained(
model_name="ericrisco/medical-gemma-3n-lora",
max_seq_length=1024,
dtype=None,
load_in_4bit=True,
)
# Apply chat template
tokenizer = get_chat_template(
tokenizer,
chat_template="gemma-3",
)
# Faster inference with Unsloth
def fast_medical_response(question):
messages = [{"role": "user", "content": question}]
inputs = tokenizer.apply_chat_template(
messages,
tokenize=True,
add_generation_prompt=True,
return_tensors="pt"
).to(model.device)
outputs = model.generate(
input_ids=inputs,
max_new_tokens=512,
temperature=0.7,
do_sample=True
)
response = tokenizer.decode(outputs[0][inputs.shape[-1]:], skip_special_tokens=True)
return response
# Example usage
response = fast_medical_response("How do I treat severe bleeding?")
print(response)
# Training configuration used
LoRAConfig(
r=8, # Rank: balance between efficiency and capacity
lora_alpha=8, # Scaling parameter
lora_dropout=0.0, # No dropout for stability
bias="none", # No bias adaptation
task_type="CAUSAL_LM", # Causal language modeling
target_modules=[ # Adapted components
"q_proj", "k_proj", "v_proj", "o_proj", # Attention
"gate_proj", "up_proj", "down_proj" # MLP
]
)
# First aid guidance
question = "Someone is choking, what should I do immediately?"
response = medical_consultation(question)
# Cardiac emergency
question = "What are the steps for CPR on an adult?"
response = medical_consultation(question)
# Trauma assessment
question = "How do I assess if someone has a spinal injury?"
response = medical_consultation(question)
# Symptom analysis
question = "What could cause chest pain and shortness of breath?"
response = medical_consultation(question)
# Medication information
question = "What are the contraindications for aspirin?"
response = medical_consultation(question)
# Diagnostic procedures
question = "When should someone seek immediate medical attention for headaches?"
response = medical_consultation(question)
Evaluated on lextale/FirstAidInstructionsDataset:
| Model | Accuracy | Parameters | Size |
|---|---|---|---|
| Base Gemma-3N | 36.15% | 7.8B | 15GB |
| + Medical LoRA | 71.54% | +19.2M | +76.9MB |
| Improvement | +35.39% | +0.24% | +0.5% |
from peft import PeftModel
# Load base model once
base_model = AutoModelForCausalLM.from_pretrained("unsloth/gemma-3n-E4B-it")
# Switch between different adapters
medical_model = PeftModel.from_pretrained(base_model, "ericrisco/medical-gemma-3n-lora")
# You can load other task-specific adapters on the same base
# legal_model = PeftModel.from_pretrained(base_model, "other/legal-adapters")
# code_model = PeftModel.from_pretrained(base_model, "other/coding-adapters")
from peft import PeftModel
# Load and merge for standalone deployment
base_model = AutoModelForCausalLM.from_pretrained("unsloth/gemma-3n-E4B-it")
model = PeftModel.from_pretrained(base_model, "ericrisco/medical-gemma-3n-lora")
# Merge adapters into base model
merged_model = model.merge_and_unload()
# Save merged model
merged_model.save_pretrained("./medical-gemma-merged")
tokenizer.save_pretrained("./medical-gemma-merged")
# Push to hub
merged_model.push_to_hub("your-username/medical-gemma-merged")
# Training parameters used
SFTConfig(
output_dir="./medical-gemma-lora",
per_device_train_batch_size=4,
gradient_accumulation_steps=4,
warmup_steps=5,
num_train_epochs=1,
learning_rate=2e-5,
logging_steps=10,
save_steps=500,
optim="adamw_8bit",
weight_decay=0.01,
lr_scheduler_type="linear",
max_seq_length=1024,
)
@misc{medical_gemma_lora,
title={Medical Gemma-3N LoRA Adapters: Efficient Medical AI Fine-tuning},
author={Eric Risco},
year={2025},
url={https://huggingface.co/ericrisco/medical-gemma-3n-lora},
note={LoRA adapters for emergency medical assistance}
}
This model is released under the Gemma License. See LICENSE file for details.