|
|
""" |
|
|
π§ LoRA Merger Space |
|
|
FSDP 체ν¬ν¬μΈνΈλ₯Ό λ€μ΄λ°μ λ² μ΄μ€ λͺ¨λΈκ³Ό λ³ν© ν Hubμ μ
λ‘λν©λλ€. |
|
|
""" |
|
|
|
|
|
import os |
|
|
import torch |
|
|
import gradio as gr |
|
|
from transformers import AutoModelForCausalLM, AutoTokenizer |
|
|
from peft import PeftModel |
|
|
from huggingface_hub import snapshot_download, HfApi, login |
|
|
import logging |
|
|
|
|
|
|
|
|
logging.basicConfig(level=logging.INFO, format='%(asctime)s | %(message)s') |
|
|
logger = logging.getLogger(__name__) |
|
|
|
|
|
|
|
|
SOURCE_REPO = "ongilLabs/IB-Math-Ontology-7B" |
|
|
BASE_MODEL = "Qwen/Qwen2.5-Math-7B-Instruct" |
|
|
OUTPUT_REPO = "ongilLabs/IB-Math-Ontology-7B" |
|
|
|
|
|
def merge_model(progress=gr.Progress()): |
|
|
"""λ©μΈ λ³ν© ν¨μ""" |
|
|
logs = [] |
|
|
|
|
|
def log(msg): |
|
|
logger.info(msg) |
|
|
logs.append(msg) |
|
|
return "\n".join(logs) |
|
|
|
|
|
try: |
|
|
|
|
|
progress(0.1, desc="π₯ Downloading checkpoint...") |
|
|
log("π₯ Downloading checkpoint from Hub...") |
|
|
|
|
|
local_dir = snapshot_download( |
|
|
repo_id=SOURCE_REPO, |
|
|
local_dir="/tmp/checkpoint", |
|
|
token=os.getenv("HF_TOKEN") |
|
|
) |
|
|
log(f" Downloaded to: {local_dir}") |
|
|
|
|
|
|
|
|
progress(0.2, desc="π Finding adapter...") |
|
|
adapter_path = None |
|
|
|
|
|
|
|
|
for path in [f"{local_dir}/last-checkpoint", local_dir]: |
|
|
if os.path.exists(f"{path}/adapter_config.json"): |
|
|
adapter_path = path |
|
|
log(f"β
Found adapter at: {path}") |
|
|
break |
|
|
|
|
|
if not adapter_path: |
|
|
|
|
|
log("β adapter_config.json not found!") |
|
|
log("π Available files:") |
|
|
for root, dirs, files in os.walk(local_dir): |
|
|
for f in files: |
|
|
rel_path = os.path.relpath(os.path.join(root, f), local_dir) |
|
|
log(f" - {rel_path}") |
|
|
return "\n".join(logs) + "\n\nβ FAILED: No adapter found" |
|
|
|
|
|
|
|
|
progress(0.3, desc="π¦ Loading base model...") |
|
|
log(f"π¦ Loading base model: {BASE_MODEL}") |
|
|
log(" This may take 3-5 minutes...") |
|
|
|
|
|
base_model = AutoModelForCausalLM.from_pretrained( |
|
|
BASE_MODEL, |
|
|
torch_dtype=torch.bfloat16, |
|
|
device_map="auto", |
|
|
trust_remote_code=True, |
|
|
) |
|
|
log(" β
Base model loaded!") |
|
|
|
|
|
|
|
|
progress(0.4, desc="π Loading tokenizer...") |
|
|
log("π Loading tokenizer...") |
|
|
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True) |
|
|
log(" β
Tokenizer loaded!") |
|
|
|
|
|
|
|
|
progress(0.5, desc="π Loading LoRA adapter...") |
|
|
log(f"π Loading LoRA adapter from: {adapter_path}") |
|
|
|
|
|
model = PeftModel.from_pretrained( |
|
|
base_model, |
|
|
adapter_path, |
|
|
torch_dtype=torch.bfloat16, |
|
|
) |
|
|
log(" β
LoRA adapter loaded!") |
|
|
|
|
|
|
|
|
progress(0.6, desc="π§ Merging LoRA with base model...") |
|
|
log("π§ Merging LoRA weights with base model...") |
|
|
model = model.merge_and_unload() |
|
|
log(" β
Merge complete!") |
|
|
|
|
|
|
|
|
progress(0.7, desc="πΎ Saving merged model...") |
|
|
output_dir = "/tmp/merged_model" |
|
|
log(f"πΎ Saving merged model to: {output_dir}") |
|
|
|
|
|
os.makedirs(output_dir, exist_ok=True) |
|
|
model.save_pretrained(output_dir, safe_serialization=True, max_shard_size="5GB") |
|
|
tokenizer.save_pretrained(output_dir) |
|
|
|
|
|
|
|
|
log(" π Saved files:") |
|
|
for f in os.listdir(output_dir): |
|
|
size_mb = os.path.getsize(os.path.join(output_dir, f)) / (1024 * 1024) |
|
|
log(f" - {f}: {size_mb:.1f} MB") |
|
|
|
|
|
|
|
|
progress(0.8, desc="π Creating model card...") |
|
|
log("π Creating model card...") |
|
|
|
|
|
model_card = """--- |
|
|
license: apache-2.0 |
|
|
base_model: Qwen/Qwen2.5-Math-7B-Instruct |
|
|
tags: |
|
|
- math |
|
|
- ib-mathematics |
|
|
- qwen2 |
|
|
- fine-tuned |
|
|
- education |
|
|
- ontology |
|
|
- chain-of-thought |
|
|
language: |
|
|
- en |
|
|
pipeline_tag: text-generation |
|
|
--- |
|
|
|
|
|
# IB-Math-Ontology-7B |
|
|
|
|
|
Fine-tuned Qwen2.5-Math-7B-Instruct for IB Mathematics AA with ontology-based Chain-of-Thought reasoning. |
|
|
|
|
|
## Features |
|
|
- π― **IB Math AA Specialized**: Trained on 1,332 ontology-based examples |
|
|
- π **Chain-of-Thought**: Uses `<think>` tags for step-by-step reasoning |
|
|
- π **Curriculum-Aligned**: Covers all 5 IB Math AA topics |
|
|
- β οΈ **Pitfall Awareness**: Warns about common student mistakes |
|
|
|
|
|
## Usage |
|
|
|
|
|
```python |
|
|
from transformers import AutoModelForCausalLM, AutoTokenizer |
|
|
|
|
|
model = AutoModelForCausalLM.from_pretrained("ongilLabs/IB-Math-Ontology-7B", torch_dtype="auto", device_map="auto") |
|
|
tokenizer = AutoTokenizer.from_pretrained("ongilLabs/IB-Math-Ontology-7B") |
|
|
|
|
|
prompt = "Find the derivative of f(x) = xΒ³ - 2xΒ² + 5x [6 marks]" |
|
|
messages = [ |
|
|
{"role": "system", "content": "You are an expert IB Mathematics AA tutor. Think step-by-step and explain concepts clearly."}, |
|
|
{"role": "user", "content": prompt} |
|
|
] |
|
|
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) |
|
|
inputs = tokenizer(text, return_tensors="pt").to(model.device) |
|
|
outputs = model.generate(**inputs, max_new_tokens=512) |
|
|
print(tokenizer.decode(outputs[0], skip_special_tokens=True)) |
|
|
``` |
|
|
|
|
|
## Training Details |
|
|
- **Base Model**: Qwen2.5-Math-7B-Instruct |
|
|
- **Method**: LoRA (r=64, alpha=128) |
|
|
- **Dataset**: 1,332 IB Math Ontology examples with CoT |
|
|
- **Hardware**: NVIDIA A100 (80GB) |
|
|
- **Epochs**: 3 |
|
|
- **Precision**: BF16 |
|
|
""" |
|
|
|
|
|
with open(os.path.join(output_dir, "README.md"), "w") as f: |
|
|
f.write(model_card) |
|
|
log(" β
Model card created!") |
|
|
|
|
|
|
|
|
progress(0.9, desc="π Uploading to Hub...") |
|
|
log(f"π Uploading to Hub: {OUTPUT_REPO}") |
|
|
|
|
|
api = HfApi(token=os.getenv("HF_TOKEN")) |
|
|
api.upload_folder( |
|
|
folder_path=output_dir, |
|
|
repo_id=OUTPUT_REPO, |
|
|
commit_message="β¨ Merged LoRA with base model - Production ready", |
|
|
) |
|
|
|
|
|
log(f" β
Uploaded to: https://huggingface.co/{OUTPUT_REPO}") |
|
|
|
|
|
|
|
|
progress(1.0, desc="π Complete!") |
|
|
log("") |
|
|
log("=" * 50) |
|
|
log("π SUCCESS! Model merged and uploaded!") |
|
|
log("=" * 50) |
|
|
log(f"π Model URL: https://huggingface.co/{OUTPUT_REPO}") |
|
|
|
|
|
return "\n".join(logs) |
|
|
|
|
|
except Exception as e: |
|
|
log(f"\nβ ERROR: {str(e)}") |
|
|
import traceback |
|
|
log(traceback.format_exc()) |
|
|
return "\n".join(logs) |
|
|
|
|
|
|
|
|
def create_ui(): |
|
|
"""Gradio UI μμ±""" |
|
|
with gr.Blocks(title="LoRA Merger") as app: |
|
|
gr.Markdown(""" |
|
|
# π§ IB-Math-Ontology LoRA Merger |
|
|
|
|
|
This Space merges the LoRA adapter with the base model. |
|
|
|
|
|
**Source**: `ongilLabs/IB-Math-Ontology-7B` (LoRA adapter) |
|
|
**Base**: `Qwen/Qwen2.5-Math-7B-Instruct` |
|
|
**Output**: `ongilLabs/IB-Math-Ontology-7B` (merged model) |
|
|
|
|
|
**Steps:** |
|
|
1. Download LoRA checkpoint from Hub |
|
|
2. Load base model (Qwen2.5-Math-7B-Instruct) |
|
|
3. Load LoRA adapter |
|
|
4. Merge LoRA weights into base model |
|
|
5. Upload merged model to Hub |
|
|
""") |
|
|
|
|
|
with gr.Row(): |
|
|
merge_btn = gr.Button("π Start Merge", variant="primary", scale=2) |
|
|
|
|
|
output = gr.Textbox( |
|
|
label="Logs", |
|
|
lines=30, |
|
|
max_lines=50 |
|
|
) |
|
|
|
|
|
merge_btn.click(fn=merge_model, outputs=output) |
|
|
|
|
|
gr.Markdown(""" |
|
|
--- |
|
|
**Note**: This process takes about 10-15 minutes. Make sure you have enough GPU memory. |
|
|
""") |
|
|
|
|
|
return app |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
app = create_ui() |
|
|
app.launch() |
|
|
|
|
|
|