""" πŸ”§ LoRA Merger Space FSDP 체크포인트λ₯Ό λ‹€μš΄λ°›μ•„ 베이슀 λͺ¨λΈκ³Ό 병합 ν›„ Hub에 μ—…λ‘œλ“œν•©λ‹ˆλ‹€. """ import os import torch import gradio as gr from transformers import AutoModelForCausalLM, AutoTokenizer from peft import PeftModel from huggingface_hub import snapshot_download, HfApi, login import logging # Logging logging.basicConfig(level=logging.INFO, format='%(asctime)s | %(message)s') logger = logging.getLogger(__name__) # Configuration SOURCE_REPO = "ongilLabs/IB-Math-Ontology-7B" # LoRA adapter BASE_MODEL = "Qwen/Qwen2.5-Math-7B-Instruct" OUTPUT_REPO = "ongilLabs/IB-Math-Ontology-7B" # Merged model output def merge_model(progress=gr.Progress()): """메인 병합 ν•¨μˆ˜""" logs = [] def log(msg): logger.info(msg) logs.append(msg) return "\n".join(logs) try: # Step 1: Download checkpoint progress(0.1, desc="πŸ“₯ Downloading checkpoint...") log("πŸ“₯ Downloading checkpoint from Hub...") local_dir = snapshot_download( repo_id=SOURCE_REPO, local_dir="/tmp/checkpoint", token=os.getenv("HF_TOKEN") ) log(f" Downloaded to: {local_dir}") # Step 2: Find adapter progress(0.2, desc="πŸ” Finding adapter...") adapter_path = None # Check locations for path in [f"{local_dir}/last-checkpoint", local_dir]: if os.path.exists(f"{path}/adapter_config.json"): adapter_path = path log(f"βœ… Found adapter at: {path}") break if not adapter_path: # List files for debugging log("❌ adapter_config.json not found!") log("πŸ“‚ Available files:") for root, dirs, files in os.walk(local_dir): for f in files: rel_path = os.path.relpath(os.path.join(root, f), local_dir) log(f" - {rel_path}") return "\n".join(logs) + "\n\n❌ FAILED: No adapter found" # Step 3: Load base model progress(0.3, desc="πŸ“¦ Loading base model...") log(f"πŸ“¦ Loading base model: {BASE_MODEL}") log(" This may take 3-5 minutes...") base_model = AutoModelForCausalLM.from_pretrained( BASE_MODEL, torch_dtype=torch.bfloat16, device_map="auto", trust_remote_code=True, ) log(" βœ… Base model loaded!") # Step 4: Load tokenizer progress(0.4, desc="πŸ“ Loading tokenizer...") log("πŸ“ Loading tokenizer...") tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True) log(" βœ… Tokenizer loaded!") # Step 5: Load LoRA adapter progress(0.5, desc="πŸ”— Loading LoRA adapter...") log(f"πŸ”— Loading LoRA adapter from: {adapter_path}") model = PeftModel.from_pretrained( base_model, adapter_path, torch_dtype=torch.bfloat16, ) log(" βœ… LoRA adapter loaded!") # Step 6: Merge progress(0.6, desc="πŸ”§ Merging LoRA with base model...") log("πŸ”§ Merging LoRA weights with base model...") model = model.merge_and_unload() log(" βœ… Merge complete!") # Step 7: Save progress(0.7, desc="πŸ’Ύ Saving merged model...") output_dir = "/tmp/merged_model" log(f"πŸ’Ύ Saving merged model to: {output_dir}") os.makedirs(output_dir, exist_ok=True) model.save_pretrained(output_dir, safe_serialization=True, max_shard_size="5GB") tokenizer.save_pretrained(output_dir) # List saved files log(" πŸ“‚ Saved files:") for f in os.listdir(output_dir): size_mb = os.path.getsize(os.path.join(output_dir, f)) / (1024 * 1024) log(f" - {f}: {size_mb:.1f} MB") # Step 8: Create model card progress(0.8, desc="πŸ“ Creating model card...") log("πŸ“ Creating model card...") model_card = """--- license: apache-2.0 base_model: Qwen/Qwen2.5-Math-7B-Instruct tags: - math - ib-mathematics - qwen2 - fine-tuned - education - ontology - chain-of-thought language: - en pipeline_tag: text-generation --- # IB-Math-Ontology-7B Fine-tuned Qwen2.5-Math-7B-Instruct for IB Mathematics AA with ontology-based Chain-of-Thought reasoning. ## Features - 🎯 **IB Math AA Specialized**: Trained on 1,332 ontology-based examples - πŸ’­ **Chain-of-Thought**: Uses `` tags for step-by-step reasoning - πŸ“š **Curriculum-Aligned**: Covers all 5 IB Math AA topics - ⚠️ **Pitfall Awareness**: Warns about common student mistakes ## Usage ```python from transformers import AutoModelForCausalLM, AutoTokenizer model = AutoModelForCausalLM.from_pretrained("ongilLabs/IB-Math-Ontology-7B", torch_dtype="auto", device_map="auto") tokenizer = AutoTokenizer.from_pretrained("ongilLabs/IB-Math-Ontology-7B") prompt = "Find the derivative of f(x) = xΒ³ - 2xΒ² + 5x [6 marks]" messages = [ {"role": "system", "content": "You are an expert IB Mathematics AA tutor. Think step-by-step and explain concepts clearly."}, {"role": "user", "content": prompt} ] text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) inputs = tokenizer(text, return_tensors="pt").to(model.device) outputs = model.generate(**inputs, max_new_tokens=512) print(tokenizer.decode(outputs[0], skip_special_tokens=True)) ``` ## Training Details - **Base Model**: Qwen2.5-Math-7B-Instruct - **Method**: LoRA (r=64, alpha=128) - **Dataset**: 1,332 IB Math Ontology examples with CoT - **Hardware**: NVIDIA A100 (80GB) - **Epochs**: 3 - **Precision**: BF16 """ with open(os.path.join(output_dir, "README.md"), "w") as f: f.write(model_card) log(" βœ… Model card created!") # Step 9: Upload to Hub progress(0.9, desc="πŸš€ Uploading to Hub...") log(f"πŸš€ Uploading to Hub: {OUTPUT_REPO}") api = HfApi(token=os.getenv("HF_TOKEN")) api.upload_folder( folder_path=output_dir, repo_id=OUTPUT_REPO, commit_message="✨ Merged LoRA with base model - Production ready", ) log(f" βœ… Uploaded to: https://huggingface.co/{OUTPUT_REPO}") # Done! progress(1.0, desc="πŸŽ‰ Complete!") log("") log("=" * 50) log("πŸŽ‰ SUCCESS! Model merged and uploaded!") log("=" * 50) log(f"πŸ“ Model URL: https://huggingface.co/{OUTPUT_REPO}") return "\n".join(logs) except Exception as e: log(f"\n❌ ERROR: {str(e)}") import traceback log(traceback.format_exc()) return "\n".join(logs) def create_ui(): """Gradio UI 생성""" with gr.Blocks(title="LoRA Merger") as app: gr.Markdown(""" # πŸ”§ IB-Math-Ontology LoRA Merger This Space merges the LoRA adapter with the base model. **Source**: `ongilLabs/IB-Math-Ontology-7B` (LoRA adapter) **Base**: `Qwen/Qwen2.5-Math-7B-Instruct` **Output**: `ongilLabs/IB-Math-Ontology-7B` (merged model) **Steps:** 1. Download LoRA checkpoint from Hub 2. Load base model (Qwen2.5-Math-7B-Instruct) 3. Load LoRA adapter 4. Merge LoRA weights into base model 5. Upload merged model to Hub """) with gr.Row(): merge_btn = gr.Button("πŸš€ Start Merge", variant="primary", scale=2) output = gr.Textbox( label="Logs", lines=30, max_lines=50 ) merge_btn.click(fn=merge_model, outputs=output) gr.Markdown(""" --- **Note**: This process takes about 10-15 minutes. Make sure you have enough GPU memory. """) return app if __name__ == "__main__": app = create_ui() app.launch()