Miracle12345
/

gemma-3-GRPO

@@ -10,12 +10,8 @@ tags:
 - math-reasoning
 language:
 - en
-base_model: google/gemma-3-1b-it
-datasets:
-- custom-instruction-dataset
-metrics:
-- accuracy
-- f1
 ---
 # Gemma-3 1B IT LoRA Fine-tuned with GRPO
@@ -62,7 +58,7 @@ from peft import PeftModel
 # Model identifiers
 base_model_name = "google/gemma-3-1b-it"
-adapter_repo_id = "your-username/gemma-3-GRPO"  # Replace with your Hugging Face repo
 # Load base model and tokenizer
 model = AutoModelForCausalLM.from_pretrained(
@@ -122,28 +118,6 @@ The model was fine-tuned using the Unsloth framework with the following approach
   - Reward Function: Accuracy-based with format compliance bonus
   - KL Divergence Penalty: 0.01
-### Dataset
-The model was trained on a curated dataset of mathematical problems and reasoning tasks, including:
-- Arithmetic problems
-- Word problems
-- Algebraic equations
-- Geometric calculations
-*Note: Replace placeholders with actual training details if available.*
-## Evaluation
-The model's performance was evaluated on a held-out test set of mathematical problems. Key metrics include:
-- **Accuracy**: Percentage of correct final answers
-- **Format Compliance**: Adherence to specified output format
-- **Reasoning Quality**: Coherence and correctness of intermediate steps
-Example evaluation results:
-- Simple arithmetic: 95% accuracy
-- Complex word problems: 78% accuracy
-- Overall improvement over base model: +15-20% on reasoning tasks
 ## Limitations
@@ -154,26 +128,12 @@ Example evaluation results:
 - **Hallucinations**: Like all language models, can generate incorrect information
 - **Bias**: May reflect biases present in the training data
-## Ethical Considerations
-- Use outputs as a tool, not as definitive answers
-- Verify critical information independently
-- Be aware of potential biases in generated content
-- Consider the environmental impact of large language model usage
 ## Citation
 If you use this model in your research or applications, please cite:
 ```bibtex
-@misc{gemma3-grpo-lora,
-  title={Gemma-3 1B IT LoRA Fine-tuned with GRPO},
-  author={Your Name},
-  year={2025},
-  publisher={Hugging Face},
-  url={https://huggingface.co/your-username/gemma-3-GRPO}
-}
 @article{shao2024deepseekmath,
   title={DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open-source Large Language Models},
   author={Shao, Zhihong and Wang, Peiyi and Zhu, Qihao and Xu, Runxin and Song, Junxiao and Bi, Xiao and Zhang, Haowei and Zhang, Mingchuan and Li, Y. K. and Wu, Y. K. and Guo, Daya},

 - math-reasoning
 language:
 - en
+base_model: google/gemma-3-1b-it
 ---
 # Gemma-3 1B IT LoRA Fine-tuned with GRPO
 # Model identifiers
 base_model_name = "google/gemma-3-1b-it"
+adapter_repo_id = "Miracle12345/gemma-3-GRPO"
 # Load base model and tokenizer
 model = AutoModelForCausalLM.from_pretrained(
   - Reward Function: Accuracy-based with format compliance bonus
   - KL Divergence Penalty: 0.01
 ## Limitations
 - **Hallucinations**: Like all language models, can generate incorrect information
 - **Bias**: May reflect biases present in the training data
 ## Citation
 If you use this model in your research or applications, please cite:
 ```bibtex
 @article{shao2024deepseekmath,
   title={DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open-source Large Language Models},
   author={Shao, Zhihong and Wang, Peiyi and Zhu, Qihao and Xu, Runxin and Song, Junxiao and Bi, Xiao and Zhang, Haowei and Zhang, Mingchuan and Li, Y. K. and Wu, Y. K. and Guo, Daya},

inference_test.ipynb CHANGED Viewed

@@ -55,74 +55,48 @@
     "from transformers import AutoTokenizer\n",
     "from peft import PeftModel\n",
     "\n",
-    "# ===============================\n",
-    "# Tags for GRPO training format\n",
-    "# ===============================\n",
     "reasoning_start = \"<start_working_out>\"\n",
     "reasoning_end   = \"<end_working_out>\"\n",
     "solution_start  = \"<SOLUTION>\"\n",
     "solution_end    = \"</SOLUTION>\"\n",
     "\n",
     "def _normalize_numeric(s: str) -> str:\n",
-    "    \"\"\"Normalize numeric-like strings:\n",
-    "       - remove commas and stray whitespace\n",
-    "       - convert '60.0' -> '60'\n",
-    "       - keep decimals if they are not integer-valued\n",
-    "       - otherwise return the cleaned string as-is\n",
-    "    \"\"\"\n",
     "    s = s.strip().replace(\",\", \"\")\n",
-    "    # remove trailing punctuation commonly found in outputs\n",
     "    s = s.rstrip(\".;)\")\n",
-    "    # try float conversion\n",
     "    try:\n",
     "        f = float(s)\n",
     "    except Exception:\n",
-    "        return s  # not a pure number, return raw cleaned string\n",
-    "    # if it's actually an integer value, return integer form\n",
     "    if f.is_integer():\n",
     "        return str(int(f))\n",
-    "    # else return float without unnecessary trailing zeros\n",
     "    s_float = repr(f)\n",
-    "    # strip trailing zeros like '2.500000' -> '2.5'\n",
     "    if \".\" in s_float:\n",
     "        s_float = s_float.rstrip(\"0\").rstrip(\".\")\n",
     "    return s_float\n",
     "\n",
     "def extract_solution(text: str) -> str | None:\n",
-    "    \"\"\"\n",
-    "    Extract the final solution from `text`.\n",
-    "    1) Look for <SOLUTION>...</SOLUTION>\n",
-    "    2) Otherwise take the last numeric token in the text\n",
-    "    Returns the cleaned numeric answer as a string, or None.\n",
-    "    \"\"\"\n",
-    "    # 1) Strict tag-based extraction (safe escaping)\n",
     "    try:\n",
     "        tag_pattern = re.escape(solution_start) + r\"(.*?)\" + re.escape(solution_end)\n",
     "        m = re.search(tag_pattern, text, flags=re.DOTALL)\n",
     "    except NameError:\n",
-    "        # If solution_start/solution_end not defined for some reason\n",
     "        m = None\n",
     "\n",
     "    if m:\n",
     "        ans = m.group(1).strip()\n",
     "        return _normalize_numeric(ans)\n",
     "\n",
-    "    # 2) Fallback: find all numeric tokens and return the last one\n",
     "    nums = re.findall(r\"-?\\d+(?:\\.\\d+)?\", text)\n",
     "    if not nums:\n",
     "        return None\n",
     "    return _normalize_numeric(nums[-1])\n",
     "\n",
     "\n",
-    "\n",
-    "# ===============================\n",
-    "# Model setup\n",
-    "# ===============================\n",
     "model_name   = \"unsloth/gemma-3-1b-it\"\n",
     "lora_repo_id = \"Miracle12345/gemma-3-GRPO\"\n",
     "max_seq_len  = 4096\n",
     "\n",
-    "# Load base model + tokenizer\n",
     "base_model, tokenizer = FastLanguageModel.from_pretrained(\n",
     "    model_name=model_name,\n",
     "    max_seq_length=max_seq_len,\n",
@@ -137,18 +111,12 @@
     "    is_trainable=False,\n",
     ")\n",
     "\n",
-    "# ===============================\n",
-    "# Prompt setup\n",
-    "# ===============================\n",
     "system_prompt = f\"\"\"You are given a problem.\n",
     "Think about it and provide your working out.\n",
     "Put your reasoning between {reasoning_start} and {reasoning_end}.\n",
     "Then, provide ONLY the final numerical solution between {solution_start}{solution_end}.\n",
     "Do not output anything else.\"\"\"\n",
     "\n",
-    "# ===============================\n",
-    "# Inference function\n",
-    "# ===============================\n",
     "def run_inference(model, tokenizer, question, label=\"\"):\n",
     "    messages = [\n",
     "        {\"role\": \"system\", \"content\": system_prompt},\n",
@@ -177,9 +145,8 @@
     "\n",
     "    return extract_solution(generated_text)\n",
     "\n",
-    "# ===============================\n",
     "# Test set (easy → hard)\n",
-    "# ===============================\n",
     "test_problems = [\n",
     "    (\"What is 12 + 8 - 4 ?\", \"16\"),\n",
     "    (\"If you buy 5 pens at $12 each and 3 notebooks at $20 each, what is the total cost?\", \"120\"),\n",
@@ -191,9 +158,7 @@
     "    (\"Solve: A boat goes 30 km downstream in 2 hours and the same distance upstream in 3 hours. Find the speed of the boat in still water.\", \"12\"),\n",
     "]\n",
     "\n",
-    "# ===============================\n",
     "# Run diagnostic test\n",
-    "# ===============================\n",
     "for q, correct in test_problems:\n",
     "    ans_lora = run_inference(lora_model, tokenizer, q, label=\"(LoRA)\")\n",
     "    ans_base = run_inference(base_model, tokenizer, q, label=\"(Base)\")\n",
@@ -207,9 +172,6 @@
     "        print(\"\\n✅ Found case where LoRA is correct and Base is wrong!\")\n",
     "        break\n",
     "\n",
-    "# ===============================\n",
-    "# Debug memory usage\n",
-    "# ===============================\n",
     "print(\"\\nGPU memory allocated:\", torch.cuda.memory_allocated() / 1024**3, \"GB\")"
    ]
   }

     "from transformers import AutoTokenizer\n",
     "from peft import PeftModel\n",
     "\n",
     "reasoning_start = \"<start_working_out>\"\n",
     "reasoning_end   = \"<end_working_out>\"\n",
     "solution_start  = \"<SOLUTION>\"\n",
     "solution_end    = \"</SOLUTION>\"\n",
     "\n",
     "def _normalize_numeric(s: str) -> str:\n",
+    "\n",
     "    s = s.strip().replace(\",\", \"\")\n",
     "    s = s.rstrip(\".;)\")\n",
     "    try:\n",
     "        f = float(s)\n",
     "    except Exception:\n",
+    "        return s  \n",
     "    if f.is_integer():\n",
     "        return str(int(f))\n",
     "    s_float = repr(f)\n",
     "    if \".\" in s_float:\n",
     "        s_float = s_float.rstrip(\"0\").rstrip(\".\")\n",
     "    return s_float\n",
     "\n",
     "def extract_solution(text: str) -> str | None:\n",
+    "\n",
     "    try:\n",
     "        tag_pattern = re.escape(solution_start) + r\"(.*?)\" + re.escape(solution_end)\n",
     "        m = re.search(tag_pattern, text, flags=re.DOTALL)\n",
     "    except NameError:\n",
     "        m = None\n",
     "\n",
     "    if m:\n",
     "        ans = m.group(1).strip()\n",
     "        return _normalize_numeric(ans)\n",
     "\n",
     "    nums = re.findall(r\"-?\\d+(?:\\.\\d+)?\", text)\n",
     "    if not nums:\n",
     "        return None\n",
     "    return _normalize_numeric(nums[-1])\n",
     "\n",
     "\n",
     "model_name   = \"unsloth/gemma-3-1b-it\"\n",
     "lora_repo_id = \"Miracle12345/gemma-3-GRPO\"\n",
     "max_seq_len  = 4096\n",
     "\n",
     "base_model, tokenizer = FastLanguageModel.from_pretrained(\n",
     "    model_name=model_name,\n",
     "    max_seq_length=max_seq_len,\n",
     "    is_trainable=False,\n",
     ")\n",
     "\n",
     "system_prompt = f\"\"\"You are given a problem.\n",
     "Think about it and provide your working out.\n",
     "Put your reasoning between {reasoning_start} and {reasoning_end}.\n",
     "Then, provide ONLY the final numerical solution between {solution_start}{solution_end}.\n",
     "Do not output anything else.\"\"\"\n",
     "\n",
     "def run_inference(model, tokenizer, question, label=\"\"):\n",
     "    messages = [\n",
     "        {\"role\": \"system\", \"content\": system_prompt},\n",
     "\n",
     "    return extract_solution(generated_text)\n",
     "\n",
     "# Test set (easy → hard)\n",
+    "\n",
     "test_problems = [\n",
     "    (\"What is 12 + 8 - 4 ?\", \"16\"),\n",
     "    (\"If you buy 5 pens at $12 each and 3 notebooks at $20 each, what is the total cost?\", \"120\"),\n",
     "    (\"Solve: A boat goes 30 km downstream in 2 hours and the same distance upstream in 3 hours. Find the speed of the boat in still water.\", \"12\"),\n",
     "]\n",
     "\n",
     "# Run diagnostic test\n",
     "for q, correct in test_problems:\n",
     "    ans_lora = run_inference(lora_model, tokenizer, q, label=\"(LoRA)\")\n",
     "    ans_base = run_inference(base_model, tokenizer, q, label=\"(Base)\")\n",
     "        print(\"\\n✅ Found case where LoRA is correct and Base is wrong!\")\n",
     "        break\n",
     "\n",
     "print(\"\\nGPU memory allocated:\", torch.cuda.memory_allocated() / 1024**3, \"GB\")"
    ]
   }