| import os
|
| import json
|
| import random
|
| import time
|
| from datasets import load_dataset
|
| import google.generativeai as genai
|
|
|
| def generate_fine_tuning_dataset(num_samples=1000, output_file="models/local_mvm2_adapter/mvm2_training_data.jsonl"):
|
| """
|
| Downloads GSM8K and uses Gemini 2.5 Flash to automatically convert the solutions
|
| into the strict MVM2 JSON Triplet format for QLoRA fine-tuning.
|
| """
|
| print(f"Loading GSM8K to generate {num_samples} training examples...")
|
| dataset = load_dataset("gsm8k", "main", split="train")
|
| indices = random.sample(range(len(dataset)), num_samples)
|
|
|
|
|
| GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY", "AIzaSyBM0LGvprdpevZXTE4IqlSLv0y74aBGhRc")
|
| genai.configure(api_key=GEMINI_API_KEY)
|
| teacher = genai.GenerativeModel('gemini-2.5-flash')
|
|
|
| os.makedirs(os.path.dirname(output_file), exist_ok=True)
|
|
|
| success_count = 0
|
| with open(output_file, 'w', encoding='utf-8') as f:
|
| for i, idx in enumerate(indices):
|
| problem = dataset[idx]["question"]
|
| raw_solution = dataset[idx]["answer"]
|
|
|
| prompt = f"""
|
| You are creating a fine-tuning dataset for a math reasoning model.
|
| Convert this problem and solution into the strict MVM2 Triplet schema.
|
|
|
| Problem: {problem}
|
| Raw Solution: {raw_solution}
|
|
|
| You MUST return ONLY a raw JSON object matching this schema:
|
| {{
|
| "final_answer": "The numerical final answer",
|
| "reasoning_trace": ["step 1 equation", "step 2 equation"],
|
| "confidence_explanation": "A statement confirming the algebraic steps are sound."
|
| }}
|
| """
|
|
|
| max_retries = 3
|
| for attempt in range(max_retries):
|
| try:
|
| response = teacher.generate_content(prompt)
|
| text = response.text.replace("```json", "").replace("```", "").strip()
|
| json_data = json.loads(text)
|
|
|
|
|
| training_row = {
|
| "messages": [
|
| {"role": "system", "content": "You are an MVM2 math reasoning agent. You strictly output JSON triplets: {final_answer, reasoning_trace, confidence_explanation}."},
|
| {"role": "user", "content": problem},
|
| {"role": "assistant", "content": json.dumps(json_data)}
|
| ]
|
| }
|
|
|
| f.write(json.dumps(training_row) + "\n")
|
| success_count += 1
|
| print(f"Successfully generated triplet for problem {i+1}/{num_samples}")
|
| break
|
| except Exception as e:
|
| if "429" in str(e) or "quota" in str(e).lower():
|
| print(f"Rate limited. Sleeping 20s... (Attempt {attempt+1}/{max_retries})")
|
| time.sleep(20)
|
| else:
|
| print(f"Skipping problem {i+1} due to parsing error: {e}")
|
| break
|
|
|
| print(f"\n✅ Dataset generation complete! Created {success_count} perfect MVM2 triplets at {output_file}")
|
| print("You can now run `python scripts/train_qlora_math_agent.py` to commence fine-tuning.")
|
|
|
| if __name__ == "__main__":
|
| import sys
|
| sys.stdout.reconfigure(encoding='utf-8')
|
| generate_fine_tuning_dataset(num_samples=100)
|
|
|