bbkdevops's picture
download
raw
32.7 kB
"""
Math Forge — สร้าง math training data คุณภาพสูงสำหรับ GRPO
ครอบคลุม 8 หมวด ตั้งแต่ arithmetic → competition math:
L1: Arithmetic & Fractions
L2: Algebra (linear, quadratic)
L3: Word Problems (rates, work, mixture)
L4: Geometry (area, perimeter, 3D)
L5: Probability & Combinatorics
L6: Number Theory
L7: Sequences & Series
L8: Pre-Calculus / Limits
แต่ละโจทย์มี: question, thinking (step-by-step), answer, ground_truth (verified)
"""
from __future__ import annotations
import json
import math
import random
from dataclasses import dataclass, asdict
from pathlib import Path
OUTPUT_DIR = Path(__file__).parent / "filtered"
OUTPUT_DIR.mkdir(exist_ok=True)
MATH_OUTPUT = OUTPUT_DIR / "math_grpo.jsonl"
@dataclass
class MathProblem:
question: str
thinking: str
answer: str
ground_truth: str # ตัวเลขที่ verifiable
level: int # 1-8
category: str
lang: str = "th"
source: str = "math_forge"
topic: str = "mathematics"
context: str = ""
# ─── Level 1: Arithmetic ──────────────────────────────────────────────────────
def gen_arithmetic(rng: random.Random) -> MathProblem:
op = rng.choice(["add", "sub", "mul", "div", "frac", "percent", "mixed"])
if op == "add":
a, b = rng.randint(100, 9999), rng.randint(100, 9999)
ans = a + b
q = f"คำนวณ: {a:,} + {b:,} = ?"
th = f"บวก: {a:,} + {b:,} = {ans:,}"
elif op == "sub":
a = rng.randint(1000, 9999); b = rng.randint(100, a)
ans = a - b
q = f"คำนวณ: {a:,} - {b:,} = ?"
th = f"ลบ: {a:,} - {b:,} = {ans:,}"
elif op == "mul":
a, b = rng.randint(12, 999), rng.randint(12, 99)
ans = a * b
q = f"คำนวณ: {a} × {b} = ?"
th = f"คูณ: {a} × {b} = {ans}"
elif op == "div":
b = rng.randint(2, 20); ans = rng.randint(10, 500)
a = b * ans
q = f"คำนวณ: {a} ÷ {b} = ?"
th = f"หาร: {a} ÷ {b} = {ans}"
elif op == "frac":
d1 = rng.randint(2, 12); d2 = rng.randint(2, 12)
n1 = rng.randint(1, d1-1); n2 = rng.randint(1, d2-1)
from fractions import Fraction
result = Fraction(n1, d1) + Fraction(n2, d2)
ans_val = float(result)
ans = f"{result.numerator}/{result.denominator}" if result.denominator != 1 else str(result.numerator)
q = f"คำนวณ: {n1}/{d1} + {n2}/{d2} = ?"
th = (f"หา LCD ของ {d1} และ {d2} = {result.denominator if result.denominator < 100 else 'ลด'}\n"
f"{n1}/{d1} = {n1*result.denominator//d1}/{result.denominator}\n"
f"{n2}/{d2} = {n2*result.denominator//d2}/{result.denominator}\n"
f"รวม = {result.numerator}/{result.denominator}")
return MathProblem(q, th, f"= {ans}", str(ans_val), 1, "arithmetic")
elif op == "percent":
pct = rng.choice([5,10,15,20,25,30,40,50,60,75])
base = rng.randint(20, 500) * 4
ans = base * pct // 100
q = f"{pct}% ของ {base:,} มีค่าเท่าไร?"
th = f"{pct}% × {base} = {pct}/{100} × {base} = {ans}"
else: # mixed
a = rng.randint(2, 20); b = rng.randint(2, 20); c = rng.randint(2, 20)
ans = a * b + c
q = f"คำนวณ: {a} × {b} + {c} = ?"
th = f"ทำคูณก่อน: {a} × {b} = {a*b}\nแล้วบวก: {a*b} + {c} = {ans}"
return MathProblem(q, th, f"= {ans}", str(ans), 1, "arithmetic")
# ─── Level 2: Algebra ─────────────────────────────────────────────────────────
def gen_algebra(rng: random.Random) -> MathProblem:
kind = rng.choice(["linear1", "linear2", "quadratic", "system", "inequality"])
if kind == "linear1":
a = rng.randint(2, 15); b = rng.randint(1, 30); c = rng.randint(1, 50)
# ax + b = c
if (c - b) % a != 0:
c = a * rng.randint(1, 10) + b
x = (c - b) // a
q = f"แก้สมการ: {a}x + {b} = {c} หา x"
th = (f"ย้าย {b} ข้ามไปอีกข้าง:\n"
f"{a}x = {c} - {b} = {c-b}\n"
f"x = {c-b} ÷ {a} = {x}")
return MathProblem(q, th, f"x = {x}", str(x), 2, "algebra")
elif kind == "linear2":
a = rng.randint(2, 8); b = rng.randint(1, 10); c = rng.randint(2, 8); d = rng.randint(1, 10)
# ax + b = cx + d → (a-c)x = d-b
if a == c: a += 1
x_num = d - b
x_den = a - c
if x_den == 0: x_den = 1
from fractions import Fraction
x_frac = Fraction(x_num, x_den)
q = f"แก้สมการ: {a}x + {b} = {c}x + {d} หา x"
th = (f"ย้ายพจน์ x ไปข้างซ้าย:\n"
f"({a}-{c})x = {d}-{b}\n"
f"{x_den}x = {x_num}\n"
f"x = {x_frac}")
return MathProblem(q, th, f"x = {x_frac}", str(float(x_frac)), 2, "algebra")
elif kind == "quadratic":
# (x-a)(x-b) = 0
r1 = rng.randint(-8, 8); r2 = rng.randint(-8, 8)
if r1 == r2: r2 += 1
b_coef = -(r1 + r2); c_coef = r1 * r2
sign_b = f"+ {b_coef}" if b_coef >= 0 else f"- {abs(b_coef)}"
sign_c = f"+ {c_coef}" if c_coef >= 0 else f"- {abs(c_coef)}"
q = f"แก้สมการ: x² {sign_b}x {sign_c} = 0 หา x"
th = (f"แยกตัวประกอบ: (x - {r1})(x - {r2}) = 0\n"
f"ตรวจสอบ: ขยายได้ x² {sign_b}x {sign_c} ✓\n"
f"ดังนั้น x = {r1} หรือ x = {r2}")
ans_str = f"x = {min(r1,r2)} หรือ x = {max(r1,r2)}"
return MathProblem(q, th, ans_str,
f"{min(r1,r2)},{max(r1,r2)}", 2, "algebra")
elif kind == "system":
# ax+by=c, dx+ey=f
x, y = rng.randint(-5, 10), rng.randint(-5, 10)
a, b = rng.randint(1, 5), rng.randint(1, 5)
d, e = rng.randint(1, 5), rng.randint(1, 5)
if a*e == b*d: e += 1 # avoid singular
c = a*x + b*y; f = d*x + e*y
q = f"แก้ระบบสมการ:\n {a}x + {b}y = {c}\n {d}x + {e}y = {f}"
th = (f"วิธีกำจัด: คูณสมการแรก ×{d} คูณสมการสอง ×{a}:\n"
f" {a*d}x + {b*d}y = {c*d}\n"
f" {a*d}x + {a*e}y = {a*f}\n"
f"ลบกัน: ({b*d}-{a*e})y = {c*d-a*f}\n"
f"y = {c*d-a*f} ÷ {b*d-a*e} = {y}\n"
f"แทน y={y}: {a}x = {c} - {b}×{y} = {c-b*y}, x = {x}")
return MathProblem(q, th, f"x = {x}, y = {y}",
f"{x},{y}", 2, "algebra")
else: # inequality
a = rng.randint(2, 8); b = rng.randint(1, 20); c = rng.randint(b+1, 50)
# ax + b > c → x > (c-b)/a
from fractions import Fraction
bound = Fraction(c - b, a)
q = f"แก้อสมการ: {a}x + {b} > {c}"
th = (f"ย้าย {b}: {a}x > {c-b}\n"
f"หาร {a} (บวก ไม่เปลี่ยนทิศ): x > {bound}")
return MathProblem(q, th, f"x > {bound}", str(float(bound)), 2, "algebra")
# ─── Level 3: Word Problems ───────────────────────────────────────────────────
def gen_word_problem(rng: random.Random) -> MathProblem:
kind = rng.choice(["speed", "work", "mixture", "age", "profit", "consecutive"])
if kind == "speed":
v = rng.choice([40, 50, 60, 80, 90, 100, 120])
t_num = rng.randint(1, 8); t_den = rng.choice([1, 2, 4])
from fractions import Fraction
t = Fraction(t_num, t_den)
d = v * t
q = f"รถยนต์วิ่งด้วยความเร็ว {v} กม./ชม. เป็นเวลา {t} ชั่วโมง จะเดินทางได้กี่กิโลเมตร?"
th = f"ระยะทาง = ความเร็ว × เวลา\n= {v} × {t} = {d} กม."
return MathProblem(q, th, f"{d} กิโลเมตร", str(float(d)), 3, "word_problem")
elif kind == "work":
a_days = rng.choice([6, 8, 10, 12, 15, 20])
b_days = rng.choice([4, 6, 8, 10, 12])
from fractions import Fraction
combined = Fraction(1, a_days) + Fraction(1, b_days)
total = 1 / combined
q = (f"คน A ทำงานคนเดียวเสร็จใน {a_days} วัน คน B เสร็จใน {b_days} วัน "
f"ถ้าทำงานพร้อมกัน จะเสร็จในกี่วัน?")
th = (f"A ทำได้วันละ 1/{a_days} ของงาน\n"
f"B ทำได้วันละ 1/{b_days} ของงาน\n"
f"ด้วยกัน = 1/{a_days} + 1/{b_days} = {combined} ต่อวัน\n"
f"วันที่เสร็จ = 1 ÷ {combined} = {total} วัน")
return MathProblem(q, th, f"{total} วัน", str(float(total)), 3, "word_problem")
elif kind == "mixture":
c1 = rng.choice([10, 20, 30, 40]); c2 = rng.choice([50, 60, 70, 80])
target = rng.randint(c1+5, c2-5)
v2 = rng.randint(2, 20)
v1 = v2 * (c2 - target) // (target - c1)
if v1 <= 0: v1 = 10
actual_c = (v1*c1 + v2*c2) / (v1 + v2)
q = (f"ผสมน้ำยา {c1}% จำนวน {v1} ลิตร กับน้ำยา {c2}% จำนวน {v2} ลิตร "
f"ความเข้มข้นของส่วนผสมเป็นเท่าไร?")
th = (f"ปริมาณสาร = {v1}×{c1}/100 + {v2}×{c2}/100 = {v1*c1/100:.2f} + {v2*c2/100:.2f} = {(v1*c1+v2*c2)/100:.2f}\n"
f"ปริมาตรรวม = {v1}+{v2} = {v1+v2} ลิตร\n"
f"ความเข้มข้น = {(v1*c1+v2*c2)/100:.2f}/{v1+v2} × 100 = {actual_c:.2f}%")
return MathProblem(q, th, f"{actual_c:.2f}%", f"{actual_c:.2f}", 3, "word_problem")
elif kind == "age":
now_a = rng.randint(10, 40); diff = rng.randint(2, 20)
now_b = now_a + diff; years = rng.randint(3, 15)
sum_then = (now_a + years) + (now_b + years)
q = (f"ปัจจุบัน A อายุ {now_a} ปี B อายุ {now_b} ปี "
f"อีก {years} ปีข้างหน้า อายุรวมของทั้งสองจะเป็นเท่าไร?")
th = (f"A หลัง {years} ปี = {now_a} + {years} = {now_a+years}\n"
f"B หลัง {years} ปี = {now_b} + {years} = {now_b+years}\n"
f"รวม = {now_a+years} + {now_b+years} = {sum_then}")
return MathProblem(q, th, f"{sum_then} ปี", str(sum_then), 3, "word_problem")
elif kind == "profit":
cost = rng.randint(100, 2000) * 5
pct = rng.choice([10, 15, 20, 25, 30, 40])
sell = cost * (100 + pct) // 100
profit = sell - cost
q = f"ซื้อของราคา {cost:,} บาท ขายเพื่อกำไร {pct}% ควรตั้งราคาขายเท่าไร?"
th = (f"ราคาขาย = ต้นทุน × (1 + กำไร%)\n"
f"= {cost:,} × (1 + {pct}/100)\n"
f"= {cost:,} × {1+pct/100}\n"
f"= {sell:,} บาท\n"
f"กำไร = {sell:,} - {cost:,} = {profit:,} บาท")
return MathProblem(q, th, f"ราคาขาย {sell:,} บาท", str(sell), 3, "word_problem")
else: # consecutive
n = rng.choice([3, 4, 5])
start = rng.randint(5, 50)
nums = list(range(start, start + n))
total = sum(nums)
q = f"จำนวนเต็มบวกต่อเนื่องกัน {n} จำนวน ผลรวม = {total} จำนวนแรกคือเท่าไร?"
th = (f"สมมติจำนวนแรก = x\n"
f"ผลรวม = x + (x+1) + ... + (x+{n-1}) = {n}x + {n*(n-1)//2}\n"
f"{n}x + {n*(n-1)//2} = {total}\n"
f"{n}x = {total - n*(n-1)//2}\n"
f"x = {start}")
return MathProblem(q, th, f"จำนวนแรก = {start}", str(start), 3, "word_problem")
# ─── Level 4: Geometry ────────────────────────────────────────────────────────
def gen_geometry(rng: random.Random) -> MathProblem:
kind = rng.choice(["triangle_area", "circle", "rect_diag", "pythagorean",
"cylinder", "similar_triangle"])
if kind == "triangle_area":
base = rng.randint(4, 30); height = rng.randint(3, 25)
area = base * height / 2
q = f"สามเหลี่ยมมีฐาน {base} ซม. สูง {height} ซม. หาพื้นที่"
th = f"พื้นที่สามเหลี่ยม = (ฐาน × สูง) / 2 = ({base} × {height}) / 2 = {area:.1f} ตร.ซม."
return MathProblem(q, th, f"{area:.1f} ตร.ซม.", f"{area:.2f}", 4, "geometry")
elif kind == "circle":
r = rng.randint(3, 20)
kind2 = rng.choice(["area", "circumference"])
if kind2 == "area":
q = f"วงกลมรัศมี {r} ซม. หาพื้นที่ (ใช้ π = 3.14)"
th = f"พื้นที่ = πr² = 3.14 × {r}² = 3.14 × {r*r} = {3.14*r*r:.2f} ตร.ซม."
return MathProblem(q, th, f"{3.14*r*r:.2f} ตร.ซม.", f"{3.14*r*r:.2f}", 4, "geometry")
else:
q = f"วงกลมรัศมี {r} ซม. หาเส้นรอบวง (ใช้ π = 3.14)"
th = f"เส้นรอบวง = 2πr = 2 × 3.14 × {r} = {2*3.14*r:.2f} ซม."
return MathProblem(q, th, f"{2*3.14*r:.2f} ซม.", f"{2*3.14*r:.2f}", 4, "geometry")
elif kind == "pythagorean":
# Pythagorean triples
triples = [(3,4,5),(5,12,13),(8,15,17),(7,24,25),(6,8,10),(9,12,15),(10,24,26)]
a, b, c = rng.choice(triples)
mult = rng.randint(1, 4)
a, b, c = a*mult, b*mult, c*mult
ask = rng.choice(["hyp", "leg"])
if ask == "hyp":
q = f"สามเหลี่ยมมุมฉาก ขา {a} ซม. และ {b} ซม. ด้านตรงข้ามมุมฉากยาวเท่าไร?"
th = (f"ทฤษฎีบทพีทาโกรัส: c² = a² + b²\n"
f"c² = {a}² + {b}² = {a*a} + {b*b} = {a*a+b*b}\n"
f"c = √{a*a+b*b} = {c} ซม.")
return MathProblem(q, th, f"{c} ซม.", str(c), 4, "geometry")
else:
q = f"สามเหลี่ยมมุมฉาก ด้านตรงข้ามมุมฉาก {c} ซม. ขาด้านหนึ่ง {a} ซม. อีกขายาวเท่าไร?"
th = (f"b² = c² - a² = {c*c} - {a*a} = {c*c-a*a}\n"
f"b = √{b*b} = {b} ซม.")
return MathProblem(q, th, f"{b} ซม.", str(b), 4, "geometry")
elif kind == "cylinder":
r = rng.randint(3, 15); h = rng.randint(5, 30)
vol = 3.14 * r * r * h
q = f"ทรงกระบอกรัศมี {r} ซม. สูง {h} ซม. หาปริมาตร (ใช้ π = 3.14)"
th = (f"ปริมาตร = πr²h\n"
f"= 3.14 × {r}² × {h}\n"
f"= 3.14 × {r*r} × {h}\n"
f"= {vol:.2f} ลบ.ซม.")
return MathProblem(q, th, f"{vol:.2f} ลบ.ซม.", f"{vol:.2f}", 4, "geometry")
elif kind == "rect_diag":
w = rng.randint(3, 20); h = rng.randint(3, 20)
d = math.sqrt(w*w + h*h)
q = f"สี่เหลี่ยมผืนผ้ากว้าง {w} ซม. ยาว {h} ซม. เส้นทแยงมุมยาวเท่าไร?"
th = (f"เส้นทแยงมุม = √(กว้าง² + ยาว²)\n"
f"= √({w}² + {h}²) = √({w*w} + {h*h}) = √{w*w+h*h}{d:.2f} ซม.")
return MathProblem(q, th, f"≈ {d:.2f} ซม.", f"{d:.4f}", 4, "geometry")
else: # similar_triangle
scale = rng.randint(2, 5)
a = rng.randint(3, 10); b = rng.randint(3, 10); c = rng.randint(5, 15)
q = (f"สามเหลี่ยม A มีด้าน {a}, {b}, {c} ซม. "
f"สามเหลี่ยม B คล้ายกัน มีด้านสั้นที่สุด = {a*scale} ซม. "
f"หาอัตราส่วนความคล้าย และด้านที่ยาวที่สุดของ B")
longest_B = c * scale
th = (f"อัตราส่วนความคล้าย = {a*scale}/{a} = {scale}\n"
f"ด้านที่ยาวที่สุด = {c} × {scale} = {longest_B} ซม.")
return MathProblem(q, th, f"อัตราส่วน {scale}:1, ด้านยาวสุด = {longest_B} ซม.",
str(longest_B), 4, "geometry")
# ─── Level 5: Probability ─────────────────────────────────────────────────────
def gen_probability(rng: random.Random) -> MathProblem:
kind = rng.choice(["dice", "cards", "ball_bag", "permutation", "combination", "conditional"])
if kind == "dice":
n_dice = rng.choice([1, 2])
if n_dice == 1:
target = rng.randint(2, 6)
q = f"ทอดลูกเต๋า 1 ลูก ความน่าจะเป็นที่จะได้ ≥ {target} คือเท่าไร?"
favourable = 7 - target
from fractions import Fraction
prob = Fraction(favourable, 6)
th = (f"ผลลัพธ์ที่ต้องการ: {target}, {target+1}, ..., 6 → {favourable} แบบ\n"
f"ผลลัพธ์ทั้งหมด: 6 แบบ\n"
f"P = {favourable}/6 = {prob}")
return MathProblem(q, th, f"P = {prob}{float(prob):.4f}",
f"{float(prob):.4f}", 5, "probability")
else:
target_sum = rng.randint(8, 10)
outcomes = [(i,j) for i in range(1,7) for j in range(1,7) if i+j==target_sum]
from fractions import Fraction
prob = Fraction(len(outcomes), 36)
q = f"ทอดลูกเต๋า 2 ลูก ความน่าจะเป็นที่ผลรวม = {target_sum} คือเท่าไร?"
th = (f"คู่ที่ได้ผลรวม {target_sum}: {outcomes}\n"
f"จำนวน = {len(outcomes)}, ทั้งหมด = 36\n"
f"P = {prob}{float(prob):.4f}")
return MathProblem(q, th, f"P = {prob}", f"{float(prob):.4f}", 5, "probability")
elif kind == "ball_bag":
r = rng.randint(3, 10); b = rng.randint(3, 10)
total = r + b
kind2 = rng.choice(["one_red", "both_red_no_replace"])
if kind2 == "one_red":
from fractions import Fraction
prob = Fraction(r, total)
q = f"ถุงมีลูกบอลแดง {r} ลูก น้ำเงิน {b} ลูก สุ่มหยิบ 1 ลูก P(แดง) = ?"
th = f"P(แดง) = แดง/รวม = {r}/{total} = {prob}"
return MathProblem(q, th, f"P = {prob}{float(prob):.4f}",
f"{float(prob):.4f}", 5, "probability")
else:
from fractions import Fraction
p = Fraction(r, total) * Fraction(r-1, total-1)
q = f"ถุงมีลูกบอลแดง {r} ลูก น้ำเงิน {b} ลูก สุ่มหยิบ 2 ลูก (ไม่ใส่คืน) P(แดงทั้งคู่) = ?"
th = (f"P(แดงทั้งคู่) = P(แดง1) × P(แดง2|แดง1)\n"
f"= {r}/{total} × {r-1}/{total-1} = {p}{float(p):.4f}")
return MathProblem(q, th, f"P = {p}{float(p):.4f}",
f"{float(p):.4f}", 5, "probability")
elif kind == "permutation":
n = rng.randint(4, 8); r = rng.randint(2, min(n, 4))
perm = math.factorial(n) // math.factorial(n - r)
q = f"จากตัวเลข {n} ตัว จัดเรียง {r} ตำแหน่งได้กี่วิธี? (P({n},{r}))"
th = (f"P(n,r) = n! / (n-r)!\n"
f"P({n},{r}) = {n}! / {n-r}! = {math.factorial(n)} / {math.factorial(n-r)} = {perm}")
return MathProblem(q, th, f"{perm:,} วิธี", str(perm), 5, "combinatorics")
elif kind == "combination":
n = rng.randint(5, 12); r = rng.randint(2, min(n//2, 5))
comb = math.factorial(n) // (math.factorial(r) * math.factorial(n-r))
q = f"เลือก {r} คนจาก {n} คน ทำได้กี่วิธี? (C({n},{r}))"
th = (f"C(n,r) = n! / (r!(n-r)!)\n"
f"C({n},{r}) = {n}! / ({r}! × {n-r}!) = {math.factorial(n)} / ({math.factorial(r)} × {math.factorial(n-r)}) = {comb}")
return MathProblem(q, th, f"{comb:,} วิธี", str(comb), 5, "combinatorics")
else: # conditional
p_a = rng.choice([0.3, 0.4, 0.5, 0.6])
p_b_given_a = rng.choice([0.7, 0.8, 0.9])
p_ab = round(p_a * p_b_given_a, 4)
q = (f"P(A) = {p_a}, P(B|A) = {p_b_given_a} "
f"หา P(A ∩ B)")
th = (f"กฎการคูณ: P(A ∩ B) = P(A) × P(B|A)\n"
f"= {p_a} × {p_b_given_a} = {p_ab}")
return MathProblem(q, th, f"P(A∩B) = {p_ab}", str(p_ab), 5, "probability")
# ─── Level 6: Number Theory ────────────────────────────────────────────────────
def gen_number_theory(rng: random.Random) -> MathProblem:
kind = rng.choice(["gcd_lcm", "prime", "divisibility", "modular"])
if kind == "gcd_lcm":
a = rng.randint(12, 120); b = rng.randint(12, 120)
g = math.gcd(a, b); l = a * b // g
ask = rng.choice(["gcd", "lcm"])
if ask == "gcd":
q = f"หา ห.ร.ม. ของ {a} และ {b}"
th = (f"ใช้อัลกอริทึมยูคลิด:\n"
f"{a} = {a//b}×{b} + {a%b}\n"
f"{b} = {b//(a%b) if a%b else '—'}×{a%b} + ...\n"
f"ห.ร.ม.({a},{b}) = {g}")
return MathProblem(q, th, str(g), str(g), 6, "number_theory")
else:
q = f"หา ค.ร.น. ของ {a} และ {b}"
th = (f"ค.ร.น.(a,b) = a×b / ห.ร.ม.(a,b)\n"
f"ห.ร.ม.({a},{b}) = {g}\n"
f"ค.ร.น. = {a}×{b} / {g} = {l}")
return MathProblem(q, th, str(l), str(l), 6, "number_theory")
elif kind == "prime":
primes = [p for p in range(50, 200) if all(p % i != 0 for i in range(2, int(p**0.5)+1))]
n = rng.choice([x for x in range(100, 500) if x not in primes])
# factorize
factors = []
temp = n
d = 2
while d * d <= temp:
while temp % d == 0:
factors.append(d); temp //= d
d += 1
if temp > 1: factors.append(temp)
factor_str = " × ".join(str(f) for f in factors)
q = f"แยกตัวประกอบเฉพาะของ {n}"
steps = []
temp = n
for f in sorted(set(factors)):
exp = factors.count(f)
steps.append(f"{n if len(steps)==0 else ''}÷{f}" + (f"^{exp}" if exp > 1 else ""))
th = f"{n} = {factor_str}\n{''.join(steps)}"
return MathProblem(q, th, f"{n} = {factor_str}", factor_str, 6, "number_theory")
elif kind == "divisibility":
n = rng.randint(100, 9999)
rules = {2: n%2==0, 3: sum(int(d) for d in str(n))%3==0,
5: n%5==0, 9: sum(int(d) for d in str(n))%9==0}
divisors = [k for k, v in rules.items() if v]
q = f"จำนวน {n} หารด้วย 2, 3, 5, 9 ลงตัวหรือไม่? (ใช้กฎการหาร)"
digit_sum = sum(int(d) for d in str(n))
th = (f"÷2: เลขท้าย = {str(n)[-1]}{'ลงตัว' if rules[2] else 'ไม่ลงตัว'}\n"
f"÷3: ผลรวมหลัก = {digit_sum}{'ลงตัว' if rules[3] else 'ไม่ลงตัว'} ({digit_sum}÷3={'ลงตัว' if rules[3] else 'ไม่ลงตัว'})\n"
f"÷5: เลขท้าย = {str(n)[-1]}{'ลงตัว' if rules[5] else 'ไม่ลงตัว'}\n"
f"÷9: ผลรวมหลัก = {digit_sum}{'ลงตัว' if rules[9] else 'ไม่ลงตัว'}")
ans = f"÷{', ÷'.join(str(d) for d in divisors)} ลงตัว" if divisors else "ไม่ลงตัวทั้ง 4"
return MathProblem(q, th, ans, str(divisors), 6, "number_theory")
else: # modular
a = rng.randint(10, 99); m = rng.randint(3, 12)
r = a % m
q = f"{a} mod {m} = ?"
q_full = f"หาเศษจากการหาร {a} ÷ {m}"
th = f"{a} = {a//m} × {m} + {r}\nดังนั้น {a} mod {m} = {r}"
return MathProblem(q_full, th, str(r), str(r), 6, "number_theory")
# ─── Level 7: Sequences ───────────────────────────────────────────────────────
def gen_sequence(rng: random.Random) -> MathProblem:
kind = rng.choice(["arithmetic_seq", "geometric_seq", "sum_arith", "sum_geo"])
if kind == "arithmetic_seq":
a1 = rng.randint(1, 20); d = rng.randint(2, 15); n = rng.randint(5, 20)
an = a1 + (n-1)*d
q = f"ลำดับเลขคณิต: a₁ = {a1}, d = {d} หา a_{n}"
th = (f"aₙ = a₁ + (n-1)d\n"
f"a_{n} = {a1} + ({n}-1)×{d}\n"
f"= {a1} + {(n-1)*d}\n"
f"= {an}")
return MathProblem(q, th, str(an), str(an), 7, "sequences")
elif kind == "geometric_seq":
a1 = rng.randint(2, 10); r = rng.choice([2, 3, 1/2, 1/3]); n = rng.randint(4, 8)
from fractions import Fraction
r_frac = Fraction(r).limit_denominator(10)
an = a1 * (r ** (n-1))
q = f"ลำดับเรขาคณิต: a₁ = {a1}, r = {r_frac} หา a_{n}"
th = (f"aₙ = a₁ × rⁿ⁻¹\n"
f"a_{n} = {a1} × ({r_frac})^{n-1}\n"
f"= {an:.4f}")
return MathProblem(q, th, f"{an:.4f}", f"{an:.4f}", 7, "sequences")
elif kind == "sum_arith":
a1 = rng.randint(1, 10); d = rng.randint(1, 8); n = rng.randint(10, 30)
sn = n * (2*a1 + (n-1)*d) // 2
q = f"ลำดับเลขคณิต a₁={a1}, d={d} หาผลรวม {n} พจน์แรก (Sₙ)"
th = (f"Sₙ = n/2 × (2a₁ + (n-1)d)\n"
f"S_{n} = {n}/2 × (2×{a1} + ({n}-1)×{d})\n"
f"= {n}/2 × ({2*a1} + {(n-1)*d})\n"
f"= {n}/2 × {2*a1+(n-1)*d}\n"
f"= {sn}")
return MathProblem(q, th, str(sn), str(sn), 7, "sequences")
else: # sum_geo
a1 = rng.randint(2, 8); r = rng.choice([2, 3]); n = rng.randint(4, 7)
sn = a1 * (r**n - 1) // (r - 1)
q = f"ลำดับเรขาคณิต a₁={a1}, r={r} หาผลรวม {n} พจน์แรก"
th = (f"Sₙ = a₁(rⁿ-1)/(r-1)\n"
f"S_{n} = {a1}×({r}^{n}-1)/({r}-1)\n"
f"= {a1}×({r**n}-1)/{r-1}\n"
f"= {a1}×{r**n-1}/{r-1}\n"
f"= {sn}")
return MathProblem(q, th, str(sn), str(sn), 7, "sequences")
# ─── Level 8: Pre-Calculus ────────────────────────────────────────────────────
def gen_precalc(rng: random.Random) -> MathProblem:
kind = rng.choice(["log_basic", "log_laws", "trig_exact", "limit_poly"])
if kind == "log_basic":
base = rng.choice([2, 3, 5, 10])
exp = rng.randint(2, 6)
val = base ** exp
q = f"หา log_{base}({val})"
th = f"log_{base}({val}) = ? หมายความว่า {base}^? = {val}\n{base}^{exp} = {val}\nดังนั้น log_{base}({val}) = {exp}"
return MathProblem(q, th, str(exp), str(exp), 8, "precalculus")
elif kind == "log_laws":
base = rng.choice([2, 10])
a = rng.choice([4, 8, 16, 9, 27])
b = rng.choice([2, 3, 4])
q = f"หา log_{base}({a}) + log_{base}({b}) (ถ้า log_{base}(x) = log base {base})"
product = a * b
log_ab = math.log(product, base)
th = (f"กฎบวก: log_b(m) + log_b(n) = log_b(m×n)\n"
f"log_{base}({a}) + log_{base}({b}) = log_{base}({a}×{b}) = log_{base}({product})\n"
f"= {log_ab:.4f}")
return MathProblem(q, th, f"{log_ab:.4f}", f"{log_ab:.4f}", 8, "precalculus")
elif kind == "trig_exact":
angles = {30: (0.5, math.sqrt(3)/2), 45: (math.sqrt(2)/2, math.sqrt(2)/2),
60: (math.sqrt(3)/2, 0.5), 0: (1.0, 0.0), 90: (0.0, 1.0)}
angle = rng.choice(list(angles.keys()))
cos_a, sin_a = angles[angle]
func = rng.choice(["sin", "cos"])
val = sin_a if func == "sin" else cos_a
exact = {(30,"sin"): "1/2", (30,"cos"): "√3/2", (45,"sin"): "√2/2",
(45,"cos"): "√2/2", (60,"sin"): "√3/2", (60,"cos"): "1/2",
(0,"sin"): "0", (0,"cos"): "1", (90,"sin"): "1", (90,"cos"): "0"}
exact_val = exact.get((angle, func), f"{val:.4f}")
q = f"หาค่า {func}({angle}°)"
th = f"ค่ามาตรฐาน: {func}({angle}°) = {exact_val}{val:.4f}"
return MathProblem(q, th, f"{exact_val}{val:.4f}", f"{val:.4f}", 8, "precalculus")
else: # limit_poly
a = rng.randint(1, 5); b = rng.randint(1, 10); c = rng.randint(-10, 10)
# lim_{x→c} (ax+b) = ac+b
limit_val = a * c + b
q = f"หา lim_{{x→{c}}} ({a}x + {b})"
th = (f"ฟังก์ชัน {a}x + {b} ต่อเนื่อง แทนค่า x = {c}:\n"
f"lim = {a}×{c} + {b} = {a*c} + {b} = {limit_val}")
return MathProblem(q, th, str(limit_val), str(limit_val), 8, "precalculus")
# ─── Generator Map & Main ─────────────────────────────────────────────────────
GENERATORS = [
(gen_arithmetic, 200, 1),
(gen_algebra, 200, 2),
(gen_word_problem, 200, 3),
(gen_geometry, 150, 4),
(gen_probability, 150, 5),
(gen_number_theory, 100, 6),
(gen_sequence, 100, 7),
(gen_precalc, 100, 8),
]
def generate_math_dataset(
seed: int = 42,
output_path: Path = MATH_OUTPUT,
) -> int:
rng = random.Random(seed)
total = 0
with open(output_path, "w", encoding="utf-8") as f:
for gen_fn, count, _level in GENERATORS:
for _ in range(count):
try:
prob = gen_fn(rng)
record = asdict(prob)
f.write(json.dumps(record, ensure_ascii=False) + "\n")
total += 1
except Exception:
pass
print(f"Math dataset: {total:,} problems → {output_path}")
return total
if __name__ == "__main__":
generate_math_dataset()

Xet Storage Details

Size:
32.7 kB
·
Xet hash:
f31230aa66b8f05342ab2a29cab993971cc5a96fc54d1baca0ebbb9c2682ea15

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.