Buckets:
bbkdevops/unicosys-hypergraph-bucket / tinymind-native-8b-remote-handoff /bundle /data /math_forge.py
| """ | |
| Math Forge — สร้าง math training data คุณภาพสูงสำหรับ GRPO | |
| ครอบคลุม 8 หมวด ตั้งแต่ arithmetic → competition math: | |
| L1: Arithmetic & Fractions | |
| L2: Algebra (linear, quadratic) | |
| L3: Word Problems (rates, work, mixture) | |
| L4: Geometry (area, perimeter, 3D) | |
| L5: Probability & Combinatorics | |
| L6: Number Theory | |
| L7: Sequences & Series | |
| L8: Pre-Calculus / Limits | |
| แต่ละโจทย์มี: question, thinking (step-by-step), answer, ground_truth (verified) | |
| """ | |
| from __future__ import annotations | |
| import json | |
| import math | |
| import random | |
| from dataclasses import dataclass, asdict | |
| from pathlib import Path | |
| OUTPUT_DIR = Path(__file__).parent / "filtered" | |
| OUTPUT_DIR.mkdir(exist_ok=True) | |
| MATH_OUTPUT = OUTPUT_DIR / "math_grpo.jsonl" | |
| class MathProblem: | |
| question: str | |
| thinking: str | |
| answer: str | |
| ground_truth: str # ตัวเลขที่ verifiable | |
| level: int # 1-8 | |
| category: str | |
| lang: str = "th" | |
| source: str = "math_forge" | |
| topic: str = "mathematics" | |
| context: str = "" | |
| # ─── Level 1: Arithmetic ────────────────────────────────────────────────────── | |
| def gen_arithmetic(rng: random.Random) -> MathProblem: | |
| op = rng.choice(["add", "sub", "mul", "div", "frac", "percent", "mixed"]) | |
| if op == "add": | |
| a, b = rng.randint(100, 9999), rng.randint(100, 9999) | |
| ans = a + b | |
| q = f"คำนวณ: {a:,} + {b:,} = ?" | |
| th = f"บวก: {a:,} + {b:,} = {ans:,}" | |
| elif op == "sub": | |
| a = rng.randint(1000, 9999); b = rng.randint(100, a) | |
| ans = a - b | |
| q = f"คำนวณ: {a:,} - {b:,} = ?" | |
| th = f"ลบ: {a:,} - {b:,} = {ans:,}" | |
| elif op == "mul": | |
| a, b = rng.randint(12, 999), rng.randint(12, 99) | |
| ans = a * b | |
| q = f"คำนวณ: {a} × {b} = ?" | |
| th = f"คูณ: {a} × {b} = {ans}" | |
| elif op == "div": | |
| b = rng.randint(2, 20); ans = rng.randint(10, 500) | |
| a = b * ans | |
| q = f"คำนวณ: {a} ÷ {b} = ?" | |
| th = f"หาร: {a} ÷ {b} = {ans}" | |
| elif op == "frac": | |
| d1 = rng.randint(2, 12); d2 = rng.randint(2, 12) | |
| n1 = rng.randint(1, d1-1); n2 = rng.randint(1, d2-1) | |
| from fractions import Fraction | |
| result = Fraction(n1, d1) + Fraction(n2, d2) | |
| ans_val = float(result) | |
| ans = f"{result.numerator}/{result.denominator}" if result.denominator != 1 else str(result.numerator) | |
| q = f"คำนวณ: {n1}/{d1} + {n2}/{d2} = ?" | |
| th = (f"หา LCD ของ {d1} และ {d2} = {result.denominator if result.denominator < 100 else 'ลด'}\n" | |
| f"{n1}/{d1} = {n1*result.denominator//d1}/{result.denominator}\n" | |
| f"{n2}/{d2} = {n2*result.denominator//d2}/{result.denominator}\n" | |
| f"รวม = {result.numerator}/{result.denominator}") | |
| return MathProblem(q, th, f"= {ans}", str(ans_val), 1, "arithmetic") | |
| elif op == "percent": | |
| pct = rng.choice([5,10,15,20,25,30,40,50,60,75]) | |
| base = rng.randint(20, 500) * 4 | |
| ans = base * pct // 100 | |
| q = f"{pct}% ของ {base:,} มีค่าเท่าไร?" | |
| th = f"{pct}% × {base} = {pct}/{100} × {base} = {ans}" | |
| else: # mixed | |
| a = rng.randint(2, 20); b = rng.randint(2, 20); c = rng.randint(2, 20) | |
| ans = a * b + c | |
| q = f"คำนวณ: {a} × {b} + {c} = ?" | |
| th = f"ทำคูณก่อน: {a} × {b} = {a*b}\nแล้วบวก: {a*b} + {c} = {ans}" | |
| return MathProblem(q, th, f"= {ans}", str(ans), 1, "arithmetic") | |
| # ─── Level 2: Algebra ───────────────────────────────────────────────────────── | |
| def gen_algebra(rng: random.Random) -> MathProblem: | |
| kind = rng.choice(["linear1", "linear2", "quadratic", "system", "inequality"]) | |
| if kind == "linear1": | |
| a = rng.randint(2, 15); b = rng.randint(1, 30); c = rng.randint(1, 50) | |
| # ax + b = c | |
| if (c - b) % a != 0: | |
| c = a * rng.randint(1, 10) + b | |
| x = (c - b) // a | |
| q = f"แก้สมการ: {a}x + {b} = {c} หา x" | |
| th = (f"ย้าย {b} ข้ามไปอีกข้าง:\n" | |
| f"{a}x = {c} - {b} = {c-b}\n" | |
| f"x = {c-b} ÷ {a} = {x}") | |
| return MathProblem(q, th, f"x = {x}", str(x), 2, "algebra") | |
| elif kind == "linear2": | |
| a = rng.randint(2, 8); b = rng.randint(1, 10); c = rng.randint(2, 8); d = rng.randint(1, 10) | |
| # ax + b = cx + d → (a-c)x = d-b | |
| if a == c: a += 1 | |
| x_num = d - b | |
| x_den = a - c | |
| if x_den == 0: x_den = 1 | |
| from fractions import Fraction | |
| x_frac = Fraction(x_num, x_den) | |
| q = f"แก้สมการ: {a}x + {b} = {c}x + {d} หา x" | |
| th = (f"ย้ายพจน์ x ไปข้างซ้าย:\n" | |
| f"({a}-{c})x = {d}-{b}\n" | |
| f"{x_den}x = {x_num}\n" | |
| f"x = {x_frac}") | |
| return MathProblem(q, th, f"x = {x_frac}", str(float(x_frac)), 2, "algebra") | |
| elif kind == "quadratic": | |
| # (x-a)(x-b) = 0 | |
| r1 = rng.randint(-8, 8); r2 = rng.randint(-8, 8) | |
| if r1 == r2: r2 += 1 | |
| b_coef = -(r1 + r2); c_coef = r1 * r2 | |
| sign_b = f"+ {b_coef}" if b_coef >= 0 else f"- {abs(b_coef)}" | |
| sign_c = f"+ {c_coef}" if c_coef >= 0 else f"- {abs(c_coef)}" | |
| q = f"แก้สมการ: x² {sign_b}x {sign_c} = 0 หา x" | |
| th = (f"แยกตัวประกอบ: (x - {r1})(x - {r2}) = 0\n" | |
| f"ตรวจสอบ: ขยายได้ x² {sign_b}x {sign_c} ✓\n" | |
| f"ดังนั้น x = {r1} หรือ x = {r2}") | |
| ans_str = f"x = {min(r1,r2)} หรือ x = {max(r1,r2)}" | |
| return MathProblem(q, th, ans_str, | |
| f"{min(r1,r2)},{max(r1,r2)}", 2, "algebra") | |
| elif kind == "system": | |
| # ax+by=c, dx+ey=f | |
| x, y = rng.randint(-5, 10), rng.randint(-5, 10) | |
| a, b = rng.randint(1, 5), rng.randint(1, 5) | |
| d, e = rng.randint(1, 5), rng.randint(1, 5) | |
| if a*e == b*d: e += 1 # avoid singular | |
| c = a*x + b*y; f = d*x + e*y | |
| q = f"แก้ระบบสมการ:\n {a}x + {b}y = {c}\n {d}x + {e}y = {f}" | |
| th = (f"วิธีกำจัด: คูณสมการแรก ×{d} คูณสมการสอง ×{a}:\n" | |
| f" {a*d}x + {b*d}y = {c*d}\n" | |
| f" {a*d}x + {a*e}y = {a*f}\n" | |
| f"ลบกัน: ({b*d}-{a*e})y = {c*d-a*f}\n" | |
| f"y = {c*d-a*f} ÷ {b*d-a*e} = {y}\n" | |
| f"แทน y={y}: {a}x = {c} - {b}×{y} = {c-b*y}, x = {x}") | |
| return MathProblem(q, th, f"x = {x}, y = {y}", | |
| f"{x},{y}", 2, "algebra") | |
| else: # inequality | |
| a = rng.randint(2, 8); b = rng.randint(1, 20); c = rng.randint(b+1, 50) | |
| # ax + b > c → x > (c-b)/a | |
| from fractions import Fraction | |
| bound = Fraction(c - b, a) | |
| q = f"แก้อสมการ: {a}x + {b} > {c}" | |
| th = (f"ย้าย {b}: {a}x > {c-b}\n" | |
| f"หาร {a} (บวก ไม่เปลี่ยนทิศ): x > {bound}") | |
| return MathProblem(q, th, f"x > {bound}", str(float(bound)), 2, "algebra") | |
| # ─── Level 3: Word Problems ─────────────────────────────────────────────────── | |
| def gen_word_problem(rng: random.Random) -> MathProblem: | |
| kind = rng.choice(["speed", "work", "mixture", "age", "profit", "consecutive"]) | |
| if kind == "speed": | |
| v = rng.choice([40, 50, 60, 80, 90, 100, 120]) | |
| t_num = rng.randint(1, 8); t_den = rng.choice([1, 2, 4]) | |
| from fractions import Fraction | |
| t = Fraction(t_num, t_den) | |
| d = v * t | |
| q = f"รถยนต์วิ่งด้วยความเร็ว {v} กม./ชม. เป็นเวลา {t} ชั่วโมง จะเดินทางได้กี่กิโลเมตร?" | |
| th = f"ระยะทาง = ความเร็ว × เวลา\n= {v} × {t} = {d} กม." | |
| return MathProblem(q, th, f"{d} กิโลเมตร", str(float(d)), 3, "word_problem") | |
| elif kind == "work": | |
| a_days = rng.choice([6, 8, 10, 12, 15, 20]) | |
| b_days = rng.choice([4, 6, 8, 10, 12]) | |
| from fractions import Fraction | |
| combined = Fraction(1, a_days) + Fraction(1, b_days) | |
| total = 1 / combined | |
| q = (f"คน A ทำงานคนเดียวเสร็จใน {a_days} วัน คน B เสร็จใน {b_days} วัน " | |
| f"ถ้าทำงานพร้อมกัน จะเสร็จในกี่วัน?") | |
| th = (f"A ทำได้วันละ 1/{a_days} ของงาน\n" | |
| f"B ทำได้วันละ 1/{b_days} ของงาน\n" | |
| f"ด้วยกัน = 1/{a_days} + 1/{b_days} = {combined} ต่อวัน\n" | |
| f"วันที่เสร็จ = 1 ÷ {combined} = {total} วัน") | |
| return MathProblem(q, th, f"{total} วัน", str(float(total)), 3, "word_problem") | |
| elif kind == "mixture": | |
| c1 = rng.choice([10, 20, 30, 40]); c2 = rng.choice([50, 60, 70, 80]) | |
| target = rng.randint(c1+5, c2-5) | |
| v2 = rng.randint(2, 20) | |
| v1 = v2 * (c2 - target) // (target - c1) | |
| if v1 <= 0: v1 = 10 | |
| actual_c = (v1*c1 + v2*c2) / (v1 + v2) | |
| q = (f"ผสมน้ำยา {c1}% จำนวน {v1} ลิตร กับน้ำยา {c2}% จำนวน {v2} ลิตร " | |
| f"ความเข้มข้นของส่วนผสมเป็นเท่าไร?") | |
| th = (f"ปริมาณสาร = {v1}×{c1}/100 + {v2}×{c2}/100 = {v1*c1/100:.2f} + {v2*c2/100:.2f} = {(v1*c1+v2*c2)/100:.2f}\n" | |
| f"ปริมาตรรวม = {v1}+{v2} = {v1+v2} ลิตร\n" | |
| f"ความเข้มข้น = {(v1*c1+v2*c2)/100:.2f}/{v1+v2} × 100 = {actual_c:.2f}%") | |
| return MathProblem(q, th, f"{actual_c:.2f}%", f"{actual_c:.2f}", 3, "word_problem") | |
| elif kind == "age": | |
| now_a = rng.randint(10, 40); diff = rng.randint(2, 20) | |
| now_b = now_a + diff; years = rng.randint(3, 15) | |
| sum_then = (now_a + years) + (now_b + years) | |
| q = (f"ปัจจุบัน A อายุ {now_a} ปี B อายุ {now_b} ปี " | |
| f"อีก {years} ปีข้างหน้า อายุรวมของทั้งสองจะเป็นเท่าไร?") | |
| th = (f"A หลัง {years} ปี = {now_a} + {years} = {now_a+years}\n" | |
| f"B หลัง {years} ปี = {now_b} + {years} = {now_b+years}\n" | |
| f"รวม = {now_a+years} + {now_b+years} = {sum_then}") | |
| return MathProblem(q, th, f"{sum_then} ปี", str(sum_then), 3, "word_problem") | |
| elif kind == "profit": | |
| cost = rng.randint(100, 2000) * 5 | |
| pct = rng.choice([10, 15, 20, 25, 30, 40]) | |
| sell = cost * (100 + pct) // 100 | |
| profit = sell - cost | |
| q = f"ซื้อของราคา {cost:,} บาท ขายเพื่อกำไร {pct}% ควรตั้งราคาขายเท่าไร?" | |
| th = (f"ราคาขาย = ต้นทุน × (1 + กำไร%)\n" | |
| f"= {cost:,} × (1 + {pct}/100)\n" | |
| f"= {cost:,} × {1+pct/100}\n" | |
| f"= {sell:,} บาท\n" | |
| f"กำไร = {sell:,} - {cost:,} = {profit:,} บาท") | |
| return MathProblem(q, th, f"ราคาขาย {sell:,} บาท", str(sell), 3, "word_problem") | |
| else: # consecutive | |
| n = rng.choice([3, 4, 5]) | |
| start = rng.randint(5, 50) | |
| nums = list(range(start, start + n)) | |
| total = sum(nums) | |
| q = f"จำนวนเต็มบวกต่อเนื่องกัน {n} จำนวน ผลรวม = {total} จำนวนแรกคือเท่าไร?" | |
| th = (f"สมมติจำนวนแรก = x\n" | |
| f"ผลรวม = x + (x+1) + ... + (x+{n-1}) = {n}x + {n*(n-1)//2}\n" | |
| f"{n}x + {n*(n-1)//2} = {total}\n" | |
| f"{n}x = {total - n*(n-1)//2}\n" | |
| f"x = {start}") | |
| return MathProblem(q, th, f"จำนวนแรก = {start}", str(start), 3, "word_problem") | |
| # ─── Level 4: Geometry ──────────────────────────────────────────────────────── | |
| def gen_geometry(rng: random.Random) -> MathProblem: | |
| kind = rng.choice(["triangle_area", "circle", "rect_diag", "pythagorean", | |
| "cylinder", "similar_triangle"]) | |
| if kind == "triangle_area": | |
| base = rng.randint(4, 30); height = rng.randint(3, 25) | |
| area = base * height / 2 | |
| q = f"สามเหลี่ยมมีฐาน {base} ซม. สูง {height} ซม. หาพื้นที่" | |
| th = f"พื้นที่สามเหลี่ยม = (ฐาน × สูง) / 2 = ({base} × {height}) / 2 = {area:.1f} ตร.ซม." | |
| return MathProblem(q, th, f"{area:.1f} ตร.ซม.", f"{area:.2f}", 4, "geometry") | |
| elif kind == "circle": | |
| r = rng.randint(3, 20) | |
| kind2 = rng.choice(["area", "circumference"]) | |
| if kind2 == "area": | |
| q = f"วงกลมรัศมี {r} ซม. หาพื้นที่ (ใช้ π = 3.14)" | |
| th = f"พื้นที่ = πr² = 3.14 × {r}² = 3.14 × {r*r} = {3.14*r*r:.2f} ตร.ซม." | |
| return MathProblem(q, th, f"{3.14*r*r:.2f} ตร.ซม.", f"{3.14*r*r:.2f}", 4, "geometry") | |
| else: | |
| q = f"วงกลมรัศมี {r} ซม. หาเส้นรอบวง (ใช้ π = 3.14)" | |
| th = f"เส้นรอบวง = 2πr = 2 × 3.14 × {r} = {2*3.14*r:.2f} ซม." | |
| return MathProblem(q, th, f"{2*3.14*r:.2f} ซม.", f"{2*3.14*r:.2f}", 4, "geometry") | |
| elif kind == "pythagorean": | |
| # Pythagorean triples | |
| triples = [(3,4,5),(5,12,13),(8,15,17),(7,24,25),(6,8,10),(9,12,15),(10,24,26)] | |
| a, b, c = rng.choice(triples) | |
| mult = rng.randint(1, 4) | |
| a, b, c = a*mult, b*mult, c*mult | |
| ask = rng.choice(["hyp", "leg"]) | |
| if ask == "hyp": | |
| q = f"สามเหลี่ยมมุมฉาก ขา {a} ซม. และ {b} ซม. ด้านตรงข้ามมุมฉากยาวเท่าไร?" | |
| th = (f"ทฤษฎีบทพีทาโกรัส: c² = a² + b²\n" | |
| f"c² = {a}² + {b}² = {a*a} + {b*b} = {a*a+b*b}\n" | |
| f"c = √{a*a+b*b} = {c} ซม.") | |
| return MathProblem(q, th, f"{c} ซม.", str(c), 4, "geometry") | |
| else: | |
| q = f"สามเหลี่ยมมุมฉาก ด้านตรงข้ามมุมฉาก {c} ซม. ขาด้านหนึ่ง {a} ซม. อีกขายาวเท่าไร?" | |
| th = (f"b² = c² - a² = {c*c} - {a*a} = {c*c-a*a}\n" | |
| f"b = √{b*b} = {b} ซม.") | |
| return MathProblem(q, th, f"{b} ซม.", str(b), 4, "geometry") | |
| elif kind == "cylinder": | |
| r = rng.randint(3, 15); h = rng.randint(5, 30) | |
| vol = 3.14 * r * r * h | |
| q = f"ทรงกระบอกรัศมี {r} ซม. สูง {h} ซม. หาปริมาตร (ใช้ π = 3.14)" | |
| th = (f"ปริมาตร = πr²h\n" | |
| f"= 3.14 × {r}² × {h}\n" | |
| f"= 3.14 × {r*r} × {h}\n" | |
| f"= {vol:.2f} ลบ.ซม.") | |
| return MathProblem(q, th, f"{vol:.2f} ลบ.ซม.", f"{vol:.2f}", 4, "geometry") | |
| elif kind == "rect_diag": | |
| w = rng.randint(3, 20); h = rng.randint(3, 20) | |
| d = math.sqrt(w*w + h*h) | |
| q = f"สี่เหลี่ยมผืนผ้ากว้าง {w} ซม. ยาว {h} ซม. เส้นทแยงมุมยาวเท่าไร?" | |
| th = (f"เส้นทแยงมุม = √(กว้าง² + ยาว²)\n" | |
| f"= √({w}² + {h}²) = √({w*w} + {h*h}) = √{w*w+h*h} ≈ {d:.2f} ซม.") | |
| return MathProblem(q, th, f"≈ {d:.2f} ซม.", f"{d:.4f}", 4, "geometry") | |
| else: # similar_triangle | |
| scale = rng.randint(2, 5) | |
| a = rng.randint(3, 10); b = rng.randint(3, 10); c = rng.randint(5, 15) | |
| q = (f"สามเหลี่ยม A มีด้าน {a}, {b}, {c} ซม. " | |
| f"สามเหลี่ยม B คล้ายกัน มีด้านสั้นที่สุด = {a*scale} ซม. " | |
| f"หาอัตราส่วนความคล้าย และด้านที่ยาวที่สุดของ B") | |
| longest_B = c * scale | |
| th = (f"อัตราส่วนความคล้าย = {a*scale}/{a} = {scale}\n" | |
| f"ด้านที่ยาวที่สุด = {c} × {scale} = {longest_B} ซม.") | |
| return MathProblem(q, th, f"อัตราส่วน {scale}:1, ด้านยาวสุด = {longest_B} ซม.", | |
| str(longest_B), 4, "geometry") | |
| # ─── Level 5: Probability ───────────────────────────────────────────────────── | |
| def gen_probability(rng: random.Random) -> MathProblem: | |
| kind = rng.choice(["dice", "cards", "ball_bag", "permutation", "combination", "conditional"]) | |
| if kind == "dice": | |
| n_dice = rng.choice([1, 2]) | |
| if n_dice == 1: | |
| target = rng.randint(2, 6) | |
| q = f"ทอดลูกเต๋า 1 ลูก ความน่าจะเป็นที่จะได้ ≥ {target} คือเท่าไร?" | |
| favourable = 7 - target | |
| from fractions import Fraction | |
| prob = Fraction(favourable, 6) | |
| th = (f"ผลลัพธ์ที่ต้องการ: {target}, {target+1}, ..., 6 → {favourable} แบบ\n" | |
| f"ผลลัพธ์ทั้งหมด: 6 แบบ\n" | |
| f"P = {favourable}/6 = {prob}") | |
| return MathProblem(q, th, f"P = {prob} ≈ {float(prob):.4f}", | |
| f"{float(prob):.4f}", 5, "probability") | |
| else: | |
| target_sum = rng.randint(8, 10) | |
| outcomes = [(i,j) for i in range(1,7) for j in range(1,7) if i+j==target_sum] | |
| from fractions import Fraction | |
| prob = Fraction(len(outcomes), 36) | |
| q = f"ทอดลูกเต๋า 2 ลูก ความน่าจะเป็นที่ผลรวม = {target_sum} คือเท่าไร?" | |
| th = (f"คู่ที่ได้ผลรวม {target_sum}: {outcomes}\n" | |
| f"จำนวน = {len(outcomes)}, ทั้งหมด = 36\n" | |
| f"P = {prob} ≈ {float(prob):.4f}") | |
| return MathProblem(q, th, f"P = {prob}", f"{float(prob):.4f}", 5, "probability") | |
| elif kind == "ball_bag": | |
| r = rng.randint(3, 10); b = rng.randint(3, 10) | |
| total = r + b | |
| kind2 = rng.choice(["one_red", "both_red_no_replace"]) | |
| if kind2 == "one_red": | |
| from fractions import Fraction | |
| prob = Fraction(r, total) | |
| q = f"ถุงมีลูกบอลแดง {r} ลูก น้ำเงิน {b} ลูก สุ่มหยิบ 1 ลูก P(แดง) = ?" | |
| th = f"P(แดง) = แดง/รวม = {r}/{total} = {prob}" | |
| return MathProblem(q, th, f"P = {prob} ≈ {float(prob):.4f}", | |
| f"{float(prob):.4f}", 5, "probability") | |
| else: | |
| from fractions import Fraction | |
| p = Fraction(r, total) * Fraction(r-1, total-1) | |
| q = f"ถุงมีลูกบอลแดง {r} ลูก น้ำเงิน {b} ลูก สุ่มหยิบ 2 ลูก (ไม่ใส่คืน) P(แดงทั้งคู่) = ?" | |
| th = (f"P(แดงทั้งคู่) = P(แดง1) × P(แดง2|แดง1)\n" | |
| f"= {r}/{total} × {r-1}/{total-1} = {p} ≈ {float(p):.4f}") | |
| return MathProblem(q, th, f"P = {p} ≈ {float(p):.4f}", | |
| f"{float(p):.4f}", 5, "probability") | |
| elif kind == "permutation": | |
| n = rng.randint(4, 8); r = rng.randint(2, min(n, 4)) | |
| perm = math.factorial(n) // math.factorial(n - r) | |
| q = f"จากตัวเลข {n} ตัว จัดเรียง {r} ตำแหน่งได้กี่วิธี? (P({n},{r}))" | |
| th = (f"P(n,r) = n! / (n-r)!\n" | |
| f"P({n},{r}) = {n}! / {n-r}! = {math.factorial(n)} / {math.factorial(n-r)} = {perm}") | |
| return MathProblem(q, th, f"{perm:,} วิธี", str(perm), 5, "combinatorics") | |
| elif kind == "combination": | |
| n = rng.randint(5, 12); r = rng.randint(2, min(n//2, 5)) | |
| comb = math.factorial(n) // (math.factorial(r) * math.factorial(n-r)) | |
| q = f"เลือก {r} คนจาก {n} คน ทำได้กี่วิธี? (C({n},{r}))" | |
| th = (f"C(n,r) = n! / (r!(n-r)!)\n" | |
| f"C({n},{r}) = {n}! / ({r}! × {n-r}!) = {math.factorial(n)} / ({math.factorial(r)} × {math.factorial(n-r)}) = {comb}") | |
| return MathProblem(q, th, f"{comb:,} วิธี", str(comb), 5, "combinatorics") | |
| else: # conditional | |
| p_a = rng.choice([0.3, 0.4, 0.5, 0.6]) | |
| p_b_given_a = rng.choice([0.7, 0.8, 0.9]) | |
| p_ab = round(p_a * p_b_given_a, 4) | |
| q = (f"P(A) = {p_a}, P(B|A) = {p_b_given_a} " | |
| f"หา P(A ∩ B)") | |
| th = (f"กฎการคูณ: P(A ∩ B) = P(A) × P(B|A)\n" | |
| f"= {p_a} × {p_b_given_a} = {p_ab}") | |
| return MathProblem(q, th, f"P(A∩B) = {p_ab}", str(p_ab), 5, "probability") | |
| # ─── Level 6: Number Theory ──────────────────────────────────────────────────── | |
| def gen_number_theory(rng: random.Random) -> MathProblem: | |
| kind = rng.choice(["gcd_lcm", "prime", "divisibility", "modular"]) | |
| if kind == "gcd_lcm": | |
| a = rng.randint(12, 120); b = rng.randint(12, 120) | |
| g = math.gcd(a, b); l = a * b // g | |
| ask = rng.choice(["gcd", "lcm"]) | |
| if ask == "gcd": | |
| q = f"หา ห.ร.ม. ของ {a} และ {b}" | |
| th = (f"ใช้อัลกอริทึมยูคลิด:\n" | |
| f"{a} = {a//b}×{b} + {a%b}\n" | |
| f"{b} = {b//(a%b) if a%b else '—'}×{a%b} + ...\n" | |
| f"ห.ร.ม.({a},{b}) = {g}") | |
| return MathProblem(q, th, str(g), str(g), 6, "number_theory") | |
| else: | |
| q = f"หา ค.ร.น. ของ {a} และ {b}" | |
| th = (f"ค.ร.น.(a,b) = a×b / ห.ร.ม.(a,b)\n" | |
| f"ห.ร.ม.({a},{b}) = {g}\n" | |
| f"ค.ร.น. = {a}×{b} / {g} = {l}") | |
| return MathProblem(q, th, str(l), str(l), 6, "number_theory") | |
| elif kind == "prime": | |
| primes = [p for p in range(50, 200) if all(p % i != 0 for i in range(2, int(p**0.5)+1))] | |
| n = rng.choice([x for x in range(100, 500) if x not in primes]) | |
| # factorize | |
| factors = [] | |
| temp = n | |
| d = 2 | |
| while d * d <= temp: | |
| while temp % d == 0: | |
| factors.append(d); temp //= d | |
| d += 1 | |
| if temp > 1: factors.append(temp) | |
| factor_str = " × ".join(str(f) for f in factors) | |
| q = f"แยกตัวประกอบเฉพาะของ {n}" | |
| steps = [] | |
| temp = n | |
| for f in sorted(set(factors)): | |
| exp = factors.count(f) | |
| steps.append(f"{n if len(steps)==0 else ''}÷{f}" + (f"^{exp}" if exp > 1 else "")) | |
| th = f"{n} = {factor_str}\n{''.join(steps)}" | |
| return MathProblem(q, th, f"{n} = {factor_str}", factor_str, 6, "number_theory") | |
| elif kind == "divisibility": | |
| n = rng.randint(100, 9999) | |
| rules = {2: n%2==0, 3: sum(int(d) for d in str(n))%3==0, | |
| 5: n%5==0, 9: sum(int(d) for d in str(n))%9==0} | |
| divisors = [k for k, v in rules.items() if v] | |
| q = f"จำนวน {n} หารด้วย 2, 3, 5, 9 ลงตัวหรือไม่? (ใช้กฎการหาร)" | |
| digit_sum = sum(int(d) for d in str(n)) | |
| th = (f"÷2: เลขท้าย = {str(n)[-1]} → {'ลงตัว' if rules[2] else 'ไม่ลงตัว'}\n" | |
| f"÷3: ผลรวมหลัก = {digit_sum} → {'ลงตัว' if rules[3] else 'ไม่ลงตัว'} ({digit_sum}÷3={'ลงตัว' if rules[3] else 'ไม่ลงตัว'})\n" | |
| f"÷5: เลขท้าย = {str(n)[-1]} → {'ลงตัว' if rules[5] else 'ไม่ลงตัว'}\n" | |
| f"÷9: ผลรวมหลัก = {digit_sum} → {'ลงตัว' if rules[9] else 'ไม่ลงตัว'}") | |
| ans = f"÷{', ÷'.join(str(d) for d in divisors)} ลงตัว" if divisors else "ไม่ลงตัวทั้ง 4" | |
| return MathProblem(q, th, ans, str(divisors), 6, "number_theory") | |
| else: # modular | |
| a = rng.randint(10, 99); m = rng.randint(3, 12) | |
| r = a % m | |
| q = f"{a} mod {m} = ?" | |
| q_full = f"หาเศษจากการหาร {a} ÷ {m}" | |
| th = f"{a} = {a//m} × {m} + {r}\nดังนั้น {a} mod {m} = {r}" | |
| return MathProblem(q_full, th, str(r), str(r), 6, "number_theory") | |
| # ─── Level 7: Sequences ─────────────────────────────────────────────────────── | |
| def gen_sequence(rng: random.Random) -> MathProblem: | |
| kind = rng.choice(["arithmetic_seq", "geometric_seq", "sum_arith", "sum_geo"]) | |
| if kind == "arithmetic_seq": | |
| a1 = rng.randint(1, 20); d = rng.randint(2, 15); n = rng.randint(5, 20) | |
| an = a1 + (n-1)*d | |
| q = f"ลำดับเลขคณิต: a₁ = {a1}, d = {d} หา a_{n}" | |
| th = (f"aₙ = a₁ + (n-1)d\n" | |
| f"a_{n} = {a1} + ({n}-1)×{d}\n" | |
| f"= {a1} + {(n-1)*d}\n" | |
| f"= {an}") | |
| return MathProblem(q, th, str(an), str(an), 7, "sequences") | |
| elif kind == "geometric_seq": | |
| a1 = rng.randint(2, 10); r = rng.choice([2, 3, 1/2, 1/3]); n = rng.randint(4, 8) | |
| from fractions import Fraction | |
| r_frac = Fraction(r).limit_denominator(10) | |
| an = a1 * (r ** (n-1)) | |
| q = f"ลำดับเรขาคณิต: a₁ = {a1}, r = {r_frac} หา a_{n}" | |
| th = (f"aₙ = a₁ × rⁿ⁻¹\n" | |
| f"a_{n} = {a1} × ({r_frac})^{n-1}\n" | |
| f"= {an:.4f}") | |
| return MathProblem(q, th, f"{an:.4f}", f"{an:.4f}", 7, "sequences") | |
| elif kind == "sum_arith": | |
| a1 = rng.randint(1, 10); d = rng.randint(1, 8); n = rng.randint(10, 30) | |
| sn = n * (2*a1 + (n-1)*d) // 2 | |
| q = f"ลำดับเลขคณิต a₁={a1}, d={d} หาผลรวม {n} พจน์แรก (Sₙ)" | |
| th = (f"Sₙ = n/2 × (2a₁ + (n-1)d)\n" | |
| f"S_{n} = {n}/2 × (2×{a1} + ({n}-1)×{d})\n" | |
| f"= {n}/2 × ({2*a1} + {(n-1)*d})\n" | |
| f"= {n}/2 × {2*a1+(n-1)*d}\n" | |
| f"= {sn}") | |
| return MathProblem(q, th, str(sn), str(sn), 7, "sequences") | |
| else: # sum_geo | |
| a1 = rng.randint(2, 8); r = rng.choice([2, 3]); n = rng.randint(4, 7) | |
| sn = a1 * (r**n - 1) // (r - 1) | |
| q = f"ลำดับเรขาคณิต a₁={a1}, r={r} หาผลรวม {n} พจน์แรก" | |
| th = (f"Sₙ = a₁(rⁿ-1)/(r-1)\n" | |
| f"S_{n} = {a1}×({r}^{n}-1)/({r}-1)\n" | |
| f"= {a1}×({r**n}-1)/{r-1}\n" | |
| f"= {a1}×{r**n-1}/{r-1}\n" | |
| f"= {sn}") | |
| return MathProblem(q, th, str(sn), str(sn), 7, "sequences") | |
| # ─── Level 8: Pre-Calculus ──────────────────────────────────────────────────── | |
| def gen_precalc(rng: random.Random) -> MathProblem: | |
| kind = rng.choice(["log_basic", "log_laws", "trig_exact", "limit_poly"]) | |
| if kind == "log_basic": | |
| base = rng.choice([2, 3, 5, 10]) | |
| exp = rng.randint(2, 6) | |
| val = base ** exp | |
| q = f"หา log_{base}({val})" | |
| th = f"log_{base}({val}) = ? หมายความว่า {base}^? = {val}\n{base}^{exp} = {val}\nดังนั้น log_{base}({val}) = {exp}" | |
| return MathProblem(q, th, str(exp), str(exp), 8, "precalculus") | |
| elif kind == "log_laws": | |
| base = rng.choice([2, 10]) | |
| a = rng.choice([4, 8, 16, 9, 27]) | |
| b = rng.choice([2, 3, 4]) | |
| q = f"หา log_{base}({a}) + log_{base}({b}) (ถ้า log_{base}(x) = log base {base})" | |
| product = a * b | |
| log_ab = math.log(product, base) | |
| th = (f"กฎบวก: log_b(m) + log_b(n) = log_b(m×n)\n" | |
| f"log_{base}({a}) + log_{base}({b}) = log_{base}({a}×{b}) = log_{base}({product})\n" | |
| f"= {log_ab:.4f}") | |
| return MathProblem(q, th, f"{log_ab:.4f}", f"{log_ab:.4f}", 8, "precalculus") | |
| elif kind == "trig_exact": | |
| angles = {30: (0.5, math.sqrt(3)/2), 45: (math.sqrt(2)/2, math.sqrt(2)/2), | |
| 60: (math.sqrt(3)/2, 0.5), 0: (1.0, 0.0), 90: (0.0, 1.0)} | |
| angle = rng.choice(list(angles.keys())) | |
| cos_a, sin_a = angles[angle] | |
| func = rng.choice(["sin", "cos"]) | |
| val = sin_a if func == "sin" else cos_a | |
| exact = {(30,"sin"): "1/2", (30,"cos"): "√3/2", (45,"sin"): "√2/2", | |
| (45,"cos"): "√2/2", (60,"sin"): "√3/2", (60,"cos"): "1/2", | |
| (0,"sin"): "0", (0,"cos"): "1", (90,"sin"): "1", (90,"cos"): "0"} | |
| exact_val = exact.get((angle, func), f"{val:.4f}") | |
| q = f"หาค่า {func}({angle}°)" | |
| th = f"ค่ามาตรฐาน: {func}({angle}°) = {exact_val} ≈ {val:.4f}" | |
| return MathProblem(q, th, f"{exact_val} ≈ {val:.4f}", f"{val:.4f}", 8, "precalculus") | |
| else: # limit_poly | |
| a = rng.randint(1, 5); b = rng.randint(1, 10); c = rng.randint(-10, 10) | |
| # lim_{x→c} (ax+b) = ac+b | |
| limit_val = a * c + b | |
| q = f"หา lim_{{x→{c}}} ({a}x + {b})" | |
| th = (f"ฟังก์ชัน {a}x + {b} ต่อเนื่อง แทนค่า x = {c}:\n" | |
| f"lim = {a}×{c} + {b} = {a*c} + {b} = {limit_val}") | |
| return MathProblem(q, th, str(limit_val), str(limit_val), 8, "precalculus") | |
| # ─── Generator Map & Main ───────────────────────────────────────────────────── | |
| GENERATORS = [ | |
| (gen_arithmetic, 200, 1), | |
| (gen_algebra, 200, 2), | |
| (gen_word_problem, 200, 3), | |
| (gen_geometry, 150, 4), | |
| (gen_probability, 150, 5), | |
| (gen_number_theory, 100, 6), | |
| (gen_sequence, 100, 7), | |
| (gen_precalc, 100, 8), | |
| ] | |
| def generate_math_dataset( | |
| seed: int = 42, | |
| output_path: Path = MATH_OUTPUT, | |
| ) -> int: | |
| rng = random.Random(seed) | |
| total = 0 | |
| with open(output_path, "w", encoding="utf-8") as f: | |
| for gen_fn, count, _level in GENERATORS: | |
| for _ in range(count): | |
| try: | |
| prob = gen_fn(rng) | |
| record = asdict(prob) | |
| f.write(json.dumps(record, ensure_ascii=False) + "\n") | |
| total += 1 | |
| except Exception: | |
| pass | |
| print(f"Math dataset: {total:,} problems → {output_path}") | |
| return total | |
| if __name__ == "__main__": | |
| generate_math_dataset() | |
Xet Storage Details
- Size:
- 32.7 kB
- Xet hash:
- f31230aa66b8f05342ab2a29cab993971cc5a96fc54d1baca0ebbb9c2682ea15
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.