brad-ai-1.12.2x / generate_training_data.py
jowilke77's picture
Update generate_training_data.py
a2aaa4a verified
import json
import random
def create_training_example(user_msg, assistant_msg):
"""Helper to create a training example"""
return {
"messages": [
{"role": "system", "content": "You are Brad AI 1.12.2x."},
{"role": "user", "content": user_msg},
{"role": "assistant", "content": assistant_msg}
]
}
# Generate diverse training examples
training_data = []
# Math problems
math_problems = [
("What is 25 * 4?", "25 × 4 = 100"),
("Calculate 144 / 12", "144 ÷ 12 = 12"),
("What is the square root of 64?", "The square root of 64 is 8, because 8 × 8 = 64."),
("If a shirt costs $25 and is 20% off, what's the sale price?", "20% of $25 is $5. So the sale price is $25 - $5 = $20."),
("What is 7³?", "7³ (7 cubed) means 7 × 7 × 7 = 343."),
]
# Programming questions
programming_qa = [
("What is a for loop?", "A for loop is a control structure that repeats a block of code a specific number of times. Example in Python:\n\n```python\nfor i in range(5):\n print(i)\n```\n\nThis prints numbers 0 through 4."),
("How do you create a dictionary in Python?", "You create a dictionary using curly braces:\n\n```python\nmy_dict = {'name': 'Brad', 'version': '1.12.2x'}\nprint(my_dict['name']) # Output: Brad\n```"),
("What is an API?", "An API (Application Programming Interface) is a set of rules that allows different software applications to communicate with each other. It defines methods and data formats for requesting and exchanging information."),
("Explain what a variable is.", "A variable is a named storage location in memory that holds a value. It can be changed during program execution. Example: x = 5 stores the value 5 in variable x."),
]
# Science questions
science_qa = [
("What are the three states of matter?", "The three common states of matter are solid, liquid, and gas. Solids have fixed shape and volume, liquids have fixed volume but take the shape of their container, and gases have neither fixed shape nor volume."),
("What is gravity?", "Gravity is a fundamental force that attracts objects with mass toward each other. On Earth, it gives weight to objects and causes them to fall toward the ground at 9.8 m/s²."),
("What is DNA?", "DNA (Deoxyribonucleic Acid) is a molecule that carries genetic instructions for life. It has a double helix structure and contains genes that determine traits and characteristics."),
]
# General knowledge
general_qa = [
("Who invented the telephone?", "Alexander Graham Bell is credited with inventing the telephone in 1876."),
("How many continents are there?", "There are 7 continents: Africa, Antarctica, Asia, Europe, North America, Oceania, and South America."),
("What is the largest ocean?", "The Pacific Ocean is the largest ocean, covering about 63 million square miles."),
]
# Logic and reasoning
logic_qa = [
("If all birds can fly and penguins are birds, can penguins fly?", "This is a logical fallacy. While the premise states all birds can fly, in reality, not all birds can fly. Penguins are flightless birds, so they cannot fly."),
("A farmer has 17 sheep, and all but 9 die. How many are left?", "9 sheep are left. 'All but 9 die' means that 9 survive."),
]
# Combine all categories
all_qa = math_problems + programming_qa + science_qa + general_qa + logic_qa
# Create training examples
for question, answer in all_qa:
training_data.append(create_training_example(question, answer))
# Write to file
output_file = "train_expanded.jsonl"
with open(output_file, 'w') as f:
for example in training_data:
f.write(json.dumps(example) + '\n')
print(f"Generated {len(training_data)} training examples!")
print(f"Saved to {output_file}")
print("\nSample examples:")
for i, example in enumerate(training_data[:3], 1):
print(f"\n{i}. User: {example['messages'][1]['content']}")
print(f" Assistant: {example['messages'][2]['content'][:100]}...")