|
|
import json |
|
|
import random |
|
|
|
|
|
def create_training_example(user_msg, assistant_msg): |
|
|
"""Helper to create a training example""" |
|
|
return { |
|
|
"messages": [ |
|
|
{"role": "system", "content": "You are Brad AI 1.12.2x."}, |
|
|
{"role": "user", "content": user_msg}, |
|
|
{"role": "assistant", "content": assistant_msg} |
|
|
] |
|
|
} |
|
|
|
|
|
|
|
|
training_data = [] |
|
|
|
|
|
|
|
|
math_problems = [ |
|
|
("What is 25 * 4?", "25 × 4 = 100"), |
|
|
("Calculate 144 / 12", "144 ÷ 12 = 12"), |
|
|
("What is the square root of 64?", "The square root of 64 is 8, because 8 × 8 = 64."), |
|
|
("If a shirt costs $25 and is 20% off, what's the sale price?", "20% of $25 is $5. So the sale price is $25 - $5 = $20."), |
|
|
("What is 7³?", "7³ (7 cubed) means 7 × 7 × 7 = 343."), |
|
|
] |
|
|
|
|
|
|
|
|
programming_qa = [ |
|
|
("What is a for loop?", "A for loop is a control structure that repeats a block of code a specific number of times. Example in Python:\n\n```python\nfor i in range(5):\n print(i)\n```\n\nThis prints numbers 0 through 4."), |
|
|
("How do you create a dictionary in Python?", "You create a dictionary using curly braces:\n\n```python\nmy_dict = {'name': 'Brad', 'version': '1.12.2x'}\nprint(my_dict['name']) # Output: Brad\n```"), |
|
|
("What is an API?", "An API (Application Programming Interface) is a set of rules that allows different software applications to communicate with each other. It defines methods and data formats for requesting and exchanging information."), |
|
|
("Explain what a variable is.", "A variable is a named storage location in memory that holds a value. It can be changed during program execution. Example: x = 5 stores the value 5 in variable x."), |
|
|
] |
|
|
|
|
|
|
|
|
science_qa = [ |
|
|
("What are the three states of matter?", "The three common states of matter are solid, liquid, and gas. Solids have fixed shape and volume, liquids have fixed volume but take the shape of their container, and gases have neither fixed shape nor volume."), |
|
|
("What is gravity?", "Gravity is a fundamental force that attracts objects with mass toward each other. On Earth, it gives weight to objects and causes them to fall toward the ground at 9.8 m/s²."), |
|
|
("What is DNA?", "DNA (Deoxyribonucleic Acid) is a molecule that carries genetic instructions for life. It has a double helix structure and contains genes that determine traits and characteristics."), |
|
|
] |
|
|
|
|
|
|
|
|
general_qa = [ |
|
|
("Who invented the telephone?", "Alexander Graham Bell is credited with inventing the telephone in 1876."), |
|
|
("How many continents are there?", "There are 7 continents: Africa, Antarctica, Asia, Europe, North America, Oceania, and South America."), |
|
|
("What is the largest ocean?", "The Pacific Ocean is the largest ocean, covering about 63 million square miles."), |
|
|
] |
|
|
|
|
|
|
|
|
logic_qa = [ |
|
|
("If all birds can fly and penguins are birds, can penguins fly?", "This is a logical fallacy. While the premise states all birds can fly, in reality, not all birds can fly. Penguins are flightless birds, so they cannot fly."), |
|
|
("A farmer has 17 sheep, and all but 9 die. How many are left?", "9 sheep are left. 'All but 9 die' means that 9 survive."), |
|
|
] |
|
|
|
|
|
|
|
|
all_qa = math_problems + programming_qa + science_qa + general_qa + logic_qa |
|
|
|
|
|
|
|
|
for question, answer in all_qa: |
|
|
training_data.append(create_training_example(question, answer)) |
|
|
|
|
|
|
|
|
output_file = "train_expanded.jsonl" |
|
|
with open(output_file, 'w') as f: |
|
|
for example in training_data: |
|
|
f.write(json.dumps(example) + '\n') |
|
|
|
|
|
print(f"Generated {len(training_data)} training examples!") |
|
|
print(f"Saved to {output_file}") |
|
|
print("\nSample examples:") |
|
|
for i, example in enumerate(training_data[:3], 1): |
|
|
print(f"\n{i}. User: {example['messages'][1]['content']}") |
|
|
print(f" Assistant: {example['messages'][2]['content'][:100]}...") |