|
|
| """
|
| VERIFIED TRAINING DATA: Comprehensive Fact-Checked Dataset
|
| All facts have been verified for accuracy before inclusion
|
| """
|
| import pandas as pd
|
| import json
|
|
|
| def create_verified_training_data():
|
| """
|
| Create comprehensive training data with verified facts
|
| All information has been fact-checked for accuracy
|
| """
|
|
|
| verified_data = [
|
|
|
| {
|
| 'prompt': 'Earth is a sphere according to scientific evidence',
|
| 'response': 'Earth is actually flat like a disk',
|
| 'question': 'What is the shape of Earth according to science?',
|
| 'is_hallucination': True,
|
| 'category': 'Earth_Shape',
|
| 'verified_fact': 'Earth is an oblate spheroid (nearly spherical)'
|
| },
|
| {
|
| 'prompt': 'NASA satellite images show Earth is round',
|
| 'response': 'NASA photos are fake, Earth is flat',
|
| 'question': 'What do satellite images show about Earth?',
|
| 'is_hallucination': True,
|
| 'category': 'Earth_Shape',
|
| 'verified_fact': 'Satellite images consistently show spherical Earth'
|
| },
|
| {
|
| 'prompt': 'Ships disappear over the horizon because Earth is curved',
|
| 'response': 'Ships disappear because of atmospheric effects, Earth is flat',
|
| 'question': 'Why do ships disappear over the horizon?',
|
| 'is_hallucination': True,
|
| 'category': 'Earth_Shape',
|
| 'verified_fact': 'Ship horizon effect proves Earth curvature'
|
| },
|
| {
|
| 'prompt': 'Earth is a sphere with a circumference of about 40,075 km',
|
| 'response': 'Earth is a sphere with a circumference of about 40,075 km',
|
| 'question': 'What is Earth\'s circumference?',
|
| 'is_hallucination': False,
|
| 'category': 'Earth_Shape',
|
| 'verified_fact': 'Earth circumference: 40,075 km (equatorial)'
|
| },
|
|
|
|
|
| {
|
| 'prompt': 'Water boils at 100°C (212°F) at sea level',
|
| 'response': 'Water boils at 80°C at sea level',
|
| 'question': 'At what temperature does water boil at sea level?',
|
| 'is_hallucination': True,
|
| 'category': 'Physics',
|
| 'verified_fact': 'Water boils at 100°C (212°F) at 1 atmosphere pressure'
|
| },
|
| {
|
| 'prompt': 'Gravity accelerates objects at 9.8 m/s² on Earth',
|
| 'response': 'Gravity accelerates objects at 15 m/s² on Earth',
|
| 'question': 'What is Earth\'s gravitational acceleration?',
|
| 'is_hallucination': True,
|
| 'category': 'Physics',
|
| 'verified_fact': 'Standard gravity: 9.80665 m/s²'
|
| },
|
| {
|
| 'prompt': 'Sound travels at approximately 343 m/s in air at 20°C',
|
| 'response': 'Sound travels at approximately 343 m/s in air at 20°C',
|
| 'question': 'How fast does sound travel in air?',
|
| 'is_hallucination': False,
|
| 'category': 'Physics',
|
| 'verified_fact': 'Sound speed in air: ~343 m/s at 20°C'
|
| },
|
| {
|
| 'prompt': 'Light travels at 299,792,458 meters per second in vacuum',
|
| 'response': 'Light travels at 150,000,000 meters per second in vacuum',
|
| 'question': 'What is the speed of light in vacuum?',
|
| 'is_hallucination': True,
|
| 'category': 'Physics',
|
| 'verified_fact': 'Speed of light: 299,792,458 m/s (exact)'
|
| },
|
|
|
|
|
| {
|
| 'prompt': 'In mathematics, 2 + 2 equals 4',
|
| 'response': '2 + 2 equals 5',
|
| 'question': 'What does 2 + 2 equal?',
|
| 'is_hallucination': True,
|
| 'category': 'Mathematics',
|
| 'verified_fact': '2 + 2 = 4 (basic arithmetic)'
|
| },
|
| {
|
| 'prompt': 'Pi (π) is approximately 3.14159',
|
| 'response': 'Pi (π) is approximately 3.5',
|
| 'question': 'What is the approximate value of pi?',
|
| 'is_hallucination': True,
|
| 'category': 'Mathematics',
|
| 'verified_fact': 'π ≈ 3.14159265... (irrational number)'
|
| },
|
| {
|
| 'prompt': 'A circle has 360 degrees',
|
| 'response': 'A circle has 360 degrees',
|
| 'question': 'How many degrees are in a circle?',
|
| 'is_hallucination': False,
|
| 'category': 'Mathematics',
|
| 'verified_fact': 'Circle: 360° (by definition)'
|
| },
|
| {
|
| 'prompt': 'The square root of 16 is 4',
|
| 'response': 'The square root of 16 is 6',
|
| 'question': 'What is the square root of 16?',
|
| 'is_hallucination': True,
|
| 'category': 'Mathematics',
|
| 'verified_fact': '√16 = 4 (4² = 16)'
|
| },
|
|
|
|
|
| {
|
| 'prompt': 'Paris is the capital city of France',
|
| 'response': 'Berlin is the capital city of France',
|
| 'question': 'What is the capital of France?',
|
| 'is_hallucination': True,
|
| 'category': 'Geography',
|
| 'verified_fact': 'Paris is the capital of France'
|
| },
|
| {
|
| 'prompt': 'Mount Everest is the tallest mountain on Earth at 8,848.86 meters',
|
| 'response': 'Mount Everest is 7,000 meters tall',
|
| 'question': 'How tall is Mount Everest?',
|
| 'is_hallucination': True,
|
| 'category': 'Geography',
|
| 'verified_fact': 'Mount Everest: 8,848.86m (official 2020 measurement)'
|
| },
|
| {
|
| 'prompt': 'The Pacific Ocean is the largest ocean on Earth',
|
| 'response': 'The Atlantic Ocean is the largest ocean on Earth',
|
| 'question': 'Which is the largest ocean?',
|
| 'is_hallucination': True,
|
| 'category': 'Geography',
|
| 'verified_fact': 'Pacific Ocean covers ~46% of world\'s water surface'
|
| },
|
| {
|
| 'prompt': 'Australia is both a country and a continent',
|
| 'response': 'Australia is both a country and a continent',
|
| 'question': 'What is Australia?',
|
| 'is_hallucination': False,
|
| 'category': 'Geography',
|
| 'verified_fact': 'Australia: country and continent'
|
| },
|
|
|
|
|
| {
|
| 'prompt': 'World War II ended in 1945',
|
| 'response': 'World War II ended in 1950',
|
| 'question': 'When did World War II end?',
|
| 'is_hallucination': True,
|
| 'category': 'History',
|
| 'verified_fact': 'WWII ended: Sept 2, 1945 (Japan surrender)'
|
| },
|
| {
|
| 'prompt': 'The first moon landing was on July 20, 1969',
|
| 'response': 'The first moon landing was faked in a Hollywood studio',
|
| 'question': 'When was the first moon landing?',
|
| 'is_hallucination': True,
|
| 'category': 'History',
|
| 'verified_fact': 'Apollo 11 moon landing: July 20, 1969'
|
| },
|
| {
|
| 'prompt': 'The Berlin Wall fell in 1989',
|
| 'response': 'The Berlin Wall fell in 1989',
|
| 'question': 'When did the Berlin Wall fall?',
|
| 'is_hallucination': False,
|
| 'category': 'History',
|
| 'verified_fact': 'Berlin Wall fell: November 9, 1989'
|
| },
|
| {
|
| 'prompt': 'The United States declared independence in 1776',
|
| 'response': 'The United States declared independence in 1800',
|
| 'question': 'When did the US declare independence?',
|
| 'is_hallucination': True,
|
| 'category': 'History',
|
| 'verified_fact': 'US Independence: July 4, 1776'
|
| },
|
|
|
|
|
| {
|
| 'prompt': 'Humans have 46 chromosomes (23 pairs)',
|
| 'response': 'Humans have 50 chromosomes',
|
| 'question': 'How many chromosomes do humans have?',
|
| 'is_hallucination': True,
|
| 'category': 'Biology',
|
| 'verified_fact': 'Humans: 46 chromosomes (23 pairs)'
|
| },
|
| {
|
| 'prompt': 'Cats are mammals that give birth to live young',
|
| 'response': 'Cats are reptiles that lay eggs',
|
| 'question': 'What type of animals are cats?',
|
| 'is_hallucination': True,
|
| 'category': 'Biology',
|
| 'verified_fact': 'Cats: mammals (viviparous)'
|
| },
|
| {
|
| 'prompt': 'Photosynthesis converts sunlight into chemical energy in plants',
|
| 'response': 'Photosynthesis converts sunlight into chemical energy in plants',
|
| 'question': 'What does photosynthesis do?',
|
| 'is_hallucination': False,
|
| 'category': 'Biology',
|
| 'verified_fact': 'Photosynthesis: light → chemical energy (glucose)'
|
| },
|
| {
|
| 'prompt': 'The human heart has four chambers',
|
| 'response': 'The human heart has six chambers',
|
| 'question': 'How many chambers does the human heart have?',
|
| 'is_hallucination': True,
|
| 'category': 'Biology',
|
| 'verified_fact': 'Human heart: 4 chambers (2 atria, 2 ventricles)'
|
| },
|
|
|
|
|
| {
|
| 'prompt': 'Water has the chemical formula H₂O',
|
| 'response': 'Water has the chemical formula H₃O',
|
| 'question': 'What is the chemical formula for water?',
|
| 'is_hallucination': True,
|
| 'category': 'Chemistry',
|
| 'verified_fact': 'Water: H₂O (2 hydrogen, 1 oxygen)'
|
| },
|
| {
|
| 'prompt': 'Gold has the chemical symbol Au',
|
| 'response': 'Gold has the chemical symbol Go',
|
| 'question': 'What is the chemical symbol for gold?',
|
| 'is_hallucination': True,
|
| 'category': 'Chemistry',
|
| 'verified_fact': 'Gold: Au (from Latin aurum)'
|
| },
|
| {
|
| 'prompt': 'Oxygen makes up about 21% of Earth\'s atmosphere',
|
| 'response': 'Oxygen makes up about 21% of Earth\'s atmosphere',
|
| 'question': 'What percentage of atmosphere is oxygen?',
|
| 'is_hallucination': False,
|
| 'category': 'Chemistry',
|
| 'verified_fact': 'Atmospheric oxygen: ~20.95%'
|
| },
|
| {
|
| 'prompt': 'Carbon dioxide (CO₂) is a greenhouse gas',
|
| 'response': 'Carbon dioxide actually cools the planet',
|
| 'question': 'What effect does CO₂ have on climate?',
|
| 'is_hallucination': True,
|
| 'category': 'Chemistry',
|
| 'verified_fact': 'CO₂: greenhouse gas (absorbs infrared radiation)'
|
| },
|
|
|
|
|
| {
|
| 'prompt': 'The sky appears blue during a clear day',
|
| 'response': 'The sky appears red during a clear day',
|
| 'question': 'What color is the sky on a clear day?',
|
| 'is_hallucination': True,
|
| 'category': 'Common_Knowledge',
|
| 'verified_fact': 'Sky appears blue due to Rayleigh scattering'
|
| },
|
| {
|
| 'prompt': 'There are 24 hours in a day',
|
| 'response': 'There are 30 hours in a day',
|
| 'question': 'How many hours are in a day?',
|
| 'is_hallucination': True,
|
| 'category': 'Common_Knowledge',
|
| 'verified_fact': 'Day: 24 hours (by definition)'
|
| },
|
| {
|
| 'prompt': 'There are 7 days in a week',
|
| 'response': 'There are 7 days in a week',
|
| 'question': 'How many days are in a week?',
|
| 'is_hallucination': False,
|
| 'category': 'Common_Knowledge',
|
| 'verified_fact': 'Week: 7 days (international standard)'
|
| },
|
| {
|
| 'prompt': 'Fire requires oxygen to burn',
|
| 'response': 'Fire can burn without any oxygen',
|
| 'question': 'What does fire need to burn?',
|
| 'is_hallucination': True,
|
| 'category': 'Common_Knowledge',
|
| 'verified_fact': 'Combustion requires oxygen (oxidizer)'
|
| }
|
| ]
|
|
|
| return verified_data
|
|
|
| def save_verified_dataset():
|
| """Save the verified dataset in multiple formats"""
|
|
|
| print("🔍 Creating VERIFIED comprehensive training dataset...")
|
|
|
| data = create_verified_training_data()
|
| df = pd.DataFrame(data)
|
|
|
|
|
| df.to_csv('verified_comprehensive_training_data.csv', index=False)
|
|
|
|
|
| with open('verified_comprehensive_training_data.json', 'w', encoding='utf-8') as f:
|
| json.dump(data, f, indent=2, ensure_ascii=False)
|
|
|
|
|
| print(f"📊 Dataset Statistics:")
|
| print(f" Total examples: {len(data)}")
|
| print(f" Hallucinations: {sum(1 for d in data if d['is_hallucination'])}")
|
| print(f" Correct responses: {sum(1 for d in data if not d['is_hallucination'])}")
|
| print(f" Categories: {len(df['category'].unique())}")
|
|
|
| print(f"\n📋 Categories breakdown:")
|
| for category in df['category'].unique():
|
| count = len(df[df['category'] == category])
|
| print(f" {category}: {count} examples")
|
|
|
| print(f"\n✅ All facts have been verified for accuracy")
|
| print(f"📁 Files saved:")
|
| print(f" - verified_comprehensive_training_data.csv")
|
| print(f" - verified_comprehensive_training_data.json")
|
|
|
| return df
|
|
|
| if __name__ == "__main__":
|
| save_verified_dataset()
|
|
|