Spaces:
Runtime error
Runtime error
| # Custom Training Data | |
| # Add high-quality, manually created QA pairs | |
| import json | |
| custom_qa_pairs = [ | |
| { | |
| "instruction": "Which prefecture has the largest population in Japan?", | |
| "input": "Context: Japan Census data.", | |
| "output": "According to the 2020 Population Census, Tokyo has the largest population with 14,047,594 residents." | |
| }, | |
| { | |
| "instruction": "What is the population density of Tokyo?", | |
| "input": "Context: Japan Census data.", | |
| "output": "Tokyo has a population density of approximately 6,402 people per square kilometer, calculated from its population of 14,047,594 and area of 2,194 km²." | |
| }, | |
| { | |
| "instruction": "Compare the population of Tokyo and Osaka.", | |
| "input": "Context: Japan Census data.", | |
| "output": "Tokyo has a population of 14,047,594 while Osaka has 8,837,685. Tokyo's population is approximately 5.2 million larger than Osaka's." | |
| }, | |
| { | |
| "instruction": "What percentage of Japan's population lives in Tokyo?", | |
| "input": "Context: Japan Census data.", | |
| "output": "Tokyo accounts for approximately 11.1% of Japan's total population of 126.1 million people." | |
| }, | |
| # Add more custom pairs here | |
| ] | |
| # Save to JSONL | |
| with open('custom_train.jsonl', 'w', encoding='utf-8') as f: | |
| for pair in custom_qa_pairs: | |
| json.dump(pair, f, ensure_ascii=False) | |
| f.write('\n') | |
| print(f"Created {len(custom_qa_pairs)} custom QA pairs") | |