#!/usr/bin/env python3 """ QuestionDatabase CSV を教科・ジャンル別に分割してJSONファイルを生成する 入力: knowledge/v1.8.0/超天才クイズv3 - QuestionDatabase.csv 出力: knowledge/v1.8.1/{subject}/{category}.json """ import csv import json from pathlib import Path from collections import defaultdict # パス設定 PROJECT_ROOT = Path(__file__).parent.parent INPUT_CSV = PROJECT_ROOT / "knowledge" / "v1.8.0" / "超天才クイズv3 - QuestionDatabase.csv" OUTPUT_DIR = PROJECT_ROOT / "knowledge" / "v1.8.1" # 教科名マッピング SUBJECT_NAMES = { "jp": "国語", "math": "算数", "sci": "理科", "soc": "社会" } # ジャンル名マッピング CATEGORY_NAMES = { # 国語 "JP01": "漢字の読み書き", "JP02": "語句・ことわざ", "JP03": "文法", "JP04": "読解", "JP05": "作文", "JP06": "古典", # 算数 "MA01": "計算", "MA02": "割合・比", "MA03": "速さ", "MA04": "図形", "MA05": "面積・体積", "MA06": "数列・規則性", "MA07": "場合の数", "MA08": "濃度・仕事算", "MA09": "グラフ・表", "MA10": "文章題", # 理科 "SC01": "植物", "SC02": "動物", "SC03": "人体", "SC04": "天気", "SC05": "天体", "SC06": "地層・岩石", "SC07": "物質・水溶液", "SC08": "燃焼・気体", "SC09": "電気・磁石", "SC10": "力・運動", # 社会 "SO01": "地理(日本)", "SO02": "地理(世界)", "SO03": "歴史(古代〜中世)", "SO04": "歴史(近世)", "SO05": "歴史(近代〜現代)", "SO06": "政治", "SO07": "経済", "SO08": "国際", "SO09": "環境・資源", "SO10": "時事問題" } def main(): print(f"入力ファイル: {INPUT_CSV}") print(f"出力先: {OUTPUT_DIR}") print() # CSVを読み込み、subject + category でグルーピング grouped = defaultdict(list) total_count = 0 with open(INPUT_CSV, 'r', encoding='utf-8') as f: reader = csv.DictReader(f) for row in reader: subject = row['subject'] category = row['category'] key = (subject, category) # 問題データを抽出(usage_countは除外) question = { "answer_id": row['answer_id'], "answer": row['answer'], "question_hint": row['question_hint'], "difficulty": row['difficulty'], "source_context": row['source_context'] } grouped[key].append(question) total_count += 1 print(f"読み込み完了: {total_count}問") print() # 各グループをJSONファイルとして出力 file_count = 0 for (subject, category), questions in sorted(grouped.items()): output_path = OUTPUT_DIR / subject / f"{category}.json" data = { "subject": subject, "subject_name": SUBJECT_NAMES.get(subject, subject), "category": category, "category_name": CATEGORY_NAMES.get(category, category), "question_count": len(questions), "questions": questions } with open(output_path, 'w', encoding='utf-8') as f: json.dump(data, f, ensure_ascii=False, indent=2) print(f" {output_path.relative_to(PROJECT_ROOT)}: {len(questions)}問") file_count += 1 print() print(f"完了: {file_count}ファイル, {total_count}問") if __name__ == "__main__": main()