Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python3 | |
| """ | |
| QuestionDatabase CSV を教科・ジャンル別に分割してJSONファイルを生成する | |
| 入力: knowledge/v1.8.0/超天才クイズv3 - QuestionDatabase.csv | |
| 出力: knowledge/v1.8.1/{subject}/{category}.json | |
| """ | |
| import csv | |
| import json | |
| from pathlib import Path | |
| from collections import defaultdict | |
| # パス設定 | |
| PROJECT_ROOT = Path(__file__).parent.parent | |
| INPUT_CSV = PROJECT_ROOT / "knowledge" / "v1.8.0" / "超天才クイズv3 - QuestionDatabase.csv" | |
| OUTPUT_DIR = PROJECT_ROOT / "knowledge" / "v1.8.1" | |
| # 教科名マッピング | |
| SUBJECT_NAMES = { | |
| "jp": "国語", | |
| "math": "算数", | |
| "sci": "理科", | |
| "soc": "社会" | |
| } | |
| # ジャンル名マッピング | |
| CATEGORY_NAMES = { | |
| # 国語 | |
| "JP01": "漢字の読み書き", | |
| "JP02": "語句・ことわざ", | |
| "JP03": "文法", | |
| "JP04": "読解", | |
| "JP05": "作文", | |
| "JP06": "古典", | |
| # 算数 | |
| "MA01": "計算", | |
| "MA02": "割合・比", | |
| "MA03": "速さ", | |
| "MA04": "図形", | |
| "MA05": "面積・体積", | |
| "MA06": "数列・規則性", | |
| "MA07": "場合の数", | |
| "MA08": "濃度・仕事算", | |
| "MA09": "グラフ・表", | |
| "MA10": "文章題", | |
| # 理科 | |
| "SC01": "植物", | |
| "SC02": "動物", | |
| "SC03": "人体", | |
| "SC04": "天気", | |
| "SC05": "天体", | |
| "SC06": "地層・岩石", | |
| "SC07": "物質・水溶液", | |
| "SC08": "燃焼・気体", | |
| "SC09": "電気・磁石", | |
| "SC10": "力・運動", | |
| # 社会 | |
| "SO01": "地理(日本)", | |
| "SO02": "地理(世界)", | |
| "SO03": "歴史(古代〜中世)", | |
| "SO04": "歴史(近世)", | |
| "SO05": "歴史(近代〜現代)", | |
| "SO06": "政治", | |
| "SO07": "経済", | |
| "SO08": "国際", | |
| "SO09": "環境・資源", | |
| "SO10": "時事問題" | |
| } | |
| def main(): | |
| print(f"入力ファイル: {INPUT_CSV}") | |
| print(f"出力先: {OUTPUT_DIR}") | |
| print() | |
| # CSVを読み込み、subject + category でグルーピング | |
| grouped = defaultdict(list) | |
| total_count = 0 | |
| with open(INPUT_CSV, 'r', encoding='utf-8') as f: | |
| reader = csv.DictReader(f) | |
| for row in reader: | |
| subject = row['subject'] | |
| category = row['category'] | |
| key = (subject, category) | |
| # 問題データを抽出(usage_countは除外) | |
| question = { | |
| "answer_id": row['answer_id'], | |
| "answer": row['answer'], | |
| "question_hint": row['question_hint'], | |
| "difficulty": row['difficulty'], | |
| "source_context": row['source_context'] | |
| } | |
| grouped[key].append(question) | |
| total_count += 1 | |
| print(f"読み込み完了: {total_count}問") | |
| print() | |
| # 各グループをJSONファイルとして出力 | |
| file_count = 0 | |
| for (subject, category), questions in sorted(grouped.items()): | |
| output_path = OUTPUT_DIR / subject / f"{category}.json" | |
| data = { | |
| "subject": subject, | |
| "subject_name": SUBJECT_NAMES.get(subject, subject), | |
| "category": category, | |
| "category_name": CATEGORY_NAMES.get(category, category), | |
| "question_count": len(questions), | |
| "questions": questions | |
| } | |
| with open(output_path, 'w', encoding='utf-8') as f: | |
| json.dump(data, f, ensure_ascii=False, indent=2) | |
| print(f" {output_path.relative_to(PROJECT_ROOT)}: {len(questions)}問") | |
| file_count += 1 | |
| print() | |
| print(f"完了: {file_count}ファイル, {total_count}問") | |
| if __name__ == "__main__": | |
| main() | |