Spaces:
Sleeping
Sleeping
File size: 3,734 Bytes
c71b90b | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 | #!/usr/bin/env python3
"""
QuestionDatabase CSV を教科・ジャンル別に分割してJSONファイルを生成する
入力: knowledge/v1.8.0/超天才クイズv3 - QuestionDatabase.csv
出力: knowledge/v1.8.1/{subject}/{category}.json
"""
import csv
import json
from pathlib import Path
from collections import defaultdict
# パス設定
PROJECT_ROOT = Path(__file__).parent.parent
INPUT_CSV = PROJECT_ROOT / "knowledge" / "v1.8.0" / "超天才クイズv3 - QuestionDatabase.csv"
OUTPUT_DIR = PROJECT_ROOT / "knowledge" / "v1.8.1"
# 教科名マッピング
SUBJECT_NAMES = {
"jp": "国語",
"math": "算数",
"sci": "理科",
"soc": "社会"
}
# ジャンル名マッピング
CATEGORY_NAMES = {
# 国語
"JP01": "漢字の読み書き",
"JP02": "語句・ことわざ",
"JP03": "文法",
"JP04": "読解",
"JP05": "作文",
"JP06": "古典",
# 算数
"MA01": "計算",
"MA02": "割合・比",
"MA03": "速さ",
"MA04": "図形",
"MA05": "面積・体積",
"MA06": "数列・規則性",
"MA07": "場合の数",
"MA08": "濃度・仕事算",
"MA09": "グラフ・表",
"MA10": "文章題",
# 理科
"SC01": "植物",
"SC02": "動物",
"SC03": "人体",
"SC04": "天気",
"SC05": "天体",
"SC06": "地層・岩石",
"SC07": "物質・水溶液",
"SC08": "燃焼・気体",
"SC09": "電気・磁石",
"SC10": "力・運動",
# 社会
"SO01": "地理(日本)",
"SO02": "地理(世界)",
"SO03": "歴史(古代〜中世)",
"SO04": "歴史(近世)",
"SO05": "歴史(近代〜現代)",
"SO06": "政治",
"SO07": "経済",
"SO08": "国際",
"SO09": "環境・資源",
"SO10": "時事問題"
}
def main():
print(f"入力ファイル: {INPUT_CSV}")
print(f"出力先: {OUTPUT_DIR}")
print()
# CSVを読み込み、subject + category でグルーピング
grouped = defaultdict(list)
total_count = 0
with open(INPUT_CSV, 'r', encoding='utf-8') as f:
reader = csv.DictReader(f)
for row in reader:
subject = row['subject']
category = row['category']
key = (subject, category)
# 問題データを抽出(usage_countは除外)
question = {
"answer_id": row['answer_id'],
"answer": row['answer'],
"question_hint": row['question_hint'],
"difficulty": row['difficulty'],
"source_context": row['source_context']
}
grouped[key].append(question)
total_count += 1
print(f"読み込み完了: {total_count}問")
print()
# 各グループをJSONファイルとして出力
file_count = 0
for (subject, category), questions in sorted(grouped.items()):
output_path = OUTPUT_DIR / subject / f"{category}.json"
data = {
"subject": subject,
"subject_name": SUBJECT_NAMES.get(subject, subject),
"category": category,
"category_name": CATEGORY_NAMES.get(category, category),
"question_count": len(questions),
"questions": questions
}
with open(output_path, 'w', encoding='utf-8') as f:
json.dump(data, f, ensure_ascii=False, indent=2)
print(f" {output_path.relative_to(PROJECT_ROOT)}: {len(questions)}問")
file_count += 1
print()
print(f"完了: {file_count}ファイル, {total_count}問")
if __name__ == "__main__":
main()
|