File size: 20,237 Bytes
3f49919
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
import json
import sqlite3
import requests
import time
from pathlib import Path

DB_PATH = Path("data/quotes.db")

def create_database():
    DB_PATH.parent.mkdir(parents=True, exist_ok=True)
    conn = sqlite3.connect(DB_PATH)
    cursor = conn.cursor()
    
    cursor.execute("""
        CREATE TABLE IF NOT EXISTS quotes (
            id TEXT PRIMARY KEY,
            text TEXT NOT NULL,
            author TEXT,
            source TEXT,
            dynasty TEXT,
            type TEXT,
            tags TEXT,
            emotion TEXT
        )
    """)
    
    cursor.execute("CREATE INDEX IF NOT EXISTS idx_type ON quotes(type)")
    cursor.execute("CREATE INDEX IF NOT EXISTS idx_dynasty ON quotes(dynasty)")
    
    conn.commit()
    return conn

def insert_quotes(conn, quotes):
    cursor = conn.cursor()
    inserted = 0
    
    for quote in quotes:
        try:
            cursor.execute("""
                INSERT OR IGNORE INTO quotes (id, text, author, source, dynasty, type, tags, emotion)
                VALUES (?, ?, ?, ?, ?, ?, ?, ?)
            """, (
                quote['id'],
                quote['text'],
                quote.get('author'),
                quote.get('source'),
                quote.get('dynasty'),
                quote.get('type'),
                json.dumps(quote.get('tags', []), ensure_ascii=False),
                json.dumps(quote.get('emotion', []), ensure_ascii=False)
            ))
            if cursor.rowcount > 0:
                inserted += 1
        except Exception as e:
            print(f"Error inserting {quote['id']}: {e}")
    
    conn.commit()
    return inserted

def fetch_chinese_poetry():
    poems = []
    base_url = "https://raw.githubusercontent.com/chinese-poetry/chinese-poetry/master"
    
    print("Fetching Tang Dynasty poems...")
    for i in range(0, 58000, 1000):
        url = f"{base_url}/poet/poet_{i}.json"
        try:
            response = requests.get(url, timeout=15)
            if response.status_code == 200:
                data = response.json()
                for poem in data:
                    text = "".join(poem.get("paragraphs", []))
                    if len(text) >= 10:
                        poems.append({
                            "id": f"tang_{poem.get('id', i)}",
                            "text": text,
                            "author": poem.get("author", ""),
                            "source": poem.get("title", ""),
                            "dynasty": "唐",
                            "type": "诗词",
                            "tags": [],
                            "emotion": []
                        })
                print(f"  Fetched {len(data)} poems from batch {i}")
            time.sleep(0.5)
        except Exception as e:
            print(f"  Error fetching batch {i}: {e}")
    
    return poems

def fetch_song_poetry():
    poems = []
    base_url = "https://raw.githubusercontent.com/chinese-poetry/chinese-poetry/master"
    
    print("Fetching Song Dynasty poems...")
    for i in range(0, 25000, 1000):
        url = f"{base_url}/poet/poet.song_{i}.json"
        try:
            response = requests.get(url, timeout=15)
            if response.status_code == 200:
                data = response.json()
                for poem in data:
                    text = "".join(poem.get("paragraphs", []))
                    if len(text) >= 10:
                        poems.append({
                            "id": f"song_{poem.get('id', i)}",
                            "text": text,
                            "author": poem.get("author", ""),
                            "source": poem.get("title", ""),
                            "dynasty": "宋",
                            "type": "诗词",
                            "tags": [],
                            "emotion": []
                        })
                print(f"  Fetched {len(data)} poems from batch {i}")
            time.sleep(0.5)
        except Exception as e:
            print(f"  Error fetching batch {i}: {e}")
    
    return poems

def fetch_idioms():
    idioms = []
    url = "https://raw.githubusercontent.com/pwxcoo/chinese-xinhua/master/json/idioms.json"
    
    print("Fetching idioms...")
    try:
        response = requests.get(url, timeout=30)
        if response.status_code == 200:
            data = response.json()
            for item in data:
                word = item.get('word', '')
                if word and len(word) >= 3:
                    idioms.append({
                        "id": f"idiom_{word}",
                        "text": word,
                        "author": None,
                        "source": item.get('derivation', ''),
                        "dynasty": None,
                        "type": "成语",
                        "tags": [],
                        "emotion": []
                    })
            print(f"  Fetched {len(idioms)} idioms")
    except Exception as e:
        print(f"  Error fetching idioms: {e}")
    
    return idioms

def fetch_classic_quotes():
    quotes = []
    
    classic_texts = [
        {"text": "学而时习之,不亦说乎", "author": "孔子", "source": "论语", "dynasty": "春秋"},
        {"text": "温故而知新,可以为师矣", "author": "孔子", "source": "论语", "dynasty": "春秋"},
        {"text": "学而不思则罔,思而不学则殆", "author": "孔子", "source": "论语", "dynasty": "春秋"},
        {"text": "知之为知之,不知为不知,是知也", "author": "孔子", "source": "论语", "dynasty": "春秋"},
        {"text": "己所不欲,勿施于人", "author": "孔子", "source": "论语", "dynasty": "春秋"},
        {"text": "三人行,必有我师焉", "author": "孔子", "source": "论语", "dynasty": "春秋"},
        {"text": "君子坦荡荡,小人长戚戚", "author": "孔子", "source": "论语", "dynasty": "春秋"},
        {"text": "生于忧患,死于安乐", "author": "孟子", "source": "孟子", "dynasty": "战国"},
        {"text": "得道多助,失道寡助", "author": "孟子", "source": "孟子", "dynasty": "战国"},
        {"text": "富贵不能淫,贫贱不能移,威武不能屈", "author": "孟子", "source": "孟子", "dynasty": "战国"},
        {"text": "天行健,君子以自强不息", "author": "佚名", "source": "周易", "dynasty": "先秦"},
        {"text": "地势坤,君子以厚德载物", "author": "佚名", "source": "周易", "dynasty": "先秦"},
        {"text": "路漫漫其修远兮,吾将上下而求索", "author": "屈原", "source": "离骚", "dynasty": "战国"},
        {"text": "长太息以掩涕兮,哀民生之多艰", "author": "屈原", "source": "离骚", "dynasty": "战国"},
        {"text": "亦余心之所善兮,虽九死其犹未悔", "author": "屈原", "source": "离骚", "dynasty": "战国"},
        {"text": "老骥伏枥,志在千里", "author": "曹操", "source": "龟虽寿", "dynasty": "东汉"},
        {"text": "烈士暮年,壮心不已", "author": "曹操", "source": "龟虽寿", "dynasty": "东汉"},
        {"text": "山不厌高,海不厌深", "author": "曹操", "source": "短歌行", "dynasty": "东汉"},
        {"text": "周公吐哺,天下归心", "author": "曹操", "source": "短歌行", "dynasty": "东汉"},
        {"text": "捐躯赴国难,视死忽如归", "author": "曹植", "source": "白马篇", "dynasty": "三国"},
        {"text": "本是同根生,相煎何太急", "author": "曹植", "source": "七步诗", "dynasty": "三国"},
        {"text": "鞠躬尽瘁,死而后已", "author": "诸葛亮", "source": "后出师表", "dynasty": "三国"},
        {"text": "非淡泊无以明志,非宁静无以致远", "author": "诸葛亮", "source": "诫子书", "dynasty": "三国"},
        {"text": "采菊东篱下,悠然见南山", "author": "陶渊明", "source": "饮酒", "dynasty": "东晋"},
        {"text": "羁鸟恋旧林,池鱼思故渊", "author": "陶渊明", "source": "归园田居", "dynasty": "东晋"},
        {"text": "海内存知己,天涯若比邻", "author": "王勃", "source": "送杜少府之任蜀州", "dynasty": "唐"},
        {"text": "落霞与孤鹜齐飞,秋水共长天一色", "author": "王勃", "source": "滕王阁序", "dynasty": "唐"},
        {"text": "前不见古人,后不见来者", "author": "陈子昂", "source": "登幽州台歌", "dynasty": "唐"},
        {"text": "念天地之悠悠,独怆然而涕下", "author": "陈子昂", "source": "登幽州台歌", "dynasty": "唐"},
        {"text": "春江潮水连海平,海上明月共潮生", "author": "张若虚", "source": "春江花月夜", "dynasty": "唐"},
        {"text": "人生代代无穷已,江月年年望相似", "author": "张若虚", "source": "春江花月夜", "dynasty": "唐"},
        {"text": "欲穷千里目,更上一层楼", "author": "王之涣", "source": "登鹳雀楼", "dynasty": "唐"},
        {"text": "黄河远上白云间,一片孤城万仞山", "author": "王之涣", "source": "凉州词", "dynasty": "唐"},
        {"text": "独在异乡为异客,每逢佳节倍思亲", "author": "王维", "source": "九月九日忆山东兄弟", "dynasty": "唐"},
        {"text": "劝君更尽一杯酒,西出阳关无故人", "author": "王维", "source": "送元二使安西", "dynasty": "唐"},
        {"text": "大漠孤烟直,长河落日圆", "author": "王维", "source": "使至塞上", "dynasty": "唐"},
        {"text": "明月松间照,清泉石上流", "author": "王维", "source": "山居秋暝", "dynasty": "唐"},
        {"text": "天生我材必有用,千金散尽还复来", "author": "李白", "source": "将进酒", "dynasty": "唐"},
        {"text": "长风破浪会有时,直挂云帆济沧海", "author": "李白", "source": "行路难", "dynasty": "唐"},
        {"text": "举杯邀明月,对影成三人", "author": "李白", "source": "月下独酌", "dynasty": "唐"},
        {"text": "抽刀断水水更流,举杯消愁愁更愁", "author": "李白", "source": "宣州谢朓楼饯别校书叔云", "dynasty": "唐"},
        {"text": "安能摧眉折腰事权贵,使我不得开心颜", "author": "李白", "source": "梦游天姥吟留别", "dynasty": "唐"},
        {"text": "会当凌绝顶,一览众山小", "author": "杜甫", "source": "望岳", "dynasty": "唐"},
        {"text": "读书破万卷,下笔如有神", "author": "杜甫", "source": "奉赠韦左丞丈二十二韵", "dynasty": "唐"},
        {"text": "出师未捷身先死,长使英雄泪满襟", "author": "杜甫", "source": "蜀相", "dynasty": "唐"},
        {"text": "无边落木萧萧下,不尽长江滚滚来", "author": "杜甫", "source": "登高", "dynasty": "唐"},
        {"text": "安得广厦千万间,大庇天下寒士俱欢颜", "author": "杜甫", "source": "茅屋为秋风所破歌", "dynasty": "唐"},
        {"text": "忽如一夜春风来,千树万树梨花开", "author": "岑参", "source": "白雪歌送武判官归京", "dynasty": "唐"},
        {"text": "沉舟侧畔千帆过,病树前头万木春", "author": "刘禹锡", "source": "酬乐天扬州初逢席上见赠", "dynasty": "唐"},
        {"text": "旧时王谢堂前燕,飞入寻常百姓家", "author": "刘禹锡", "source": "乌衣巷", "dynasty": "唐"},
        {"text": "千淘万漉虽辛苦,吹尽狂沙始到金", "author": "刘禹锡", "source": "浪淘沙", "dynasty": "唐"},
        {"text": "野火烧不尽,春风吹又生", "author": "白居易", "source": "赋得古原草送别", "dynasty": "唐"},
        {"text": "同是天涯沦落人,相逢何必曾相识", "author": "白居易", "source": "琵琶行", "dynasty": "唐"},
        {"text": "在天愿作比翼鸟,在地愿为连理枝", "author": "白居易", "source": "长恨歌", "dynasty": "唐"},
        {"text": "千呼万唤始出来,犹抱琵琶半遮面", "author": "白居易", "source": "琵琶行", "dynasty": "唐"},
        {"text": "曾经沧海难为水,除却巫山不是云", "author": "元稹", "source": "离思", "dynasty": "唐"},
        {"text": "黑云压城城欲摧,甲光向日金鳞开", "author": "李贺", "source": "雁门太守行", "dynasty": "唐"},
        {"text": "男儿何不带吴钩,收取关山五十州", "author": "李贺", "source": "南园", "dynasty": "唐"},
        {"text": "商女不知亡国恨,隔江犹唱后庭花", "author": "杜牧", "source": "泊秦淮", "dynasty": "唐"},
        {"text": "停车坐爱枫林晚,霜叶红于二月花", "author": "杜牧", "source": "山行", "dynasty": "唐"},
        {"text": "东风不与周郎便,铜雀春深锁二乔", "author": "杜牧", "source": "赤壁", "dynasty": "唐"},
        {"text": "春蚕到死丝方尽,蜡炬成灰泪始干", "author": "李商隐", "source": "无题", "dynasty": "唐"},
        {"text": "身无彩凤双飞翼,心有灵犀一点通", "author": "李商隐", "source": "无题", "dynasty": "唐"},
        {"text": "夕阳无限好,只是近黄昏", "author": "李商隐", "source": "登乐游原", "dynasty": "唐"},
        {"text": "先天下之忧而忧,后天下之乐而乐", "author": "范仲淹", "source": "岳阳楼记", "dynasty": "宋"},
        {"text": "不以物喜,不以己悲", "author": "范仲淹", "source": "岳阳楼记", "dynasty": "宋"},
        {"text": "醉翁之意不在酒,在乎山水之间也", "author": "欧阳修", "source": "醉翁亭记", "dynasty": "宋"},
        {"text": "人生自是有情痴,此恨不关风与月", "author": "欧阳修", "source": "玉楼春", "dynasty": "宋"},
        {"text": "衣带渐宽终不悔,为伊消得人憔悴", "author": "柳永", "source": "蝶恋花", "dynasty": "宋"},
        {"text": "今宵酒醒何处?杨柳岸,晓风残月", "author": "柳永", "source": "雨霖铃", "dynasty": "宋"},
        {"text": "但愿人长久,千里共婵娟", "author": "苏轼", "source": "水调歌头", "dynasty": "宋"},
        {"text": "大江东去,浪淘尽,千古风流人物", "author": "苏轼", "source": "念奴娇·赤壁怀古", "dynasty": "宋"},
        {"text": "竹杖芒鞋轻胜马,谁怕?一蓑烟雨任平生", "author": "苏轼", "source": "定风波", "dynasty": "宋"},
        {"text": "回首向来萧瑟处,归去,也无风雨也无晴", "author": "苏轼", "source": "定风波", "dynasty": "宋"},
        {"text": "十年生死两茫茫,不思量,自难忘", "author": "苏轼", "source": "江城子", "dynasty": "宋"},
        {"text": "人生到处知何似,应似飞鸿踏雪泥", "author": "苏轼", "source": "和子由渑池怀旧", "dynasty": "宋"},
        {"text": "不识庐山真面目,只缘身在此山中", "author": "苏轼", "source": "题西林壁", "dynasty": "宋"},
        {"text": "两情若是久长时,又岂在朝朝暮暮", "author": "秦观", "source": "鹊桥仙", "dynasty": "宋"},
        {"text": "此情无计可消除,才下眉头,却上心头", "author": "李清照", "source": "一剪梅", "dynasty": "宋"},
        {"text": "寻寻觅觅,冷冷清清,凄凄惨惨戚戚", "author": "李清照", "source": "声声慢", "dynasty": "宋"},
        {"text": "生当作人杰,死亦为鬼雄", "author": "李清照", "source": "夏日绝句", "dynasty": "宋"},
        {"text": "三十功名尘与土,八千里路云和月", "author": "岳飞", "source": "满江红", "dynasty": "宋"},
        {"text": "莫等闲,白了少年头,空悲切", "author": "岳飞", "source": "满江红", "dynasty": "宋"},
        {"text": "壮志饥餐胡虏肉,笑谈渴饮匈奴血", "author": "岳飞", "source": "满江红", "dynasty": "宋"},
        {"text": "山重水复疑无路,柳暗花明又一村", "author": "陆游", "source": "游山西村", "dynasty": "宋"},
        {"text": "王师北定中原日,家祭无忘告乃翁", "author": "陆游", "source": "示儿", "dynasty": "宋"},
        {"text": "小楼一夜听春雨,深巷明朝卖杏花", "author": "陆游", "source": "临安春雨初霁", "dynasty": "宋"},
        {"text": "出师一表真名世,千载谁堪伯仲间", "author": "陆游", "source": "书愤", "dynasty": "宋"},
        {"text": "人生自古谁无死,留取丹心照汗青", "author": "文天祥", "source": "过零丁洋", "dynasty": "宋"},
        {"text": "臣心一片磁针石,不指南方不肯休", "author": "文天祥", "source": "扬子江", "dynasty": "宋"},
        {"text": "问渠那得清如许,为有源头活水来", "author": "朱熹", "source": "观书有感", "dynasty": "宋"},
        {"text": "等闲识得东风面,万紫千红总是春", "author": "朱熹", "source": "春日", "dynasty": "宋"},
        {"text": "众里寻他千百度,蓦然回首,那人却在,灯火阑珊处", "author": "辛弃疾", "source": "青玉案·元夕", "dynasty": "宋"},
        {"text": "想当年,金戈铁马,气吞万里如虎", "author": "辛弃疾", "source": "永遇乐·京口北固亭怀古", "dynasty": "宋"},
        {"text": "醉里挑灯看剑,梦回吹角连营", "author": "辛弃疾", "source": "破阵子", "dynasty": "宋"},
        {"text": "稻花香里说丰年,听取蛙声一片", "author": "辛弃疾", "source": "西江月·夜行黄沙道中", "dynasty": "宋"},
        {"text": "春色满园关不住,一枝红杏出墙来", "author": "叶绍翁", "source": "游园不值", "dynasty": "宋"},
        {"text": "落红不是无情物,化作春泥更护花", "author": "龚自珍", "source": "己亥杂诗", "dynasty": "清"},
        {"text": "我劝天公重抖擞,不拘一格降人才", "author": "龚自珍", "source": "己亥杂诗", "dynasty": "清"},
        {"text": "苟利国家生死以,岂因祸福避趋之", "author": "林则徐", "source": "赴戍登程口占示家人", "dynasty": "清"},
        {"text": "海到无边天作岸,山登绝顶我为峰", "author": "林则徐", "source": "出老", "dynasty": "清"},
        {"text": "我自横刀向天笑,去留肝胆两昆仑", "author": "谭嗣同", "source": "狱中题壁", "dynasty": "清"},
        {"text": "横眉冷对千夫指,俯首甘为孺子牛", "author": "鲁迅", "source": "自嘲", "dynasty": "近代"},
        {"text": "寄意寒星荃不察,我以我血荐轩辕", "author": "鲁迅", "source": "自题小像", "dynasty": "近代"},
        {"text": "心事浩茫连广宇,于无声处听惊雷", "author": "鲁迅", "source": "无题", "dynasty": "近代"},
    ]
    
    for i, item in enumerate(classic_texts):
        quotes.append({
            "id": f"classic_{i}",
            "text": item["text"],
            "author": item.get("author"),
            "source": item.get("source"),
            "dynasty": item.get("dynasty"),
            "type": "名句",
            "tags": [],
            "emotion": []
        })
    
    return quotes

def main():
    print("=" * 60)
    print("名句数据收集")
    print("=" * 60)
    
    conn = create_database()
    total_inserted = 0
    
    print("\n[1/4] Fetching Tang Dynasty poems...")
    tang_poems = fetch_chinese_poetry()
    inserted = insert_quotes(conn, tang_poems)
    total_inserted += inserted
    print(f"  Inserted {inserted} Tang poems")
    
    print("\n[2/4] Fetching Song Dynasty poems...")
    song_poems = fetch_song_poetry()
    inserted = insert_quotes(conn, song_poems)
    total_inserted += inserted
    print(f"  Inserted {inserted} Song poems")
    
    print("\n[3/4] Fetching idioms...")
    idioms = fetch_idioms()
    inserted = insert_quotes(conn, idioms)
    total_inserted += inserted
    print(f"  Inserted {inserted} idioms")
    
    print("\n[4/4] Adding classic quotes...")
    classics = fetch_classic_quotes()
    inserted = insert_quotes(conn, classics)
    total_inserted += inserted
    print(f"  Inserted {inserted} classic quotes")
    
    conn.close()
    
    print("\n" + "=" * 60)
    print(f"Total inserted: {total_inserted} quotes")
    print(f"Database saved to: {DB_PATH}")
    print("=" * 60)

if __name__ == "__main__":
    main()