SarahXia0405 commited on
Commit
74f25bb
·
verified ·
1 Parent(s): b3aa8c6

Create clare_core.py

Browse files
Files changed (1) hide show
  1. clare_core.py +670 -0
clare_core.py ADDED
@@ -0,0 +1,670 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # clare_core.py
2
+ import re
3
+ import math
4
+ from typing import List, Dict, Tuple, Optional
5
+
6
+ from docx import Document
7
+
8
+ from config import (
9
+ client,
10
+ DEFAULT_MODEL,
11
+ EMBEDDING_MODEL,
12
+ DEFAULT_COURSE_TOPICS,
13
+ CLARE_SYSTEM_PROMPT,
14
+ LEARNING_MODE_INSTRUCTIONS,
15
+ )
16
+
17
+ # ---------- syllabus 解析 ----------
18
+ def parse_syllabus_docx(file_path: str, max_lines: int = 15) -> List[str]:
19
+ """
20
+ 非常简单的 syllabus 解析:取前若干个非空段落当作主题行。
21
+ 只是为了给 Clare 一些课程上下文,不追求超精确结构。
22
+ """
23
+ topics: List[str] = []
24
+ try:
25
+ doc = Document(file_path)
26
+ for para in doc.paragraphs:
27
+ text = para.text.strip()
28
+ if not text:
29
+ continue
30
+ topics.append(text)
31
+ if len(topics) >= max_lines:
32
+ break
33
+ except Exception as e:
34
+ topics = [f"[Error parsing syllabus: {e}]"]
35
+
36
+ return topics
37
+
38
+
39
+ # ---------- 简单“弱项”检测 ----------
40
+ WEAKNESS_KEYWORDS = [
41
+ "don't understand",
42
+ "do not understand",
43
+ "not understand",
44
+ "not sure",
45
+ "confused",
46
+ "hard to",
47
+ "difficult",
48
+ "struggle",
49
+ "不会",
50
+ "不懂",
51
+ "看不懂",
52
+ "搞不清",
53
+ "很难",
54
+ ]
55
+
56
+ # ---------- 简单“掌握”检测 ----------
57
+ MASTERY_KEYWORDS = [
58
+ "got it",
59
+ "makes sense",
60
+ "now i see",
61
+ "i see",
62
+ "understand now",
63
+ "clear now",
64
+ "easy",
65
+ "no problem",
66
+ "没问题",
67
+ "懂了",
68
+ "明白了",
69
+ "清楚了",
70
+ ]
71
+
72
+ def update_weaknesses_from_message(message: str, weaknesses: List[str]) -> List[str]:
73
+ lower_msg = message.lower()
74
+ if any(k in lower_msg for k in WEAKNESS_KEYWORDS):
75
+ weaknesses = weaknesses or []
76
+ weaknesses.append(message)
77
+ return weaknesses
78
+
79
+
80
+ def update_cognitive_state_from_message(
81
+ message: str,
82
+ state: Optional[Dict[str, int]],
83
+ ) -> Dict[str, int]:
84
+ """
85
+ 简单认知状态统计:
86
+ - 遇到困惑类关键词 → confusion +1
87
+ - 遇到掌握类关键词 → mastery +1
88
+ """
89
+ if state is None:
90
+ state = {"confusion": 0, "mastery": 0}
91
+
92
+ lower_msg = message.lower()
93
+ if any(k in lower_msg for k in WEAKNESS_KEYWORDS):
94
+ state["confusion"] = state.get("confusion", 0) + 1
95
+ if any(k in lower_msg for k in MASTERY_KEYWORDS):
96
+ state["mastery"] = state.get("mastery", 0) + 1
97
+ return state
98
+
99
+
100
+ def describe_cognitive_state(state: Optional[Dict[str, int]]) -> str:
101
+ if not state:
102
+ return "unknown"
103
+ confusion = state.get("confusion", 0)
104
+ mastery = state.get("mastery", 0)
105
+ if confusion >= 2 and confusion >= mastery + 1:
106
+ return "student shows signs of HIGH cognitive load (often confused)."
107
+ elif mastery >= 2 and mastery >= confusion + 1:
108
+ return "student seems COMFORTABLE; material may be slightly easy."
109
+ else:
110
+ return "mixed or uncertain cognitive state."
111
+
112
+
113
+ # ---------- Session Memory ----------
114
+ def build_session_memory_summary(
115
+ history: List[Tuple[str, str]],
116
+ weaknesses: Optional[List[str]],
117
+ cognitive_state: Optional[Dict[str, int]],
118
+ max_questions: int = 4,
119
+ max_weaknesses: int = 3,
120
+ ) -> str:
121
+ """
122
+ 只在本次会话内使用的“记忆摘要”:
123
+ - 最近几条学生提问
124
+ - 最近几条学生觉得难的问题
125
+ - 当前的认知状态描述
126
+ """
127
+ parts: List[str] = []
128
+
129
+ # 最近几条提问(只取 student)
130
+ if history:
131
+ recent_qs = [u for (u, _a) in history[-max_questions:]]
132
+ trimmed_qs = []
133
+ for q in recent_qs:
134
+ q = q.strip()
135
+ if len(q) > 120:
136
+ q = q[:117] + "..."
137
+ trimmed_qs.append(q)
138
+ if trimmed_qs:
139
+ parts.append("Recent student questions: " + " | ".join(trimmed_qs))
140
+
141
+ # 最近几条“弱项”
142
+ if weaknesses:
143
+ recent_weak = weaknesses[-max_weaknesses:]
144
+ trimmed_weak = []
145
+ for w in recent_weak:
146
+ w = w.strip()
147
+ if len(w) > 120:
148
+ w = w[:117] + "..."
149
+ trimmed_weak.append(w)
150
+ parts.append("Recent difficulties mentioned by the student: " + " | ".join(trimmed_weak))
151
+
152
+ # 当前认知状态
153
+ if cognitive_state:
154
+ parts.append("Current cognitive state: " + describe_cognitive_state(cognitive_state))
155
+
156
+ if not parts:
157
+ return (
158
+ "No prior session memory. You can treat this as an early stage of the conversation; "
159
+ "start with simple explanations and ask a quick check-up question."
160
+ )
161
+
162
+ return " | ".join(parts)
163
+
164
+
165
+ # ---------- 语言检测(用于 Auto 模式) ----------
166
+ def detect_language(message: str, preference: str) -> str:
167
+ """
168
+ preference:
169
+ - 'English' → 强制英文
170
+ - '中文' → 强制中文
171
+ - 'Auto' → 检测文本是否包含中文字符
172
+ """
173
+ if preference in ("English", "中文"):
174
+ return preference
175
+ # Auto 模式下简单检测是否含有中文字符
176
+ if re.search(r"[\u4e00-\u9fff]", message):
177
+ return "中文"
178
+ return "English"
179
+
180
+
181
+ # ---------- Session 状态展示 ----------
182
+ def render_session_status(
183
+ learning_mode: str,
184
+ weaknesses: Optional[List[str]],
185
+ cognitive_state: Optional[Dict[str, int]],
186
+ ) -> str:
187
+ lines: List[str] = []
188
+ lines.append("### Session status\n")
189
+ lines.append(f"- Learning mode: **{learning_mode}**")
190
+ lines.append(f"- Cognitive state: {describe_cognitive_state(cognitive_state)}")
191
+
192
+ if weaknesses:
193
+ lines.append("- Recent difficulties (last 3):")
194
+ for w in weaknesses[-3:]:
195
+ lines.append(f" - {w}")
196
+ else:
197
+ lines.append("- Recent difficulties: *(none yet)*")
198
+
199
+ return "\n".join(lines)
200
+
201
+
202
+ # ---------- Same Question Check helpers ----------
203
+ def _normalize_text(text: str) -> str:
204
+ """
205
+ 将文本转为小写、去除标点和多余空格,用于简单相似度计算。
206
+ """
207
+ text = text.lower().strip()
208
+ text = re.sub(r"[^\w\s]", " ", text)
209
+ text = re.sub(r"\s+", " ", text)
210
+ return text
211
+
212
+
213
+ def _jaccard_similarity(a: str, b: str) -> float:
214
+ tokens_a = set(a.split())
215
+ tokens_b = set(b.split())
216
+ if not tokens_a or not tokens_b:
217
+ return 0.0
218
+ return len(tokens_a & tokens_b) / len(tokens_a | tokens_b)
219
+
220
+
221
+ def cosine_similarity(a: List[float], b: List[float]) -> float:
222
+ if not a or not b or len(a) != len(b):
223
+ return 0.0
224
+ dot = sum(x * y for x, y in zip(a, b))
225
+ norm_a = math.sqrt(sum(x * x for x in a))
226
+ norm_b = math.sqrt(sum(y * y for y in b))
227
+ if norm_a == 0 or norm_b == 0:
228
+ return 0.0
229
+ return dot / (norm_a * norm_b)
230
+
231
+
232
+ def get_embedding(text: str) -> Optional[List[float]]:
233
+ """
234
+ 调用 OpenAI Embedding API,将文本编码为向量。
235
+ """
236
+ try:
237
+ resp = client.embeddings.create(
238
+ model=EMBEDDING_MODEL,
239
+ input=[text],
240
+ )
241
+ return resp.data[0].embedding
242
+ except Exception as e:
243
+ # 打到 Space 的 log,便于排查
244
+ print(f"[Embedding error] {repr(e)}")
245
+ return None
246
+
247
+
248
+ def find_similar_past_question(
249
+ message: str,
250
+ history: List[Tuple[str, str]],
251
+ jaccard_threshold: float = 0.65,
252
+ embedding_threshold: float = 0.85,
253
+ max_turns_to_check: int = 6,
254
+ ) -> Optional[Tuple[str, str, float]]:
255
+ """
256
+ 在最近若干轮历史对话中查找与当前问题相似的既往问题。
257
+ 两级检测:先 Jaccard,再 Embedding。
258
+ 返回 (past_question, past_answer, similarity_score) 或 None
259
+ """
260
+ norm_msg = _normalize_text(message)
261
+ if not norm_msg:
262
+ return None
263
+
264
+ # 1) Jaccard
265
+ best_sim_j = 0.0
266
+ best_pair_j: Optional[Tuple[str, str]] = None
267
+ checked = 0
268
+
269
+ for user_q, assistant_a in reversed(history):
270
+ checked += 1
271
+ if checked > max_turns_to_check:
272
+ break
273
+
274
+ norm_hist_q = _normalize_text(user_q)
275
+ if not norm_hist_q:
276
+ continue
277
+
278
+ if norm_msg == norm_hist_q:
279
+ return user_q, assistant_a, 1.0
280
+
281
+ sim_j = _jaccard_similarity(norm_msg, norm_hist_q)
282
+ if sim_j > best_sim_j:
283
+ best_sim_j = sim_j
284
+ best_pair_j = (user_q, assistant_a)
285
+
286
+ if best_pair_j and best_sim_j >= jaccard_threshold:
287
+ return best_pair_j[0], best_pair_j[1], best_sim_j
288
+
289
+ # 2) Embedding 语义相似度
290
+ if not history:
291
+ return None
292
+
293
+ msg_emb = get_embedding(message)
294
+ if msg_emb is None:
295
+ return None
296
+
297
+ best_sim_e = 0.0
298
+ best_pair_e: Optional[Tuple[str, str]] = None
299
+ checked = 0
300
+
301
+ for user_q, assistant_a in reversed(history):
302
+ checked += 1
303
+ if checked > max_turns_to_check:
304
+ break
305
+
306
+ hist_emb = get_embedding(user_q)
307
+ if hist_emb is None:
308
+ continue
309
+
310
+ sim_e = cosine_similarity(msg_emb, hist_emb)
311
+ if sim_e > best_sim_e:
312
+ best_sim_e = sim_e
313
+ best_pair_e = (user_q, assistant_a)
314
+
315
+ if best_pair_e and best_sim_e >= embedding_threshold:
316
+ return best_pair_e[0], best_pair_e[1], best_sim_e
317
+
318
+ return None
319
+
320
+
321
+ # ---------- 构建 messages ----------
322
+ def build_messages(
323
+ user_message: str,
324
+ history: List[Tuple[str, str]],
325
+ language_preference: str,
326
+ learning_mode: str,
327
+ doc_type: str,
328
+ course_outline: Optional[List[str]],
329
+ weaknesses: Optional[List[str]],
330
+ cognitive_state: Optional[Dict[str, int]],
331
+ ) -> List[Dict[str, str]]:
332
+ messages: List[Dict[str, str]] = [
333
+ {"role": "system", "content": CLARE_SYSTEM_PROMPT}
334
+ ]
335
+
336
+ # 学习模式
337
+ if learning_mode in LEARNING_MODE_INSTRUCTIONS:
338
+ mode_instruction = LEARNING_MODE_INSTRUCTIONS[learning_mode]
339
+ messages.append(
340
+ {
341
+ "role": "system",
342
+ "content": f"Current learning mode: {learning_mode}. {mode_instruction}",
343
+ }
344
+ )
345
+
346
+ # 课程大纲
347
+ topics = course_outline if course_outline else DEFAULT_COURSE_TOPICS
348
+ topics_text = " | ".join(topics)
349
+ messages.append(
350
+ {
351
+ "role": "system",
352
+ "content": (
353
+ "Here is the course syllabus context. Use this to stay aligned "
354
+ "with the course topics when answering: "
355
+ + topics_text
356
+ ),
357
+ }
358
+ )
359
+
360
+ # 上传文件类型提示
361
+ if doc_type and doc_type != "Syllabus":
362
+ messages.append(
363
+ {
364
+ "role": "system",
365
+ "content": (
366
+ f"The student also uploaded a {doc_type} document as supporting material. "
367
+ "You do not see the full content directly, but you may assume it is relevant "
368
+ "to the same course and topics."
369
+ ),
370
+ }
371
+ )
372
+
373
+ # 学生弱项提示
374
+ if weaknesses:
375
+ weak_text = " | ".join(weaknesses[-5:])
376
+ messages.append(
377
+ {
378
+ "role": "system",
379
+ "content": (
380
+ "The student seems to struggle with the following questions or topics. "
381
+ "Be extra gentle and clear when these appear: " + weak_text
382
+ ),
383
+ }
384
+ )
385
+
386
+ # 认知状态提示
387
+ if cognitive_state:
388
+ confusion = cognitive_state.get("confusion", 0)
389
+ mastery = cognitive_state.get("mastery", 0)
390
+ if confusion >= 2 and confusion >= mastery + 1:
391
+ messages.append(
392
+ {
393
+ "role": "system",
394
+ "content": (
395
+ "The student is currently under HIGH cognitive load. "
396
+ "Use simpler language, shorter steps, and more concrete examples. "
397
+ "Avoid long derivations in a single answer, and check understanding "
398
+ "frequently."
399
+ ),
400
+ }
401
+ )
402
+ elif mastery >= 2 and mastery >= confusion + 1:
403
+ messages.append(
404
+ {
405
+ "role": "system",
406
+ "content": (
407
+ "The student seems comfortable with the material. "
408
+ "You may increase difficulty slightly, introduce deeper follow-up "
409
+ "questions, and connect concepts across topics."
410
+ ),
411
+ }
412
+ )
413
+ else:
414
+ messages.append(
415
+ {
416
+ "role": "system",
417
+ "content": (
418
+ "The student's cognitive state is mixed or uncertain. "
419
+ "Keep explanations clear and moderately paced, and probe for "
420
+ "understanding with short questions."
421
+ ),
422
+ }
423
+ )
424
+
425
+ # 语言偏好控制
426
+ if language_preference == "English":
427
+ messages.append(
428
+ {"role": "system", "content": "Please answer in English."}
429
+ )
430
+ elif language_preference == "中文":
431
+ messages.append(
432
+ {"role": "system", "content": "请用中文回答学生的问题。"}
433
+ )
434
+
435
+ # Session 内记忆摘要
436
+ session_memory_text = build_session_memory_summary(
437
+ history=history,
438
+ weaknesses=weaknesses,
439
+ cognitive_state=cognitive_state,
440
+ )
441
+ messages.append(
442
+ {
443
+ "role": "system",
444
+ "content": (
445
+ "Here is a short summary of this session's memory (only within the current chat; "
446
+ "it is not persisted across sessions). Use it to stay consistent with the "
447
+ "student's previous questions, difficulties, and cognitive state: "
448
+ + session_memory_text
449
+ ),
450
+ }
451
+ )
452
+
453
+ # 历史对话
454
+ for user, assistant in history:
455
+ messages.append({"role": "user", "content": user})
456
+ if assistant is not None:
457
+ messages.append({"role": "assistant", "content": assistant})
458
+
459
+ # 当前输入
460
+ messages.append({"role": "user", "content": user_message})
461
+ return messages
462
+
463
+
464
+ def chat_with_clare(
465
+ message: str,
466
+ history: List[Tuple[str, str]],
467
+ model_name: str,
468
+ language_preference: str,
469
+ learning_mode: str,
470
+ doc_type: str,
471
+ course_outline: Optional[List[str]],
472
+ weaknesses: Optional[List[str]],
473
+ cognitive_state: Optional[Dict[str, int]],
474
+ ) -> Tuple[str, List[Tuple[str, str]]]:
475
+ try:
476
+ messages = build_messages(
477
+ user_message=message,
478
+ history=history,
479
+ language_preference=language_preference,
480
+ learning_mode=learning_mode,
481
+ doc_type=doc_type,
482
+ course_outline=course_outline,
483
+ weaknesses=weaknesses,
484
+ cognitive_state=cognitive_state,
485
+ )
486
+ response = client.chat.completions.create(
487
+ model=model_name or DEFAULT_MODEL,
488
+ messages=messages,
489
+ temperature=0.5,
490
+ )
491
+ answer = response.choices[0].message.content
492
+ except Exception as e:
493
+ answer = f"⚠️ Error talking to the model: {e}"
494
+
495
+ history = history + [(message, answer)]
496
+ return answer, history
497
+
498
+
499
+ # ---------- 导出对话为 Markdown ----------
500
+ def export_conversation(
501
+ history: List[Tuple[str, str]],
502
+ course_outline: List[str],
503
+ learning_mode_val: str,
504
+ weaknesses: List[str],
505
+ cognitive_state: Optional[Dict[str, int]],
506
+ ) -> str:
507
+ lines: List[str] = []
508
+ lines.append("# Clare – Conversation Export\n")
509
+ lines.append(f"- Learning mode: **{learning_mode_val}**\n")
510
+ lines.append("- Course topics (short): " + "; ".join(course_outline[:5]) + "\n")
511
+ lines.append(f"- Cognitive state snapshot: {describe_cognitive_state(cognitive_state)}\n")
512
+
513
+ if weaknesses:
514
+ lines.append("- Observed student difficulties:\n")
515
+ for w in weaknesses[-5:]:
516
+ lines.append(f" - {w}\n")
517
+ lines.append("\n---\n\n")
518
+
519
+ for user, assistant in history:
520
+ lines.append(f"**Student:** {user}\n\n")
521
+ lines.append(f"**Clare:** {assistant}\n\n")
522
+ lines.append("---\n\n")
523
+
524
+ return "".join(lines)
525
+
526
+
527
+ # ---------- 生成 3 个 quiz 题目 ----------
528
+ def generate_quiz_from_history(
529
+ history: List[Tuple[str, str]],
530
+ course_outline: List[str],
531
+ weaknesses: List[str],
532
+ cognitive_state: Optional[Dict[str, int]],
533
+ model_name: str,
534
+ language_preference: str,
535
+ ) -> str:
536
+ conversation_text = ""
537
+ for user, assistant in history[-8:]:
538
+ conversation_text += f"Student: {user}\nClare: {assistant}\n"
539
+
540
+ topics_text = "; ".join(course_outline[:8])
541
+ weakness_text = "; ".join(weaknesses[-5:]) if weaknesses else "N/A"
542
+ cog_text = describe_cognitive_state(cognitive_state)
543
+
544
+ messages = [
545
+ {"role": "system", "content": CLARE_SYSTEM_PROMPT},
546
+ {
547
+ "role": "system",
548
+ "content": (
549
+ "Now your task is to create a **short concept quiz** for the student. "
550
+ "Based on the conversation and course topics, generate **3 questions** "
551
+ "(a mix of multiple-choice and short-answer is fine). After listing the "
552
+ "questions, provide an answer key at the end under a heading 'Answer Key'. "
553
+ "Number the questions Q1, Q2, Q3. Adjust the difficulty according to the "
554
+ "student's cognitive state."
555
+ ),
556
+ },
557
+ {
558
+ "role": "system",
559
+ "content": f"Course topics: {topics_text}",
560
+ },
561
+ {
562
+ "role": "system",
563
+ "content": f"Student known difficulties: {weakness_text}",
564
+ },
565
+ {
566
+ "role": "system",
567
+ "content": f"Student cognitive state: {cog_text}",
568
+ },
569
+ {
570
+ "role": "user",
571
+ "content": (
572
+ "Here is the recent conversation between you and the student:\n\n"
573
+ + conversation_text
574
+ + "\n\nPlease create the quiz now."
575
+ ),
576
+ },
577
+ ]
578
+
579
+ if language_preference == "中文":
580
+ messages.append(
581
+ {
582
+ "role": "system",
583
+ "content": "请用中文给出问题和答案。",
584
+ }
585
+ )
586
+
587
+ try:
588
+ response = client.chat.completions.create(
589
+ model=model_name or DEFAULT_MODEL,
590
+ messages=messages,
591
+ temperature=0.5,
592
+ )
593
+ quiz_text = response.choices[0].message.content
594
+ except Exception as e:
595
+ quiz_text = f"⚠️ Error generating quiz: {e}"
596
+
597
+ return quiz_text
598
+
599
+
600
+ # ---------- 概念总结(知识点摘要) ----------
601
+ def summarize_conversation(
602
+ history: List[Tuple[str, str]],
603
+ course_outline: List[str],
604
+ weaknesses: List[str],
605
+ cognitive_state: Optional[Dict[str, int]],
606
+ model_name: str,
607
+ language_preference: str,
608
+ ) -> str:
609
+ conversation_text = ""
610
+ for user, assistant in history[-10:]:
611
+ conversation_text += f"Student: {user}\nClare: {assistant}\n"
612
+
613
+ topics_text = "; ".join(course_outline[:8])
614
+ weakness_text = "; ".join(weaknesses[-5:]) if weaknesses else "N/A"
615
+ cog_text = describe_cognitive_state(cognitive_state)
616
+
617
+ messages = [
618
+ {"role": "system", "content": CLARE_SYSTEM_PROMPT},
619
+ {
620
+ "role": "system",
621
+ "content": (
622
+ "Your task now is to produce a **concept-only summary** of this tutoring "
623
+ "session. Only include knowledge points, definitions, key formulas, "
624
+ "examples, and main takeaways. Do **not** include any personal remarks, "
625
+ "jokes, or off-topic chat. Write in clear bullet points. This summary "
626
+ "should be suitable for the student to paste into their study notes. "
627
+ "Take into account what the student struggled with and their cognitive state."
628
+ ),
629
+ },
630
+ {
631
+ "role": "system",
632
+ "content": f"Course topics context: {topics_text}",
633
+ },
634
+ {
635
+ "role": "system",
636
+ "content": f"Student known difficulties: {weakness_text}",
637
+ },
638
+ {
639
+ "role": "system",
640
+ "content": f"Student cognitive state: {cog_text}",
641
+ },
642
+ {
643
+ "role": "user",
644
+ "content": (
645
+ "Here is the recent conversation between you and the student:\n\n"
646
+ + conversation_text
647
+ + "\n\nPlease summarize only the concepts and key ideas learned."
648
+ ),
649
+ },
650
+ ]
651
+
652
+ if language_preference == "中文":
653
+ messages.append(
654
+ {
655
+ "role": "system",
656
+ "content": "请用中文给出要点总结,只保留知识点和结论,使用条目符号。"
657
+ }
658
+ )
659
+
660
+ try:
661
+ response = client.chat.completions.create(
662
+ model=model_name or DEFAULT_MODEL,
663
+ messages=messages,
664
+ temperature=0.4,
665
+ )
666
+ summary_text = response.choices[0].message.content
667
+ except Exception as e:
668
+ summary_text = f"⚠️ Error generating summary: {e}"
669
+
670
+ return summary_text