Spaces:
Sleeping
Sleeping
Update api/clare_core.py
Browse files- api/clare_core.py +33 -2
api/clare_core.py
CHANGED
|
@@ -15,8 +15,24 @@ from .config import (
|
|
| 15 |
LEARNING_MODE_INSTRUCTIONS,
|
| 16 |
)
|
| 17 |
|
| 18 |
-
|
| 19 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
|
| 21 |
|
| 22 |
# ----------------------------
|
|
@@ -26,6 +42,9 @@ MAX_HISTORY_TURNS = int(os.getenv("CLARE_MAX_HISTORY_TURNS", "10"))
|
|
| 26 |
MAX_RAG_CHARS_IN_PROMPT = int(os.getenv("CLARE_MAX_RAG_CHARS", "2000"))
|
| 27 |
DEFAULT_MAX_OUTPUT_TOKENS = int(os.getenv("CLARE_MAX_OUTPUT_TOKENS", "384"))
|
| 28 |
|
|
|
|
|
|
|
|
|
|
| 29 |
|
| 30 |
# ---------- syllabus 解析 ----------
|
| 31 |
def parse_syllabus_docx(file_path: str, max_lines: int = 15) -> List[str]:
|
|
@@ -251,6 +270,12 @@ def find_similar_past_question(
|
|
| 251 |
embedding_threshold: float = 0.85,
|
| 252 |
max_turns_to_check: int = 6,
|
| 253 |
) -> Optional[Tuple[str, str, float]]:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 254 |
norm_msg = _normalize_text(message)
|
| 255 |
if not norm_msg:
|
| 256 |
return None
|
|
@@ -279,6 +304,10 @@ def find_similar_past_question(
|
|
| 279 |
if best_pair_j and best_sim_j >= jaccard_threshold:
|
| 280 |
return best_pair_j[0], best_pair_j[1], best_sim_j
|
| 281 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 282 |
if not history:
|
| 283 |
return None
|
| 284 |
|
|
@@ -462,6 +491,7 @@ def chat_with_clare(
|
|
| 462 |
cognitive_state: Optional[Dict[str, int]],
|
| 463 |
rag_context: Optional[str] = None,
|
| 464 |
) -> Tuple[str, List[Tuple[str, str]]]:
|
|
|
|
| 465 |
try:
|
| 466 |
set_run_metadata(
|
| 467 |
learning_mode=learning_mode,
|
|
@@ -469,6 +499,7 @@ def chat_with_clare(
|
|
| 469 |
doc_type=doc_type,
|
| 470 |
)
|
| 471 |
except Exception as e:
|
|
|
|
| 472 |
print(f"[LangSmith metadata error in chat_with_clare] {repr(e)}")
|
| 473 |
|
| 474 |
messages = build_messages(
|
|
|
|
| 15 |
LEARNING_MODE_INSTRUCTIONS,
|
| 16 |
)
|
| 17 |
|
| 18 |
+
# ----------------------------
|
| 19 |
+
# Tracing toggle (LangSmith)
|
| 20 |
+
# ----------------------------
|
| 21 |
+
# Default OFF for speed + stability in HF cold start environments.
|
| 22 |
+
ENABLE_TRACING = os.getenv("CLARE_ENABLE_TRACING", "0").strip() == "1"
|
| 23 |
+
|
| 24 |
+
if ENABLE_TRACING:
|
| 25 |
+
from langsmith import traceable # type: ignore
|
| 26 |
+
from langsmith.run_helpers import set_run_metadata # type: ignore
|
| 27 |
+
else:
|
| 28 |
+
# no-op decorators / funcs
|
| 29 |
+
def traceable(*args, **kwargs): # type: ignore
|
| 30 |
+
def _decorator(fn):
|
| 31 |
+
return fn
|
| 32 |
+
return _decorator
|
| 33 |
+
|
| 34 |
+
def set_run_metadata(**kwargs): # type: ignore
|
| 35 |
+
return None
|
| 36 |
|
| 37 |
|
| 38 |
# ----------------------------
|
|
|
|
| 42 |
MAX_RAG_CHARS_IN_PROMPT = int(os.getenv("CLARE_MAX_RAG_CHARS", "2000"))
|
| 43 |
DEFAULT_MAX_OUTPUT_TOKENS = int(os.getenv("CLARE_MAX_OUTPUT_TOKENS", "384"))
|
| 44 |
|
| 45 |
+
# Similarity knobs
|
| 46 |
+
ENABLE_EMBEDDING_SIM = os.getenv("CLARE_ENABLE_EMBEDDING_SIMILARITY", "0").strip() == "1"
|
| 47 |
+
|
| 48 |
|
| 49 |
# ---------- syllabus 解析 ----------
|
| 50 |
def parse_syllabus_docx(file_path: str, max_lines: int = 15) -> List[str]:
|
|
|
|
| 270 |
embedding_threshold: float = 0.85,
|
| 271 |
max_turns_to_check: int = 6,
|
| 272 |
) -> Optional[Tuple[str, str, float]]:
|
| 273 |
+
"""
|
| 274 |
+
Fast path:
|
| 275 |
+
- Always do Jaccard on normalized text for up to max_turns_to_check.
|
| 276 |
+
Optional path (disabled by default for speed/stability):
|
| 277 |
+
- Embedding-based similarity if ENABLE_EMBEDDING_SIM=1
|
| 278 |
+
"""
|
| 279 |
norm_msg = _normalize_text(message)
|
| 280 |
if not norm_msg:
|
| 281 |
return None
|
|
|
|
| 304 |
if best_pair_j and best_sim_j >= jaccard_threshold:
|
| 305 |
return best_pair_j[0], best_pair_j[1], best_sim_j
|
| 306 |
|
| 307 |
+
# Optional: embedding similarity (OFF by default)
|
| 308 |
+
if not ENABLE_EMBEDDING_SIM:
|
| 309 |
+
return None
|
| 310 |
+
|
| 311 |
if not history:
|
| 312 |
return None
|
| 313 |
|
|
|
|
| 491 |
cognitive_state: Optional[Dict[str, int]],
|
| 492 |
rag_context: Optional[str] = None,
|
| 493 |
) -> Tuple[str, List[Tuple[str, str]]]:
|
| 494 |
+
# avoid any tracing overhead when disabled (set_run_metadata is no-op in that case)
|
| 495 |
try:
|
| 496 |
set_run_metadata(
|
| 497 |
learning_mode=learning_mode,
|
|
|
|
| 499 |
doc_type=doc_type,
|
| 500 |
)
|
| 501 |
except Exception as e:
|
| 502 |
+
# safe even if tracing enabled but misconfigured
|
| 503 |
print(f"[LangSmith metadata error in chat_with_clare] {repr(e)}")
|
| 504 |
|
| 505 |
messages = build_messages(
|