SarahXia0405 commited on
Commit
e440afe
·
verified ·
1 Parent(s): 7bd3326

Update rag_engine.py

Browse files
Files changed (1) hide show
  1. rag_engine.py +15 -22
rag_engine.py CHANGED
@@ -7,6 +7,7 @@ from clare_core import (
7
  cosine_similarity,
8
  )
9
  from langsmith import traceable
 
10
 
11
 
12
  def build_rag_chunks_from_file(file, doc_type_val: str) -> List[Dict]:
@@ -57,18 +58,13 @@ def retrieve_relevant_chunks(
57
  返回拼接后的文本,供 prompt 使用。
58
  (增强版本:将检索内容记录到 LangSmith metadata)
59
  """
60
- from langsmith import get_current_run
61
-
62
- # 1) 空安全检查
63
  if not rag_chunks:
64
  return ""
65
 
66
- # 2) 问题 embedding
67
  q_emb = get_embedding(question)
68
  if q_emb is None:
69
  return ""
70
 
71
- # 3) 计算相似度
72
  scored = []
73
  for item in rag_chunks:
74
  emb = item.get("embedding")
@@ -81,26 +77,23 @@ def retrieve_relevant_chunks(
81
  if not scored:
82
  return ""
83
 
84
- # 4) 按相似度排序
85
  scored.sort(key=lambda x: x[0], reverse=True)
86
  top_items = scored[:top_k]
87
  top_chunks = [t for _sim, t in top_items]
88
 
89
- # 5) 记录到 LangSmith(每个 chunk 的文本 + 相似度)
90
- run = get_current_run()
91
- if run:
92
- run.update(
93
- metadata={
94
- "question": question,
95
- "retrieved_chunks": [
96
- {
97
- "score": float(sim),
98
- "text_preview": text[:300], # 避免 UI 太长,取前300字
99
- }
100
- for sim, text in top_items
101
- ]
102
- }
103
  )
 
 
 
104
 
105
- # 6) 返回原格式的拼接结果
106
- return "\n---\n".join(top_chunks)
 
7
  cosine_similarity,
8
  )
9
  from langsmith import traceable
10
+ from langsmith.run_helpers import set_run_metadata
11
 
12
 
13
  def build_rag_chunks_from_file(file, doc_type_val: str) -> List[Dict]:
 
58
  返回拼接后的文本,供 prompt 使用。
59
  (增强版本:将检索内容记录到 LangSmith metadata)
60
  """
 
 
 
61
  if not rag_chunks:
62
  return ""
63
 
 
64
  q_emb = get_embedding(question)
65
  if q_emb is None:
66
  return ""
67
 
 
68
  scored = []
69
  for item in rag_chunks:
70
  emb = item.get("embedding")
 
77
  if not scored:
78
  return ""
79
 
 
80
  scored.sort(key=lambda x: x[0], reverse=True)
81
  top_items = scored[:top_k]
82
  top_chunks = [t for _sim, t in top_items]
83
 
84
+ # 使用 set_run_metadata 给当前 retriever run metadata
85
+ try:
86
+ previews = [
87
+ {"score": float(sim), "text_preview": text[:300]}
88
+ for sim, text in top_items
89
+ ]
90
+ set_run_metadata(
91
+ question=question,
92
+ retrieved_chunks=previews,
 
 
 
 
 
93
  )
94
+ except Exception as e:
95
+ # observability 出错不能影响主流程
96
+ print(f"[LangSmith metadata error in retrieve_relevant_chunks] {repr(e)}")
97
 
98
+ # 用分隔线拼接,方便模型辨认不同片段
99
+ return "\n---\n".join(top_chunks)