fikri0o0 commited on
Commit
fcbf394
·
verified ·
1 Parent(s): e596ae3

Add conversation memory: history-aware LLM + context-aware retrieval

Browse files
Files changed (1) hide show
  1. app.py +13 -2
app.py CHANGED
@@ -148,9 +148,20 @@ def respond_stream(message: str, history: list, philosopher: str, llm_label: str
148
  yield history + [{"role": "assistant", "content": err}], "", gr.update(), gr.update()
149
  return
150
 
 
 
 
 
 
 
 
 
 
 
 
151
  # — Retrieval (fast, happens before streaming) —
152
  t0 = time.perf_counter()
153
- docs, scores = retrieve_docs(message, philosopher)
154
  retrieve_time = time.perf_counter() - t0
155
  context_str = "\n\n".join(d.page_content for d in docs)
156
 
@@ -174,7 +185,7 @@ def respond_stream(message: str, history: list, philosopher: str, llm_label: str
174
  t1 = time.perf_counter()
175
  full_response = ""
176
  try:
177
- for text_chunk in stream_llm(provider, model_id, context_str, message):
178
  full_response += text_chunk
179
  history[-1]["content"] = _format_think_blocks(full_response)
180
  yield history, "", gr.update(value=chunks_md), gr.update()
 
148
  yield history + [{"role": "assistant", "content": err}], "", gr.update(), gr.update()
149
  return
150
 
151
+ # — Build retrieval query —
152
+ # For short follow-ups ("bahas lebih lanjut", "elaborate", etc.) that lack
153
+ # standalone meaning, prepend the last user message so retrieval has context.
154
+ retrieval_query = message
155
+ if len(message.split()) <= 8 and history:
156
+ last_user = next(
157
+ (t["content"] for t in reversed(history) if t["role"] == "user"), ""
158
+ )
159
+ if last_user:
160
+ retrieval_query = f"{last_user} {message}"
161
+
162
  # — Retrieval (fast, happens before streaming) —
163
  t0 = time.perf_counter()
164
+ docs, scores = retrieve_docs(retrieval_query, philosopher)
165
  retrieve_time = time.perf_counter() - t0
166
  context_str = "\n\n".join(d.page_content for d in docs)
167
 
 
185
  t1 = time.perf_counter()
186
  full_response = ""
187
  try:
188
+ for text_chunk in stream_llm(provider, model_id, context_str, message, history=history[:-2]):
189
  full_response += text_chunk
190
  history[-1]["content"] = _format_think_blocks(full_response)
191
  yield history, "", gr.update(value=chunks_md), gr.update()