fikri0o0 commited on
Commit
e596ae3
·
verified ·
1 Parent(s): 183a996

Add conversation memory: history-aware LLM + context-aware retrieval

Browse files
Files changed (1) hide show
  1. rag_chain.py +59 -19
rag_chain.py CHANGED
@@ -1,3 +1,4 @@
 
1
  from functools import lru_cache
2
  from pathlib import Path
3
  from typing import Generator
@@ -30,6 +31,19 @@ SYSTEM_PROMPT = (
30
  )
31
 
32
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  # ---------------------------------------------------------------------------
34
  # Cached singletons
35
  # ---------------------------------------------------------------------------
@@ -143,18 +157,26 @@ def retrieve_docs(
143
  # LLM calls — non-streaming
144
  # ---------------------------------------------------------------------------
145
 
146
- def _call_llm(provider: str, model_id: str, context_str: str, input_text: str) -> str:
147
- user_content = (
148
- f"Context from philosophical texts:\n{context_str}\n\nQuestion: {input_text}"
149
- )
 
150
 
151
  if provider == "google":
152
  if not GOOGLE_API_KEY:
153
  env_var, site = PROVIDER_KEYS["google"]
154
  raise ValueError(f"{env_var} not set. Get a free key at {site}")
 
 
 
 
 
 
 
155
  response = _get_genai_client().models.generate_content(
156
  model=model_id,
157
- contents=user_content,
158
  config=types.GenerateContentConfig(
159
  system_instruction=SYSTEM_PROMPT, temperature=0.3
160
  ),
@@ -181,12 +203,16 @@ def _call_llm(provider: str, model_id: str, context_str: str, input_text: str) -
181
  else:
182
  raise ValueError(f"Unknown provider: {provider!r}")
183
 
 
 
 
 
 
 
 
184
  resp = client.chat.completions.create(
185
  model=model_id,
186
- messages=[
187
- {"role": "system", "content": SYSTEM_PROMPT},
188
- {"role": "user", "content": user_content},
189
- ],
190
  temperature=0.3,
191
  )
192
  return resp.choices[0].message.content
@@ -197,20 +223,30 @@ def _call_llm(provider: str, model_id: str, context_str: str, input_text: str) -
197
  # ---------------------------------------------------------------------------
198
 
199
  def stream_llm(
200
- provider: str, model_id: str, context_str: str, input_text: str
 
201
  ) -> Generator[str, None, None]:
202
- """Yield text chunks for real-time streaming."""
203
- user_content = (
204
- f"Context from philosophical texts:\n{context_str}\n\nQuestion: {input_text}"
205
- )
 
 
206
 
207
  if provider == "google":
208
  if not GOOGLE_API_KEY:
209
  env_var, site = PROVIDER_KEYS["google"]
210
  raise ValueError(f"{env_var} not set. Get a free key at {site}")
 
 
 
 
 
 
 
211
  for chunk in _get_genai_client().models.generate_content_stream(
212
  model=model_id,
213
- contents=user_content,
214
  config=types.GenerateContentConfig(
215
  system_instruction=SYSTEM_PROMPT, temperature=0.3
216
  ),
@@ -239,12 +275,16 @@ def stream_llm(
239
  "HTTP-Referer": "https://github.com/Fikri645/philosopher-chat"
240
  },
241
  )
 
 
 
 
 
 
 
242
  stream = client.chat.completions.create(
243
  model=model_id,
244
- messages=[
245
- {"role": "system", "content": SYSTEM_PROMPT},
246
- {"role": "user", "content": user_content},
247
- ],
248
  temperature=0.3,
249
  stream=True,
250
  )
 
1
+ import re
2
  from functools import lru_cache
3
  from pathlib import Path
4
  from typing import Generator
 
31
  )
32
 
33
 
34
+ def _clean_for_history(text: str) -> str:
35
+ """Strip HTML tags and source footer from stored assistant messages.
36
+
37
+ Assistant responses contain <details>/<div> think blocks and a
38
+ '--- **Sources:**' footer injected by the UI — remove both before
39
+ passing prior turns as LLM history, so models see clean prose only.
40
+ """
41
+ text = re.sub(r"<[^>]+>", " ", text) # strip HTML
42
+ text = re.sub(r"\n\n---\n\*\*Sources:\*\*.*$", "", text, # strip footer
43
+ flags=re.DOTALL)
44
+ return " ".join(text.split()) # normalise whitespace
45
+
46
+
47
  # ---------------------------------------------------------------------------
48
  # Cached singletons
49
  # ---------------------------------------------------------------------------
 
157
  # LLM calls — non-streaming
158
  # ---------------------------------------------------------------------------
159
 
160
+ def _call_llm(
161
+ provider: str, model_id: str, context_str: str, input_text: str,
162
+ history: list[dict] | None = None,
163
+ ) -> str:
164
+ final_user = f"Context from philosophical texts:\n{context_str}\n\nQuestion: {input_text}"
165
 
166
  if provider == "google":
167
  if not GOOGLE_API_KEY:
168
  env_var, site = PROVIDER_KEYS["google"]
169
  raise ValueError(f"{env_var} not set. Get a free key at {site}")
170
+ contents = []
171
+ for turn in (history or []):
172
+ role = "model" if turn["role"] == "assistant" else "user"
173
+ content = _clean_for_history(turn["content"]) if turn["role"] == "assistant" else turn["content"]
174
+ if content:
175
+ contents.append({"role": role, "parts": [content]})
176
+ contents.append({"role": "user", "parts": [final_user]})
177
  response = _get_genai_client().models.generate_content(
178
  model=model_id,
179
+ contents=contents,
180
  config=types.GenerateContentConfig(
181
  system_instruction=SYSTEM_PROMPT, temperature=0.3
182
  ),
 
203
  else:
204
  raise ValueError(f"Unknown provider: {provider!r}")
205
 
206
+ messages = [{"role": "system", "content": SYSTEM_PROMPT}]
207
+ for turn in (history or []):
208
+ role = "assistant" if turn["role"] == "assistant" else "user"
209
+ content = _clean_for_history(turn["content"]) if turn["role"] == "assistant" else turn["content"]
210
+ if content:
211
+ messages.append({"role": role, "content": content})
212
+ messages.append({"role": "user", "content": final_user})
213
  resp = client.chat.completions.create(
214
  model=model_id,
215
+ messages=messages,
 
 
 
216
  temperature=0.3,
217
  )
218
  return resp.choices[0].message.content
 
223
  # ---------------------------------------------------------------------------
224
 
225
  def stream_llm(
226
+ provider: str, model_id: str, context_str: str, input_text: str,
227
+ history: list[dict] | None = None,
228
  ) -> Generator[str, None, None]:
229
+ """Yield text chunks for real-time streaming.
230
+
231
+ history: previous turns as [{"role": "user"|"assistant", "content": "..."}].
232
+ Pass all completed turns so the model understands follow-up questions.
233
+ """
234
+ final_user = f"Context from philosophical texts:\n{context_str}\n\nQuestion: {input_text}"
235
 
236
  if provider == "google":
237
  if not GOOGLE_API_KEY:
238
  env_var, site = PROVIDER_KEYS["google"]
239
  raise ValueError(f"{env_var} not set. Get a free key at {site}")
240
+ contents = []
241
+ for turn in (history or []):
242
+ role = "model" if turn["role"] == "assistant" else "user"
243
+ content = _clean_for_history(turn["content"]) if turn["role"] == "assistant" else turn["content"]
244
+ if content:
245
+ contents.append({"role": role, "parts": [content]})
246
+ contents.append({"role": "user", "parts": [final_user]})
247
  for chunk in _get_genai_client().models.generate_content_stream(
248
  model=model_id,
249
+ contents=contents,
250
  config=types.GenerateContentConfig(
251
  system_instruction=SYSTEM_PROMPT, temperature=0.3
252
  ),
 
275
  "HTTP-Referer": "https://github.com/Fikri645/philosopher-chat"
276
  },
277
  )
278
+ messages = [{"role": "system", "content": SYSTEM_PROMPT}]
279
+ for turn in (history or []):
280
+ role = "assistant" if turn["role"] == "assistant" else "user"
281
+ content = _clean_for_history(turn["content"]) if turn["role"] == "assistant" else turn["content"]
282
+ if content:
283
+ messages.append({"role": role, "content": content})
284
+ messages.append({"role": "user", "content": final_user})
285
  stream = client.chat.completions.create(
286
  model=model_id,
287
+ messages=messages,
 
 
 
288
  temperature=0.3,
289
  stream=True,
290
  )