fikri0o0 commited on
Commit
b031ba8
·
verified ·
1 Parent(s): b9ea71d

Add test_history.py: multi-turn conversation memory test

Browse files
Files changed (1) hide show
  1. test_history.py +70 -0
test_history.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Test conversation memory: send a follow-up message with history attached
3
+ and verify all three providers handle the new multi-turn format correctly.
4
+
5
+ Usage: python test_history.py
6
+ """
7
+
8
+ import sys
9
+ import io
10
+ import time
11
+
12
+ sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8", errors="replace")
13
+
14
+ from config import LLM_OPTIONS
15
+ from rag_chain import stream_llm, retrieve_docs
16
+
17
+ Q1 = "What is Nietzsche's view on the death of God?"
18
+ Q2 = "How does this relate to the concept of nihilism?" # follow-up — needs history
19
+
20
+ # Fake first-turn history the way Gradio passes it back
21
+ MOCK_HISTORY = [
22
+ {"role": "user", "content": Q1},
23
+ {
24
+ "role": "assistant",
25
+ "content": (
26
+ "Nietzsche argues that the death of God signals the collapse of "
27
+ "transcendent moral frameworks, forcing humanity to create its own values. "
28
+ "As he writes in <em>Thus Spoke Zarathustra</em>, the Übermensch "
29
+ "must fill the void left by divine authority through acts of self-overcoming."
30
+ "\n\n---\n**Sources:**\n- `0.30` Nietzsche — *Thus Spoke Zarathustra*"
31
+ ),
32
+ },
33
+ ]
34
+
35
+ # Pick one representative model per provider
36
+ PROBE_MODELS = {
37
+ label: (provider, mid)
38
+ for label, (provider, mid) in LLM_OPTIONS.items()
39
+ if label in (
40
+ "Gemma 4 MoE 26B [Google]",
41
+ "Llama 3.1 8B [Groq]",
42
+ "Llama 3.3 70B [OpenRouter]",
43
+ )
44
+ }
45
+
46
+ PASS = "\033[92mPASS\033[0m"
47
+ FAIL = "\033[91mFAIL\033[0m"
48
+
49
+ docs, scores = retrieve_docs(f"{Q1} {Q2}", "All")
50
+ context_str = "\n\n".join(d.page_content for d in docs)
51
+
52
+ print(f"\nHistory test — follow-up: \"{Q2}\"\n")
53
+ print(f"{'Model':<45} {'Provider':<12} {'Status':<6} {'Time (s)':>8} Preview")
54
+ print("-" * 110)
55
+
56
+ for label, (provider, model_id) in PROBE_MODELS.items():
57
+ sys.stdout.write(f" {label:<43} {provider:<12} ... ")
58
+ sys.stdout.flush()
59
+ t0 = time.perf_counter()
60
+ try:
61
+ chunks = list(stream_llm(provider, model_id, context_str, Q2, history=MOCK_HISTORY))
62
+ elapsed = time.perf_counter() - t0
63
+ answer = "".join(chunks).replace("\n", " ").strip()
64
+ preview = answer[:90] + ("…" if len(answer) > 90 else "")
65
+ print(f"\r {label:<43} {provider:<12} {PASS} {elapsed:>8.2f}s {preview}")
66
+ except Exception as exc:
67
+ elapsed = time.perf_counter() - t0
68
+ print(f"\r {label:<43} {provider:<12} {FAIL} {elapsed:>8.2f}s {str(exc)[:90]}")
69
+
70
+ print()