File size: 3,564 Bytes
bedfb0c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 |
#!/usr/bin/env python3
"""
Regression test: verify EOS token fix improves completeness without breaking anything
"""
import httpx
import json
import time
BASE_URL = "https://jeanbaptdzd-open-finance-llm-8b.hf.space"
print("="*80)
print("REGRESSION & IMPROVEMENT TEST")
print("="*80)
# Test 1: Basic functionality still works
print("\n[1] Basic functionality check")
try:
response = httpx.post(
f"{BASE_URL}/v1/chat/completions",
json={
"model": "DragonLLM/qwen3-8b-fin-v1.0",
"messages": [{"role": "user", "content": "What is 2+2?"}],
"max_tokens": 100,
"temperature": 0.3
},
timeout=30.0
)
data = response.json()
if "error" not in data:
print(f"β
Basic request works")
else:
print(f"β Error: {data['error']['message']}")
except Exception as e:
print(f"β Exception: {e}")
time.sleep(3)
# Test 2: French answer with reasonable token limit
print("\n[2] French answer (500 tokens)")
try:
response = httpx.post(
f"{BASE_URL}/v1/chat/completions",
json={
"model": "DragonLLM/qwen3-8b-fin-v1.0",
"messages": [{"role": "user", "content": "Qu'est-ce qu'une obligation? RΓ©ponse courte."}],
"max_tokens": 500,
"temperature": 0.3
},
timeout=45.0
)
data = response.json()
if "error" in data:
print(f"β Error: {data['error']['message'][:100]}")
else:
content = data["choices"][0]["message"]["content"]
finish = data["choices"][0]["finish_reason"]
tokens = data.get("usage", {}).get("completion_tokens", 0)
answer = content.split("</think>")[1].strip() if "</think>" in content else content
print(f"Tokens: {tokens}/500")
print(f"Finish: {finish}")
print(f"Answer: {answer}")
print(f"Ends properly: {answer.rstrip().endswith(('.', '!', '?'))}")
if finish == "stop":
print(f"β
IMPROVEMENT: Stopped naturally at EOS (was hitting length before)")
elif finish == "length":
print(f"β οΈ Still hitting length limit")
except Exception as e:
print(f"β Exception: {e}")
time.sleep(3)
# Test 3: Sequential requests (no OOM regression)
print("\n[3] Sequential requests (memory check)")
success = 0
for i in range(1, 4):
try:
response = httpx.post(
f"{BASE_URL}/v1/chat/completions",
json={
"model": "DragonLLM/qwen3-8b-fin-v1.0",
"messages": [{"role": "user", "content": f"Calculate {i}+{i}"}],
"max_tokens": 200,
"temperature": 0.3
},
timeout=30.0
)
data = response.json()
if "error" not in data:
success += 1
print(f" [{i}] β
")
else:
if "out of memory" in data["error"]["message"].lower():
print(f" [{i}] β OOM!")
else:
print(f" [{i}] β Error")
time.sleep(2)
except:
print(f" [{i}] β Timeout/Exception")
if success == 3:
print(f"β
NO REGRESSION: Memory management still working")
else:
print(f"β REGRESSION: Only {success}/3 succeeded")
print("\n" + "="*80)
print("VERDICT")
print("="*80)
print("If Test 2 shows finish='stop' β EOS fix is working β
")
print("If Test 2 shows finish='length' β Need more investigation β οΈ")
print("If Test 3 passes β No memory regression β
")
|