File size: 4,314 Bytes
f372eea
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
#!/usr/bin/env python3
"""
Test that the EOS token fix is working properly
Verify: no regressions, better completion, proper finish_reason
"""
import httpx
import json
import time

BASE_URL = "https://jeanbaptdzd-open-finance-llm-8b.hf.space"

def check_space_status():
    """Check if Space is running"""
    try:
        response = httpx.get(f"{BASE_URL}/", timeout=10.0)
        data = response.json()
        return data.get("status") == "ok" and data.get("backend") == "Transformers"
    except:
        return False

print("="*80)
print("TESTING EOS TOKEN FIX")
print("="*80)

if not check_space_status():
    print("❌ Space not ready. Please wait for rebuild.")
    exit(1)

print("✅ Space is ready\n")

# Test 1: Check finish_reason is accurate
print("[TEST 1] Verify finish_reason accuracy")
print("-" * 80)

response = httpx.post(
    f"{BASE_URL}/v1/chat/completions",
    json={
        "model": "DragonLLM/qwen3-8b-fin-v1.0",
        "messages": [{"role": "user", "content": "What is 2+2? Answer in 5 words."}],
        "max_tokens": 50,
        "temperature": 0.3
    },
    timeout=60.0
)

data = response.json()
finish = data["choices"][0]["finish_reason"]
content = data["choices"][0]["message"]["content"]
tokens = data.get("usage", {}).get("completion_tokens", 0)

print(f"Max tokens: 50")
print(f"Generated: {tokens} tokens")
print(f"Finish reason: {finish}")
print(f"Response: {content[:150]}...")

if finish == "stop" and tokens < 50:
    print("✅ PASS: Stopped naturally with EOS token (not length limit)")
elif finish == "length" and tokens >= 50:
    print("✅ PASS: Correctly detected length limit")
else:
    print(f"⚠️  Unexpected: finish={finish}, tokens={tokens}")

# Test 2: Check complete French answer
print("\n[TEST 2] Complete French answer")
print("-" * 80)

response = httpx.post(
    f"{BASE_URL}/v1/chat/completions",
    json={
        "model": "DragonLLM/qwen3-8b-fin-v1.0",
        "messages": [{"role": "user", "content": "Qu'est-ce qu'une obligation? Soyez concis."}],
        "max_tokens": 300,
        "temperature": 0.3
    },
    timeout=60.0
)

data = response.json()
content = data["choices"][0]["message"]["content"]
finish = data["choices"][0]["finish_reason"]
tokens = data.get("usage", {}).get("completion_tokens", 0)

# Extract answer
if "</think>" in content:
    answer = content.split("</think>")[1].strip()
else:
    answer = content

print(f"Generated: {tokens} tokens")
print(f"Finish reason: {finish}")
print(f"\nFull answer:\n{answer}\n")

# Check completeness
ends_properly = answer.rstrip().endswith((".", "!", "?", ")", "]"))
has_french = any(c in answer for c in ["é", "è", "à", "ç"])

print(f"Ends properly: {ends_properly}")
print(f"Is French: {has_french}")
print(f"Finish: {finish}")

if ends_properly and finish == "stop" and has_french:
    print("✅ PASS: Complete French answer with proper EOS")
else:
    print(f"⚠️  Check: ends={ends_properly}, finish={finish}, french={has_french}")

# Test 3: Long answer completeness
print("\n[TEST 3] Long answer completeness")
print("-" * 80)

response = httpx.post(
    f"{BASE_URL}/v1/chat/completions",
    json={
        "model": "DragonLLM/qwen3-8b-fin-v1.0",
        "messages": [{"role": "user", "content": "Expliquez en détail le nantissement de compte-titres."}],
        "temperature": 0.3
        # Use default max_tokens (1500)
    },
    timeout=90.0
)

data = response.json()
content = data["choices"][0]["message"]["content"]
finish = data["choices"][0]["finish_reason"]
tokens = data.get("usage", {}).get("completion_tokens", 0)

if "</think>" in content:
    answer = content.split("</think>")[1].strip()
else:
    answer = content

print(f"Generated: {tokens} tokens (default max: 1500)")
print(f"Finish reason: {finish}")
print(f"Answer length: {len(answer)} chars")
print(f"Last 150 chars: ...{answer[-150:]}")

if finish == "stop":
    print("✅ PASS: Model stopped naturally at EOS (complete answer)")
elif finish == "length":
    print(f"⚠️  Hit token limit - may need higher max_tokens for complex questions")
else:
    print(f"❌ Unexpected finish_reason: {finish}")

print("\n" + "="*80)
print("SUMMARY")
print("="*80)
print("If all tests show 'stop' finish_reason and proper sentence endings,")
print("the EOS token fix is working correctly!")