File size: 3,564 Bytes
bedfb0c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
#!/usr/bin/env python3
"""
Regression test: verify EOS token fix improves completeness without breaking anything
"""
import httpx
import json
import time

BASE_URL = "https://jeanbaptdzd-open-finance-llm-8b.hf.space"

print("="*80)
print("REGRESSION & IMPROVEMENT TEST")
print("="*80)

# Test 1: Basic functionality still works
print("\n[1] Basic functionality check")
try:
    response = httpx.post(
        f"{BASE_URL}/v1/chat/completions",
        json={
            "model": "DragonLLM/qwen3-8b-fin-v1.0",
            "messages": [{"role": "user", "content": "What is 2+2?"}],
            "max_tokens": 100,
            "temperature": 0.3
        },
        timeout=30.0
    )
    
    data = response.json()
    if "error" not in data:
        print(f"βœ… Basic request works")
    else:
        print(f"❌ Error: {data['error']['message']}")
except Exception as e:
    print(f"❌ Exception: {e}")

time.sleep(3)

# Test 2: French answer with reasonable token limit
print("\n[2] French answer (500 tokens)")
try:
    response = httpx.post(
        f"{BASE_URL}/v1/chat/completions",
        json={
            "model": "DragonLLM/qwen3-8b-fin-v1.0",
            "messages": [{"role": "user", "content": "Qu'est-ce qu'une obligation? RΓ©ponse courte."}],
            "max_tokens": 500,
            "temperature": 0.3
        },
        timeout=45.0
    )
    
    data = response.json()
    if "error" in data:
        print(f"❌ Error: {data['error']['message'][:100]}")
    else:
        content = data["choices"][0]["message"]["content"]
        finish = data["choices"][0]["finish_reason"]
        tokens = data.get("usage", {}).get("completion_tokens", 0)
        
        answer = content.split("</think>")[1].strip() if "</think>" in content else content
        
        print(f"Tokens: {tokens}/500")
        print(f"Finish: {finish}")
        print(f"Answer: {answer}")
        print(f"Ends properly: {answer.rstrip().endswith(('.', '!', '?'))}")
        
        if finish == "stop":
            print(f"βœ… IMPROVEMENT: Stopped naturally at EOS (was hitting length before)")
        elif finish == "length":
            print(f"⚠️  Still hitting length limit")
            
except Exception as e:
    print(f"❌ Exception: {e}")

time.sleep(3)

# Test 3: Sequential requests (no OOM regression)
print("\n[3] Sequential requests (memory check)")
success = 0
for i in range(1, 4):
    try:
        response = httpx.post(
            f"{BASE_URL}/v1/chat/completions",
            json={
                "model": "DragonLLM/qwen3-8b-fin-v1.0",
                "messages": [{"role": "user", "content": f"Calculate {i}+{i}"}],
                "max_tokens": 200,
                "temperature": 0.3
            },
            timeout=30.0
        )
        
        data = response.json()
        if "error" not in data:
            success += 1
            print(f"  [{i}] βœ…")
        else:
            if "out of memory" in data["error"]["message"].lower():
                print(f"  [{i}] ❌ OOM!")
            else:
                print(f"  [{i}] ❌ Error")
        time.sleep(2)
    except:
        print(f"  [{i}] ❌ Timeout/Exception")

if success == 3:
    print(f"βœ… NO REGRESSION: Memory management still working")
else:
    print(f"❌ REGRESSION: Only {success}/3 succeeded")

print("\n" + "="*80)
print("VERDICT")
print("="*80)
print("If Test 2 shows finish='stop' β†’ EOS fix is working βœ…")
print("If Test 2 shows finish='length' β†’ Need more investigation ⚠️")
print("If Test 3 passes β†’ No memory regression βœ…")