open-finance-llm-8b / test_advanced_finance.py
jeanbaptdzd's picture
Fix generation: increase tokens for complete answers, add EOS handling
78f67d6
raw
history blame
8.73 kB
#!/usr/bin/env python3
"""
Advanced finance tests including streaming and complex scenarios.
"""
import httpx
import json
import time
BASE_URL = "https://jeanbaptdzd-open-finance-llm-8b.hf.space"
def test_streaming_response():
"""Test streaming chat completion."""
print("\n" + "="*80)
print("TESTING STREAMING RESPONSE")
print("="*80)
payload = {
"model": "DragonLLM/qwen3-8b-fin-v1.0",
"messages": [
{
"role": "user",
"content": "Explain the Black-Scholes option pricing model in simple terms."
}
],
"stream": True,
"max_tokens": 150,
"temperature": 0.4
}
print(f"\nQuestion: {payload['messages'][0]['content']}")
print(f"\nStreaming response:")
print("─" * 80)
start_time = time.time()
chunks_received = 0
full_response = ""
try:
with httpx.stream(
"POST",
f"{BASE_URL}/v1/chat/completions",
json=payload,
timeout=60.0
) as response:
for line in response.iter_lines():
if line.startswith("data: "):
data_str = line[6:] # Remove "data: " prefix
if data_str == "[DONE]":
break
try:
chunk_data = json.loads(data_str)
delta = chunk_data.get("choices", [{}])[0].get("delta", {})
content = delta.get("content", "")
if content:
print(content, end="", flush=True)
full_response += content
chunks_received += 1
except json.JSONDecodeError:
pass
elapsed = time.time() - start_time
print("\n" + "─" * 80)
print(f"\nβœ… Streaming test successful!")
print(f" ⏱️ Time: {elapsed:.2f}s")
print(f" πŸ“¦ Chunks received: {chunks_received}")
print(f" πŸ“ Total characters: {len(full_response)}")
return True
except Exception as e:
print(f"\n❌ Error: {e}")
return False
def test_complex_finance_scenario():
"""Test complex multi-step finance reasoning."""
print("\n" + "="*80)
print("TESTING COMPLEX FINANCE SCENARIO")
print("="*80)
question = """A company has the following financials:
- Revenue: $10 million
- Cost of Goods Sold: $4 million
- Operating Expenses: $3 million
- Interest Expense: $500,000
- Tax Rate: 25%
Calculate the company's:
1. Gross Profit Margin
2. Operating Income
3. Net Income
4. EBITDA (assuming $200k depreciation)"""
payload = {
"model": "DragonLLM/qwen3-8b-fin-v1.0",
"messages": [
{"role": "user", "content": question}
],
"temperature": 0.1,
"max_tokens": 300
}
print(f"\nQuestion:\n{question}")
print("\n" + "─" * 80)
start_time = time.time()
try:
response = httpx.post(
f"{BASE_URL}/v1/chat/completions",
json=payload,
timeout=60.0
)
elapsed = time.time() - start_time
if response.status_code == 200:
data = response.json()
answer = data['choices'][0]['message']['content']
usage = data.get('usage', {})
print(f"\nπŸ’¬ Answer:\n{answer}")
print("\n" + "─" * 80)
print(f"\nβœ… Complex scenario test successful!")
print(f" ⏱️ Time: {elapsed:.2f}s")
print(f" πŸ“ Tokens: {usage.get('total_tokens', 'N/A')}")
# Check for key calculations in response
calculations = ["gross profit", "operating income", "net income", "ebitda"]
found = [calc for calc in calculations if calc in answer.lower()]
print(f" 🎯 Calculations mentioned: {len(found)}/{len(calculations)}")
return True
else:
print(f"❌ Error: HTTP {response.status_code}")
return False
except Exception as e:
print(f"❌ Error: {e}")
return False
def test_financial_advice():
"""Test investment advice generation."""
print("\n" + "="*80)
print("TESTING FINANCIAL ADVICE")
print("="*80)
question = """I'm 30 years old with $50,000 to invest. My risk tolerance is moderate,
and I'm investing for retirement in 35 years. What asset allocation would you recommend?"""
payload = {
"model": "DragonLLM/qwen3-8b-fin-v1.0",
"messages": [
{"role": "user", "content": question}
],
"temperature": 0.5,
"max_tokens": 250
}
print(f"\nQuestion: {question}")
print("\n" + "─" * 80)
try:
response = httpx.post(
f"{BASE_URL}/v1/chat/completions",
json=payload,
timeout=60.0
)
if response.status_code == 200:
data = response.json()
answer = data['choices'][0]['message']['content']
print(f"\nπŸ’¬ Answer:\n{answer}")
print("\n" + "─" * 80)
print(f"\nβœ… Financial advice test successful!")
# Check for relevant concepts
concepts = ["stocks", "bonds", "diversification", "allocation", "risk"]
found = [c for c in concepts if c in answer.lower()]
print(f" 🎯 Relevant concepts: {', '.join(found)}")
return True
else:
print(f"❌ Error: HTTP {response.status_code}")
return False
except Exception as e:
print(f"❌ Error: {e}")
return False
def test_market_interpretation():
"""Test market data interpretation."""
print("\n" + "="*80)
print("TESTING MARKET DATA INTERPRETATION")
print("="*80)
question = """A stock has the following characteristics:
- Current Price: $100
- 52-week High: $120
- 52-week Low: $75
- P/E Ratio: 25
- Beta: 1.5
- Dividend Yield: 2%
What does this data tell you about the stock's risk and valuation?"""
payload = {
"model": "DragonLLM/qwen3-8b-fin-v1.0",
"messages": [
{"role": "user", "content": question}
],
"temperature": 0.3,
"max_tokens": 250
}
print(f"\nQuestion:\n{question}")
print("\n" + "─" * 80)
try:
response = httpx.post(
f"{BASE_URL}/v1/chat/completions",
json=payload,
timeout=60.0
)
if response.status_code == 200:
data = response.json()
answer = data['choices'][0]['message']['content']
print(f"\nπŸ’¬ Answer:\n{answer}")
print("\n" + "─" * 80)
print(f"\nβœ… Market interpretation test successful!")
# Check for key concepts
concepts = ["beta", "p/e", "volatility", "risk", "valuation"]
found = [c for c in concepts if c in answer.lower()]
print(f" 🎯 Key concepts addressed: {', '.join(found)}")
return True
else:
print(f"❌ Error: HTTP {response.status_code}")
return False
except Exception as e:
print(f"❌ Error: {e}")
return False
def main():
"""Run all advanced tests."""
print("="*80)
print("ADVANCED FINANCE LLM TESTING")
print("="*80)
print(f"Target: {BASE_URL}")
results = []
# Test 1: Streaming
results.append(("Streaming Response", test_streaming_response()))
time.sleep(2)
# Test 2: Complex scenario
results.append(("Complex Finance Calculations", test_complex_finance_scenario()))
time.sleep(2)
# Test 3: Financial advice
results.append(("Investment Advice", test_financial_advice()))
time.sleep(2)
# Test 4: Market interpretation
results.append(("Market Data Interpretation", test_market_interpretation()))
# Summary
print("\n" + "="*80)
print("ADVANCED TESTS SUMMARY")
print("="*80)
passed = sum(1 for _, success in results if success)
total = len(results)
print(f"\nβœ… Passed: {passed}/{total}")
for test_name, success in results:
status = "βœ…" if success else "❌"
print(f" {status} {test_name}")
print("\n" + "="*80)
if __name__ == "__main__":
main()