finnie / tests /test_workflow.py
Vishnu Rama
Initial deployment
2701365
"""
tests/test_workflow.py
Integration tests for the Finnie LangGraph ReAct workflow.
Covers:
1. Return shape β€” invoke() always returns the right keys/types
2. Message accumulation β€” history grows turn by turn
3. Agent routing β€” each query type reaches the right tool
4. Out-of-scope guardrailβ€” insurance/traffic/legal get the refusal message
5. Multi-turn memory β€” risk profile, savings, and goal persist across turns
6. Thread isolation β€” separate thread_ids never share state
7. Disclaimer β€” every substantive answer carries the edu disclaimer
8. Multi-tool in one turnβ€” LLM can call two tools in a single reasoning step
Run:
uv run pytest tests/test_workflow.py -v
"""
import uuid
import pytest
from src.workflow.graph import invoke
def _tid() -> str:
"""Unique thread ID per test β€” prevents state bleed between tests."""
return str(uuid.uuid4())
# ── 1. Return shape ───────────────────────────────────────────────────────────
def test_invoke_returns_required_keys():
result = invoke("What is an index fund?", thread_id=_tid())
assert "answer" in result
assert "messages" in result
def test_answer_is_non_empty_string():
result = invoke("What is compound interest?", thread_id=_tid())
assert isinstance(result["answer"], str)
assert len(result["answer"].strip()) > 0
def test_messages_is_non_empty_list():
result = invoke("What is a Roth IRA?", thread_id=_tid())
assert isinstance(result["messages"], list)
assert len(result["messages"]) > 0
# ── 2. Message accumulation ───────────────────────────────────────────────────
def test_messages_grow_with_each_turn():
"""Each conversation turn appends to the persisted message history."""
thread = _tid()
r1 = invoke("What is dollar cost averaging?", thread_id=thread)
r2 = invoke("Can you give me a simple example?", thread_id=thread)
assert len(r2["messages"]) > len(r1["messages"])
def test_three_turns_accumulate_messages():
thread = _tid()
r1 = invoke("What is an ETF?", thread_id=thread)
r2 = invoke("How is it different from a stock?", thread_id=thread)
r3 = invoke("Which is better for a beginner?", thread_id=thread)
assert len(r3["messages"]) > len(r2["messages"]) > len(r1["messages"])
# ── 3. Agent routing ──────────────────────────────────────────────────────────
def test_routes_to_goal_agent():
"""Retirement goal query β†’ answer mentions monthly savings."""
result = invoke(
"I want $2 million in 20 years. I have $50,000 saved.",
thread_id=_tid(),
)
answer = result["answer"].lower()
assert any(w in answer for w in ["monthly", "savings", "contribute", "invest", "goal"])
def test_routes_to_tax_agent_capital_gains():
"""Capital gains query β†’ answer mentions tax or short/long term."""
result = invoke(
"I sold AAPL after 8 months with a $5,000 gain. I'm in the 22% bracket.",
thread_id=_tid(),
)
answer = result["answer"].lower()
assert any(w in answer for w in ["tax", "short-term", "short term", "gain", "bracket"])
def test_routes_to_tax_agent_account_limits():
"""401k query β†’ answer mentions contribution limit."""
result = invoke(
"What is the 401k contribution limit for 2024?",
thread_id=_tid(),
)
answer = result["answer"].lower()
assert any(w in answer for w in ["401k", "limit", "contribute", "23,000", "pre-tax"])
def test_routes_to_news_agent():
"""News query β†’ answer references the queried ticker."""
result = invoke("What's the latest news on NVDA?", thread_id=_tid())
answer = result["answer"].upper()
assert "NVDA" in answer or "NVIDIA" in answer
def test_routes_to_market_agent():
"""Stock price query β†’ answer mentions price or market data."""
result = invoke("How is AAPL stock doing today?", thread_id=_tid())
answer = result["answer"].lower()
assert any(w in answer for w in ["apple", "aapl", "stock", "price", "market", "share"])
def test_routes_to_portfolio_agent():
"""Portfolio holdings query β†’ answer discusses allocation or diversification."""
result = invoke(
"I have 10 AAPL shares and 5 MSFT shares. Analyze my portfolio.",
thread_id=_tid(),
)
answer = result["answer"].lower()
assert any(w in answer for w in ["portfolio", "diversif", "allocation", "aapl", "msft", "sector"])
def test_routes_to_qa_agent():
"""General finance education β†’ answer is educational."""
result = invoke(
"What is the difference between a stock and a bond?",
thread_id=_tid(),
)
answer = result["answer"].lower()
assert any(w in answer for w in ["stock", "bond", "equity", "debt", "return", "risk"])
# ── 4. Out-of-scope guardrail ─────────────────────────────────────────────────
def test_guardrail_blocks_car_insurance():
result = invoke("I need car insurance. What's the best policy?", thread_id=_tid())
answer = result["answer"].lower()
assert any(phrase in answer for phrase in [
"not able to help", "i'm finnie", "financial education",
"can help you with", "out of scope",
])
def test_guardrail_blocks_business_start():
result = invoke(
"I want to start a business. How do I get a business loan?",
thread_id=_tid(),
)
answer = result["answer"].lower()
assert any(phrase in answer for phrase in [
"not able to help", "financial education", "can help you with",
])
def test_guardrail_blocks_legal_advice():
result = invoke("I need legal advice about a contract.", thread_id=_tid())
answer = result["answer"].lower()
assert any(phrase in answer for phrase in [
"not able to help", "financial education", "can help you with",
])
def test_guardrail_blocks_medical_query():
result = invoke("What medication should I take for high blood pressure?", thread_id=_tid())
answer = result["answer"].lower()
assert any(phrase in answer for phrase in [
"not able to help", "financial education", "can help you with",
])
def test_guardrail_in_scope_after_out_of_scope():
"""After an out-of-scope refusal the bot still answers in-scope queries correctly."""
thread = _tid()
invoke("What is the best car insurance?", thread_id=thread)
r2 = invoke("What is a Roth IRA?", thread_id=thread)
answer = r2["answer"].lower()
assert any(w in answer for w in ["roth", "ira", "tax", "retire", "contribute"])
# ── 5. Multi-turn memory ──────────────────────────────────────────────────────
def test_risk_profile_persists_to_goal_planning():
"""Risk profile set in turn 1 should be visible to goal planner in turn 2."""
thread = _tid()
invoke("I'm very aggressive with risk β€” I can handle high volatility.", thread_id=thread)
r2 = invoke("I want to retire with $2 million in 15 years.", thread_id=thread)
answer = r2["answer"].lower()
assert any(w in answer for w in [
"aggressive", "growth", "10%", "15 year", "monthly", "invest", "higher return",
])
def test_savings_update_overrides_earlier_value():
"""
Turn 1: set goal + initial savings.
Turn 2: correct savings upward.
Turn 3: LLM should use the updated savings when recalculating.
"""
thread = _tid()
invoke(
"I want $1 million for retirement in 25 years. I have $50,000 saved.",
thread_id=thread,
)
invoke("I now have $200,000 saved, not $50,000.", thread_id=thread)
r3 = invoke("Can you recalculate how much I need to save monthly?", thread_id=thread)
answer = r3["answer"].lower()
assert any(w in answer for w in ["monthly", "contribute", "200", "savings"])
def test_goal_amount_persists_to_follow_up():
"""Goal stated in turn 1 is still in context for a follow-up in turn 2."""
thread = _tid()
invoke("I want to save $500,000 for a house in 10 years.", thread_id=thread)
r2 = invoke("How much do I need to set aside each month if I invest?", thread_id=thread)
answer = r2["answer"].lower()
assert any(w in answer for w in ["monthly", "contribute", "500", "invest", "month"])
def test_multi_turn_qa_stays_coherent():
"""Three educational turns stay on topic β€” each answer is non-empty."""
thread = _tid()
r1 = invoke("What is a Roth IRA?", thread_id=thread)
r2 = invoke("How is it different from a Traditional IRA?", thread_id=thread)
r3 = invoke("Which one is better if I expect to be in a higher tax bracket later?", thread_id=thread)
for r in (r1, r2, r3):
assert isinstance(r["answer"], str)
assert len(r["answer"].strip()) > 0
# ── 6. Thread isolation ───────────────────────────────────────────────────────
def test_separate_threads_do_not_share_state():
"""
Thread A sets an aggressive risk profile.
Thread B starts fresh β€” it must not inherit A's profile.
"""
thread_a = _tid()
thread_b = _tid()
invoke(
"I'm super aggressive. I want $5M in 10 years. I have $500K saved.",
thread_id=thread_a,
)
r_b = invoke("What is dollar cost averaging?", thread_id=thread_b)
assert isinstance(r_b["answer"], str)
assert len(r_b["answer"].strip()) > 0
def test_two_concurrent_sessions_independent():
"""Two users with different goals get independent answers."""
thread_user1 = _tid()
thread_user2 = _tid()
r1 = invoke("I want $100,000 in 3 years. I'm conservative.", thread_id=thread_user1)
r2 = invoke("I want $2,000,000 in 30 years. I'm aggressive.", thread_id=thread_user2)
assert isinstance(r1["answer"], str)
assert isinstance(r2["answer"], str)
assert r1["answer"] != r2["answer"]
# ── 7. Disclaimer always present ──────────────────────────────────────────────
_DISCLAIMER_PHRASES = [
"not financial advice", "educational purposes",
"consult", "disclaimer", "educational",
]
def test_disclaimer_in_goal_answer():
result = invoke("I want $200,000 in 8 years.", thread_id=_tid())
assert any(p in result["answer"].lower() for p in _DISCLAIMER_PHRASES)
def test_disclaimer_in_tax_answer():
result = invoke(
"I sold TSLA after 2 years with a $15,000 gain. 24% bracket.",
thread_id=_tid(),
)
assert any(p in result["answer"].lower() for p in _DISCLAIMER_PHRASES + ["tax professional"])
def test_disclaimer_in_portfolio_answer():
result = invoke("I have 20 AAPL and 10 MSFT β€” is my portfolio healthy?", thread_id=_tid())
assert any(p in result["answer"].lower() for p in _DISCLAIMER_PHRASES)
# ── 8. Multi-tool in one turn ─────────────────────────────────────────────────
def test_tax_and_news_in_one_turn():
"""
One query asks about 401k limits AND AAPL news.
The ReAct loop should call get_tax_education and get_financial_news.
The answer should address both topics.
"""
result = invoke(
"What is the 401k contribution limit and what's the latest news on AAPL?",
thread_id=_tid(),
)
answer = result["answer"].lower()
assert any(w in answer for w in ["401k", "limit", "contribute"])
assert any(w in answer for w in ["apple", "aapl", "news"])
def test_goal_and_tax_in_one_turn():
"""
Retirement goal + tax question in one turn β€” LLM should address both.
"""
result = invoke(
"I want $2M in 20 years and I also want to know how a Roth IRA helps with taxes.",
thread_id=_tid(),
)
answer = result["answer"].lower()
assert any(w in answer for w in ["roth", "ira", "tax"])
assert any(w in answer for w in ["monthly", "goal", "invest", "retire", "million"])