Spaces:
Running
Running
| """ | |
| tests/test_workflow.py | |
| Integration tests for the Finnie LangGraph ReAct workflow. | |
| Covers: | |
| 1. Return shape β invoke() always returns the right keys/types | |
| 2. Message accumulation β history grows turn by turn | |
| 3. Agent routing β each query type reaches the right tool | |
| 4. Out-of-scope guardrailβ insurance/traffic/legal get the refusal message | |
| 5. Multi-turn memory β risk profile, savings, and goal persist across turns | |
| 6. Thread isolation β separate thread_ids never share state | |
| 7. Disclaimer β every substantive answer carries the edu disclaimer | |
| 8. Multi-tool in one turnβ LLM can call two tools in a single reasoning step | |
| Run: | |
| uv run pytest tests/test_workflow.py -v | |
| """ | |
| import uuid | |
| import pytest | |
| from src.workflow.graph import invoke | |
| def _tid() -> str: | |
| """Unique thread ID per test β prevents state bleed between tests.""" | |
| return str(uuid.uuid4()) | |
| # ββ 1. Return shape βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def test_invoke_returns_required_keys(): | |
| result = invoke("What is an index fund?", thread_id=_tid()) | |
| assert "answer" in result | |
| assert "messages" in result | |
| def test_answer_is_non_empty_string(): | |
| result = invoke("What is compound interest?", thread_id=_tid()) | |
| assert isinstance(result["answer"], str) | |
| assert len(result["answer"].strip()) > 0 | |
| def test_messages_is_non_empty_list(): | |
| result = invoke("What is a Roth IRA?", thread_id=_tid()) | |
| assert isinstance(result["messages"], list) | |
| assert len(result["messages"]) > 0 | |
| # ββ 2. Message accumulation βββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def test_messages_grow_with_each_turn(): | |
| """Each conversation turn appends to the persisted message history.""" | |
| thread = _tid() | |
| r1 = invoke("What is dollar cost averaging?", thread_id=thread) | |
| r2 = invoke("Can you give me a simple example?", thread_id=thread) | |
| assert len(r2["messages"]) > len(r1["messages"]) | |
| def test_three_turns_accumulate_messages(): | |
| thread = _tid() | |
| r1 = invoke("What is an ETF?", thread_id=thread) | |
| r2 = invoke("How is it different from a stock?", thread_id=thread) | |
| r3 = invoke("Which is better for a beginner?", thread_id=thread) | |
| assert len(r3["messages"]) > len(r2["messages"]) > len(r1["messages"]) | |
| # ββ 3. Agent routing ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def test_routes_to_goal_agent(): | |
| """Retirement goal query β answer mentions monthly savings.""" | |
| result = invoke( | |
| "I want $2 million in 20 years. I have $50,000 saved.", | |
| thread_id=_tid(), | |
| ) | |
| answer = result["answer"].lower() | |
| assert any(w in answer for w in ["monthly", "savings", "contribute", "invest", "goal"]) | |
| def test_routes_to_tax_agent_capital_gains(): | |
| """Capital gains query β answer mentions tax or short/long term.""" | |
| result = invoke( | |
| "I sold AAPL after 8 months with a $5,000 gain. I'm in the 22% bracket.", | |
| thread_id=_tid(), | |
| ) | |
| answer = result["answer"].lower() | |
| assert any(w in answer for w in ["tax", "short-term", "short term", "gain", "bracket"]) | |
| def test_routes_to_tax_agent_account_limits(): | |
| """401k query β answer mentions contribution limit.""" | |
| result = invoke( | |
| "What is the 401k contribution limit for 2024?", | |
| thread_id=_tid(), | |
| ) | |
| answer = result["answer"].lower() | |
| assert any(w in answer for w in ["401k", "limit", "contribute", "23,000", "pre-tax"]) | |
| def test_routes_to_news_agent(): | |
| """News query β answer references the queried ticker.""" | |
| result = invoke("What's the latest news on NVDA?", thread_id=_tid()) | |
| answer = result["answer"].upper() | |
| assert "NVDA" in answer or "NVIDIA" in answer | |
| def test_routes_to_market_agent(): | |
| """Stock price query β answer mentions price or market data.""" | |
| result = invoke("How is AAPL stock doing today?", thread_id=_tid()) | |
| answer = result["answer"].lower() | |
| assert any(w in answer for w in ["apple", "aapl", "stock", "price", "market", "share"]) | |
| def test_routes_to_portfolio_agent(): | |
| """Portfolio holdings query β answer discusses allocation or diversification.""" | |
| result = invoke( | |
| "I have 10 AAPL shares and 5 MSFT shares. Analyze my portfolio.", | |
| thread_id=_tid(), | |
| ) | |
| answer = result["answer"].lower() | |
| assert any(w in answer for w in ["portfolio", "diversif", "allocation", "aapl", "msft", "sector"]) | |
| def test_routes_to_qa_agent(): | |
| """General finance education β answer is educational.""" | |
| result = invoke( | |
| "What is the difference between a stock and a bond?", | |
| thread_id=_tid(), | |
| ) | |
| answer = result["answer"].lower() | |
| assert any(w in answer for w in ["stock", "bond", "equity", "debt", "return", "risk"]) | |
| # ββ 4. Out-of-scope guardrail βββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def test_guardrail_blocks_car_insurance(): | |
| result = invoke("I need car insurance. What's the best policy?", thread_id=_tid()) | |
| answer = result["answer"].lower() | |
| assert any(phrase in answer for phrase in [ | |
| "not able to help", "i'm finnie", "financial education", | |
| "can help you with", "out of scope", | |
| ]) | |
| def test_guardrail_blocks_business_start(): | |
| result = invoke( | |
| "I want to start a business. How do I get a business loan?", | |
| thread_id=_tid(), | |
| ) | |
| answer = result["answer"].lower() | |
| assert any(phrase in answer for phrase in [ | |
| "not able to help", "financial education", "can help you with", | |
| ]) | |
| def test_guardrail_blocks_legal_advice(): | |
| result = invoke("I need legal advice about a contract.", thread_id=_tid()) | |
| answer = result["answer"].lower() | |
| assert any(phrase in answer for phrase in [ | |
| "not able to help", "financial education", "can help you with", | |
| ]) | |
| def test_guardrail_blocks_medical_query(): | |
| result = invoke("What medication should I take for high blood pressure?", thread_id=_tid()) | |
| answer = result["answer"].lower() | |
| assert any(phrase in answer for phrase in [ | |
| "not able to help", "financial education", "can help you with", | |
| ]) | |
| def test_guardrail_in_scope_after_out_of_scope(): | |
| """After an out-of-scope refusal the bot still answers in-scope queries correctly.""" | |
| thread = _tid() | |
| invoke("What is the best car insurance?", thread_id=thread) | |
| r2 = invoke("What is a Roth IRA?", thread_id=thread) | |
| answer = r2["answer"].lower() | |
| assert any(w in answer for w in ["roth", "ira", "tax", "retire", "contribute"]) | |
| # ββ 5. Multi-turn memory ββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def test_risk_profile_persists_to_goal_planning(): | |
| """Risk profile set in turn 1 should be visible to goal planner in turn 2.""" | |
| thread = _tid() | |
| invoke("I'm very aggressive with risk β I can handle high volatility.", thread_id=thread) | |
| r2 = invoke("I want to retire with $2 million in 15 years.", thread_id=thread) | |
| answer = r2["answer"].lower() | |
| assert any(w in answer for w in [ | |
| "aggressive", "growth", "10%", "15 year", "monthly", "invest", "higher return", | |
| ]) | |
| def test_savings_update_overrides_earlier_value(): | |
| """ | |
| Turn 1: set goal + initial savings. | |
| Turn 2: correct savings upward. | |
| Turn 3: LLM should use the updated savings when recalculating. | |
| """ | |
| thread = _tid() | |
| invoke( | |
| "I want $1 million for retirement in 25 years. I have $50,000 saved.", | |
| thread_id=thread, | |
| ) | |
| invoke("I now have $200,000 saved, not $50,000.", thread_id=thread) | |
| r3 = invoke("Can you recalculate how much I need to save monthly?", thread_id=thread) | |
| answer = r3["answer"].lower() | |
| assert any(w in answer for w in ["monthly", "contribute", "200", "savings"]) | |
| def test_goal_amount_persists_to_follow_up(): | |
| """Goal stated in turn 1 is still in context for a follow-up in turn 2.""" | |
| thread = _tid() | |
| invoke("I want to save $500,000 for a house in 10 years.", thread_id=thread) | |
| r2 = invoke("How much do I need to set aside each month if I invest?", thread_id=thread) | |
| answer = r2["answer"].lower() | |
| assert any(w in answer for w in ["monthly", "contribute", "500", "invest", "month"]) | |
| def test_multi_turn_qa_stays_coherent(): | |
| """Three educational turns stay on topic β each answer is non-empty.""" | |
| thread = _tid() | |
| r1 = invoke("What is a Roth IRA?", thread_id=thread) | |
| r2 = invoke("How is it different from a Traditional IRA?", thread_id=thread) | |
| r3 = invoke("Which one is better if I expect to be in a higher tax bracket later?", thread_id=thread) | |
| for r in (r1, r2, r3): | |
| assert isinstance(r["answer"], str) | |
| assert len(r["answer"].strip()) > 0 | |
| # ββ 6. Thread isolation βββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def test_separate_threads_do_not_share_state(): | |
| """ | |
| Thread A sets an aggressive risk profile. | |
| Thread B starts fresh β it must not inherit A's profile. | |
| """ | |
| thread_a = _tid() | |
| thread_b = _tid() | |
| invoke( | |
| "I'm super aggressive. I want $5M in 10 years. I have $500K saved.", | |
| thread_id=thread_a, | |
| ) | |
| r_b = invoke("What is dollar cost averaging?", thread_id=thread_b) | |
| assert isinstance(r_b["answer"], str) | |
| assert len(r_b["answer"].strip()) > 0 | |
| def test_two_concurrent_sessions_independent(): | |
| """Two users with different goals get independent answers.""" | |
| thread_user1 = _tid() | |
| thread_user2 = _tid() | |
| r1 = invoke("I want $100,000 in 3 years. I'm conservative.", thread_id=thread_user1) | |
| r2 = invoke("I want $2,000,000 in 30 years. I'm aggressive.", thread_id=thread_user2) | |
| assert isinstance(r1["answer"], str) | |
| assert isinstance(r2["answer"], str) | |
| assert r1["answer"] != r2["answer"] | |
| # ββ 7. Disclaimer always present ββββββββββββββββββββββββββββββββββββββββββββββ | |
| _DISCLAIMER_PHRASES = [ | |
| "not financial advice", "educational purposes", | |
| "consult", "disclaimer", "educational", | |
| ] | |
| def test_disclaimer_in_goal_answer(): | |
| result = invoke("I want $200,000 in 8 years.", thread_id=_tid()) | |
| assert any(p in result["answer"].lower() for p in _DISCLAIMER_PHRASES) | |
| def test_disclaimer_in_tax_answer(): | |
| result = invoke( | |
| "I sold TSLA after 2 years with a $15,000 gain. 24% bracket.", | |
| thread_id=_tid(), | |
| ) | |
| assert any(p in result["answer"].lower() for p in _DISCLAIMER_PHRASES + ["tax professional"]) | |
| def test_disclaimer_in_portfolio_answer(): | |
| result = invoke("I have 20 AAPL and 10 MSFT β is my portfolio healthy?", thread_id=_tid()) | |
| assert any(p in result["answer"].lower() for p in _DISCLAIMER_PHRASES) | |
| # ββ 8. Multi-tool in one turn βββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def test_tax_and_news_in_one_turn(): | |
| """ | |
| One query asks about 401k limits AND AAPL news. | |
| The ReAct loop should call get_tax_education and get_financial_news. | |
| The answer should address both topics. | |
| """ | |
| result = invoke( | |
| "What is the 401k contribution limit and what's the latest news on AAPL?", | |
| thread_id=_tid(), | |
| ) | |
| answer = result["answer"].lower() | |
| assert any(w in answer for w in ["401k", "limit", "contribute"]) | |
| assert any(w in answer for w in ["apple", "aapl", "news"]) | |
| def test_goal_and_tax_in_one_turn(): | |
| """ | |
| Retirement goal + tax question in one turn β LLM should address both. | |
| """ | |
| result = invoke( | |
| "I want $2M in 20 years and I also want to know how a Roth IRA helps with taxes.", | |
| thread_id=_tid(), | |
| ) | |
| answer = result["answer"].lower() | |
| assert any(w in answer for w in ["roth", "ira", "tax"]) | |
| assert any(w in answer for w in ["monthly", "goal", "invest", "retire", "million"]) | |