Claude commited on
Commit
0c23cd2
·
unverified ·
1 Parent(s): 29b2eb2

test: Add conversational flow and LLM context tests

Browse files

New test classes:
- TestConversationalFlow: Multi-turn conversations, history handling
- TestLLMContextPassing: Verify correct context sent to LLM

Tests verify:
- Follow-up questions work with history
- User messages preserved in history
- System prompts instruct RAG behavior
- User questions included in LLM prompts
- Relevant video content passed as context

Total: 64 tests

Files changed (1) hide show
  1. tests/test_app.py +331 -0
tests/test_app.py CHANGED
@@ -859,3 +859,334 @@ class TestGetKnowledgeStatsWithSession:
859
  # Should show chunk count and video count
860
  assert "chunks" in result.lower() or "2" in result
861
  assert "Test Video" in result
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
859
  # Should show chunk count and video count
860
  assert "chunks" in result.lower() or "2" in result
861
  assert "Test Video" in result
862
+
863
+
864
+ class TestConversationalFlow:
865
+ """Tests for multi-turn conversational interactions with the chatbot."""
866
+
867
+ def test_multi_turn_conversation(self):
868
+ """Test that chatbot can handle follow-up questions using history."""
869
+ from app import SessionState, add_to_vector_db, handle_chat
870
+
871
+ state = SessionState("convo_test_1")
872
+
873
+ # Add content about a cooking video
874
+ add_to_vector_db(
875
+ title="Italian Cooking",
876
+ transcript="Today we make authentic Italian pasta. First boil water. Add salt. "
877
+ "Cook pasta for 8 minutes. The sauce uses fresh tomatoes, garlic, and basil.",
878
+ visual_contexts=["Chef chopping tomatoes", "Boiling pot of pasta"],
879
+ session_state=state,
880
+ )
881
+
882
+ mock_profile = MagicMock()
883
+ mock_token = MagicMock()
884
+ mock_token.token = "test_token"
885
+
886
+ with patch("app.InferenceClient") as mock_client:
887
+ # First question
888
+ mock_response1 = MagicMock()
889
+ mock_response1.choices = [MagicMock()]
890
+ mock_response1.choices[0].message.content = "The video shows how to make Italian pasta with a tomato sauce."
891
+
892
+ # Follow-up question
893
+ mock_response2 = MagicMock()
894
+ mock_response2.choices = [MagicMock()]
895
+ mock_response2.choices[0].message.content = "The sauce ingredients are fresh tomatoes, garlic, and basil."
896
+
897
+ mock_client.return_value.chat.completions.create.side_effect = [mock_response1, mock_response2]
898
+
899
+ # First turn
900
+ history1, _, state = handle_chat(
901
+ message="What is this video about?",
902
+ history=[],
903
+ session_state=state,
904
+ profile=mock_profile,
905
+ oauth_token=mock_token,
906
+ )
907
+
908
+ first_turn_len = len(history1)
909
+ assert first_turn_len >= 2
910
+ assert "pasta" in history1[-1]["content"].lower() or "Italian" in history1[-1]["content"]
911
+
912
+ # Second turn - follow-up question using history
913
+ history2, _, state = handle_chat(
914
+ message="What ingredients are in the sauce?",
915
+ history=history1, # Pass previous history
916
+ session_state=state,
917
+ profile=mock_profile,
918
+ oauth_token=mock_token,
919
+ )
920
+
921
+ # Should have more messages now (history is mutated in place)
922
+ assert len(history2) == 4 # 2 turns x 2 messages each
923
+ # Last response should be about ingredients
924
+ assert "tomatoes" in history2[-1]["content"].lower() or "sauce" in history2[-1]["content"].lower()
925
+
926
+ def test_history_preserves_context(self):
927
+ """Test that conversation history preserves context for follow-ups."""
928
+ from app import SessionState, add_to_vector_db, handle_chat
929
+
930
+ state = SessionState("convo_test_2")
931
+
932
+ # Add content
933
+ add_to_vector_db(
934
+ title="Python Tutorial",
935
+ transcript="Python is a programming language. Variables store data. "
936
+ "Functions are defined with def keyword. Classes use the class keyword.",
937
+ visual_contexts=["Code editor showing Python"],
938
+ session_state=state,
939
+ )
940
+
941
+ mock_profile = MagicMock()
942
+ mock_token = MagicMock()
943
+ mock_token.token = "test_token"
944
+
945
+ with patch("app.InferenceClient") as mock_client:
946
+ mock_response = MagicMock()
947
+ mock_response.choices = [MagicMock()]
948
+ mock_response.choices[0].message.content = "Functions are defined using the def keyword."
949
+ mock_client.return_value.chat.completions.create.return_value = mock_response
950
+
951
+ # Build up a conversation
952
+ history = []
953
+
954
+ # Turn 1: Ask about functions
955
+ history, _, state = handle_chat(
956
+ message="How do you define functions in Python?",
957
+ history=history,
958
+ session_state=state,
959
+ profile=mock_profile,
960
+ oauth_token=mock_token,
961
+ )
962
+
963
+ # Verify history structure
964
+ assert len(history) == 2 # User + Assistant
965
+ assert history[0]["role"] == "user"
966
+ assert history[1]["role"] == "assistant"
967
+ assert "function" in history[0]["content"].lower()
968
+
969
+ def test_user_messages_added_to_history(self):
970
+ """Test that user messages are properly added to history."""
971
+ from app import SessionState, handle_chat
972
+
973
+ state = SessionState("convo_test_3")
974
+ mock_profile = MagicMock()
975
+
976
+ history, _, state = handle_chat(
977
+ message="Hello chatbot!",
978
+ history=[],
979
+ session_state=state,
980
+ profile=mock_profile,
981
+ oauth_token=MagicMock(),
982
+ )
983
+
984
+ # User message should be in history
985
+ user_messages = [h for h in history if h["role"] == "user"]
986
+ assert len(user_messages) >= 1
987
+ assert user_messages[0]["content"] == "Hello chatbot!"
988
+
989
+ def test_assistant_responses_added_to_history(self):
990
+ """Test that assistant responses are properly added to history."""
991
+ from app import SessionState, add_to_vector_db, handle_chat
992
+
993
+ state = SessionState("convo_test_4")
994
+
995
+ add_to_vector_db(
996
+ title="Test",
997
+ transcript="Test content.",
998
+ visual_contexts=[],
999
+ session_state=state,
1000
+ )
1001
+
1002
+ mock_profile = MagicMock()
1003
+ mock_token = MagicMock()
1004
+ mock_token.token = "test"
1005
+
1006
+ with patch("app.InferenceClient") as mock_client:
1007
+ mock_response = MagicMock()
1008
+ mock_response.choices = [MagicMock()]
1009
+ mock_response.choices[0].message.content = "This is my response."
1010
+ mock_client.return_value.chat.completions.create.return_value = mock_response
1011
+
1012
+ history, _, _ = handle_chat(
1013
+ message="Tell me about the test",
1014
+ history=[],
1015
+ session_state=state,
1016
+ profile=mock_profile,
1017
+ oauth_token=mock_token,
1018
+ )
1019
+
1020
+ # Assistant message should be in history
1021
+ assistant_messages = [h for h in history if h["role"] == "assistant"]
1022
+ assert len(assistant_messages) >= 1
1023
+
1024
+ def test_can_ask_about_specific_parts(self):
1025
+ """Test asking specific questions about video content."""
1026
+ from app import SessionState, add_to_vector_db, search_knowledge
1027
+
1028
+ state = SessionState("specific_test")
1029
+
1030
+ # Add detailed content
1031
+ add_to_vector_db(
1032
+ title="Science Documentary",
1033
+ transcript="The documentary covers three topics. First, black holes are massive objects. "
1034
+ "Second, neutron stars are extremely dense. Third, galaxies contain billions of stars.",
1035
+ visual_contexts=[
1036
+ "Animation of black hole",
1037
+ "Diagram of neutron star",
1038
+ "Hubble image of galaxy",
1039
+ ],
1040
+ session_state=state,
1041
+ )
1042
+
1043
+ # Search for specific topic
1044
+ results = search_knowledge("black holes", session_state=state)
1045
+ assert len(results) > 0
1046
+ assert any("black hole" in r["content"].lower() for r in results)
1047
+
1048
+ # Search for another topic
1049
+ results = search_knowledge("neutron stars", session_state=state)
1050
+ assert len(results) > 0
1051
+ assert any("neutron" in r["content"].lower() for r in results)
1052
+
1053
+ # Search for visual content
1054
+ results = search_knowledge("galaxy image", session_state=state)
1055
+ assert len(results) > 0
1056
+
1057
+
1058
+ class TestLLMContextPassing:
1059
+ """Tests to verify correct context is passed to the LLM."""
1060
+
1061
+ def test_context_includes_relevant_video_content(self):
1062
+ """Test that the LLM receives relevant video content in its prompt."""
1063
+ from app import SessionState, add_to_vector_db, chat_with_videos
1064
+
1065
+ state = SessionState("context_test_1")
1066
+
1067
+ add_to_vector_db(
1068
+ title="Machine Learning Basics",
1069
+ transcript="Neural networks consist of layers. Input layer, hidden layers, and output layer.",
1070
+ visual_contexts=["Diagram of neural network architecture"],
1071
+ session_state=state,
1072
+ )
1073
+
1074
+ mock_profile = MagicMock()
1075
+ mock_token = MagicMock()
1076
+ mock_token.token = "test"
1077
+
1078
+ captured_messages = None
1079
+
1080
+ with patch("app.InferenceClient") as mock_client:
1081
+ def capture_call(*args, **kwargs):
1082
+ nonlocal captured_messages
1083
+ captured_messages = kwargs.get("messages", [])
1084
+ mock_resp = MagicMock()
1085
+ mock_resp.choices = [MagicMock()]
1086
+ mock_resp.choices[0].message.content = "Response"
1087
+ return mock_resp
1088
+
1089
+ mock_client.return_value.chat.completions.create.side_effect = capture_call
1090
+
1091
+ chat_with_videos(
1092
+ message="Tell me about neural networks",
1093
+ history=[],
1094
+ profile=mock_profile,
1095
+ oauth_token=mock_token,
1096
+ session_state=state,
1097
+ )
1098
+
1099
+ # Verify the context was passed to LLM
1100
+ assert captured_messages is not None
1101
+ assert len(captured_messages) == 2 # system + user
1102
+
1103
+ # User message should contain the video content
1104
+ user_msg = captured_messages[1]["content"]
1105
+ assert "neural" in user_msg.lower()
1106
+ assert "layers" in user_msg.lower()
1107
+
1108
+ def test_system_prompt_instructs_rag_behavior(self):
1109
+ """Test that system prompt instructs LLM to use provided context."""
1110
+ from app import SessionState, add_to_vector_db, chat_with_videos
1111
+
1112
+ state = SessionState("context_test_2")
1113
+
1114
+ add_to_vector_db(
1115
+ title="Test",
1116
+ transcript="Content here.",
1117
+ visual_contexts=[],
1118
+ session_state=state,
1119
+ )
1120
+
1121
+ mock_profile = MagicMock()
1122
+ mock_token = MagicMock()
1123
+ mock_token.token = "test"
1124
+
1125
+ captured_messages = None
1126
+
1127
+ with patch("app.InferenceClient") as mock_client:
1128
+ def capture_call(*args, **kwargs):
1129
+ nonlocal captured_messages
1130
+ captured_messages = kwargs.get("messages", [])
1131
+ mock_resp = MagicMock()
1132
+ mock_resp.choices = [MagicMock()]
1133
+ mock_resp.choices[0].message.content = "Response"
1134
+ return mock_resp
1135
+
1136
+ mock_client.return_value.chat.completions.create.side_effect = capture_call
1137
+
1138
+ chat_with_videos(
1139
+ message="Question",
1140
+ history=[],
1141
+ profile=mock_profile,
1142
+ oauth_token=mock_token,
1143
+ session_state=state,
1144
+ )
1145
+
1146
+ # System prompt should instruct RAG behavior
1147
+ system_msg = captured_messages[0]["content"]
1148
+ assert "video" in system_msg.lower()
1149
+ assert "context" in system_msg.lower()
1150
+
1151
+ def test_user_question_included_in_prompt(self):
1152
+ """Test that the user's actual question is included in the prompt."""
1153
+ from app import SessionState, add_to_vector_db, chat_with_videos
1154
+
1155
+ state = SessionState("context_test_3")
1156
+
1157
+ add_to_vector_db(
1158
+ title="Test",
1159
+ transcript="Content.",
1160
+ visual_contexts=[],
1161
+ session_state=state,
1162
+ )
1163
+
1164
+ mock_profile = MagicMock()
1165
+ mock_token = MagicMock()
1166
+ mock_token.token = "test"
1167
+
1168
+ specific_question = "What are the three main ingredients mentioned?"
1169
+ captured_messages = None
1170
+
1171
+ with patch("app.InferenceClient") as mock_client:
1172
+ def capture_call(*args, **kwargs):
1173
+ nonlocal captured_messages
1174
+ captured_messages = kwargs.get("messages", [])
1175
+ mock_resp = MagicMock()
1176
+ mock_resp.choices = [MagicMock()]
1177
+ mock_resp.choices[0].message.content = "Response"
1178
+ return mock_resp
1179
+
1180
+ mock_client.return_value.chat.completions.create.side_effect = capture_call
1181
+
1182
+ chat_with_videos(
1183
+ message=specific_question,
1184
+ history=[],
1185
+ profile=mock_profile,
1186
+ oauth_token=mock_token,
1187
+ session_state=state,
1188
+ )
1189
+
1190
+ # User's question should be in the prompt
1191
+ user_msg = captured_messages[1]["content"]
1192
+ assert specific_question in user_msg