chuckfinca commited on
Commit
b0ef2dc
·
1 Parent(s): 23ef32a

test: Add initial test suite for core application logic

Browse files

Adds unit and integration tests for the project's most critical components, ensuring their correctness and stability.

- A pure unit test was created for the `semantic_chunker` to validate its data grouping logic.
- Mocked tests were implemented for the `rag_pipeline` to test the search filtering and prompt generation logic in isolation from the FAISS index and the external Gemini API.
- Tests have been separated into `test_chunking.py` (fast) and `test_pipeline.py` (slower, mocked) to improve the development testing cycle.

tests/test_chunking.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def test_chunk_by_concept_groups_correctly():
2
+ """
3
+ Ensures that items are correctly grouped by (source_document, concept)
4
+ and that their content is concatenated in the right order.
5
+ """
6
+ from src.fot_recommender.semantic_chunker import chunk_by_concept
7
+
8
+ # 1. Arrange: Create simple, predictable raw data
9
+ sample_raw_kb = [
10
+ {"source_document": "doc_A", "concept": "Mentoring", "absolute_page": 1, "content": "First part."},
11
+ {"source_document": "doc_B", "concept": "Tutoring", "absolute_page": 10, "content": "Tutoring info."},
12
+ {"source_document": "doc_A", "concept": "Mentoring", "absolute_page": 2, "content": "Second part."},
13
+ ]
14
+
15
+ # 2. Act: Run the function we're testing
16
+ final_chunks = chunk_by_concept(sample_raw_kb)
17
+
18
+ # 3. Assert: Check the results
19
+ assert len(final_chunks) == 2 # Should have grouped into 2 concepts
20
+
21
+ # Find the 'Mentoring' chunk for detailed checks
22
+ mentoring_chunk = next(c for c in final_chunks if c["title"] == "Mentoring")
23
+
24
+ assert mentoring_chunk is not None
25
+ assert mentoring_chunk["source_document"] == "doc_A"
26
+ assert mentoring_chunk["fot_pages"] == "Pages: 1, 2"
27
+ assert "First part.\n\nSecond part." in mentoring_chunk["original_content"]
28
+ assert "Title: Mentoring. Content: First part.\n\nSecond part." in mentoring_chunk["content_for_embedding"]
29
+
tests/test_main.py DELETED
@@ -1,12 +0,0 @@
1
- from demo_application.main import main
2
- import sys
3
- from io import StringIO
4
-
5
-
6
- def test_main():
7
- """Test that main() prints the expected message."""
8
- captured_output = StringIO()
9
- sys.stdout = captured_output
10
- main()
11
- sys.stdout = sys.__stdout__
12
- assert "Hello from demo application!" in captured_output.getvalue()
 
 
 
 
 
 
 
 
 
 
 
 
 
tests/test_pipeline.py ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from unittest.mock import MagicMock, patch
2
+ import numpy as np
3
+
4
+ def test_search_interventions_filters_by_score():
5
+ """
6
+ Ensures the search function correctly filters out results
7
+ that are below the minimum similarity score threshold.
8
+ """
9
+ from src.fot_recommender.rag_pipeline import search_interventions
10
+
11
+ # 1. Arrange: Create mock objects and sample data
12
+ mock_model = MagicMock()
13
+ mock_index = MagicMock()
14
+
15
+ # Fake knowledge base
16
+ sample_kb = [{"id": 1, "content": "high score"}, {"id": 2, "content": "low score"}]
17
+
18
+ # Configure the mock FAISS index to return specific scores and indices
19
+ # Let's say it finds two results, one with a high score (0.9) and one low (0.3)
20
+ mock_index.search.return_value = (
21
+ np.array([[0.9, 0.3]]), # scores
22
+ np.array([[0, 1]]) # indices
23
+ )
24
+
25
+ # 2. Act: Run the search with a minimum score of 0.5
26
+ results = search_interventions(
27
+ query="test query",
28
+ model=mock_model,
29
+ index=mock_index,
30
+ knowledge_base=sample_kb,
31
+ k=2,
32
+ min_similarity_score=0.5
33
+ )
34
+
35
+ # 3. Assert: Check that only the high-scoring result was returned
36
+ assert len(results) == 1
37
+ assert results[0][0]["content"] == "high score" # Check the chunk content
38
+ assert results[0][1] == 0.9 # Check the score
39
+
40
+
41
+ def test_generate_recommendation_summary_builds_correct_prompt():
42
+ """
43
+ Ensures that the context from retrieved chunks and the student narrative
44
+ are correctly formatted into the final prompt sent to the LLM.
45
+ """
46
+ from src.fot_recommender.rag_pipeline import generate_recommendation_summary
47
+
48
+ # 1. Arrange: Create sample inputs
49
+ sample_chunks = [
50
+ ({"title": "Tip 1", "original_content": "Do this.", "source_document": "doc_A"}, 0.9),
51
+ ]
52
+ student_narrative = "Student is struggling."
53
+
54
+ # 2. Act & Assert: Use a patch to intercept the API call
55
+ # This temporarily replaces genai.GenerativeModel with our mock
56
+ with patch("src.fot_recommender.rag_pipeline.genai.GenerativeModel") as mock_gen_model:
57
+ # Create a mock instance that the function will use
58
+ mock_model_instance = MagicMock()
59
+ mock_gen_model.return_value = mock_model_instance
60
+
61
+ generate_recommendation_summary(
62
+ retrieved_chunks=sample_chunks,
63
+ student_narrative=student_narrative,
64
+ api_key="fake_key",
65
+ persona="teacher"
66
+ )
67
+
68
+ # 3. Assert: Check what our function tried to do
69
+ # Was the API call made once?
70
+ mock_model_instance.generate_content.assert_called_once()
71
+
72
+ # Get the actual prompt that was passed to the LLM
73
+ actual_prompt = mock_model_instance.generate_content.call_args[0][0]
74
+
75
+ # Check if our key pieces of information are in the prompt
76
+ assert "Student is struggling." in actual_prompt
77
+ assert "--- Intervention Chunk 1 ---" in actual_prompt
78
+ assert "Title: Tip 1" in actual_prompt
79
+ assert "Content: Do this." in actual_prompt
80
+ assert "(Source Document: doc_A)" in actual_prompt