| """ |
| Tests for paragraph generation via convergence-driven sentence retrieval. |
| |
| Verifies: |
| 1. Planning convergence finds relevant concept clusters |
| 2. Sentence retrieval returns taught sentences in correct order |
| 3. Relevance floor filters noise sentences |
| 4. Multi-sentence output maintains coherence |
| 5. Configurable sentence separator |
| """ |
|
|
| import sys |
| import os |
| import numpy as np |
| import pytest |
|
|
| sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'src')) |
|
|
| from engine import Engine |
|
|
|
|
| def make_engine(tmp_path, seed=42): |
| """Create engine with vocabulary and taught sentences.""" |
| engine = Engine(data_dir=str(tmp_path), dim=300) |
| rng = np.random.RandomState(seed) |
| words = {} |
| word_list = [ |
| "shakespeare", "wrote", "hamlet", "macbeth", "playwright", |
| "english", "was", "an", "a", "is", "the", "of", |
| "einstein", "discovered", "relativity", "physicist", "german", |
| "newton", "invented", "calculus", "gravity", |
| "paris", "capital", "france", "london", "england", |
| "python", "programming", "language", "created", "guido", |
| "famous", "tragedy", "who", "what", "in", "by", |
| ] |
| for w in word_list: |
| vec = rng.randn(300).astype(np.float32) |
| vec = vec / (np.linalg.norm(vec) + 1e-10) |
| words[w] = vec |
| engine.load_embeddings_from_dict(words) |
| return engine |
|
|
|
|
| class TestParagraphGeneration: |
| def test_single_topic_paragraph(self, tmp_path): |
| """Teaching multiple sentences about one topic → multi-sentence output.""" |
| engine = make_engine(tmp_path) |
| engine.teach_sentence("shakespeare wrote hamlet") |
| engine.teach_sentence("shakespeare wrote macbeth") |
|
|
| result = engine.query_paragraph("shakespeare") |
| assert result.strategy != "abstain" |
| |
| answer = result.answer.lower() |
| assert "shakespeare" in answer |
| engine.close() |
|
|
| def test_paragraph_preserves_word_order(self, tmp_path): |
| """Sentences should appear in their taught word order.""" |
| engine = make_engine(tmp_path) |
| engine.teach_sentence("paris is the capital of france") |
|
|
| result = engine.query_paragraph("capital of france") |
| if result.strategy == "paragraph": |
| |
| assert "paris" in result.answer.lower() |
| assert "capital" in result.answer.lower() |
| engine.close() |
|
|
| def test_relevance_floor_filters_noise(self, tmp_path): |
| """Low-relevance sentences should be excluded.""" |
| engine = make_engine(tmp_path) |
| engine.teach_sentence("shakespeare wrote hamlet") |
| engine.teach_sentence("paris is the capital of france") |
|
|
| |
| result = engine.query_paragraph("shakespeare hamlet") |
| answer = result.answer.lower() |
| if result.strategy == "paragraph": |
| |
| assert "shakespeare" in answer or "hamlet" in answer |
| engine.close() |
|
|
| def test_multi_topic_retrieval(self, tmp_path): |
| """Query spanning multiple topics returns relevant sentences from each.""" |
| engine = make_engine(tmp_path) |
| engine.teach_sentence("einstein discovered relativity") |
| engine.teach_sentence("newton invented calculus") |
|
|
| result = engine.query_paragraph("einstein newton") |
| answer = result.answer.lower() |
| if result.strategy == "paragraph": |
| has_einstein = "einstein" in answer or "relativity" in answer |
| has_newton = "newton" in answer or "calculus" in answer |
| assert has_einstein or has_newton |
| engine.close() |
|
|
| def test_configurable_separator(self, tmp_path): |
| """Sentence separator should be configurable.""" |
| engine = make_engine(tmp_path) |
| engine.teach_sentence("shakespeare wrote hamlet") |
| engine.teach_sentence("shakespeare wrote macbeth") |
|
|
| |
| from convergence import ConvergenceLoop |
| query_vec = engine.encoder.encode_sentence("shakespeare") |
| result = engine.generator.generate_paragraph( |
| query_vector=query_vec, |
| convergence_loop=engine.convergence, |
| query_words=["shakespeare"], |
| sentence_separator=" | ", |
| ) |
| if result.strategy == "paragraph" and "|" in result.text: |
| |
| assert " | " in result.text |
| engine.close() |
|
|
| def test_max_sentences_limit(self, tmp_path): |
| """max_sentences should cap the output.""" |
| engine = make_engine(tmp_path) |
| engine.teach_sentence("shakespeare wrote hamlet") |
| engine.teach_sentence("shakespeare wrote macbeth") |
| engine.teach_sentence("einstein discovered relativity") |
| engine.teach_sentence("newton invented calculus") |
|
|
| result = engine.query_paragraph("shakespeare einstein newton", max_sentences=2) |
| if result.strategy == "paragraph": |
| |
| |
| parts = result.answer.split(". ") |
| assert len(parts) <= 3 |
| engine.close() |
|
|
| def test_empty_kb_abstains(self, tmp_path): |
| """No taught sentences → abstain.""" |
| engine = make_engine(tmp_path) |
| result = engine.query_paragraph("shakespeare") |
| assert result.strategy == "abstain" |
| engine.close() |
|
|
| def test_paragraph_confidence(self, tmp_path): |
| """Paragraph confidence should reflect concept quality.""" |
| engine = make_engine(tmp_path) |
| engine.teach_sentence("shakespeare wrote hamlet") |
| result = engine.query_paragraph("shakespeare hamlet") |
| assert result.confidence >= 0 |
| assert result.confidence <= 1.0 |
| engine.close() |
|
|
| def test_no_duplicate_sentences(self, tmp_path): |
| """Same sentence taught twice shouldn't appear twice in output.""" |
| engine = make_engine(tmp_path) |
| engine.teach_sentence("shakespeare wrote hamlet") |
| engine.teach_sentence("shakespeare wrote hamlet") |
|
|
| result = engine.query_paragraph("shakespeare") |
| if result.strategy == "paragraph": |
| |
| count = result.answer.lower().count("shakespeare wrote hamlet") |
| |
| assert count <= 1 |
| engine.close() |
|
|
| def test_trace_shows_planning(self, tmp_path): |
| """Trace should show the planning phase.""" |
| engine = make_engine(tmp_path) |
| engine.teach_sentence("shakespeare wrote hamlet") |
| result = engine.query_paragraph("shakespeare") |
| if result.strategy == "paragraph" and hasattr(result, 'generation') and result.generation: |
| trace_text = "\n".join(result.generation.trace) |
| assert "Plan" in trace_text |
| engine.close() |
|
|