File size: 2,746 Bytes
f18a082
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
"""
Quick validation test for chunked analysis functions.
Run: python test_chunked_analysis.py
"""
import sys
sys.path.insert(0, '/backend')

from server import (
    _determine_chunk_size,
    _extract_pages_text,
    _normalize_concept_name,
    _deduplicate_concepts,
    _merge_diagrams,
    _synthesize_summary_from_chunks,
    _analyze_batch,
    _analyze_pdf_legacy,
    analyze_pdf
)

def test_determine_chunk_size():
    assert _determine_chunk_size(5) == 2
    assert _determine_chunk_size(15) == 2
    assert _determine_chunk_size(16) == 3
    assert _determine_chunk_size(30) == 3
    assert _determine_chunk_size(31) == 4
    assert _determine_chunk_size(60) == 4


def test_normalize_concept_name():
    assert _normalize_concept_name("The Neural Network") == "neural network"
    assert _normalize_concept_name("  Backpropagation Algorithm  ") == "backpropagation algorithm"
    assert _normalize_concept_name("Convolutional NN (CNN)") == "convolutional nn cnn"
    assert _normalize_concept_name("A Fast Fourier Transform") == "fast fourier transform"


def test_deduplicate_concepts():
    concepts = [
        {"concept": "Neural Network", "explanation": "A", "code_example": ""},
        {"concept": "neural network", "explanation": "B longer", "code_example": "code"},
        {"concept": "CNN", "explanation": "C", "code_example": ""},
    ]
    result = _deduplicate_concepts(concepts)
    assert len(result) == 2
    # Should keep longer explanation+code
    nn = [c for c in result if "neural" in c["concept"].lower()][0]
    assert nn["explanation"] == "B longer"


def test_merge_diagrams():
    diagrams = [
        {"name": "Architecture", "type": "flowchart", "explanation": "Short"},
        {"name": "architecture", "type": "flowchart", "explanation": "Very detailed long"},
        {"name": "Data Flow", "type": "diagram", "explanation": "X"},
    ]
    result = _merge_diagrams(diagrams)
    assert len(result) == 2
    arch = [d for d in result if d["type"] == "flowchart"][0]
    assert arch["explanation"] == "Very detailed long"


def test_synthesize_summary():
    summaries = [
        "Summary batch 1 about methodology.",
        "Summary batch 2 about results.",
    ]
    # Can't test async easily, but function exists
    print("_synthesize_summary_from_chunks function OK")


if __name__ == "__main__":
    test_determine_chunk_size()
    print("[OK] _determine_chunk_size")
    test_normalize_concept_name()
    print("[OK] _normalize_concept_name")
    test_deduplicate_concepts()
    print("[OK] _deduplicate_concepts")
    test_merge_diagrams()
    print("[OK] _merge_diagrams")
    test_synthesize_summary()
    print("[OK] _synthesize_summary_from_chunks")
    print("\nAll unit tests passed!")