File size: 2,038 Bytes
253246d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
import os
import shutil
from backend import SemanticAnalyzer

def create_dummy_files():
    if os.path.exists("test_data"):
        shutil.rmtree("test_data")
    os.makedirs("test_data")
    
    # File A
    with open("test_data/doc_a.txt", "w") as f:
        f.write("The software release is scheduled for Q3 2024.\n")
        f.write("Machine learning models require vast amounts of data.\n")
        f.write("This is a generic statement about AI capabilities.\n")

    # File B
    with open("test_data/doc_b.txt", "w") as f:
        f.write("The software release is strictly scheduled for Q4 2025.\n") # Contradiction
        f.write("Machine learning models require vast amounts of data.\n") # Duplicate
        f.write("AI generates images from text prompts.\n") # Unrelated

def run_test():
    create_dummy_files()
    files = ["test_data/doc_a.txt", "test_data/doc_b.txt"]
    
    print("Initializing Analyzer...")
    analyzer = SemanticAnalyzer()
    
    print("Analyzing...")
    results = analyzer.analyze_documents(files)
    
    print("\n=== RESULTS ===")
    print(f"Duplicates found: {len(results['duplicates'])}")
    for d in results['duplicates']:
        print(f"  [Match] ({d['score']:.4f})")
        print(f"  A: {d['chunk_a']['text']}")
        print(f"  B: {d['chunk_b']['text']}")
        
    print(f"\nContradictions found: {len(results['contradictions'])}")
    for c in results['contradictions']:
        print(f"  [Conflict] (Conf: {c['confidence']:.4f})")
        print(f"  A: {c['chunk_a']['text']}")
        print(f"  B: {c['chunk_b']['text']}")
        
    # Validation logic
    has_dup = any("vast amounts of data" in d['chunk_a']['text'] for d in results['duplicates'])
    has_contra = any("software release" in c['chunk_a']['text'] for c in results['contradictions'])
    
    if has_dup and has_contra:
        print("\n✅ VERIFICATION PASSED: Core logic works.")
    else:
        print("\n❌ VERIFICATION FAILED: Missing expected detections.")

if __name__ == "__main__":
    run_test()