File size: 5,278 Bytes

9b1c753

"""
Comprehensive test for LDARiskDiscovery compatibility with trainer
"""

print("=" * 60)
print("Testing LDARiskDiscovery Complete Interface")
print("=" * 60)

try:
    from risk_discovery import LDARiskDiscovery
    import numpy as np
    
    print("\n✅ Step 1: Import successful")
    
    # Create instance
    print("\n🔧 Step 2: Creating LDARiskDiscovery instance...")
    lda = LDARiskDiscovery(n_clusters=3)
    print("   ✅ Instance created")
    
    # Check all required attributes
    print("\n📋 Step 3: Checking required attributes...")
    required_attrs = [
        'n_clusters',
        'discovered_patterns',
        'cluster_labels',
        'feature_matrix',
        'legal_indicators',
        'complexity_indicators'
    ]
    
    for attr in required_attrs:
        if hasattr(lda, attr):
            print(f"   ✅ {attr}: Present")
        else:
            print(f"   ❌ {attr}: MISSING")
            raise AttributeError(f"Missing attribute: {attr}")
    
    # Check all required methods
    print("\n🔍 Step 4: Checking required methods...")
    required_methods = [
        'discover_risk_patterns',
        'get_risk_labels',
        'get_discovered_risk_names',
        'get_topic_distribution',
        'clean_clause_text',
        'extract_risk_features'
    ]
    
    for method in required_methods:
        if hasattr(lda, method) and callable(getattr(lda, method)):
            print(f"   ✅ {method}(): Present")
        else:
            print(f"   ❌ {method}(): MISSING")
            raise AttributeError(f"Missing method: {method}")
    
    # Test discover_risk_patterns
    print("\n🎯 Step 5: Testing discover_risk_patterns()...")
    sample_clauses = [
        "The party shall indemnify and hold harmless all damages and losses.",
        "This agreement shall be governed by the laws of the state of California.",
        "Payment shall be made within thirty days of invoice date.",
        "The licensee must not disclose confidential information to third parties.",
        "Company agrees to comply with all applicable laws and regulations."
    ]
    
    results = lda.discover_risk_patterns(sample_clauses)
    print(f"   ✅ Discovered {len(lda.discovered_patterns)} patterns")
    
    # Test extract_risk_features
    print("\n⚙️  Step 6: Testing extract_risk_features()...")
    test_clause = "The party shall indemnify and hold harmless against all liability."
    features = lda.extract_risk_features(test_clause)
    
    print(f"   ✅ Extracted {len(features)} features")
    print(f"   📊 Sample features:")
    print(f"      - risk_intensity: {features.get('risk_intensity', 0):.3f}")
    print(f"      - obligation_strength: {features.get('obligation_strength', 0):.3f}")
    print(f"      - legal_complexity: {features.get('legal_complexity', 0):.3f}")
    print(f"      - liability_terms_density: {features.get('liability_terms_density', 0):.3f}")
    
    # Verify feature types
    for key, value in features.items():
        if not isinstance(value, (int, float, np.integer, np.floating)):
            print(f"   ❌ Feature '{key}' has wrong type: {type(value)}")
            raise TypeError(f"Feature '{key}' must be numeric")
    
    print(f"   ✅ All {len(features)} features are numeric")
    
    # Test clean_clause_text
    print("\n🧹 Step 7: Testing clean_clause_text()...")
    dirty_text = "  This   is  a    test  clause   with   extra    spaces.  "
    clean_text = lda.clean_clause_text(dirty_text)
    print(f"   Before: '{dirty_text}'")
    print(f"   After:  '{clean_text}'")
    print(f"   ✅ Text cleaned successfully")
    
    # Test get_risk_labels
    print("\n🏷️  Step 8: Testing get_risk_labels()...")
    new_clauses = [
        "Party agrees to indemnify all damages.",
        "Governed by California law."
    ]
    labels = lda.get_risk_labels(new_clauses)
    print(f"   ✅ Labels: {labels}")
    print(f"   ✅ Type: {type(labels)}")
    print(f"   ✅ Length: {len(labels)}")
    
    # Test get_topic_distribution
    print("\n📊 Step 9: Testing get_topic_distribution()...")
    dist = lda.get_topic_distribution(new_clauses)
    print(f"   ✅ Distribution shape: {dist.shape}")
    print(f"   ✅ Sample distribution: {dist[0]}")
    print(f"   ✅ Sum per document: {dist.sum(axis=1)}")
    
    # Test get_discovered_risk_names
    print("\n📝 Step 10: Testing get_discovered_risk_names()...")
    names = lda.get_discovered_risk_names()
    print(f"   ✅ Risk names: {names[:3]}...")
    print(f"   ✅ Total names: {len(names)}")
    
    print("\n" + "=" * 60)
    print("🎉 ALL TESTS PASSED!")
    print("=" * 60)
    print("\n✅ LDARiskDiscovery is fully compatible with trainer")
    print("✅ All required methods implemented")
    print("✅ All required attributes present")
    print("\n🚀 Ready to run: python3 train.py")
    
except ImportError as e:
    print(f"\n❌ Import error: {e}")
    print("   sklearn may not be installed")
    exit(1)
    
except AttributeError as e:
    print(f"\n❌ Attribute error: {e}")
    print("   Missing required method or attribute")
    exit(1)
    
except Exception as e:
    print(f"\n❌ Test failed: {e}")
    import traceback
    traceback.print_exc()
    exit(1)