Spaces:
Sleeping
Sleeping
| import sys | |
| import os | |
| import numpy as np | |
| from sentence_transformers import SentenceTransformer, util | |
| sys.path.append(os.getcwd()) | |
| try: | |
| from analysis.common_analyzer import analyze_pair | |
| from preprocessing.clause_extraction import extract_clauses | |
| except ImportError: | |
| # Handle case where run from root | |
| sys.path.append(os.path.join(os.getcwd(), 'analysis')) | |
| sys.path.append(os.path.join(os.getcwd(), 'preprocessing')) | |
| from analysis.common_analyzer import analyze_pair | |
| from preprocessing.clause_extraction import extract_clauses | |
| def test_reproduction(): | |
| print("--- Section 1: Core Logic Test ---") | |
| t1 = "Audit reports must be retained for a minimum of three (3) years." | |
| t2 = "Audit reports shall be deleted after one (1) year to reduce storage overhead." | |
| print(f"Text 1: {t1}") | |
| print(f"Text 2: {t2}") | |
| # 1. Calculate Similarity | |
| print("Loading embedding model...") | |
| model = SentenceTransformer('all-MiniLM-L6-v2') | |
| e1 = model.encode(t1) | |
| e2 = model.encode(t2) | |
| sim = util.cos_sim(e1, e2).item() | |
| print(f"Similarity Score: {sim:.4f}") | |
| # 2. Test analyze_pair | |
| print("Running analyze_pair...") | |
| label, conf, reason = analyze_pair(t1, t2, sim) | |
| print(f"Result: Label={label}, Conf={conf}, Reason={reason}") | |
| if label == "CANDIDATE": | |
| print("!!! PASSED Phase 1: ACCEPTED as CANDIDATE") | |
| # 3. Test NLI | |
| from analysis.nli_verifier import NLIVerifier | |
| print("\nRunning NLI Verification (Phase 2)...") | |
| verifier = NLIVerifier() | |
| is_contra, nli_conf, nli_label = verifier.predict(t1, t2) | |
| print(f"NLI Result: IsContra={is_contra}, Conf={nli_conf}, Label={nli_label}") | |
| elif label: | |
| print(f"!!! PASSED Phase 1: ACCEPTED as {label} (No NLI needed usually, but logic might vary)") | |
| else: | |
| print("!!! PASSED Phase 1: REJECTED (None)") | |
| print("\n--- Section 2: Pipeline & Metadata Test ---") | |
| mock_text = [ | |
| {"text": "Section 1. This is a test clause on page 1.", "page": 1}, | |
| {"text": "Section 2. This is another clause on page 2.", "page": 2} | |
| ] | |
| print("Testing extract_clauses with structured input...") | |
| clauses = extract_clauses(mock_text) | |
| if len(clauses) > 0 and 'page' in clauses[0] and 'line' in clauses[0]: | |
| print(f"SUCCESS: Extracted {len(clauses)} clauses with metadata.") | |
| print(f"Sample: {clauses[0]}") | |
| else: | |
| print("FAIL: Metadata extraction failed.") | |
| if __name__ == "__main__": | |
| test_reproduction() | |