Spaces:
Running
Running
| from ingestion.pdf_reader import extract_text_from_pdf | |
| from preprocessing.clause_extraction import extract_clauses | |
| from embeddings.sbert_encoder import generate_embeddings | |
| from storage.faiss_index import create_faiss_index | |
| from analysis.similarity_search import get_similar | |
| from analysis.common_analyzer import analyze_pair | |
| from output.report_generator import generate_report | |
| import numpy as np | |
| # Load document | |
| text = extract_text_from_pdf("data/sample_docs/policy.pdf") | |
| # Clause extraction | |
| clauses = extract_clauses(text) | |
| # Embeddings | |
| embeddings = generate_embeddings(clauses) | |
| index = create_faiss_index(embeddings) | |
| results = [] | |
| for i, emb in enumerate(embeddings): | |
| idxs, dists = get_similar(index, emb) | |
| for j, dist in zip(idxs, dists): | |
| if i == j: | |
| continue | |
| similarity = 1 / (1 + dist) | |
| # Use new Common Analyzer (Centralized Logic) | |
| issue_type, score = analyze_pair(clauses[i]["text"], clauses[j]["text"], similarity) | |
| if issue_type: | |
| results.append({ | |
| "type": issue_type, | |
| "confidence": score, | |
| "clause_1": clauses[i]["text"], | |
| "clause_2": clauses[j]["text"] | |
| }) | |
| generate_report(results) | |
| print("✅ Analysis completed. Report generated.") | |