{ "cells": [ { "cell_type": "markdown", "id": "bad00304-0e69-495f-9ce2-e60ba694054f", "metadata": {}, "source": [ "# Contract Analysis System - Testing Notebook\n", "\n", "## Overview\n", "This notebook provides comprehensive testing for the Contract Analysis System, which includes document reading, text processing, contract validation, and AI-powered analysis using local LLMs (Ollama).\n", "\n", "## System Architecture\n", "- **Document Reader**: Handles PDF, DOCX, and text files\n", "- **Text Processor**: Advanced NLP for legal text analysis\n", "- **Contract Validator**: Determines if document is a valid contract\n", "- **LLM Manager**: Unified interface for Ollama, OpenAI, and Anthropic\n", "- **Contract Classifier**: AI-powered contract categorization\n", "- **Model Manager**: Handles model loading and caching\n" ] }, { "cell_type": "markdown", "id": "07d05757-5201-4bdd-9284-9dd5be7c65a6", "metadata": {}, "source": [ "## Import Dependencies" ] }, { "cell_type": "code", "execution_count": 1, "id": "4cc1afe5-a2cb-4d2b-acb1-a79908b5327b", "metadata": {}, "outputs": [ { "ename": "ImportError", "evalue": "cannot import name 'UniversalMarketComparator' from 'services.market_comparator' (/Users/itobuz/projects/satyaki/contract_guard_ai/notebooks/../services/market_comparator.py)", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mImportError\u001b[0m Traceback (most recent call last)", "Cell \u001b[0;32mIn[1], line 30\u001b[0m\n\u001b[1;32m 28\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mservices\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mrisk_analyzer\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m MultiFactorRiskAnalyzer\n\u001b[1;32m 29\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mservices\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcontract_classifier\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m ContractClassifier\n\u001b[0;32m---> 30\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mservices\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmarket_comparator\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m UniversalMarketComparator\n\u001b[1;32m 32\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m✅ All modules imported successfully!\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n", "\u001b[0;31mImportError\u001b[0m: cannot import name 'UniversalMarketComparator' from 'services.market_comparator' (/Users/itobuz/projects/satyaki/contract_guard_ai/notebooks/../services/market_comparator.py)" ] } ], "source": [ "import os\n", "import sys\n", "import json\n", "from pathlib import Path\n", "from pprint import pprint\n", "\n", "\n", "# Add parent directory to path for module imports\n", "sys.path.append('..')\n", "\n", "# Import all system components\n", "from utils.logger import log_info\n", "from utils.logger import log_error\n", "from config.risk_rules import ContractType\n", "from utils.validators import ContractValidator\n", "from utils.text_processor import TextProcessor\n", "from utils.logger import ContractAnalyzerLogger\n", "from services.term_analyzer import TermAnalyzer\n", "from utils.document_reader import DocumentReader\n", "from model_manager.llm_manager import LLMManager\n", "from model_manager.llm_manager import LLMProvider\n", "from model_manager.model_loader import ModelLoader\n", "from services.clause_extractor import ClauseExtractor\n", "from services.clause_extractor import ExtractedClause\n", "from services.protection_checker import ProtectionChecker\n", "from services.llm_interpreter import LLMClauseInterpreter\n", "from services.negotiation_engine import NegotiationEngine\n", "from services.risk_analyzer import MultiFactorRiskAnalyzer\n", "from services.contract_classifier import ContractClassifier\n", "from services.market_comparator import UniversalMarketComparator\n", "\n", "print(\"✅ All modules imported successfully!\")\n" ] }, { "cell_type": "markdown", "id": "41de1c33-63f7-4641-88f8-cdd34904b471", "metadata": {}, "source": [ "## Configuration" ] }, { "cell_type": "code", "execution_count": 2, "id": "656f2ca4-3a2e-41bb-96e9-3b464ed138b3", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Configuration Settings:\n", " pdf_file_path: ../../../../Downloads/Satyaki Mitra - Employee Agreement - 2021.pdf\n", " use_spacy: True\n", " ollama_base_url: http://localhost:11434\n", " log_directory: contract_analysis_logs\n" ] } ], "source": [ "# Configuration settings\n", "CONFIG = {\"pdf_file_path\" : \"../../../../Downloads/Satyaki Mitra - Employee Agreement - 2021.pdf\",\n", " \"use_spacy\" : True, # Set to False if spaCy not installed\n", " \"ollama_base_url\" : \"http://localhost:11434\",\n", " \"log_directory\" : \"contract_analysis_logs\",\n", " }\n", "\n", "# Display configuration\n", "print(\"Configuration Settings:\")\n", "for key, value in CONFIG.items():\n", " print(f\" {key}: {value}\")\n", " " ] }, { "cell_type": "markdown", "id": "868a3e47-58d2-4fdf-b311-0dda005cc722", "metadata": {}, "source": [ "## Document Reader Testing" ] }, { "cell_type": "code", "execution_count": 3, "id": "cd257232-7e9d-4d6e-9488-45b1fc1b8283", "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "📄 STEP 1: Testing Document Reader\n", "\n", "============================================================\n", "✅ Document read successfully!\n", "\n", "📊 Text length: 26,469 characters\n", "\n", "\n", "Text preview:\n", "\n", "--------------------------------------------------\n", "Itobuz Technologies Pvt. Ltd. Private and Confidential\n", "\n", "Agreement of Employment\n", "\n", "This Agreement for service (hereinafter referred to as “Agreement”) made and entered into on the 01st day of December 2022, by\n", "\n", "and between Itobuz Technologies Private Limited a Company registered under the Companies Act 2013 having registered office at\n", "\n", "STEP, IIT KHARAGPUR, P.S.- IIT KHARAGPUR, KHARAGPUR, WEST BENGAL 721302, INDIA, CIN No. U72200WB2010PTC150305\n", "\n", "(hereinafter referred to as the “Employer”)\n", "\n", "And\n", "\n", "Satyaki Mitra son of Debdas Mitra residing at 28/6, Nabin Senapati Lane, P.O. - Baishnab Para Bazaar, P.S. - Shibpur, Howrah,\n", "\n", "West Bengal - 711101 (hereinafter referred to as the “Employee”)\n", "\n", "RECITALS\n", "\n", "A. The Employer is engaged in the business of Software development and Information Technology based services (hereinafter\n", "\n", "referred to as the “Business”).\n", "\n", "B. The Employer had called for applications from the eligible candidates for the post of Data Scientist.\n", "\n", "C. After due process being carried out and a successful interview thereto an offer letter dated 30th November 2022 was\n", "\n", "forwarded by the Employer to the Employee.\n", "\n", "D. On processing the application and the relevant documents, the Employer found the Employee adequately qualified for the post\n", "\n", "and offered to appoint him as Data Scientist in the Company.\n", "\n", "E. The employee is willing to be employed by the Employer, and Employer is willing to employ Employee, on the terms and\n", "\n", "conditions herein set forth.\n", "\n", "FOR REASONS SET FORTH ABOVE, AND IN CONSIDERATION OF THE MUTUAL COVENANTS AND PROMISES OF THE PARTIES\n", "\n", "HERETO, EMPLOYER AND EMPLOYEE COVENANT AND AGREE AS FOLLOWS:\n", "\n", "1. Definition:\n", "\n", "The Parties to this Agreement hereby unconditionally agree that unless the context otherwise requires, the terms listed below when\n", "\n", "used in this Agreement shall have the meanings attached to them and these terms shall be interpreted accordingly. The\n", "\n", "terms listed below as used in this Agreement may be identified by the capitalization of the first letter of each principal word\n", "\n", "thereof. In addition to the terms defined below, certain other capitalized terms are defined elsewhere in this Agreement and\n", "\n", "whenever such terms are used in this Agreement they shall have their respective defined meanings, unless the context,\n", "\n", "expressly or by necessary implication, requires otherwise:\n", "\n", "1.1. “Client” shall mean any Person, introduced to the Company, with whom the Company enters into a business transaction.\n", "\n", "1.2. “Confidential Information” means all of the Company’s business plans, mechanisms, business-related functions, activities\n", "\n", "and services, customer lists, knowledge of customer needs and preferences, trade secrets, business strategies, marketing\n", "\n", "strategies, methods of operation, tax records, markets, other valuable information, confidential information and trade-related\n", "\n", "information relating to the business and activities of the Company and useful or necessary for the success of the Company’s\n", "\n", "business and activities. Confidential Information shall also include financial information, such as Company’s earnings,\n", "\n", "assets, debts, prices, pricing structure, estimates, volumes of customers, transaction details such as names or address,\n", "\n", "terms of services, contracts of particular transactions, or related information about Company employees, customers, potential\n", "\n", "customers; marketing information, such as details about ongoing or proposed marketing programs or agreements by or on\n", "\n", "behalf of the Company, projections, sales forecasts or results of marketing efforts or information about impending\n", "\n", "transactions; personnel information; and customer information, such as any compilation of past, existing or prospective\n", "\n", "customers, customers’ proposals or agreements between customers and status of customers’ accounts or credit, or related\n", "\n", "information about actual or prospective customers.\n", "\n", "1.3. “Dispute” shall have the meaning ascribed to it in Clause 19.1.\n", "\n", "1.4. “Effective Date” shall have the meaning ascribed to it Clause 4.\n", "\n", "1\n", "\n", "Itobuz Technologies Pvt. Ltd. Private and Confidential\n", "\n", "1.5. “Events of Default” shall have the meaning ascribed to it in Clause 13.\n", "\n", "1.6. “Indemnified Liabilities” shall have the meaning ascribed to it in Clause 14.\n", "\n", "1.7. “Indemnified Parties” shall have the meaning ascribed to it in Clause 14.\n", "\n", "1.8. “Intellectual Properties” shall have the meaning ascribed to it in Clause 9.1.\n", "\n", "1.9. “Law” includes all applicable statutes, enactments, acts of state legislatures or parliament, laws, ordinances, rules, bye-laws,\n", "\n", "regulations, notifications, guidelines, policies, directions, directives, and orders of any governmental authority, statutory\n", "\n", "authority, tribunal, board, court or recognized stock exchange of India or any other relevant jurisdiction.\n", "\n", "1.10. “Manual” shall refer to the Human Resource Manual governing the workplace policies for all employees and employee code\n", "\n", "of Conduct.\n", "\n", "1.11. “Person” means and includes an individual, a sole proprietorship, an association, syndicate, a corporation, a firm, a\n", "\n", "partnership, a joint venture, a trust, an unincorporated organization, a joint-stock company, a limited liability company or\n", "\n", "other entity or organization, body corporate, governmental authority, judicial authority, a natural person in his capacity as\n", "\n", "trustee, executor, administrator, or other legal representative and any other entity including a government or political\n", "\n", "subdivision, or an agency or instrumentality thereof and/or any other legal entity.\n", "\n", "1.12. “Proprietary Information” shall mean the business, technical and financial information (including, without limitation, the\n", "\n", "identity of and information relating to customers or employees) the Employer develops, learns, or obtains in connection\n", "\n", "with its engagement with a Prospective Client/Client and/or that are received by or for Company in confidence.\n", "\n", "1.13. “Prospective Clients” shall mean any person to whom the Employer under this Agreement approaches, advertises, and/or\n", "\n", "otherwise communicates in relation to the initiation of a Transaction.\n", "\n", "1.14. “Rupees” or “Rs.” or “INR” shall mean Indian Rupees, the lawful currency of the Republic of India.\n", "\n", "1.15. “Transaction” shall refer to (including but not limited to the License Agreement) the engagement between a Client and the\n", "\n", "Company.\n", "\n", "2. Interpretation\n", "\n", "In this Agreement, except to the extent that the context otherwise requires:\n", "\n", "2.1. References to Clauses and Schedules are a reference to clauses in and annexures and schedules to this Agreement unless the\n", "\n", "context requires otherwise and the Schedules to this Agreement shall always be deemed to form part of this Agreement;\n", "\n", "2.2. The headings are inserted for convenience only and shall not affect the construction of this Agreement;\n", "\n", "3. Position held:\n", "\n", "The said Employee is hereby appointed as Data Scientist.\n", "\n", "4. Effective Date:\n", "\n", "4.1. The Effective Date of this Agreement shall be the date of execution of the Agreement.\n", "\n", "5. Terms of service:\n", "\n", "5.1. Employer hereby enters into this Agreement with the Employee to act as per the directions of the Employer.\n", "\n", "5.2. Subject to this Agreement and the supervision and pursuant to the orders, advice, and direction of Employer, the Employee\n", "\n", "shall perform such duties as are customarily performed by one holding such position in other businesses or enterprises of\n", "\n", "the same and similar nature as that engaged in by the Employer.\n", "\n", "5.3. Employee shall additionally render such other services and duties as may be assigned to him from time to time by the\n", "\n", "Employer.\n", "\n", "5.4. The Employee shall not initiate, maintain and/or otherwise make any form of communication to the Client in connection\n", "\n", "with the project allotted to him/her in any manner whatsoever.\n", "\n", "2\n", "\n", "Itobuz Technologies Pvt. Ltd. Private and Confidential\n", "\n", "5.5. The Employee shall not make any misrepresentations and/or provide false information with regards to the products and\n", "\n", "services of the Company.\n", "\n", "5.6. The Employee understands, acknowledges, and accepts that his conduct at the workplace shall be governed by the Human\n", "\n", "Resource Manual made available to all employees and that he shall abide by all terms stipulated in the said Manual.\n", "\n", "6. Probation and confirmation:\n", "\n", "6.1. The Employee shall be on probation for a period of 6 months. The decision of the management on the performance of the\n", "\n", "Employee during the period of probation is final and binding on the Employee.\n", "\n", "6.2. If the candidate wants to leave the company in the probation period, he/she has to serve a notice period of 1 month (not\n", "\n", "negotiable).\n", "\n", "7. Hours of work:\n", "\n", "The Employee is required to work from 10 a.m. to 7 p.m. during the Weekdays. The weekly holiday would be on Saturday and Sunday.\n", "\n", "8. Remuneration:\n", "\n", "8.1. The Employer shall pay the Employee a salary of INR 7.06 LPA during the period of probation.\n", "\n", "8.2. Upon successful completion of probation, the Employer shall pay the Employee a salary as discussed.\n", "\n", "9. Representations, Warranties, And Covenants Of The Employee:\n", "\n", "9.1. The Employee shall at all times faithfully, industriously and to the best of his ability, experience, and talent, perform all duties\n", "\n", "that may be required of and from him/her pursuant to the express and implicit terms hereof, to the reasonable satisfaction\n", "\n", "of the Employer. Such duties shall be rendered at the above-mentioned premises and at such other places or places as the\n", "\n", "Employer shall in good faith require or as the interests, needs, and opportunities of the Employer (including but not limited\n", "\n", "to the Business) shall require or make advisable.\n", "\n", "9.2. The Employee shall devote all of his/her time, attention, knowledge, and skill solely and exclusively to the Business and the\n", "\n", "interests of Employer, and Employer shall be entitled to all benefits, emoluments, profits, or other issues arising from or\n", "\n", "incident to any and all work services and advice of Employee.\n", "\n", "9.3. The Employee represents and covenants that he/she is not having and or he shall not have the right to make any contracts\n", "\n", "or other commitments for or on behalf of the Employer without the written consent of the Employer.\n", "\n", "9.4. The Employee shall not delegate any of his/her duties and obligations under this Agreement in any manner whatsoever.\n", "\n", "9.5. The Employee represents warrants and covenants that he /she is not and or he/she shall not apply in any company\n", "\n", "associated with the same Industry as the Employer carries out and during the probation period or during his term of service\n", "\n", "as well as a calendar period of twenty-four months after termination.\n", "\n", "9.6. The Employee expressly agrees that during the term thereof he/she will not be interested directly or indirectly, in any form,\n", "\n", "fashion, or manner as partner, officer, director, stockholder, advisor, Employee or any other forum or capacity in any other\n", "\n", "business or any other allied trade.\n", "\n", "9.7. The employee agrees and accepts that upon termination all company assets, resources, online accounts, passwords etc.\n", "\n", "needed to be handed over to the company within 7 working days.\n", "\n", "10. Restrictive Covenants Of The Employee:\n", "\n", "10.1. The Employee hereby agrees, during the term of this Agreement, not to directly or indirectly carry on or be directly engaged\n", "\n", "or interested in any business or demonstrably anticipated business which competes with the business of the Company.\n", "\n", "3\n", "\n", "Itobuz Technologies Pvt. Ltd. Private and Confidential\n", "\n", "10.2. The Employee agrees that he/she shall not assist any other person or organization in competing or in preparing to compete\n", "\n", "with any business or demonstrably anticipated business of the Company.\n", "\n", "10.3. The Employee agrees that in case of new business and or service opportunities which are similar in all material respects to\n", "\n", "the business of the Company in relation to in any geographical region other than those in which the Company operates, the\n", "\n", "Employee shall not engage in such business opportunity, without the prior written approval of the Company post two years\n", "\n", "of resignation or termination.\n", "\n", "10.4. The employee agrees acknowledges and accepts that he/she cannot take on side projects or approach a possible client with\n", "\n", "intention to take work for themselves. The employee further agrees that the employee shall not carry out the business\n", "\n", "carried out by the Company through contracts executed in the name family members, relatives and friends.\n", "\n", "10.5. The Employee agrees and accepts that he/she shall not take up professional courses without informing and taking approval\n", "\n", "from management during course of employment.\n", "\n", "10.6. The Employee agrees acknowledges and accepts that during the Term of this Agreement the Employee shall not, either\n", "\n", "directly or indirectly solicit or entice away or endeavor to assist any other Third Party in an endeavor to solicit or to entice\n", "\n", "away from the Company any employee, Prospective Client and/or Client.\n", "\n", "10.7. The Employee agrees and acknowledges that he shall not during the tenure of his employment apply and/or enter into any\n", "\n", "part-time, full-time, contractual, freelance, or other form of transaction with financial consideration with any third party.\n", "\n", "10.8. The Employee agrees and accepts that he/she cannot contact any of the clients in any other capacity except for the\n", "\n", "company’s requirement.\n", "\n", "10.9. The Employee agrees and accepts that the employee shall not work in paid or free consultation on the company’s domain.\n", "\n", "10.10. The Employee agrees, acknowledges, and accepts that during the Term of this Agreement, the Employee shall take no action\n", "\n", "which is intended, or would reasonably be expected, to harm the Company or its or their reputation or which would\n", "\n", "reasonably be expected to lead to unwanted or unfavorable publicity to the Company.\n", "\n", "11. Penalties for policy violations\n", "\n", "11.1. The Employee agrees and accepts that he/she can be immediately suspended from his position roles, and privileges upon\n", "\n", "Company policy Violations and restrictions set forth in clause 10.\n", "\n", "11.2. The Employee agrees and accepts that he/she will not be paid his/her running month salary, earned leaves encashments,\n", "\n", "and any other form of compensation in lieu of the damages caused to the company subject to proper enquiry and\n", "\n", "investigation.\n", "\n", "11.3. The Employee will not get any experience letter, release letter or any other certificate from the company upon breach of\n", "\n", "Clause 10. The employee shall only get a termination letter which shall state the reason for his/her termination mentioned.\n", "\n", "11.4. Upon any malpractice proven upon instances of subsequent verification for future employment in other institutions and\n", "\n", "background checks from any other company, negative feedback shall be provided with the reason for termination mentioned.\n", "\n", "11.5. The company has the right to sue the employee for damages upon instances of malpractice, fraud, data theft, theft, breach of\n", "\n", "confidentiality as the case may be.\n", "\n", "12. Leave:\n", "\n", "4\n", "\n", "Itobuz Technologies Pvt. Ltd. Private and Confidential\n", "\n", "12.1. The Employee who is serving a probationary period i.e. for 6 months from the date of joining will not be entitled to any leave\n", "\n", "in the said period. If any leave taken by the employee will be treated as leave without pay. During the period of LOP, the\n", "\n", "employee is not entitled to any pay or allowance.\n", "\n", "The following leaves are allotted to the employee after completion of a successful probationary period -\n", "\n", "12.1.1. Casual Leave (CL)\n", "\n", "12.1.2. Sick Leave (SL)\n", "\n", "12.1.3. Earned Leave (EL)\n", "\n", "12.1.4. Maternity Leave (ML)\n", "\n", "12.1.5. Loss Of Pay (LOP)\n", "\n", "12.2. The duration of each form of leave is stipulated in the Human Resource Handbook provided to the employees. Period of leave\n", "\n", "shall be subject to leave policies framed by the management which shall be published in the handbook and duly notified to\n", "\n", "the employees.\n", "\n", "12.3. Notwithstanding anything provided in the above mentioned clauses an employee shall not be entitled to any form of leave\n", "\n", "other than the one stipulated in the Leave policy provided in the Human Resource Handbook.\n", "\n", "13. Confidentiality and non- disclosure:\n", "\n", "13.1. Employee shall not at any time, in any fashion, form or manner, either directly or indirectly divulge disclose or communicate\n", "\n", "to any person, firm or corporation, including but not limited to any and all persons directly and/or indirectly engaged by the\n", "\n", "Employer, in any manner whatsoever disclose any information of any kind, nature, description concerning any matters\n", "\n", "affecting or relating to the Business, including without limitation, the names of any of the customers, the prices it obtains or\n", "\n", "has obtained or at which it sells or has sold its services or any other information concerning the Business, its manner of\n", "\n", "operation, or its plans, process, or other date of any kind, nature, or description without regard to whether any or all of the\n", "\n", "foregoing matters would be deemed confidential, material, or important, the project cost or client contact information.\n", "\n", "13.2. Subject to clause 13.1 the Employee shall not showcase any project worked upon by him during the tenure of his\n", "\n", "employment at the Company as a personal portfolio or reflect the same on his Curriculum Vitae.\n", "\n", "13.3. The parties hereby stipulate that as between them, the foregoing matters are important, material and confidential and\n", "\n", "gravely effective to the successful conduct of the Business, its goodwill and that any breach of the terms of this section is a\n", "\n", "material breach of this Agreement.\n", "\n", "13.4. The Employee understands, acknowledges, agrees, and affirms that any form of conversation whether through email, SMS,\n", "\n", "chat and/or any other offline and/or online networking and communication platform between any and all people engaged by\n", "\n", "the Employer, is private and confidential. Any such disclosure shall be deemed to be material breach of this Agreement.\n", "\n", "13.5. Any all terms mentioned in this Agreement, including but not limited to payment terms referred to in Clause 8, shall be treated\n", "\n", "confidential and such disclosure to any person shall be deemed to be a material breach of this Agreement.\n", "\n", "14. Events Of Default:\n", "\n", "14.1. The Company shall, pursuant to Clause 14.2, have the right to terminate the Agreement with the Employee in case of the\n", "\n", "following events (hereinafter referred to as “Events of Default”):\n", "\n", "14.2. Any breach of the duties of the Employee under Clauses 5,7 and 10 of this Agreement.\n", "\n", "14.3. Violation of any of the rights of the Company under this Agreement.\n", "\n", "14.4. Breach of any representations and/or warranties of the Employee under this Agreement under Clause 9 of this Agreement.\n", "\n", "5\n", "\n", "Itobuz Technologies Pvt. Ltd. Private and Confidential\n", "\n", "14.5. Breach of any of the provisions of Clauses 11 and 12 of this Agreement.\n", "\n", "14.6. Breach of any of the provisions of Clause 18 of this Agreement.\n", "\n", "15. Indemnification:\n", "\n", "15.1. The Employee shall defend, protect, indemnify and hold harmless the Employer and his agents, successors, and assignees\n", "\n", "(“Indemnified Parties”) from and against any and all claims in connection therewith (collectively, the “Indemnified\n", "\n", "Liabilities”), incurred by the Indemnified Parties as a result of, arising out of or relating to\n", "\n", "15.1.1. any misrepresentation by the Employee to the Indemnified Parties,\n", "\n", "15.1.2. the breach of representations and warranties of the Employee made in this Agreement,\n", "\n", "15.1.3. The breach of any representations, warranties, covenants and declarations made by the Employee under this\n", "\n", "Agreement.\n", "\n", "15.1.4. Any material breaches of this Agreement.\n", "\n", "15.1.5. Breach of any and all provisions in relation to confidentiality and non-disclosure.\n", "\n", "15.2. The Employer reserves the right to deduct the amount payable to the Employer under this Clause from the compensation\n", "\n", "provided to the Employee under Clause 8 of this Agreement.\n", "\n", "16. Discontinuance Of Business As Termination Of Employment:\n", "\n", "Notwithstanding anything contained herein, in the event that Employer shall discontinue the Business, then this Agreement shall\n", "\n", "cease and terminate as of the last day of the month in which operations cease with the same force and effect as if such last day of the\n", "\n", "month were originally set forth as the termination date hereof.\n", "\n", "17. Termination of Employment and notice period:\n", "\n", "17.1. Notwithstanding anything contained herein the Employer reserves to terminate this Agreement at any point of time, for any\n", "\n", "reason whatsoever, with a notice of 15 days.\n", "\n", "17.2. The Employer can immediately terminate this Agreement, in case of any and all breaches by the Employee of the provisions\n", "\n", "of this Agreement.\n", "\n", "17.3. On termination, the Employee shall surrender to the management and stop the usage of all electronic devices, sim cards,\n", "\n", "visiting cards, stationery, gadgets, passwords, software or hardware and ancillary perks as awarded by the Employer during\n", "\n", "the course of employment.\n", "\n", "17.4. In instances of resignation on the part of the Employee, the Employee shall submit his application for resignation\n", "\n", "addressing the head of his department stating reasons for the same.\n", "\n", "17.5. Pursuant to clause 17.4 the employee shall be liable to serve a notice period of ninety days and shall attend to his workplace\n", "\n", "in a professional manner to execute his pending projects and transfer of such job descriptions allotted to his concerned\n", "\n", "colleagues.\n", "\n", "17.6. The employee agrees and accepts that he/she shall not contact any existing and previous clients for 5 years post-termination.\n", "Upon such malpractice, the Company shall approach the courts of law for damages and penal provisions.\n", "\n", "18. Intellectual Property Rights:\n", "\n", "6\n", "\n", "Itobuz Technologies Pvt. Ltd. Private and Confidential\n", "\n", "18.1. The Employee shall take all necessary steps to protect the Employers intellectual property rights and as such shall under all\n", "\n", "circumstances protect the intellectual property owned by the company failure or negligence to which shall result in\n", "\n", "stringent action taken by the Employer.\n", "\n", "18.2. The Employee affirms, acknowledges, agrees and understands that the Employer is the first owner of the copyright in\n", "\n", "relation to any website, the mobile application and its content developed by him during the course of his employment or\n", "\n", "such person that the Employer deems fit, for the purposes of the Indian Copyright Act, 1957.\n", "\n", "19. Waiver Of Modification Ineffective Unless In Writing:\n", "\n", "No waiver or modification of this Agreement or of any covenant, condition, or limitation herein contained shall be valid unless in\n", "\n", "writing and duly executed by the party to be charged therewith. Furthermore, no evidence of any waiver or modification shall be\n", "\n", "offered or received in evidence in any proceeding, arbitration or litigation between the parties arising out of or affecting this\n", "\n", "Agreement or the rights or obligations of any party hereunder, unless such waiver or modification is in writing, duly executed as\n", "\n", "aforesaid. The provisions of this paragraph may not be waived except as herein set forth.\n", "\n", "20. Governing Law, Jurisdiction And Dispute Resolution:\n", "\n", "20.1. Any dispute, controversy or claim arising out of or relating to this Agreement or the validity, interpretation, breach or\n", "\n", "termination thereof (“Dispute”), including claims seeking redress or asserting rights under the applicable law shall be\n", "\n", "amicably settled through mediation, in the offices of the Employer in Kolkata through mutual consultation and escalation at\n", "\n", "such offices of the Employer as Employer may designate.\n", "\n", "20.2. If the Dispute is not settled amicably as aforesaid within a period of [14] (Fourteen) calendar days, the matter would be\n", "\n", "referred to the courts of law.\n", "\n", "20.3. The governing law of the Agreement shall be the laws of the Republic of India.\n", "\n", "20.4. Subject to the aforesaid clause, the High Court of Calcutta and subordinate courts shall have exclusive jurisdiction in all\n", "\n", "matters arising out of this Agreement.\n", "\n", "21. Severability:\n", "\n", "Any provision in this Agreement, which is or may become prohibited or unenforceable in any jurisdiction, shall, as to such jurisdiction, be\n", "\n", "ineffective to the extent of such prohibition or unenforceability without invalidating the remaining provisions of this Agreement or\n", "\n", "affecting the validity or enforceability of such provision in the same or any other jurisdiction. Without prejudice to the foregoing, the\n", "\n", "Parties will immediately negotiate in good faith to replace such provision with a proviso, which is not prohibited or unenforceable\n", "\n", "and has, as far as possible, the same legal and commercial effect as that which it replaces\n", "\n", "22. Survival:\n", "\n", "Notwithstanding anything contained in this Agreement, the rights and obligations under Clauses 10 (Restrictive Covenants),\n", "\n", "13(Confidentiality and Non-disclosure), 15 (Indemnification) 18 (Intellectual Property Rights), 20 (Governing Law, Jurisdiction and\n", "\n", "Dispute Resolution), 22 (Binding effect of Contract) shall survive the termination of this Agreement.\n", "\n", "23. Agreement terms to be exclusive:\n", "\n", "7\n", "\n", "Itobuz Technologies Pvt. Ltd. Private and Confidential\n", "\n", "23.1. This written Agreement contains the sole and entire agreement between the parties and supersedes any other agreements\n", "\n", "between them.\n", "\n", "23.2. The parties acknowledge and agree that neither of them has made any representation with respect to the subject matter of\n", "\n", "this Agreement or any representations inducing the execution and delivery hereof except such representations as are\n", "\n", "specifically set forth herein, and each party acknowledges that he or it has relied on his or its own judgment in entering into\n", "\n", "this Agreement.\n", "\n", "23.3. The parties further acknowledge that any statements or representations that may have heretofore been made by either of\n", "\n", "them to the other are void and of no effect and that neither of them has relied thereon in connection with his or its dealings\n", "\n", "with the other.\n", "\n", "23.4. The rights of the Parties under this Agreement are cumulative and not alternative. Notwithstanding anything contained\n", "\n", "herein, none of the terms of this Agreement shall be prejudicial to the rights the Employer that may otherwise exist under\n", "\n", "Applicable Law.\n", "\n", "24. Binding Effect Of Contract:\n", "\n", "This Agreement shall be binding on and inure to the benefit of the respective parties and their respective heirs, legal representatives,\n", "\n", "successors, and assignees.\n", "\n", "8\n", "\n", "Itobuz Technologies Pvt. Ltd. Private and Confidential\n", "\n", "IN WITNESS WHEREOF THIS AGREEMENT HAS BEEN SIGNED BY THE DULY AUTHORIZED REPRESENTATIVES OF THE\n", "\n", "PARTIES THE DAY AND YEAR FIRST BEFORE WRITTEN.\n", "\n", "Employer. Employee.\n", "\n", "FOR Itobuz Technologies Private Limited\n", "\n", "Signature: Signature:\n", "\n", "Name: Sneh Sagar Prajapati Name:\n", "\n", "Designation: Director Date:\n", "\n", "Date: 01-12-2022\n", "\n", "9\n", "--------------------------------------------------\n", "\n", "\n", "\n" ] } ], "source": [ "# Initialize Document Reader\n", "print(\"📄 STEP 1: Testing Document Reader\\n\")\n", "print(\"=\" * 60)\n", "\n", "def test_document_reader(file_path):\n", " \"\"\"\n", " Test the document reader with various file types\n", " \"\"\"\n", " reader = DocumentReader()\n", " \n", " try:\n", " # Read the document\n", " file_contents = reader.read_file(file_path_or_bytes = file_path,\n", " file_type = \"pdf\",\n", " )\n", " \n", " # Extract text content\n", " if isinstance(file_contents, dict):\n", " text = file_contents.get('text', '') or file_contents.get('content', '')\n", " metadata = {k: v for k, v in file_contents.items() if k != 'text'}\n", " \n", " else:\n", " text = str(file_contents)\n", " metadata = dict()\n", " \n", " \n", " # Display results\n", " print(f\"✅ Document read successfully!\\n\")\n", " print(f\"📊 Text length: {len(text):,} characters\\n\")\n", " print(f\"\\nText preview:\\n\")\n", " print(\"-\" * 50)\n", " print(text)\n", " print(\"-\" * 50)\n", " print(\"\\n\\n\")\n", " \n", " if metadata:\n", " print(f\"📋 Metadata: {list(metadata.keys())}\")\n", " \n", " return text, metadata\n", " \n", " except Exception as e:\n", " print(f\"❌ Error reading document: {e}\")\n", " return None, None\n", "\n", "\n", "# Test with configured PDF file\n", "document_text, document_metadata = test_document_reader(file_path = CONFIG[\"pdf_file_path\"])\n", "\n", "if not document_text:\n", " print(\"⚠️ No text extracted.\")\n" ] }, { "cell_type": "markdown", "id": "921523bd-257a-4970-aef6-8ecd1b4e7e1f", "metadata": {}, "source": [ "## Contract Validation Testing" ] }, { "cell_type": "code", "execution_count": 4, "id": "6ad923b8-f2a3-4a6d-bf03-6e735af771a7", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "🔍 STEP 2: Testing Contract Validator\n", "\n", "============================================================\n", "📋 Running contract validation...\n", "📁 File Integrity: True - File check simulated - always passes in notebook\n", "\n", "📑 Contract Validation Results:\n", " Is Contract: True\n", " Confidence: high_confidence\n", " Message: Strong contract indicators detected (score: 130). This is highly likely a legal contract.\n", "\n", "\n", "📊 Detailed Validation Report:\n", " Total Score: 123\n", " Found Indicators: 37\n", " Anti-patterns: 1\n", "\n", " Key Features:\n", " - has_signature_block: False\n", " - has_effective_date: True\n", " - has_party_identification: True\n", " Top Indicators: ['agreement', 'contract', 'party', 'parties', 'hereinafter']\n" ] } ], "source": [ "# Initialize Contract Validator\n", "print(\"\\n🔍 STEP 2: Testing Contract Validator\\n\")\n", "print(\"=\" * 60)\n", "\n", "def test_contract_validation(text):\n", " \"\"\"\n", " Test if the document is a valid contract\n", " \"\"\"\n", " validator = ContractValidator()\n", " \n", " print(\"📋 Running contract validation...\")\n", " \n", " # Test 1: File integrity check (simulated)\n", " file_valid, file_message = True, \"File check simulated - always passes in notebook\"\n", " print(f\"📁 File Integrity: {file_valid} - {file_message}\\n\")\n", " \n", " # Test 2: Contract validation\n", " is_contract, validation_type, validation_message = validator.is_valid_contract(text = text)\n", " \n", " print(f\"📑 Contract Validation Results:\")\n", " print(f\" Is Contract: {is_contract}\")\n", " print(f\" Confidence: {validation_type}\")\n", " print(f\" Message: {validation_message}\\n\")\n", " \n", " # Test 3: Detailed validation report\n", " validation_report = validator.get_validation_report(text = text)\n", " \n", " print(f\"\\n📊 Detailed Validation Report:\")\n", " print(f\" Total Score: {validation_report['scores']['total']}\")\n", " print(f\" Found Indicators: {len(validation_report['found_indicators'])}\")\n", " print(f\" Anti-patterns: {len(validation_report['found_anti_patterns'])}\\n\")\n", " \n", " # Display key features\n", " features = validation_report['features']\n", " print(f\" Key Features:\")\n", " for feature, value in features.items():\n", " print(f\" - {feature}: {value}\")\n", " \n", " # Display top indicators\n", " if validation_report['found_indicators']:\n", " print(f\" Top Indicators: {validation_report['found_indicators'][:5]}\")\n", " \n", " return validation_report\n", "\n", "# Run validation test\n", "validation_report = test_contract_validation(text = document_text)\n" ] }, { "cell_type": "markdown", "id": "65c2ba01-6630-4d32-adec-8e3012f25139", "metadata": {}, "source": [ "## Text Processing Testing" ] }, { "cell_type": "code", "execution_count": 5, "id": "7ff06a32-9981-425e-9632-1a42e1fa6737", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "📝 STEP 3: Testing Text Processor\n", "============================================================\n", "[TextProcessor] spaCy model loaded successfully\n", "🔧 Initializing text processor...\n", "\n", "📊 1. Text Statistics:\n", " Character Count: 26469\n", " Word Count: 4061\n", " Sentence Count: 168\n", " Paragraph Count: 264\n", " Avg Words Per Sentence: 24.172619047619047\n", " Avg Chars Per Word: 6.517852745629155\n", " Language: en\n", "\n", "🏛️ 2. Legal Entity Extraction:\n", " Parties: 5 found\n", "\n", " Samples: ['The Company', 'Itobuz Technologies Private Limited', 'Employer', 'Client', 'Employee']\n", " Dates: 1 found\n", "\n", " Samples: ['01-12-2022']\n", " References: 15 found\n", "\n", "\n", "📝 3. Sentence Analysis:\n", " Total sentences: 218\n", " First 10 sentences with entities:\n", " 1. Itobuz Technologies Pvt.\n", "\n", " Entities: [('Itobuz Technologies Pvt', 'ORG')]\n", " 2. Ltd. Private and Confidential\n", "\n", "Agreement of Employment\n", "\n", "This Agreement for service (hereinafter referred to as “Agreement”) made and entered into on the 01st day of December 2022, by\n", "\n", "and between Itobuz Technologies Private Limited a Company registered under the Companies Act 2013 having registered office at\n", "\n", "STEP, IIT KHARAGPUR, P.S.- IIT KHARAGPUR, KHARAGPUR, WEST BENGAL 721302, INDIA, CIN No. U72200WB2010PTC150305\n", "\n", "(hereinafter referred to as the “Employer”)\n", "\n", "And\n", "\n", "Satyaki Mitra son of Debdas Mitra residing at 28/6, Nabin Senapati Lane, P.O. - Baishnab Para Bazaar, P.S. - Shibpur, Howrah,\n", "\n", "West Bengal - 711101 (hereinafter referred to as the “Employee”)\n", "\n", "RECITALS\n", "\n", " Entities: [('the 01st day of December 2022', 'DATE'), ('Itobuz Technologies Private Limited a Company', 'ORG'), ('the Companies Act 2013', 'LAW'), ('STEP', 'ORG'), ('IIT KHARAGPUR', 'ORG'), ('P.S.- IIT KHARAGPUR', 'ORG'), ('KHARAGPUR', 'ORG'), ('WEST BENGAL 721302', 'DATE'), ('INDIA', 'GPE'), ('CIN', 'ORG'), ('Satyaki Mitra', 'PERSON'), ('Debdas Mitra', 'PERSON'), ('28/6', 'CARDINAL'), ('Nabin Senapati Lane', 'ORG'), ('Para Bazaar', 'PERSON'), ('P.S.', 'GPE'), ('Howrah', 'PERSON'), ('West Bengal - 711101', 'PERSON')]\n", " 3. A. The Employer is engaged in the business of Software development and Information Technology based services (hereinafter\n", "\n", "referred to as the “Business”).\n", "\n", " Entities: [('A. The Employer', 'PRODUCT'), ('Software', 'NORP'), ('Information Technology', 'ORG')]\n", " 4. B. The Employer had called for applications from the eligible candidates for the post of Data Scientist.\n", "\n", " Entities: [('B. The Employer', 'PERSON'), ('Data Scientist', 'ORG')]\n", " 5. C. After due process being carried out and a successful interview thereto an offer letter dated 30th November 2022 was\n", "\n", "forwarded by the Employer to the Employee.\n", "\n", " Entities: [('30th November 2022', 'DATE'), ('Employer', 'LOC'), ('Employee', 'FAC')]\n", " 6. D. On processing the application and the relevant documents, the Employer found the Employee adequately qualified for the post\n", "\n", "and offered to appoint him as Data Scientist in the Company.\n", "\n", " Entities: [('D.', 'NORP'), ('Employer', 'ORG'), ('Data Scientist', 'ORG')]\n", " 7. E. The employee is willing to be employed by the Employer, and Employer is willing to employ Employee, on the terms and\n", "\n", "conditions herein set forth.\n", "\n", " Entities: [('Employer', 'LOC'), ('Employer', 'ORG')]\n", " 8. FOR REASONS SET FORTH ABOVE, AND IN CONSIDERATION OF THE MUTUAL COVENANTS AND PROMISES OF THE PARTIES\n", "\n", "HERETO, EMPLOYER AND EMPLOYEE COVENANT AND AGREE AS FOLLOWS:\n", "\n", "1.\n", "\n", " Entities: [('EMPLOYER', 'ORG'), ('1', 'CARDINAL')]\n", " 9. Definition:\n", "\n", "The Parties to this Agreement hereby unconditionally agree that unless the context otherwise requires, the terms listed below when\n", "\n", "used in this Agreement shall have the meanings attached to them and these terms shall be interpreted accordingly.\n", "\n", " 10. The\n", "\n", "terms listed below as used in this Agreement may be identified by the capitalization of the first letter of each principal word\n", "\n", "thereof.\n", "\n", " Entities: [('first', 'ORDINAL')]\n", "\n", "📦 4. Text Chunking:\n", " Created 9 chunks for analysis\n", "\n", " First chunk preview:\n", " Text: Itobuz Technologies Pvt Private and Confidential\n", "\n", "Agreement of Employment\n", "\n", "This Agreement for service (hereinafter referred to as “Agreement”) made and entered into on the 01st day of December 2022, by\n", "\n", "and between Itobuz Technologies Private Limited a Company registered under the Companies Act 2013 having registered office at\n", "\n", "STEP, IIT KHARAGPUR, P - IIT KHARAGPUR, KHARAGPUR, WEST BENGAL 721302, INDIA, CIN No U72200WB2010PTC150305\n", "\n", "(hereinafter referred to as the “Employer”)\n", "\n", "And\n", "\n", "Satyaki Mitra son of Debdas Mitra residing at 28/6, Nabin Senapati Lane, P - Baishnab Para Bazaar, P - Shibpur, Howrah,\n", "\n", "West Bengal - 711101 (hereinafter referred to as the “Employee”)\n", "\n", "RECITALS\n", "\n", "A The Employer is engaged in the business of Software development and Information Technology based services (hereinafter\n", "\n", "referred to as the “Business”) The Employer had called for applications from the eligible candidates for the post of Data Scientist After due process being carried out and a successful interview thereto an offer letter dated 30th November 2022 was\n", "\n", "forwarded by the Employer to the Employee On processing the application and the relevant documents, the Employer found the Employee adequately qualified for the post\n", "\n", "and offered to appoint him as Data Scientist in the Company The employee is willing to be employed by the Employer, and Employer is willing to employ Employee, on the terms and\n", "\n", "conditions herein set forth FOR REASONS SET FORTH ABOVE, AND IN CONSIDERATION OF THE MUTUAL COVENANTS AND PROMISES OF THE PARTIES\n", "\n", "HERETO, EMPLOYER AND EMPLOYEE COVENANT AND AGREE AS FOLLOWS:\n", "\n", "1 Definition:\n", "\n", "The Parties to this Agreement hereby unconditionally agree that unless the context otherwise requires, the terms listed below when\n", "\n", "used in this Agreement shall have the meanings attached to them and these terms shall be interpreted accordingly The\n", "\n", "terms listed below as used in this Agreement may be identified by the capitalization of the first letter of each principal word\n", "\n", "thereof In addition to the terms defined below, certain other capitalized terms are defined elsewhere in this Agreement and\n", "\n", "whenever such terms are used in this Agreement they shall have their respective defined meanings, unless the context,\n", "\n", "expressly or by necessary implication, requires otherwise:\n", "\n", "1 “Client” shall mean any Person, introduced to the Company, with whom the Company enters into a business transaction “Confidential Information” means all of the Company’s business plans, mechanisms, business-related functions, activities\n", "\n", "and services, customer lists, knowledge of customer needs and preferences, trade secrets, business strategies, marketing\n", "\n", "strategies, methods of operation, tax records, markets, other valuable information, confidential information and trade-related\n", "\n", "information relating to the business and activities of the Company and useful or necessary for the success of the Company’s\n", "\n", "business and activities\n", "\n", " Word count: 436\n", "\n", " Sentences: 0-16\n", "\n", "\n", "\n", "💰 5. Financial & Legal Elements:\n", " Monetary amounts: []\n", " Durations: [{'amount': '6', 'unit': 'month'}, {'amount': '1', 'unit': 'month'}, {'amount': '6', 'unit': 'month'}, {'amount': '15', 'unit': 'day'}, {'amount': '5', 'unit': 'year'}]\n", " Percentages: []\n", "\n", "\n" ] } ], "source": [ "# Initialize Text Processor\n", "print(\"\\n📝 STEP 3: Testing Text Processor\")\n", "print(\"=\" * 60)\n", "\n", "def test_text_processing(text: str, use_spacy: bool = True):\n", " \"\"\"\n", " Test advanced text processing capabilities\n", " \"\"\"\n", " processor = TextProcessor(use_spacy = use_spacy)\n", " \n", " print(\"🔧 Initializing text processor...\")\n", " \n", " # Test 1: Basic text statistics\n", " print(\"\\n📊 1. Text Statistics:\")\n", " text_statistics = processor.get_text_statistics(text = text)\n", " \n", " for key, value in text_statistics.items():\n", " print(f\" {key.replace('_', ' ').title()}: {value}\")\n", " \n", " # Test 2: Legal entity extraction\n", " print(\"\\n🏛️ 2. Legal Entity Extraction:\")\n", " legal_entities = processor.extract_legal_entities(text = text)\n", " legal_entity_counts = {k: len(v) for k, v in legal_entities.items() if v}\n", " \n", " for entity_type, count in legal_entity_counts.items():\n", " print(f\" {entity_type.title()}: {count} found\\n\")\n", " \n", " if ((entity_type in ['parties', 'dates', 'amounts']) and legal_entities[entity_type]):\n", " # Show first 10 samples\n", " samples = legal_entities[entity_type][:10] \n", " print(f\" Samples: {samples}\")\n", " \n", " # Test 3: Sentence extraction\n", " print(\"\\n📝 3. Sentence Analysis:\")\n", " sentences = processor.extract_sentences_advanced(text = text)\n", " print(f\" Total sentences: {len(sentences)}\")\n", " \n", " if sentences:\n", " print(\" First 10 sentences with entities:\")\n", " for i, sent in enumerate(sentences[:10]):\n", " print(f\" {i+1}. {sent['text']}\\n\")\n", " if sent['entities']:\n", " print(f\" Entities: {sent['entities']}\")\n", " \n", " # Test 4: Text chunking for analysis\n", " print(\"\\n📦 4. Text Chunking:\")\n", " chunks = processor.chunk_text_for_embedding(text = text, \n", " chunk_size = 512, \n", " overlap = 50,\n", " )\n", " \n", " print(f\" Created {len(chunks)} chunks for analysis\\n\")\n", " \n", " if chunks:\n", " print(f\" First chunk preview:\")\n", " print(f\" Text: {chunks[0]['text']}\\n\")\n", " print(f\" Word count: {chunks[0]['word_count']}\\n\")\n", " print(f\" Sentences: {chunks[0]['start_sentence']}-{chunks[0]['end_sentence']}\\n\\n\")\n", " \n", " # Test 5: Specialized legal extraction\n", " print(\"\\n💰 5. Financial & Legal Elements:\")\n", " monetary_amounts = processor.extract_monetary_amounts(text = text)\n", " durations = processor.extract_durations(text = text)\n", " percentages = processor.extract_percentages(text = text)\n", " \n", " print(f\" Monetary amounts: {monetary_amounts}\")\n", " print(f\" Durations: {durations}\")\n", " print(f\" Percentages: {percentages}\\n\\n\")\n", " \n", " return {'statistics' : text_statistics,\n", " 'legal_entities' : legal_entities,\n", " 'sentences' : sentences,\n", " 'chunks' : chunks,\n", " }\n", "\n", "# Run text processing test\n", "processing_results = test_text_processing(text = document_text, \n", " use_spacy = CONFIG[\"use_spacy\"],\n", " )\n" ] }, { "cell_type": "markdown", "id": "8cd98247-dc1d-4f19-a985-ffb666e557bb", "metadata": {}, "source": [ "## LLM Manager Testing (Ollama)" ] }, { "cell_type": "code", "execution_count": 6, "id": "cd182862-82b9-4e47-bed4-7bcd4b1a6d20", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "💬 STEP 4: Testing LLM Manager with Ollama\n", "============================================================\n", "🚀 Initializing LLM Manager...\n", "[Logger] Logging initialized. Logs: logs\n", "✅ Available LLM Providers: ['ollama']\n", "📚 Available Ollama Models: ['llama3:8b', 'mistral:7b', 'deepseek-r1:32b', 'qwen3:32b']\n", "\n", "🧪 1. Testing Basic Completion:\n", " Prompt: What are the key elements of an employment agreement?\n", " Response: An employment agreement, also known as an employment contract or employee agreement, is a written document that outlines the terms and conditions of an individual's employment with a company. The key elements of an employment agreement typically include:\n", "\n", "1. **Job Title and Responsibilities**: A clear description of the job title, duties, and responsibilities.\n", "2. **Term of Employment**: The duration of the employment agreement, which can be a fixed term or ongoing.\n", "3. **Compensation and Benefits**: Information about salary, wages, bonuses, commissions, benefits (e.g., health insurance, retirement plans), and any other forms of compensation.\n", "4. **Confidentiality and Non-Disclosure**: Clauses that prohibit the employee from disclosing confidential information or trade secrets during or after employment.\n", "5. **Non-Compete Clause**: A provision that restricts the employee from working for a competitor or starting a competing business during or after their employment.\n", "6. **Intellectual Property Rights**: Provisions that address ownership and use of intellectual property, such as patents, copyrights, trademarks, and trade secrets.\n", "7. **Workplace Conduct**: Rules governing behavior in the workplace, including policies on harassment, discrimination, and professional conduct.\n", "8. **Termination**: The circumstances under which employment can be terminated, including notice periods, severance packages, and any applicable laws (e.g., wrongful termination).\n", "9. **Governing Law and Jurisdiction**: The law that governs the agreement and the jurisdiction in which disputes will be resolved.\n", "10. **Entire Agreement**: A provision stating that the employment agreement is the entire understanding between the parties and supersedes all prior agreements, understandings, or representations.\n", "11. **Amendments**: Procedures for modifying or amending the agreement, including any required notice periods or approvals.\n", "12. **Dispute Resolution**: Mechanisms for resolving disputes, such as arbitration or mediation.\n", "13. **Notices**: Provisions governing how notices will be given and received, including contact information and timing requirements.\n", "14. **Indemnification**: Clauses that require the employee to indemnify the employer against certain claims or damages.\n", "15. **Waivers and Releases**: Provisions that waive or release the employee's rights to sue the employer for certain claims or damages.\n", "\n", "These key elements may vary depending on the industry, location, and specific circumstances of the employment agreement. It is essential to have a comprehensive and well-drafted employment agreement in place to protect both the employer and the employee.\n", " Success: True\n", " Tokens: 371\n", " Latency: 12.62s\n", "\n", "📑 2. Testing Contract Analysis:\n", "\n", " Contract Analysis Results:\n", " ========================================\n", "Based on the provided contract text, I've identified the following:\n", "\n", "**1. Parties involved:**\n", "\n", "* Itobuz Technologies Private Limited (Employer)\n", "* Satyaki Mitra (Employee)\n", "\n", "**2. Main obligations:**\n", "\n", "The main obligation of this agreement is the employment of Satyaki Mitra as a Data Scientist by Itobuz Technologies Private Limited, on the terms and conditions set forth in the agreement.\n", "\n", "**3. Key financial terms:**\n", "\n", "There are no specific financial terms mentioned in this contract text. However, it can be inferred that the Employee will receive compensation for their services as a Data Scientist, but the details of this compensation (e.g., salary, benefits) are not specified.\n", "\n", "**4. Duration/term:**\n", "\n", "The duration or term of this agreement is not explicitly stated. However, based on the language used in the contract, it can be inferred that the employment will continue until the parties agree to terminate the agreement or until the Employee's services are no longer required by the Employer.\n", " ========================================\n", "\n", "📋 3. Testing Structured Output:\n", " Structured JSON Output:\n", "{'agreement_type': 'Agreement of Employment',\n", " 'effective_date': '01st day of December 2022',\n", " 'parties': [{'name': 'Itobuz Technologies Private Limited',\n", " 'role': 'Employer',\n", " 'type': 'organization'},\n", " {'name': 'Satyaki Mitra',\n", " 'role': 'Employee',\n", " 'type': 'individual'}]}\n" ] } ], "source": [ "# Initialize LLM Manager\n", "print(\"\\n💬 STEP 4: Testing LLM Manager with Ollama\")\n", "print(\"=\" * 60)\n", "\n", "def test_llm_manager(text_snippet: str):\n", " \"\"\"\n", " Test LLM capabilities using Ollama\n", " \"\"\"\n", " print(\"🚀 Initializing LLM Manager...\")\n", " \n", " try:\n", " # Initialize LLM manager\n", " llm_manager = LLMManager(default_provider = LLMProvider.OLLAMA,\n", " ollama_base_url = CONFIG[\"ollama_base_url\"],\n", " )\n", " \n", " # Check available providers\n", " available_providers = llm_manager.get_available_providers()\n", " \n", " print(f\"✅ Available LLM Providers: {[p.value for p in available_providers]}\")\n", " \n", " if LLMProvider.OLLAMA not in available_providers:\n", " print(\"❌ Ollama not available. Please ensure Ollama is running.\")\n", " print(\" Start Ollama: ollama serve\")\n", " return None\n", " \n", " # Check available models\n", " ollama_models = llm_manager.list_ollama_models()\n", " print(f\"📚 Available Ollama Models: {ollama_models}\")\n", " \n", " if not ollama_models:\n", " print(\"⚠️ No Ollama models found. Pull a model: ollama pull llama2\")\n", " return None\n", " \n", " # Test 1: Basic completion\n", " print(\"\\n🧪 1. Testing Basic Completion:\")\n", " test_prompt = \"What are the key elements of an employment agreement?\"\n", " \n", " response = llm_manager.complete(prompt = test_prompt,\n", " provider = LLMProvider.OLLAMA,\n", " temperature = 0.1,\n", " max_tokens = 512,\n", " )\n", " \n", " print(f\" Prompt: {test_prompt}\")\n", " print(f\" Response: {response.text}\")\n", " print(f\" Success: {response.success}\")\n", " print(f\" Tokens: {response.tokens_used}\")\n", " print(f\" Latency: {response.latency_seconds:.2f}s\")\n", " \n", " # Test 2: Contract analysis\n", " print(\"\\n📑 2. Testing Contract Analysis:\")\n", " analysis_prompt = f\"\"\"\n", " Analyze this contract text and identify:\n", " 1. The parties involved\n", " 2. Main obligations \n", " 3. Key financial terms\n", " 4. Duration/term\n", " \n", " Contract text: {text_snippet[:2000]} # Limit to first 2000 chars\n", " \"\"\"\n", " \n", " analysis_response = llm_manager.complete(prompt = analysis_prompt,\n", " provider = LLMProvider.OLLAMA,\n", " temperature = 0.1,\n", " max_tokens = 500,\n", " )\n", " \n", " print(\"\\n Contract Analysis Results:\")\n", " print(\" \" + \"=\" * 40)\n", " print(analysis_response.text)\n", " print(\" \" + \"=\" * 40)\n", " \n", " # Test 3: FIXED JSON structured output\n", " print(\"\\n📋 3. Testing Structured Output:\")\n", " try:\n", " # Create a better prompt with the actual contract text\n", " json_prompt = f\"\"\"\n", " Extract the key parties and their roles from the following contract text. Return ONLY valid JSON with no additional text.\n", " \n", " Contract Text:\n", " {text_snippet[:1500]}\n", " \n", " Return JSON format:\n", " {{\n", " \"parties\": [\n", " {{\n", " \"name\": \"party_name\",\n", " \"role\": \"party_role\",\n", " \"type\": \"individual/organization\"\n", " }}\n", " ],\n", " \"agreement_type\": \"type_of_agreement\",\n", " \"effective_date\": \"date_if_mentioned\"\n", " }}\n", " \"\"\"\n", " \n", " # Use a more detailed schema description\n", " schema_description = \"\"\"\n", " JSON schema with:\n", " - parties: array of objects with name, role, and type\n", " - agreement_type: string describing the type of agreement\n", " - effective_date: string with the effective date if mentioned\n", " - compensation: object with salary/amount details if mentioned\n", " \"\"\"\n", " \n", " json_response = llm_manager.generate_structured_json(prompt = json_prompt,\n", " schema_description = schema_description,\n", " provider = LLMProvider.OLLAMA,\n", " max_tokens = 1024,\n", " temperature = 0.1,\n", " )\n", " \n", " print(\" Structured JSON Output:\")\n", " pprint(json_response)\n", " \n", " # Validate the response\n", " if ((json_response.get('parties') == ['Alice', 'Bob']) or (json_response.get('roles') == ['Seller', 'Buyer'])):\n", " print(\"\\n ⚠️ WARNING: Model generated generic placeholder data!\")\n", " print(\" This indicates the model didn't properly analyze the contract.\")\n", " \n", " except Exception as e:\n", " print(f\" JSON generation failed: {e}\")\n", " \n", " # Fallback: Try manual JSON parsing with a simpler approach\n", " print(\"\\n 🔧 Trying alternative JSON extraction...\")\n", " try:\n", " fallback_prompt = f\"\"\"\n", " Based on this contract text, extract the parties and their roles in JSON format:\n", " \n", " {text_snippet[:1000]}\n", " \n", " Return ONLY JSON, no other text. Example format:\n", " {{\n", " \"parties\": [\n", " {{\n", " \"name\": \"Company Name\", \n", " \"role\": \"Employer\"\n", " }},\n", " {{\n", " \"name\": \"Employee Name\", \n", " \"role\": \"Employee\" \n", " }}\n", " ]\n", " }}\n", " \"\"\"\n", " \n", " fallback_response = llm_manager.complete(prompt = fallback_prompt,\n", " provider = LLMProvider.OLLAMA,\n", " temperature = 0.1,\n", " max_tokens = 500,\n", " json_mode = True,\n", " )\n", " \n", " if fallback_response.success:\n", " # Try to parse the response as JSON\n", " try:\n", " # Clean the response\n", " json_text = fallback_response.text.strip()\n", " json_text = json_text.replace('```json', '').replace('```', '').strip()\n", " \n", " parsed_json = json.loads(json_text)\n", " print(\" Alternative JSON Output:\")\n", " pprint(parsed_json)\n", " \n", " except json.JSONDecodeError:\n", " print(\" Could not parse JSON from response:\")\n", " print(f\" Response: {fallback_response.text}\")\n", " \n", " except Exception as fallback_error:\n", " print(f\" Alternative approach also failed: {fallback_error}\")\n", " \n", " return llm_manager\n", " \n", " except Exception as e:\n", " print(f\"❌ LLM Manager test failed: {e}\")\n", " import traceback\n", " print(f\"Detailed error: {traceback.format_exc()}\")\n", " return None\n", "\n", "\n", "# Run LLM test with the extracted document text\n", "llm_manager = test_llm_manager(text_snippet = document_text)\n" ] }, { "cell_type": "markdown", "id": "33250119-a476-4600-a30d-494c22499a6c", "metadata": {}, "source": [ "## Contract Classification Testing" ] }, { "cell_type": "code", "execution_count": 7, "id": "89313d11-d495-483f-b714-9630c2adab45", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "🏷️ STEP 5: Testing Contract Classifier\n", "============================================================\n", "🎯 Initializing Contract Classifier...\n", "2025-11-13 19:18:17 - contract_analyzer.error - ERROR - {\n", " \"timestamp\": \"2025-11-13T19:18:17.410345\",\n", " \"error_type\": \"OSError\",\n", " \"error_message\": \"models/nlpaueb/legal-bert-base-uncased does not appear to have a file named config.json. Checkout 'https://huggingface.co/models/nlpaueb/legal-bert-base-uncased/tree/None' for available files.\",\n", " \"traceback\": \"Traceback (most recent call last):\\n File \\\"/Users/itobuz/projects/satyaki/contract_guard_ai/notebooks/../model_manager/model_loader.py\\\", line 74, in load_legal_bert\\n model = AutoModel.from_pretrained(pretrained_model_name_or_path = config[\\\"local_path\\\"])\\n File \\\"/Users/itobuz/.conda/envs/mvp_env/lib/python3.10/site-packages/transformers/models/auto/auto_factory.py\\\", line 523, in from_pretrained\\n config, kwargs = AutoConfig.from_pretrained(\\n File \\\"/Users/itobuz/.conda/envs/mvp_env/lib/python3.10/site-packages/transformers/models/auto/configuration_auto.py\\\", line 928, in from_pretrained\\n config_dict, unused_kwargs = PretrainedConfig.get_config_dict(pretrained_model_name_or_path, **kwargs)\\n File \\\"/Users/itobuz/.conda/envs/mvp_env/lib/python3.10/site-packages/transformers/configuration_utils.py\\\", line 631, in get_config_dict\\n config_dict, kwargs = cls._get_config_dict(pretrained_model_name_or_path, **kwargs)\\n File \\\"/Users/itobuz/.conda/envs/mvp_env/lib/python3.10/site-packages/transformers/configuration_utils.py\\\", line 686, in _get_config_dict\\n resolved_config_file = cached_file(\\n File \\\"/Users/itobuz/.conda/envs/mvp_env/lib/python3.10/site-packages/transformers/utils/hub.py\\\", line 369, in cached_file\\n raise EnvironmentError(\\nOSError: models/nlpaueb/legal-bert-base-uncased does not appear to have a file named config.json. Checkout 'https://huggingface.co/models/nlpaueb/legal-bert-base-uncased/tree/None' for available files.\\n\",\n", " \"context\": {\n", " \"component\": \"ModelLoader\",\n", " \"operation\": \"load_legal_bert\",\n", " \"model_name\": \"nlpaueb/legal-bert-base-uncased\"\n", " }\n", "}\n", "2025-11-13 19:18:17 - contract_analyzer.error - ERROR - {\n", " \"timestamp\": \"2025-11-13T19:18:17.410345\",\n", " \"error_type\": \"OSError\",\n", " \"error_message\": \"models/nlpaueb/legal-bert-base-uncased does not appear to have a file named config.json. Checkout 'https://huggingface.co/models/nlpaueb/legal-bert-base-uncased/tree/None' for available files.\",\n", " \"traceback\": \"Traceback (most recent call last):\\n File \\\"/Users/itobuz/projects/satyaki/contract_guard_ai/notebooks/../model_manager/model_loader.py\\\", line 74, in load_legal_bert\\n model = AutoModel.from_pretrained(pretrained_model_name_or_path = config[\\\"local_path\\\"])\\n File \\\"/Users/itobuz/.conda/envs/mvp_env/lib/python3.10/site-packages/transformers/models/auto/auto_factory.py\\\", line 523, in from_pretrained\\n config, kwargs = AutoConfig.from_pretrained(\\n File \\\"/Users/itobuz/.conda/envs/mvp_env/lib/python3.10/site-packages/transformers/models/auto/configuration_auto.py\\\", line 928, in from_pretrained\\n config_dict, unused_kwargs = PretrainedConfig.get_config_dict(pretrained_model_name_or_path, **kwargs)\\n File \\\"/Users/itobuz/.conda/envs/mvp_env/lib/python3.10/site-packages/transformers/configuration_utils.py\\\", line 631, in get_config_dict\\n config_dict, kwargs = cls._get_config_dict(pretrained_model_name_or_path, **kwargs)\\n File \\\"/Users/itobuz/.conda/envs/mvp_env/lib/python3.10/site-packages/transformers/configuration_utils.py\\\", line 686, in _get_config_dict\\n resolved_config_file = cached_file(\\n File \\\"/Users/itobuz/.conda/envs/mvp_env/lib/python3.10/site-packages/transformers/utils/hub.py\\\", line 369, in cached_file\\n raise EnvironmentError(\\nOSError: models/nlpaueb/legal-bert-base-uncased does not appear to have a file named config.json. Checkout 'https://huggingface.co/models/nlpaueb/legal-bert-base-uncased/tree/None' for available files.\\n\",\n", " \"context\": {\n", " \"component\": \"ModelLoader\",\n", " \"operation\": \"load_legal_bert\",\n", " \"model_name\": \"nlpaueb/legal-bert-base-uncased\"\n", " }\n", "}\n", "2025-11-13 19:18:17 - contract_analyzer.error - ERROR - {\n", " \"timestamp\": \"2025-11-13T19:18:17.413786\",\n", " \"error_type\": \"OSError\",\n", " \"error_message\": \"models/nlpaueb/legal-bert-base-uncased does not appear to have a file named config.json. Checkout 'https://huggingface.co/models/nlpaueb/legal-bert-base-uncased/tree/None' for available files.\",\n", " \"traceback\": \"Traceback (most recent call last):\\n File \\\"/Users/itobuz/projects/satyaki/contract_guard_ai/notebooks/../services/contract_classifier.py\\\", line 195, in _lazy_load\\n self.legal_bert_model, self.legal_bert_tokenizer = self.model_loader.load_legal_bert()\\n File \\\"/Users/itobuz/projects/satyaki/contract_guard_ai/notebooks/../model_manager/model_loader.py\\\", line 74, in load_legal_bert\\n model = AutoModel.from_pretrained(pretrained_model_name_or_path = config[\\\"local_path\\\"])\\n File \\\"/Users/itobuz/.conda/envs/mvp_env/lib/python3.10/site-packages/transformers/models/auto/auto_factory.py\\\", line 523, in from_pretrained\\n config, kwargs = AutoConfig.from_pretrained(\\n File \\\"/Users/itobuz/.conda/envs/mvp_env/lib/python3.10/site-packages/transformers/models/auto/configuration_auto.py\\\", line 928, in from_pretrained\\n config_dict, unused_kwargs = PretrainedConfig.get_config_dict(pretrained_model_name_or_path, **kwargs)\\n File \\\"/Users/itobuz/.conda/envs/mvp_env/lib/python3.10/site-packages/transformers/configuration_utils.py\\\", line 631, in get_config_dict\\n config_dict, kwargs = cls._get_config_dict(pretrained_model_name_or_path, **kwargs)\\n File \\\"/Users/itobuz/.conda/envs/mvp_env/lib/python3.10/site-packages/transformers/configuration_utils.py\\\", line 686, in _get_config_dict\\n resolved_config_file = cached_file(\\n File \\\"/Users/itobuz/.conda/envs/mvp_env/lib/python3.10/site-packages/transformers/utils/hub.py\\\", line 369, in cached_file\\n raise EnvironmentError(\\nOSError: models/nlpaueb/legal-bert-base-uncased does not appear to have a file named config.json. Checkout 'https://huggingface.co/models/nlpaueb/legal-bert-base-uncased/tree/None' for available files.\\n\",\n", " \"context\": {\n", " \"component\": \"ContractClassifier\",\n", " \"operation\": \"model_loading\"\n", " }\n", "}\n", "2025-11-13 19:18:17 - contract_analyzer.error - ERROR - {\n", " \"timestamp\": \"2025-11-13T19:18:17.413786\",\n", " \"error_type\": \"OSError\",\n", " \"error_message\": \"models/nlpaueb/legal-bert-base-uncased does not appear to have a file named config.json. Checkout 'https://huggingface.co/models/nlpaueb/legal-bert-base-uncased/tree/None' for available files.\",\n", " \"traceback\": \"Traceback (most recent call last):\\n File \\\"/Users/itobuz/projects/satyaki/contract_guard_ai/notebooks/../services/contract_classifier.py\\\", line 195, in _lazy_load\\n self.legal_bert_model, self.legal_bert_tokenizer = self.model_loader.load_legal_bert()\\n File \\\"/Users/itobuz/projects/satyaki/contract_guard_ai/notebooks/../model_manager/model_loader.py\\\", line 74, in load_legal_bert\\n model = AutoModel.from_pretrained(pretrained_model_name_or_path = config[\\\"local_path\\\"])\\n File \\\"/Users/itobuz/.conda/envs/mvp_env/lib/python3.10/site-packages/transformers/models/auto/auto_factory.py\\\", line 523, in from_pretrained\\n config, kwargs = AutoConfig.from_pretrained(\\n File \\\"/Users/itobuz/.conda/envs/mvp_env/lib/python3.10/site-packages/transformers/models/auto/configuration_auto.py\\\", line 928, in from_pretrained\\n config_dict, unused_kwargs = PretrainedConfig.get_config_dict(pretrained_model_name_or_path, **kwargs)\\n File \\\"/Users/itobuz/.conda/envs/mvp_env/lib/python3.10/site-packages/transformers/configuration_utils.py\\\", line 631, in get_config_dict\\n config_dict, kwargs = cls._get_config_dict(pretrained_model_name_or_path, **kwargs)\\n File \\\"/Users/itobuz/.conda/envs/mvp_env/lib/python3.10/site-packages/transformers/configuration_utils.py\\\", line 686, in _get_config_dict\\n resolved_config_file = cached_file(\\n File \\\"/Users/itobuz/.conda/envs/mvp_env/lib/python3.10/site-packages/transformers/utils/hub.py\\\", line 369, in cached_file\\n raise EnvironmentError(\\nOSError: models/nlpaueb/legal-bert-base-uncased does not appear to have a file named config.json. Checkout 'https://huggingface.co/models/nlpaueb/legal-bert-base-uncased/tree/None' for available files.\\n\",\n", " \"context\": {\n", " \"component\": \"ContractClassifier\",\n", " \"operation\": \"model_loading\"\n", " }\n", "}\n", "❌ Contract classification failed: models/nlpaueb/legal-bert-base-uncased does not appear to have a file named config.json. Checkout 'https://huggingface.co/models/nlpaueb/legal-bert-base-uncased/tree/None' for available files.\n", " This may be due to model download requirements.\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Traceback (most recent call last):\n", " File \"/var/folders/jk/wxfv5xn16_b00bdt6v49v7640000gn/T/ipykernel_88401/1912900102.py\", line 13, in test_contract_classification\n", " classifier = ContractClassifier(model_loader)\n", " File \"/Users/itobuz/projects/satyaki/contract_guard_ai/notebooks/../services/contract_classifier.py\", line 180, in __init__\n", " self._lazy_load()\n", " File \"/Users/itobuz/projects/satyaki/contract_guard_ai/notebooks/../services/contract_classifier.py\", line 195, in _lazy_load\n", " self.legal_bert_model, self.legal_bert_tokenizer = self.model_loader.load_legal_bert()\n", " File \"/Users/itobuz/projects/satyaki/contract_guard_ai/notebooks/../model_manager/model_loader.py\", line 74, in load_legal_bert\n", " model = AutoModel.from_pretrained(pretrained_model_name_or_path = config[\"local_path\"])\n", " File \"/Users/itobuz/.conda/envs/mvp_env/lib/python3.10/site-packages/transformers/models/auto/auto_factory.py\", line 523, in from_pretrained\n", " config, kwargs = AutoConfig.from_pretrained(\n", " File \"/Users/itobuz/.conda/envs/mvp_env/lib/python3.10/site-packages/transformers/models/auto/configuration_auto.py\", line 928, in from_pretrained\n", " config_dict, unused_kwargs = PretrainedConfig.get_config_dict(pretrained_model_name_or_path, **kwargs)\n", " File \"/Users/itobuz/.conda/envs/mvp_env/lib/python3.10/site-packages/transformers/configuration_utils.py\", line 631, in get_config_dict\n", " config_dict, kwargs = cls._get_config_dict(pretrained_model_name_or_path, **kwargs)\n", " File \"/Users/itobuz/.conda/envs/mvp_env/lib/python3.10/site-packages/transformers/configuration_utils.py\", line 686, in _get_config_dict\n", " resolved_config_file = cached_file(\n", " File \"/Users/itobuz/.conda/envs/mvp_env/lib/python3.10/site-packages/transformers/utils/hub.py\", line 369, in cached_file\n", " raise EnvironmentError(\n", "OSError: models/nlpaueb/legal-bert-base-uncased does not appear to have a file named config.json. Checkout 'https://huggingface.co/models/nlpaueb/legal-bert-base-uncased/tree/None' for available files.\n" ] } ], "source": [ "print(\"\\n🏷️ STEP 5: Testing Contract Classifier\")\n", "print(\"=\" * 60)\n", "\n", "def test_contract_classification(text):\n", " \"\"\"\n", " Test AI-powered contract classification\n", " \"\"\"\n", " print(\"🎯 Initializing Contract Classifier...\")\n", " \n", " try:\n", " # Initialize model loader and classifier\n", " model_loader = ModelLoader()\n", " classifier = ContractClassifier(model_loader)\n", " \n", " print(\"✅ Models loaded successfully!\")\n", " \n", " # Test 1: Single category classification\n", " print(\"\\n🔍 1. Single Category Classification:\")\n", " classification = classifier.classify_contract(contract_text = text)\n", " \n", " print(f\" Primary Category: {classification.category}\")\n", " print(f\" Subcategory: {classification.subcategory}\")\n", " print(f\" Confidence: {classification.confidence:.2f}\")\n", " \n", " print(f\" Reasoning:\")\n", " for reason in classification.reasoning:\n", " print(f\" - {reason}\")\n", " \n", " print(f\" Detected Keywords: {classification.detected_keywords}\")\n", " \n", " # Test 2: Multi-label classification\n", " print(\"\\n🏷️ 2. Multi-Label Classification:\")\n", " multi_categories = classifier.classify_multi_label(text = text, \n", " threshold = 0.5,\n", " )\n", " \n", " print(f\" Found {len(multi_categories)} relevant categories:\")\n", " for i, category in enumerate(multi_categories):\n", " print(f\" {i+1}. {category.category} (confidence: {category.confidence:.2f})\")\n", " if category.subcategory:\n", " print(f\" Subcategory: {category.subcategory}\")\n", " \n", " # Test 3: Category descriptions\n", " print(\"\\n📚 3. Available Categories:\")\n", " all_categories = classifier.get_all_categories()\n", " print(f\" Total categories: {len(all_categories)}\")\n", " \n", " # Show descriptions for top categories\n", " for category in multi_categories[:3]:\n", " description = classifier.get_category_description(category = category.category)\n", " print(f\" - {category.category}: {description}\")\n", " \n", " return {'primary_classification' : classification,\n", " 'multi_categories' : multi_categories,\n", " 'all_categories' : all_categories,\n", " }\n", " \n", " except Exception as e:\n", " print(f\"❌ Contract classification failed: {e}\")\n", " print(\" This may be due to model download requirements.\")\n", " import traceback\n", " traceback.print_exc()\n", " return None\n", "\n", "\n", "# Run classification test\n", "classification_results = test_contract_classification(text = document_text)\n" ] }, { "cell_type": "markdown", "id": "772ee380-f0fc-49a8-b1e9-37394ac0d027", "metadata": {}, "source": [ "## Clause Extractor Testing" ] }, { "cell_type": "code", "execution_count": null, "id": "82289569-eca2-4995-9b4f-e2b739c61ee3", "metadata": {}, "outputs": [], "source": [ "print(\"\\n🔍 STEP 6: Testing Clause Extractor\")\n", "print(\"=\" * 60)\n", "\n", "def test_clause_extraction(text):\n", " \"\"\"\n", " Test advanced clause extraction using Legal-BERT + structural patterns\n", " \"\"\"\n", " print(\"🎯 Initializing Clause Extractor...\")\n", " \n", " try:\n", " # Initialize model loader and clause extractor\n", " model_loader = ModelLoader()\n", " \n", " # Get contract category from previous classification if available\n", " contract_category = None\n", " \n", " if classification_results and 'primary_classification' in classification_results:\n", " contract_category = classification_results['primary_classification'].category\n", " \n", " extractor = ClauseExtractor(model_loader = model_loader, \n", " contract_category = contract_category,\n", " )\n", " \n", " print(\"✅ Clause extractor initialized successfully!\")\n", " \n", " # Test 1: Basic clause extraction\n", " print(\"\\n📄 1. Basic Clause Extraction:\")\n", " clauses = extractor.extract_clauses(contract_text = text, \n", " max_clauses = 50,\n", " )\n", " \n", " print(f\" Extracted {len(clauses)} clauses\")\n", " \n", " # Show all clauses\n", " for i, clause in enumerate(clauses): \n", " print(f\" {i+1}. [{clause.category}] {clause.reference}\")\n", " print(f\" Confidence: {clause.confidence:.3f}\")\n", " print(f\" Method: {clause.extraction_method}\")\n", " print(f\" Text: {clause.text}\")\n", " \n", " if clause.risk_indicators:\n", " print(f\" ⚠️ Risks: {clause.risk_indicators}\")\n", " \n", " print()\n", " \n", " # Test 2: Extraction statistics\n", " print(\"\\n📊 2. Extraction Statistics:\")\n", " stats = extractor.get_extraction_stats(clauses)\n", " \n", " for key, value in stats.items():\n", " if isinstance(value, dict):\n", " print(f\" {key.replace('_', ' ').title()}:\")\n", " for subkey, subvalue in value.items():\n", " print(f\" - {subkey}: {subvalue}\")\n", " \n", " else:\n", " print(f\" {key.replace('_', ' ').title()}: {value}\")\n", " \n", " # Test 3: Category distribution\n", " print(\"\\n🏷️ 3. Category Distribution:\")\n", " distribution = extractor.get_category_distribution(clauses)\n", " \n", " for category, count in distribution.items():\n", " print(f\" {category}: {count} clauses\")\n", " \n", " # Test 4: High-risk clauses\n", " print(\"\\n⚠️ 4. High-Risk Clauses:\")\n", " risky_clauses = extractor.get_high_risk_clauses(clauses)\n", " \n", " print(f\" Found {len(risky_clauses)} clauses with risk indicators\")\n", " \n", " for i, clause in enumerate(risky_clauses):\n", " print(f\" {i+1}. {clause.reference} - {clause.category}\")\n", " print(f\" Risks: {clause.risk_indicators}\")\n", " \n", " return {'clauses' : clauses,\n", " 'stats' : stats,\n", " 'distribution' : distribution,\n", " 'risky_clauses' : risky_clauses,\n", " }\n", " \n", " except Exception as e:\n", " print(f\"❌ Clause extraction failed: {e}\")\n", " import traceback\n", " traceback.print_exc()\n", " return None\n", "\n", "\n", "# Run clause extraction test\n", "clause_results = test_clause_extraction(text = document_text)\n", "\n", "# Store clauses for use in subsequent tests\n", "if (clause_results and ('clauses' in clause_results)):\n", " extracted_clauses = clause_results['clauses']\n", " print(f\"\\n✅ Successfully extracted {len(extracted_clauses)} clauses for further analysis\")\n", "\n", "else:\n", " extracted_clauses = []\n", " print(f\"\\n⚠️ No clauses extracted - creating empty list for testing\")\n" ] }, { "cell_type": "markdown", "id": "75414c10-d049-4c2d-b3a4-d8ba79640e44", "metadata": {}, "source": [ "## Risk Analyzer Testing" ] }, { "cell_type": "code", "execution_count": null, "id": "40d05ec7-dcae-43c2-8469-48f6d8ffd4bc", "metadata": {}, "outputs": [], "source": [ "print(\"\\n📊 STEP 7: Testing Risk Analyzer\")\n", "print(\"-\" * 60)\n", "\n", "def test_risk_analyzer(contract_text, clauses):\n", " \"\"\"\n", " Test multi-factor risk analysis\n", " \"\"\"\n", " print(\"🎯 Initializing Risk Analyzer...\")\n", " \n", " try:\n", " # Initialize risk analyzer for employment contracts\n", " risk_analyzer = MultiFactorRiskAnalyzer(contract_type = ContractType.EMPLOYMENT)\n", " \n", " # Run comprehensive risk analysis\n", " print(\"🔍 Running multi-factor risk analysis...\")\n", " risk_score = risk_analyzer.analyze_risk(contract_text = contract_text,\n", " clauses = clauses,\n", " )\n", " \n", " # Display results\n", " print(f\"\\n📈 RISK ANALYSIS RESULTS:\")\n", " print(f\" Overall Score: {risk_score.overall_score}/100\")\n", " print(f\" Risk Level: {risk_score.risk_level}\")\n", " print(f\" High-Risk Categories: {len(risk_score.risk_factors)}\")\n", " \n", " print(f\"\\n📋 Category Scores:\")\n", " for category, score in risk_score.category_scores.items():\n", " level = \"🔴\" if (score >= 70) else \"🟡\" if (score >= 50) else \"🟢\"\n", " print(f\" {level} {category.replace('_', ' ').title()}: {score}/100\")\n", " \n", " print(f\"\\n⚠️ Key Risk Factors:\")\n", " for factor in risk_score.risk_factors:\n", " print(f\" - {factor.replace('_', ' ').title()}\")\n", " \n", " if risk_score.benchmark_comparison:\n", " print(f\"\\n📊 Benchmark Comparison:\")\n", " for item, comparison in risk_score.benchmark_comparison.items():\n", " print(f\" {item}: {comparison}\")\n", " \n", " print(f\"\\n🔍 Detailed Breakdown:\")\n", " for breakdown in risk_score.risk_breakdown[:3]:\n", " print(f\" 📁 {breakdown.category}: {breakdown.score}/100\")\n", " \n", " if breakdown.findings:\n", " print(f\" Finding: {breakdown.findings[0]}\")\n", " \n", " return risk_score\n", " \n", " except Exception as e:\n", " print(f\"❌ Risk analysis failed: {e}\")\n", " import traceback\n", " traceback.print_exc()\n", " return None\n", "\n", "\n", "# Run risk analyzer test with actual clauses\n", "risk_results = test_risk_analyzer(contract_text = document_text,\n", " clauses = extracted_clauses,\n", " )\n" ] }, { "cell_type": "markdown", "id": "eb281e66-b2e4-4492-b368-db9b84b6ffe2", "metadata": {}, "source": [ "## Term Analyzer Testing" ] }, { "cell_type": "code", "execution_count": null, "id": "92896727-1d49-4603-9e1a-1998ed4c242a", "metadata": {}, "outputs": [], "source": [ "print(\"\\n⚖️ STEP 8: Testing Term Analyzer\")\n", "print(\"-\" * 60)\n", "\n", "def test_term_analyzer(contract_text, clauses):\n", " \"\"\"\n", " Test unfavorable terms detection\n", " \"\"\"\n", " print(\"🎯 Initializing Term Analyzer...\")\n", " \n", " try:\n", " # Initialize term analyzer\n", " term_analyzer = TermAnalyzer()\n", " \n", " # Run unfavorable terms analysis\n", " print(\"🔍 Detecting unfavorable terms...\")\n", " unfavorable_terms = term_analyzer.analyze_unfavorable_terms(contract_text = contract_text,\n", " clauses = clauses,\n", " )\n", " \n", " # Display results\n", " print(f\"\\n📋 UNFAVORABLE TERMS ANALYSIS:\")\n", " print(f\" Total Unfavorable Terms Found: {len(unfavorable_terms)}\")\n", " \n", " # Severity distribution\n", " severity_dist = term_analyzer.get_severity_distribution(unfavorable_terms)\n", " \n", " print(f\"\\n📊 Severity Distribution:\")\n", " for severity, count in severity_dist.items():\n", " icon = \"🔴\" if (severity == \"critical\") else \"🟡\" if (severity == \"high\") else \"🟢\"\n", " print(f\" {icon} {severity.title()}: {count} terms\")\n", " \n", " # Category distribution\n", " category_dist = term_analyzer.get_category_distribution(unfavorable_terms)\n", " \n", " print(f\"\\n📁 Category Distribution:\")\n", " for category, count in category_dist.items():\n", " print(f\" 📂 {category}: {count} terms\")\n", " \n", " # Show top unfavorable terms\n", " print(f\"\\n🚨 TOP UNFAVORABLE TERMS:\")\n", " for i, term in enumerate(unfavorable_terms):\n", " print(f\"\\n {i+1}. [{term.severity.upper()}] {term.term}\")\n", " print(f\" Category: {term.category}\")\n", " print(f\" Explanation: {term.explanation}\")\n", " \n", " if term.suggested_fix:\n", " print(f\" 💡 Suggested Fix: {term.suggested_fix}\")\n", " \n", " return unfavorable_terms\n", " \n", " except Exception as e:\n", " print(f\"❌ Term analysis failed: {e}\")\n", " import traceback\n", " traceback.print_exc()\n", " return None\n", "\n", "# Run term analyzer test with actual clauses\n", "term_results = test_term_analyzer(contract_text = document_text,\n", " clauses = extracted_clauses,\n", " )\n" ] }, { "cell_type": "markdown", "id": "27782392-c5bd-4984-9c87-a73dd89c5ff7", "metadata": {}, "source": [ "## Protection Checker Testing" ] }, { "cell_type": "code", "execution_count": null, "id": "eabb8af1-b251-407d-8268-73d214b48257", "metadata": {}, "outputs": [], "source": [ "print(\"\\n🛡️ STEP 9: Testing Protection Checker\")\n", "print(\"-\" * 60)\n", "\n", "def test_protection_checker(contract_text, clauses):\n", " \"\"\"\n", " Test missing protections detection\n", " \"\"\"\n", " print(\"🎯 Initializing Protection Checker...\")\n", " \n", " try:\n", " # Initialize protection checker\n", " protection_checker = ProtectionChecker()\n", " \n", " # Run missing protections analysis\n", " print(\"🔍 Checking for missing protections...\")\n", " missing_protections = protection_checker.check_missing_protections(contract_text = contract_text,\n", " clauses = clauses,\n", " )\n", " \n", " # Display results\n", " print(f\"\\n🛡️ MISSING PROTECTIONS ANALYSIS:\")\n", " print(f\" Total Missing Protections: {len(missing_protections)}\")\n", " \n", " # Importance distribution\n", " importance_dist = protection_checker.get_importance_distribution(missing_protections)\n", " \n", " print(f\"\\n📊 Importance Distribution:\")\n", " for importance, count in importance_dist.items():\n", " icon = \"🔴\" if (importance == \"critical\") else \"🟡\" if (importance == \"high\") else \"🟢\"\n", " print(f\" {icon} {importance.title()}: {count} protections\")\n", " \n", " # Show critical missing protections\n", " critical_protections = protection_checker.get_critical_missing(missing_protections)\n", " \n", " print(f\"\\n🚨 CRITICAL MISSING PROTECTIONS:\")\n", " for i, protection in enumerate(critical_protections[:3]):\n", " print(f\"\\n {i+1}. {protection.protection}\")\n", " print(f\" Category: {protection.category}\")\n", " print(f\" Explanation: {protection.explanation}\")\n", " print(f\" 💡 Recommendation: {protection.recommendation}\")\n", " \n", " if protection.examples:\n", " print(f\" 📝 Example: {protection.examples[0]}\")\n", " \n", " # Show all missing protections by category\n", " print(f\"\\n📁 ALL MISSING PROTECTIONS BY CATEGORY:\")\n", " \n", " categories = set(p.category for p in missing_protections)\n", " \n", " for category in categories:\n", " category_protections = protection_checker.get_by_category(missing_protections, category)\n", " \n", " print(f\" 📂 {category}: {len(category_protections)} missing\")\n", " for prot in category_protections:\n", " print(f\" - {prot.protection} ({prot.importance})\")\n", " \n", " return missing_protections\n", " \n", " except Exception as e:\n", " print(f\"❌ Protection check failed: {e}\")\n", " import traceback\n", " traceback.print_exc()\n", " return None\n", "\n", "# Run protection checker test with actual clauses\n", "protection_results = test_protection_checker(contract_text = document_text,\n", " clauses = extracted_clauses,\n", " )\n" ] }, { "cell_type": "markdown", "id": "7e203efd-0711-46aa-a5f2-4692c4a87738", "metadata": {}, "source": [ "## LLM Interpreter Testing" ] }, { "cell_type": "code", "execution_count": null, "id": "57fef368-7661-43db-9085-73b341bc38d5", "metadata": {}, "outputs": [], "source": [ "print(\"\\n💬 STEP 10: Testing LLM Interpreter\")\n", "print(\"-\" * 60)\n", "\n", "def test_llm_interpreter(clauses, llm_manager):\n", " \"\"\"\n", " Test LLM-powered clause interpretation\n", " \"\"\"\n", " if not llm_manager:\n", " print(\"⚠️ LLM Manager not available - skipping LLM Interpreter test\")\n", " return None\n", " \n", " print(\"🎯 Initializing LLM Interpreter...\")\n", " \n", " try:\n", " # Initialize LLM interpreter\n", " llm_interpreter = LLMClauseInterpreter(llm_manager = llm_manager)\n", " \n", " # Test with a few clauses\n", " test_clauses = clauses if clauses else []\n", " \n", " if not test_clauses:\n", " print(\"⚠️ No clauses available for interpretation\")\n", " return None\n", " \n", " print(f\"🔍 Interpreting {len(test_clauses)} clauses with LLM...\")\n", " interpretations = llm_interpreter.interpret_clauses(clauses = test_clauses,\n", " max_clauses = 50,\n", " )\n", " \n", " # Display results\n", " print(f\"\\n💡 CLAUSE INTERPRETATIONS:\")\n", " print(f\" Successfully Interpreted: {len(interpretations)} clauses\")\n", " \n", " for i, interpretation in enumerate(interpretations):\n", " print(f\"\\n {i+1}. [{interpretation.clause_reference}]\")\n", " print(f\" 📝 Summary: {interpretation.plain_english_summary}\")\n", " print(f\" ⚖️ Favorability: {interpretation.favorability}\")\n", " print(f\" 🎯 Confidence: {interpretation.confidence:.2f}\")\n", " \n", " if interpretation.key_points:\n", " print(f\" 📋 Key Points:\")\n", " for point in interpretation.key_points:\n", " print(f\" • {point}\")\n", " \n", " if interpretation.potential_risks:\n", " print(f\" ⚠️ Potential Risks:\")\n", " for risk in interpretation.potential_risks:\n", " print(f\" • {risk}\")\n", " \n", " # Get unfavorable interpretations\n", " unfavorable = llm_interpreter.get_unfavorable_interpretations(interpretations)\n", " \n", " print(f\"\\n🚨 Unfavorable Interpretations: {len(unfavorable)}\")\n", " \n", " # Get high-risk interpretations\n", " high_risk = llm_interpreter.get_high_risk_interpretations(interpretations)\n", " \n", " print(f\"⚠️ High-Risk Interpretations: {len(high_risk)}\")\n", " \n", " return interpretations\n", " \n", " except Exception as e:\n", " print(f\"❌ LLM interpretation failed: {e}\")\n", " import traceback\n", " traceback.print_exc()\n", " return None\n", "\n", "\n", "# Run LLM interpreter test with actual clauses\n", "llm_interpretation_results = test_llm_interpreter(clauses = extracted_clauses,\n", " llm_manager = llm_manager,\n", " )\n" ] }, { "cell_type": "markdown", "id": "deacf2f5-b47f-4267-bbcc-46494b968122", "metadata": {}, "source": [ "## Negotiation Engine Testing" ] }, { "cell_type": "code", "execution_count": null, "id": "c7e9d2e5-e21b-45eb-839e-5d4b8828381a", "metadata": {}, "outputs": [], "source": [ "print(\"\\n🤝 STEP 11: Testing Negotiation Engine\")\n", "print(\"-\" * 60)\n", "\n", "def test_negotiation_engine(risk_analysis, unfavorable_terms, missing_protections, clauses, llm_manager):\n", " \"\"\"\n", " Test negotiation strategy generation\n", " \"\"\"\n", " print(\"🎯 Initializing Negotiation Engine...\")\n", " \n", " try:\n", " # Initialize negotiation engine\n", " negotiation_engine = NegotiationEngine(llm_manager = llm_manager)\n", " \n", " # Generate negotiation points\n", " print(\"💡 Generating negotiation strategy...\")\n", " negotiation_points = negotiation_engine.generate_negotiation_points(risk_analysis = risk_analysis,\n", " unfavorable_terms = unfavorable_terms,\n", " missing_protections = missing_protections,\n", " clauses = clauses,\n", " max_points = 25,\n", " )\n", " \n", " # Display results\n", " print(f\"\\n🤝 NEGOTIATION STRATEGY:\")\n", " print(f\" Total Negotiation Points: {len(negotiation_points)}\")\n", " \n", " # Group by priority\n", " priority_groups = dict()\n", " \n", " for point in negotiation_points:\n", " if point.priority not in priority_groups:\n", " priority_groups[point.priority] = []\n", " \n", " priority_groups[point.priority].append(point)\n", " \n", " print(f\"\\n🎯 PRIORITIZED NEGOTIATION POINTS:\")\n", " for priority in sorted(priority_groups.keys()):\n", " points = priority_groups[priority]\n", " priority_label = {1 : \"🔴 CRITICAL\", \n", " 2 : \"🟠 HIGH\", \n", " 3 : \"🟡 MEDIUM\", \n", " 4 : \"🟢 LOW\",\n", " }.get(priority, f\"PRIORITY {priority}\")\n", " \n", " print(f\"\\n {priority_label} PRIORITY:\")\n", " for i, point in enumerate(points):\n", " print(f\"\\n {i+1}. {point.issue}\")\n", " print(f\" 📁 Category: {point.category}\")\n", " print(f\" 🎯 Difficulty: {point.estimated_difficulty}\")\n", " print(f\" 📝 Current: {point.current_language}\")\n", " print(f\" 💡 Proposed: {point.proposed_language}\")\n", " print(f\" 📚 Rationale: {point.rationale}\")\n", " \n", " if point.fallback_position:\n", " print(f\" 🔄 Fallback: {point.fallback_position}\")\n", " \n", " # Get critical points\n", " critical_points = negotiation_engine.get_critical_points(negotiation_points)\n", " \n", " print(f\"\\n🚨 CRITICAL NEGOTIATION POINTS: {len(critical_points)}\")\n", " \n", " # Generate strategy document\n", " strategy_doc = negotiation_engine.generate_negotiation_strategy_document(negotiation_points)\n", " \n", " print(f\"\\n📄 Strategy Document Length: {len(strategy_doc)} characters\")\n", " \n", " # Show document preview\n", " print(f\"\\n📋 STRATEGY DOCUMENT PREVIEW:\")\n", " doc_lines = strategy_doc.split('\\n')[:15] # Show first 15 lines\n", " \n", " for line in doc_lines:\n", " # Only show non-empty lines\n", " if line.strip(): \n", " print(f\" {line}\")\n", " \n", " return negotiation_points\n", " \n", " except Exception as e:\n", " print(f\"❌ Negotiation engine failed: {e}\")\n", " import traceback\n", " traceback.print_exc()\n", " return None\n", "\n", "\n", "# Run negotiation engine test with actual data\n", "negotiation_results = test_negotiation_engine(risk_analysis = risk_results,\n", " unfavorable_terms = term_results,\n", " missing_protections = protection_results,\n", " clauses = extracted_clauses,\n", " llm_manager = llm_manager,\n", " )\n" ] }, { "cell_type": "markdown", "id": "7dcdf2b7-889c-457d-8654-e82dc653f8df", "metadata": {}, "source": [ "## Market Comparator Testing" ] }, { "cell_type": "code", "execution_count": null, "id": "99c7ce85-1242-4b8a-9162-d682f5c0e6da", "metadata": {}, "outputs": [], "source": [ "print(\"\\n🌍 STEP 12: Testing Universal Market Comparator\")\n", "print(\"-\" * 60)\n", "\n", "def test_universal_market_comparator(clauses, contract_type=ContractType.EMPLOYMENT):\n", " \"\"\"\n", " Test universal market standards comparison for ANY contract type\n", " \"\"\"\n", " print(f\"🎯 Initializing Universal Market Comparator for {contract_type.value}...\")\n", " \n", " try:\n", " # Initialize universal market comparator\n", " model_loader = ModelLoader()\n", " market_comparator = UniversalMarketComparator(model_loader = model_loader,\n", " contract_type = contract_type,\n", " )\n", " \n", " # Run universal market comparison\n", " print(\"🔍 Comparing clauses to universal market standards...\")\n", " comparisons = market_comparator.compare_to_market(clauses = clauses,\n", " max_comparisons = 50,\n", " )\n", " \n", " # Display results\n", " print(f\"\\n📈 UNIVERSAL MARKET COMPARISON RESULTS:\")\n", " print(f\" Contract Type: {contract_type.value}\")\n", " print(f\" Total Comparisons: {len(comparisons)}\")\n", " \n", " if comparisons:\n", " # Assessment summary\n", " summary = market_comparator.get_assessment_summary(comparisons)\n", " \n", " print(f\"\\n📊 ASSESSMENT SUMMARY:\")\n", " print(f\" Aggressive Terms: {summary['assessments']['aggressive']}\")\n", " print(f\" Unfavorable Terms: {summary['assessments']['unfavorable']}\")\n", " print(f\" Standard Terms: {summary['assessments']['standard']}\")\n", " print(f\" Favorable Terms: {summary['assessments']['favorable']}\")\n", " print(f\" Average Similarity: {summary['average_similarity']:.3f}\")\n", " print(f\" Categories Analyzed: {', '.join(summary['categories_analyzed'][:5])}\")\n", " \n", " # Show high-risk comparisons\n", " high_risk = market_comparator.get_high_risk_comparisons(comparisons)\n", " \n", " print(f\"\\n🚨 HIGH-RISK MARKET COMPARISONS:\")\n", " for i, comparison in enumerate(high_risk[:5]):\n", " print(f\"\\n {i+1}. [{comparison.clause_category}] - {comparison.assessment.upper()}\")\n", " print(f\" Original Category: {comparison.original_category}\")\n", " print(f\" Similarity: {comparison.similarity_score:.3f}\")\n", " print(f\" Explanation: {comparison.explanation}\")\n", " \n", " if comparison.recommendation:\n", " print(f\" 💡 Recommendation: {comparison.recommendation}\")\n", " \n", " # Show sample comparisons\n", " print(f\"\\n🔍 SAMPLE COMPARISONS:\")\n", " for i, comparison in enumerate(comparisons[:3]):\n", " print(f\"\\n {i+1}. [{comparison.clause_category}] - {comparison.assessment}\")\n", " print(f\" Original: {comparison.original_category}\")\n", " print(f\" Your Clause: {comparison.user_clause[:80]}...\")\n", " print(f\" Market Standard: {comparison.market_standard[:80]}...\")\n", " print(f\" Similarity Score: {comparison.similarity_score:.3f}\")\n", " else:\n", " print(\"❌ No comparisons found. This might indicate:\")\n", " print(\" - Clause categories don't match any market standards\")\n", " print(\" - Embedding model issues\")\n", " print(\" - Very unique/unusual contract terms\")\n", " \n", " return comparisons\n", " \n", " except Exception as e:\n", " print(f\"❌ Universal market comparison failed: {e}\")\n", " import traceback\n", " traceback.print_exc()\n", " return None\n", "\n", "# Run universal market comparator test\n", "universal_market_results = test_universal_market_comparator(clauses = extracted_clauses,\n", " contract_type = ContractType.EMPLOYMENT, # This can be ANY contract type!\n", " )\n" ] }, { "cell_type": "markdown", "id": "8fa1d5d6-ed13-4a6c-ade7-d0f7ba727fe3", "metadata": {}, "source": [ "## Complete Service Integration Test" ] }, { "cell_type": "code", "execution_count": null, "id": "15a12461-03f1-4381-8750-f5697e06139e", "metadata": {}, "outputs": [], "source": [ "print(\"\\n🚀 STEP 13: Complete Analysis Pipeline\")\n", "print(\"=\" * 60)\n", "\n", "def complete_contract_analysis(file_path, use_ai = True):\n", " \"\"\"\n", " Complete end-to-end contract analysis\n", " \"\"\"\n", " print(\"🎯 Starting Complete Contract Analysis Pipeline...\")\n", " \n", " # Initialize logging\n", " ContractAnalyzerLogger.setup(log_dir = CONFIG[\"log_directory\"], \n", " app_name = \"complete_analysis\",\n", " )\n", " \n", " analysis_results = {'file_info' : {},\n", " 'validation' : {},\n", " 'processing' : {},\n", " 'classification' : {},\n", " 'clause_extraction' : {},\n", " 'risk_analysis' : {},\n", " 'term_analysis' : {},\n", " 'protection_analysis' : {},\n", " 'market_comparison' : {},\n", " 'llm_interpretation' : {},\n", " 'negotiation_strategy': {},\n", " 'llm_analysis' : {},\n", " }\n", " \n", " try:\n", " # Step 1: Document Reading\n", " print(\"\\n📄 1. Document Reading...\")\n", " reader = DocumentReader()\n", " file_contents = reader.read_file(file_path, \"pdf\")\n", " text = file_contents.get('text', '') if isinstance(file_contents, dict) else str(file_contents)\n", " \n", " analysis_results['file_info'] = {'text_length' : len(text),\n", " 'file_type' : 'pdf',\n", " 'extraction_success' : bool(text.strip()),\n", " }\n", " \n", " # Step 2: Contract Validation\n", " print(\"🔍 2. Contract Validation...\")\n", " validator = ContractValidator()\n", " is_contract, val_type, val_message = validator.is_valid_contract(text)\n", " val_report = validator.get_validation_report(text)\n", " \n", " analysis_results['validation'] = {'is_contract' : is_contract,\n", " 'confidence_level' : val_type,\n", " 'validation_message' : val_message,\n", " 'score' : val_report['scores']['total'],\n", " 'key_indicators' : val_report['found_indicators'],\n", " }\n", " \n", " # Step 3: Text Processing\n", " print(\"📝 3. Text Processing...\")\n", " processor = TextProcessor(use_spacy = CONFIG[\"use_spacy\"])\n", " stats = processor.get_text_statistics(text)\n", " entities = processor.extract_legal_entities(text)\n", " chunks = processor.chunk_text_for_embedding(text)\n", " \n", " analysis_results['processing'] = {'statistics' : stats,\n", " 'entity_counts' : {k: len(v) for k, v in entities.items()},\n", " 'key_entities' : {'parties' : entities.get('parties', []),\n", " 'dates' : entities.get('dates', []),\n", " 'amounts' : entities.get('amounts', [])\n", " },\n", " 'analysis_chunks' : len(chunks),\n", " }\n", " \n", " # Step 4: AI-Powered Analysis (Optional)\n", " if use_ai:\n", " print(\"🤖 4. AI-Powered Analysis...\")\n", " try:\n", " # Contract Classification\n", " model_loader = ModelLoader()\n", " classifier = ContractClassifier(model_loader)\n", " classification = classifier.classify_contract(text)\n", " \n", " analysis_results['classification'] = {'primary_category' : classification.category,\n", " 'subcategory' : classification.subcategory,\n", " 'confidence' : classification.confidence,\n", " 'reasoning' : classification.reasoning,\n", " }\n", " \n", " # Clause Extraction\n", " print(\"🔍 5. Clause Extraction...\")\n", " clause_extractor = ClauseExtractor(model_loader = model_loader,\n", " contract_category = classification.category, \n", " )\n", " clauses = clause_extractor.extract_clauses(contract_text = text, \n", " max_clauses = 50,\n", " )\n", " clause_stats = clause_extractor.get_extraction_stats(clauses)\n", " risky_clauses = clause_extractor.get_high_risk_clauses(clauses)\n", " \n", " analysis_results['clause_extraction'] = {'total_clauses' : len(clauses),\n", " 'categories_found' : list(set(c.category for c in clauses)),\n", " 'risky_clauses_count' : len(risky_clauses),\n", " 'avg_confidence' : clause_stats['avg_confidence'],\n", " 'extraction_stats' : clause_stats,\n", " 'sample_clauses' : [{'reference' : c.reference,\n", " 'category' : c.category,\n", " 'confidence' : c.confidence,\n", " 'risk_indicators' : c.risk_indicators,\n", " } for c in clauses\n", " ],\n", " }\n", " \n", " # Risk Analysis\n", " print(\"📊 6. Risk Analysis...\")\n", " risk_analyzer = MultiFactorRiskAnalyzer(contract_type = ContractType.EMPLOYMENT)\n", " risk_score = risk_analyzer.analyze_risk(contract_text = text,\n", " clauses = clauses,\n", " )\n", " \n", " analysis_results['risk_analysis'] = {'overall_score' : risk_score.overall_score,\n", " 'risk_level' : risk_score.risk_level,\n", " 'category_scores' : risk_score.category_scores,\n", " 'risk_factors' : risk_score.risk_factors,\n", " 'benchmark_results' : risk_score.benchmark_comparison,\n", " }\n", " \n", " # Term Analysis\n", " print(\"⚖️ 7. Term Analysis...\")\n", " term_analyzer = TermAnalyzer()\n", " unfavorable_terms = term_analyzer.analyze_unfavorable_terms(contract_text = text,\n", " clauses = clauses,\n", " )\n", " severity_dist = term_analyzer.get_severity_distribution(unfavorable_terms)\n", " category_dist = term_analyzer.get_category_distribution(unfavorable_terms)\n", " \n", " analysis_results['term_analysis'] = {'total_terms' : len(unfavorable_terms),\n", " 'severity_dist' : severity_dist,\n", " 'category_dist' : category_dist,\n", " 'critical_terms' : [t for t in unfavorable_terms if t.severity == \"critical\"],\n", " 'sample_terms' : unfavorable_terms[:5],\n", " }\n", " \n", " # Protection Analysis\n", " print(\"🛡️ 8. Protection Analysis...\")\n", " protection_checker = ProtectionChecker()\n", " missing_protections = protection_checker.check_missing_protections(contract_text = text,\n", " clauses = clauses,\n", " )\n", " importance_dist = protection_checker.get_importance_distribution(missing_protections)\n", " critical_protections = protection_checker.get_critical_missing(missing_protections)\n", " \n", " analysis_results['protection_analysis'] = {'total_missing' : len(missing_protections),\n", " 'importance_dist' : importance_dist,\n", " 'critical_protections': critical_protections,\n", " 'sample_protections' : missing_protections[:5],\n", " }\n", " \n", " # Market Comparison\n", " print(\"📈 9. Market Comparison...\")\n", " market_comparator = UniversalMarketComparator(model_loader = model_loader)\n", " market_comparisons = market_comparator.compare_to_market(clauses = clauses,\n", " max_comparisons = 15,\n", " )\n", " market_summary = market_comparator.get_assessment_summary(market_comparisons)\n", " high_risk_market = market_comparator.get_high_risk_comparisons(market_comparisons)\n", " \n", " analysis_results['market_comparison'] = {'total_comparisons' : len(market_comparisons),\n", " 'assessment_summary' : market_summary,\n", " 'high_risk_count' : len(high_risk_market),\n", " 'sample_comparisons' : market_comparisons[:3],\n", " }\n", " \n", " # LLM Interpretation (if available)\n", " llm_manager = LLMManager(default_provider = LLMProvider.OLLAMA)\n", " \n", " if LLMProvider.OLLAMA in llm_manager.get_available_providers():\n", " print(\"💬 10. LLM Interpretation...\")\n", " try:\n", " llm_interpreter = LLMClauseInterpreter(llm_manager = llm_manager)\n", " interpretations = llm_interpreter.interpret_clauses(clauses = clauses[:5],\n", " max_clauses = 5,\n", " )\n", " unfavorable_interpretations = llm_interpreter.get_unfavorable_interpretations(interpretations)\n", " high_risk_interpretations = llm_interpreter.get_high_risk_interpretations(interpretations)\n", " \n", " analysis_results['llm_interpretation'] = {'total_interpreted' : len(interpretations),\n", " 'unfavorable_count' : len(unfavorable_interpretations),\n", " 'high_risk_count' : len(high_risk_interpretations),\n", " 'sample_interpretations' : interpretations[:3],\n", " }\n", " except Exception as e:\n", " print(f\"⚠️ LLM interpretation failed: {e}\")\n", " analysis_results['llm_interpretation'] = {'error': str(e)}\n", " \n", " # Negotiation Strategy\n", " print(\"🤝 11. Negotiation Strategy...\")\n", " try:\n", " negotiation_engine = NegotiationEngine(llm_manager = llm_manager)\n", " negotiation_points = negotiation_engine.generate_negotiation_points(risk_analysis = risk_score,\n", " unfavorable_terms = unfavorable_terms,\n", " missing_protections = missing_protections,\n", " clauses = clauses,\n", " max_points = 10,\n", " )\n", " critical_points = negotiation_engine.get_critical_points(negotiation_points)\n", " strategy_doc = negotiation_engine.generate_negotiation_strategy_document(negotiation_points)\n", " \n", " analysis_results['negotiation_strategy'] = {'total_points' : len(negotiation_points),\n", " 'critical_points' : len(critical_points),\n", " 'strategy_doc_len' : len(strategy_doc),\n", " 'sample_points' : negotiation_points[:3],\n", " }\n", " except Exception as e:\n", " print(f\"⚠️ Negotiation strategy failed: {e}\")\n", " analysis_results['negotiation_strategy'] = {'error': str(e)}\n", " \n", " # LLM Summary Analysis\n", " print(\"💬 12. LLM Summary Analysis...\")\n", " if LLMProvider.OLLAMA in llm_manager.get_available_providers():\n", " try:\n", " # Create comprehensive summary using all analyses\n", " summary_context = f\"\"\"\n", "Contract Type: {classification.category}\n", "Risk Score: {risk_score.overall_score}/100 ({risk_score.risk_level})\n", "Unfavorable Terms: {len(unfavorable_terms)} (Critical: {severity_dist.get('critical', 0)})\n", "Missing Protections: {len(missing_protections)} (Critical: {importance_dist.get('critical', 0)})\n", "Market Comparison: {market_summary['assessments']['aggressive']} aggressive terms, {market_summary['assessments']['unfavorable']} unfavorable terms\n", "\n", "Key Risk Factors: {', '.join(risk_score.risk_factors[:3])}\n", "Top Unfavorable Terms: {', '.join([t.term for t in unfavorable_terms[:3]])}\n", "Critical Missing Protections: {', '.join([p.protection for p in critical_protections[:2]])}\n", "Aggressive Market Terms: {', '.join([c.clause_category for c in high_risk_market[:2]])}\n", "\"\"\"\n", " \n", " summary_prompt = f\"\"\"\n", "Based on this comprehensive contract analysis, provide a concise executive summary:\n", "\n", "{summary_context}\n", "\n", "Provide a 3-4 bullet point summary highlighting:\n", "1. Overall risk level and key concerns\n", "2. Most critical unfavorable terms to negotiate\n", "3. Essential missing protections to add\n", "4. Market deviations that need attention\n", "5. Recommended negotiation priorities\n", "\n", "Keep it business-friendly and actionable.\n", "\"\"\"\n", " \n", " summary_response = llm_manager.complete(prompt = summary_prompt,\n", " max_tokens = 1024,\n", " temperature = 0.1,\n", " )\n", " \n", " analysis_results['llm_analysis'] = {'summary' : summary_response.text if summary_response.success else \"LLM analysis failed\",\n", " 'provider' : 'ollama',\n", " 'context_used' : summary_context,\n", " }\n", " except Exception as e:\n", " print(f\"⚠️ LLM summary analysis failed: {e}\")\n", " analysis_results['llm_analysis'] = {'error': str(e)}\n", " \n", " except Exception as e:\n", " print(f\"⚠️ AI analysis partially failed: {e}\")\n", " analysis_results['classification'] = {'error': str(e)}\n", " analysis_results['clause_extraction'] = {'error': str(e)}\n", " analysis_results['risk_analysis'] = {'error': str(e)}\n", " analysis_results['term_analysis'] = {'error': str(e)}\n", " analysis_results['protection_analysis'] = {'error': str(e)}\n", " analysis_results['market_comparison'] = {'error': str(e)}\n", " analysis_results['llm_interpretation'] = {'error': str(e)}\n", " analysis_results['negotiation_strategy']= {'error': str(e)}\n", " analysis_results['llm_analysis'] = {'error': str(e)}\n", " \n", " print(\"✅ Analysis completed successfully!\")\n", " return analysis_results\n", " \n", " except Exception as e:\n", " print(f\"❌ Analysis failed: {e}\")\n", " return {'error': str(e)}\n", "\n", "# Run complete analysis\n", "print(\"🔧 Running complete analysis (this may take a few minutes)...\")\n", "complete_results = complete_contract_analysis(file_path = CONFIG[\"pdf_file_path\"], \n", " use_ai = True,\n", " )\n", "\n", "\n", "# Display results\n", "print(\"\\n\" + \"=\" * 60)\n", "print(\"📊 COMPLETE ANALYSIS RESULTS\")\n", "print(\"=\" * 60)\n", "\n", "if ('error' in complete_results):\n", " print(f\"❌ Error: {complete_results['error']}\")\n", "\n", "else:\n", " # File Info\n", " file_info = complete_results['file_info']\n", " print(f\"📄 FILE INFO:\")\n", " print(f\" Text Length: {file_info['text_length']:,} characters\")\n", " print(f\" Extraction Success: {file_info['extraction_success']}\")\n", " \n", " # Validation\n", " validation = complete_results['validation']\n", " print(f\"\\n🔍 VALIDATION:\")\n", " print(f\" Is Contract: {validation['is_contract']}\")\n", " print(f\" Confidence: {validation['confidence_level']}\")\n", " print(f\" Score: {validation['score']}\")\n", " print(f\" Key Indicators: {', '.join(validation['key_indicators'][:3])}\")\n", " \n", " # Processing\n", " processing = complete_results['processing']\n", " print(f\"\\n📝 PROCESSING:\")\n", " print(f\" Sentences: {processing['statistics']['sentence_count']}\")\n", " print(f\" Words: {processing['statistics']['word_count']}\")\n", " print(f\" Language: {processing['statistics']['language']}\")\n", " print(f\" Parties Found: {processing['entity_counts'].get('parties', 0)}\")\n", " print(f\" Dates Found: {processing['entity_counts'].get('dates', 0)}\")\n", " \n", " # Classification (if available)\n", " if ((complete_results['classification']) and ('primary_category' in complete_results['classification'])):\n", " classification = complete_results['classification']\n", " print(f\"\\n🏷️ CLASSIFICATION:\")\n", " print(f\" Category: {classification['primary_category']}\")\n", " print(f\" Subcategory: {classification['subcategory']}\")\n", " print(f\" Confidence: {classification['confidence']:.2f}\")\n", " print(f\" Key Reasoning: {classification['reasoning'][0] if classification['reasoning'] else 'N/A'}\")\n", " \n", " # Clause Extraction (if available)\n", " if ((complete_results['clause_extraction']) and ('total_clauses' in complete_results['clause_extraction'])):\n", " clause_extraction = complete_results['clause_extraction']\n", " print(f\"\\n🔍 CLAUSE EXTRACTION:\")\n", " print(f\" Total Clauses: {clause_extraction['total_clauses']}\")\n", " print(f\" Categories Found: {', '.join(clause_extraction['categories_found'][:5])}\")\n", " print(f\" Risky Clauses: {clause_extraction['risky_clauses_count']}\")\n", " print(f\" Average Confidence: {clause_extraction['avg_confidence']:.3f}\")\n", " \n", " # Show sample clauses\n", " if (clause_extraction['sample_clauses']):\n", " print(f\" Sample Clauses:\")\n", " for i, clause in enumerate(clause_extraction['sample_clauses'][:2]):\n", " print(f\" {i+1}. [{clause['category']}] {clause['reference']}\")\n", " print(f\" Confidence: {clause['confidence']:.3f}\")\n", " if clause['risk_indicators']:\n", " print(f\" ⚠️ Risks: {clause['risk_indicators']}\")\n", " \n", " # Risk Analysis (if available)\n", " if ((complete_results['risk_analysis']) and ('overall_score' in complete_results['risk_analysis'])):\n", " risk_analysis = complete_results['risk_analysis']\n", " print(f\"\\n📊 RISK ANALYSIS:\")\n", " print(f\" Overall Score: {risk_analysis['overall_score']}/100\")\n", " print(f\" Risk Level: {risk_analysis['risk_level']}\")\n", " print(f\" High-Risk Categories: {len(risk_analysis['risk_factors'])}\")\n", " print(f\" Key Risk Factors: {', '.join(risk_analysis['risk_factors'][:3])}\")\n", " \n", " # Term Analysis (if available)\n", " if ((complete_results['term_analysis']) and ('total_terms' in complete_results['term_analysis'])):\n", " term_analysis = complete_results['term_analysis']\n", " print(f\"\\n⚖️ TERM ANALYSIS:\")\n", " print(f\" Unfavorable Terms: {term_analysis['total_terms']}\")\n", " print(f\" Critical Terms: {term_analysis['severity_dist'].get('critical', 0)}\")\n", " print(f\" High Terms: {term_analysis['severity_dist'].get('high', 0)}\")\n", " \n", " if term_analysis['sample_terms']:\n", " print(f\" Sample Critical Terms:\")\n", " for i, term in enumerate(term_analysis['sample_terms'][:2]):\n", " print(f\" {i+1}. {term.term} ({term.severity})\")\n", " \n", " # Protection Analysis (if available)\n", " if ((complete_results['protection_analysis']) and ('total_missing' in complete_results['protection_analysis'])):\n", " protection_analysis = complete_results['protection_analysis']\n", " print(f\"\\n🛡️ PROTECTION ANALYSIS:\")\n", " print(f\" Missing Protections: {protection_analysis['total_missing']}\")\n", " print(f\" Critical Missing: {protection_analysis['importance_dist'].get('critical', 0)}\")\n", " print(f\" High Missing: {protection_analysis['importance_dist'].get('high', 0)}\")\n", " \n", " if protection_analysis['critical_protections']:\n", " print(f\" Critical Missing Protections:\")\n", " for i, protection in enumerate(protection_analysis['critical_protections'][:2]):\n", " print(f\" {i+1}. {protection.protection}\")\n", " \n", " # Market Comparison (if available)\n", " if ((complete_results['market_comparison']) and ('total_comparisons' in complete_results['market_comparison'])):\n", " market_comparison = complete_results['market_comparison']\n", " print(f\"\\n📈 MARKET COMPARISON:\")\n", " print(f\" Total Comparisons: {market_comparison['total_comparisons']}\")\n", " if 'assessment_summary' in market_comparison:\n", " summary = market_comparison['assessment_summary']\n", " print(f\" Aggressive Terms: {summary['assessments']['aggressive']}\")\n", " print(f\" Unfavorable Terms: {summary['assessments']['unfavorable']}\")\n", " print(f\" Standard Terms: {summary['assessments']['standard']}\")\n", " print(f\" Favorable Terms: {summary['assessments']['favorable']}\")\n", " print(f\" Average Similarity: {summary['average_similarity']:.3f}\")\n", " \n", " if market_comparison['sample_comparisons']:\n", " print(f\" Sample Market Comparisons:\")\n", " for i, comparison in enumerate(market_comparison['sample_comparisons'][:2]):\n", " print(f\" {i+1}. [{comparison.clause_category}] - {comparison.assessment}\")\n", " print(f\" Similarity: {comparison.similarity_score:.3f}\")\n", " \n", " # Negotiation Strategy (if available)\n", " if ((complete_results['negotiation_strategy']) and ('total_points' in complete_results['negotiation_strategy'])):\n", " negotiation_strategy = complete_results['negotiation_strategy']\n", " print(f\"\\n🤝 NEGOTIATION STRATEGY:\")\n", " print(f\" Total Points: {negotiation_strategy['total_points']}\")\n", " print(f\" Critical Points: {negotiation_strategy['critical_points']}\")\n", " \n", " if negotiation_strategy['sample_points']:\n", " print(f\" Sample Negotiation Points:\")\n", " for i, point in enumerate(negotiation_strategy['sample_points'][:2]):\n", " print(f\" {i+1}. {point.issue} (Priority: {point.priority})\")\n", " \n", " # LLM Analysis (if available)\n", " if ((complete_results['llm_analysis']) and ('summary' in complete_results['llm_analysis'])):\n", " llm_analysis = complete_results['llm_analysis']\n", " print(f\"\\n💬 EXECUTIVE SUMMARY:\")\n", " print(f\" {llm_analysis['summary']}\")\n", " " ] }, { "cell_type": "code", "execution_count": null, "id": "9bb2195b-00bf-4514-bb16-0fe66bcccdcd", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "7ab8aaab-e717-4d1f-acd7-b8d20572b1eb", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "8b012ae9-ee9f-4dc1-87d1-ff1d18cca21d", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "7b65f304-94c8-4cc7-9876-e0c53e552f93", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "c91b8aab-d3dc-49ca-88d8-7cf8f20c17cd", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "020c0c66-a12b-4c69-b339-931dd4844bda", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.18" } }, "nbformat": 4, "nbformat_minor": 5 }