Spaces:

Ashar086
/

ContractIQ

Sleeping

File size: 11,350 Bytes

import streamlit as st
import requests
import asyncio
import pdfplumber  # For PDF text extraction
import json  # For parsing JSON response

# Function to split text into chunks based on token limit
def split_text_into_chunks(text, max_tokens=4000):
    """
    Splits the text into chunks of approximately `max_tokens` tokens.
    Assumes 1 token ≈ 4 characters or 0.75 words.
    """
    words = text.split()
    chunks = []
    current_chunk = []

    for word in words:
        current_chunk.append(word)
        # Approximate token count (1 token ~= 4 characters or 0.75 words)
        if len(" ".join(current_chunk)) > max_tokens * 0.75:
            chunks.append(" ".join(current_chunk))
            current_chunk = []

    if current_chunk:
        chunks.append(" ".join(current_chunk))

    return chunks

# Function to generate chat completion using AI/ML API
async def generate_chat_completion(api_key, system_prompt, user_prompt):
    base_url = "https://api.aimlapi.com/v1"
    
    try:
        # Define the payload for the API request
        payload = {
            "model": "gpt-3.5-turbo",  # Use GPT-3.5 Turbo (or your custom model)
            "messages": [
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": user_prompt}
            ],
            "max_tokens": 1000,  # Increase tokens for detailed analysis
            "temperature": 0.5  # Lower temperature for more focused responses
        }
        
        # Send the request to the API
        response = await asyncio.to_thread(
            requests.post,
            f"{base_url}/chat/completions",
            json=payload,
            headers={
                'Authorization': f'Bearer {api_key}',
                'Content-Type': 'application/json'
            }
        )
        response.raise_for_status()  # Raise an error for bad responses (4xx, 5xx)
        
        return response.json()["choices"][0]["message"]["content"]
    except requests.exceptions.HTTPError as e:
        st.error(f'HTTP Error: {e.response.status_code} - {e.response.text}')
    except requests.exceptions.RequestException as e:
        st.error(f'API Request Error: {e}')
    return None

# Function to extract text from PDFs
def extract_text_from_pdf(uploaded_file):
    text = ""
    try:
        with pdfplumber.open(uploaded_file) as pdf:
            for page in pdf.pages:
                text += page.extract_text() or ""  # Handle pages with no text
    except Exception as e:
        st.error(f"Error extracting text from PDF: {e}")
    return text

# Function to merge multiple JSON responses into a single JSON object
def merge_json_responses(json_responses):
    merged_result = {
        "risk_analysis": {
            "high_risk_clauses": [],
            "medium_risk_clauses": [],
            "low_risk_clauses": []
        },
        "compliance": {
            "gdpr": "Compliant",
            "data_protection": "Compliant",
            "intellectual_property": "Compliant"
        },
        "key_clauses": []
    }

    for response in json_responses:
        try:
            data = json.loads(response)
            
            # Merge risk analysis
            if "risk_analysis" in data:
                for risk_level in ["high_risk_clauses", "medium_risk_clauses", "low_risk_clauses"]:
                    if risk_level in data["risk_analysis"]:
                        merged_result["risk_analysis"][risk_level].extend(data["risk_analysis"][risk_level])
            
            # Merge compliance (take the strictest compliance)
            if "compliance" in data:
                for compliance_key in ["gdpr", "data_protection", "intellectual_property"]:
                    if compliance_key in data["compliance"]:
                        if data["compliance"][compliance_key] == "Non-compliant":
                            merged_result["compliance"][compliance_key] = "Non-compliant"
            
            # Merge key clauses
            if "key_clauses" in data:
                merged_result["key_clauses"].extend(data["key_clauses"])
        
        except json.JSONDecodeError:
            st.error(f"Failed to parse JSON response: {response}")
    
    return merged_result

# Function to analyze the contract using the AI/ML API
async def analyze_contract(api_key, contract_text):
    # Define comprehensive system prompt
    system_prompt = """
    You are an AI-powered contract review assistant. Your task is to analyze contracts for the following aspects:
    1. Clause extraction: Identify and extract key clauses.
    2. Risk assessment: Evaluate the risk level of each clause.
    3. Anomaly detection: Detect any unusual or non-standard clauses.
    4. Compliance checking: Ensure the contract complies with relevant regulations (e.g., GDPR).
    5. Provide a detailed analysis report in the following JSON format:
    {
        "risk_analysis": {
            "high_risk_clauses": [],
            "medium_risk_clauses": [],
            "low_risk_clauses": []
        },
        "compliance": {
            "gdpr": "Compliant/Non-compliant",
            "data_protection": "Compliant/Non-compliant",
            "intellectual_property": "Compliant/Non-compliant"
        },
        "key_clauses": [
            {
                "clause_name": "Termination Clause",
                "description": "30 days' notice"
            },
            {
                "clause_name": "Liability Limitation",
                "description": "Limited to contract value"
            },
            {
                "clause_name": "Confidentiality Agreement",
                "description": "Standard clause"
            }
        ]
    }
    """
    
    # Split the contract text into smaller chunks
    chunks = split_text_into_chunks(contract_text)
    analysis_results = []

    for chunk in chunks:
        user_prompt = f"""Analyze the following contract text and provide a detailed report in JSON format:
        {chunk}
        """
        
        # Generate analysis using the AI/ML API
        analysis_result = await generate_chat_completion(api_key, system_prompt, user_prompt)
        if analysis_result:
            analysis_results.append(analysis_result)
    
    # Combine results from all chunks into a single JSON object
    return merge_json_responses(analysis_results)

# Function to parse and display the analysis result
def display_analysis_result(analysis_result):
    try:
        # Display Risk Analysis
        st.subheader("Risk Analysis")
        st.write("**High Risk Clauses:**")
        for clause in analysis_result["risk_analysis"]["high_risk_clauses"]:
            if isinstance(clause, dict):
                st.write(f"- {clause['clause_name']}: {clause['description']}")
            else:
                st.write(f"- {clause}")
        
        st.write("**Medium Risk Clauses:**")
        for clause in analysis_result["risk_analysis"]["medium_risk_clauses"]:
            if isinstance(clause, dict):
                st.write(f"- {clause['clause_name']}: {clause['description']}")
            else:
                st.write(f"- {clause}")
        
        st.write("**Low Risk Clauses:**")
        for clause in analysis_result["risk_analysis"]["low_risk_clauses"]:
            if isinstance(clause, dict):
                st.write(f"- {clause['clause_name']}: {clause['description']}")
            else:
                st.write(f"- {clause}")
        
        # Display Compliance
        st.subheader("Compliance")
        st.write(f"**GDPR:** {analysis_result['compliance']['gdpr']}")
        st.write(f"**Data Protection:** {analysis_result['compliance']['data_protection']}")
        st.write(f"**Intellectual Property:** {analysis_result['compliance']['intellectual_property']}")
        
        # Display Key Clauses
        st.subheader("Key Clauses")
        for clause in analysis_result["key_clauses"]:
            st.write(f"**{clause['clause_name']}:** {clause['description']}")
    
    except KeyError as e:
        st.error(f"Missing expected key in analysis result: {e}")

# Streamlit UI
st.title("ContractIQ")

# Initialize session state for API key
if "api_key" not in st.session_state:
    st.session_state.api_key = None

# Prompt the user to enter their API key
if not st.session_state.api_key:
    st.header("Get Started")
    st.markdown("**Please enter your AI/ML API key to continue.**")
    api_key = st.text_input("Enter your API key:", type="password")
    
    if api_key:
        st.session_state.api_key = api_key
        st.success("API key saved successfully!")
    else:
        st.warning("Please enter a valid API key to proceed.")
        st.stop()

# Display key metrics (can be dynamically updated based on backend data)
col1, col2, col3, col4 = st.columns(4)
col1.metric("Contracts Reviewed", "1,234")  # Replace with dynamic data
col2.metric("High Risk Contracts", "56")    # Replace with dynamic data
col3.metric("Approved Contracts", "987")    # Replace with dynamic data
col4.metric("Active Users", "42")           # Replace with dynamic data

# Upload Contract Section
st.header("Upload New Contract")
st.markdown("**Please upload a contract file (PDF, DOC, TXT) that is less than 100 KB.**")
uploaded_file = st.file_uploader("Drag and drop your contract file or click to browse", type=["pdf", "doc", "txt"])

if uploaded_file is not None:
    # Check file size
    if uploaded_file.size > 100 * 1024:  # 100 KB in bytes
        st.error("File size exceeds 100 KB. Please upload a smaller file.")
    else:
        try:
            # Extract text from the uploaded file
            if uploaded_file.type == "application/pdf":
                contract_text = extract_text_from_pdf(uploaded_file)
            else:
                contract_text = uploaded_file.read().decode("utf-8")
            
            # Analyze the contract
            if st.button("Start AI Review"):
                with st.spinner("Analyzing contract..."):
                    # Use the API key from session state
                    api_key = st.session_state.api_key
                    
                    # Analyze the contract using the AI/ML API
                    analysis_result = asyncio.run(analyze_contract(api_key, contract_text))
                    
                    if analysis_result:
                        st.markdown("### Analysis Result")
                        display_analysis_result(analysis_result)  # Display the parsed analysis result
                    else:
                        st.error("Failed to analyze the contract.")
        except Exception as e:
            st.error(f"Error processing the file: {e}")

# Recent Contract Activity (can be dynamically updated based on backend data)
st.header("Recent Contract Activity")
st.write("Latest updates on contract reviews and approvals")

# Example dynamic data (replace with actual data from backend)
recent_activity = [
    {"Contract Name": "Service Agreement - TechCorp", "Status": "Approved", "Risk Level": "Low", "Last Updated": "2023-09-15"},
    {"Contract Name": "NDA - StartupX", "Status": "In Review", "Risk Level": "Medium", "Last Updated": "2023-09-14"},
    {"Contract Name": "Licensing Agreement - BigCo", "Status": "Needs Attention", "Risk Level": "High", "Last Updated": "2023-09-13"},
]

st.table(recent_activity)