ContractIQ / app.py
Ashar086's picture
Update app.py
14d5b28 verified
import streamlit as st
import requests
import asyncio
import pdfplumber # For PDF text extraction
import json # For parsing JSON response
# Function to split text into chunks based on token limit
def split_text_into_chunks(text, max_tokens=4000):
"""
Splits the text into chunks of approximately `max_tokens` tokens.
Assumes 1 token ≈ 4 characters or 0.75 words.
"""
words = text.split()
chunks = []
current_chunk = []
for word in words:
current_chunk.append(word)
# Approximate token count (1 token ~= 4 characters or 0.75 words)
if len(" ".join(current_chunk)) > max_tokens * 0.75:
chunks.append(" ".join(current_chunk))
current_chunk = []
if current_chunk:
chunks.append(" ".join(current_chunk))
return chunks
# Function to generate chat completion using AI/ML API
async def generate_chat_completion(api_key, system_prompt, user_prompt):
base_url = "https://api.aimlapi.com/v1"
try:
# Define the payload for the API request
payload = {
"model": "gpt-3.5-turbo", # Use GPT-3.5 Turbo (or your custom model)
"messages": [
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt}
],
"max_tokens": 1000, # Increase tokens for detailed analysis
"temperature": 0.5 # Lower temperature for more focused responses
}
# Send the request to the API
response = await asyncio.to_thread(
requests.post,
f"{base_url}/chat/completions",
json=payload,
headers={
'Authorization': f'Bearer {api_key}',
'Content-Type': 'application/json'
}
)
response.raise_for_status() # Raise an error for bad responses (4xx, 5xx)
return response.json()["choices"][0]["message"]["content"]
except requests.exceptions.HTTPError as e:
st.error(f'HTTP Error: {e.response.status_code} - {e.response.text}')
except requests.exceptions.RequestException as e:
st.error(f'API Request Error: {e}')
return None
# Function to extract text from PDFs
def extract_text_from_pdf(uploaded_file):
text = ""
try:
with pdfplumber.open(uploaded_file) as pdf:
for page in pdf.pages:
text += page.extract_text() or "" # Handle pages with no text
except Exception as e:
st.error(f"Error extracting text from PDF: {e}")
return text
# Function to merge multiple JSON responses into a single JSON object
def merge_json_responses(json_responses):
merged_result = {
"risk_analysis": {
"high_risk_clauses": [],
"medium_risk_clauses": [],
"low_risk_clauses": []
},
"compliance": {
"gdpr": "Compliant",
"data_protection": "Compliant",
"intellectual_property": "Compliant"
},
"key_clauses": []
}
for response in json_responses:
try:
data = json.loads(response)
# Merge risk analysis
if "risk_analysis" in data:
for risk_level in ["high_risk_clauses", "medium_risk_clauses", "low_risk_clauses"]:
if risk_level in data["risk_analysis"]:
merged_result["risk_analysis"][risk_level].extend(data["risk_analysis"][risk_level])
# Merge compliance (take the strictest compliance)
if "compliance" in data:
for compliance_key in ["gdpr", "data_protection", "intellectual_property"]:
if compliance_key in data["compliance"]:
if data["compliance"][compliance_key] == "Non-compliant":
merged_result["compliance"][compliance_key] = "Non-compliant"
# Merge key clauses
if "key_clauses" in data:
merged_result["key_clauses"].extend(data["key_clauses"])
except json.JSONDecodeError:
st.error(f"Failed to parse JSON response: {response}")
return merged_result
# Function to analyze the contract using the AI/ML API
async def analyze_contract(api_key, contract_text):
# Define comprehensive system prompt
system_prompt = """
You are an AI-powered contract review assistant. Your task is to analyze contracts for the following aspects:
1. Clause extraction: Identify and extract key clauses.
2. Risk assessment: Evaluate the risk level of each clause.
3. Anomaly detection: Detect any unusual or non-standard clauses.
4. Compliance checking: Ensure the contract complies with relevant regulations (e.g., GDPR).
5. Provide a detailed analysis report in the following JSON format:
{
"risk_analysis": {
"high_risk_clauses": [],
"medium_risk_clauses": [],
"low_risk_clauses": []
},
"compliance": {
"gdpr": "Compliant/Non-compliant",
"data_protection": "Compliant/Non-compliant",
"intellectual_property": "Compliant/Non-compliant"
},
"key_clauses": [
{
"clause_name": "Termination Clause",
"description": "30 days' notice"
},
{
"clause_name": "Liability Limitation",
"description": "Limited to contract value"
},
{
"clause_name": "Confidentiality Agreement",
"description": "Standard clause"
}
]
}
"""
# Split the contract text into smaller chunks
chunks = split_text_into_chunks(contract_text)
analysis_results = []
for chunk in chunks:
user_prompt = f"""Analyze the following contract text and provide a detailed report in JSON format:
{chunk}
"""
# Generate analysis using the AI/ML API
analysis_result = await generate_chat_completion(api_key, system_prompt, user_prompt)
if analysis_result:
analysis_results.append(analysis_result)
# Combine results from all chunks into a single JSON object
return merge_json_responses(analysis_results)
# Function to parse and display the analysis result
def display_analysis_result(analysis_result):
try:
# Display Risk Analysis
st.subheader("Risk Analysis")
st.write("**High Risk Clauses:**")
for clause in analysis_result["risk_analysis"]["high_risk_clauses"]:
if isinstance(clause, dict):
st.write(f"- {clause['clause_name']}: {clause['description']}")
else:
st.write(f"- {clause}")
st.write("**Medium Risk Clauses:**")
for clause in analysis_result["risk_analysis"]["medium_risk_clauses"]:
if isinstance(clause, dict):
st.write(f"- {clause['clause_name']}: {clause['description']}")
else:
st.write(f"- {clause}")
st.write("**Low Risk Clauses:**")
for clause in analysis_result["risk_analysis"]["low_risk_clauses"]:
if isinstance(clause, dict):
st.write(f"- {clause['clause_name']}: {clause['description']}")
else:
st.write(f"- {clause}")
# Display Compliance
st.subheader("Compliance")
st.write(f"**GDPR:** {analysis_result['compliance']['gdpr']}")
st.write(f"**Data Protection:** {analysis_result['compliance']['data_protection']}")
st.write(f"**Intellectual Property:** {analysis_result['compliance']['intellectual_property']}")
# Display Key Clauses
st.subheader("Key Clauses")
for clause in analysis_result["key_clauses"]:
st.write(f"**{clause['clause_name']}:** {clause['description']}")
except KeyError as e:
st.error(f"Missing expected key in analysis result: {e}")
# Streamlit UI
st.title("ContractIQ")
# Initialize session state for API key
if "api_key" not in st.session_state:
st.session_state.api_key = None
# Prompt the user to enter their API key
if not st.session_state.api_key:
st.header("Get Started")
st.markdown("**Please enter your AI/ML API key to continue.**")
api_key = st.text_input("Enter your API key:", type="password")
if api_key:
st.session_state.api_key = api_key
st.success("API key saved successfully!")
else:
st.warning("Please enter a valid API key to proceed.")
st.stop()
# Display key metrics (can be dynamically updated based on backend data)
col1, col2, col3, col4 = st.columns(4)
col1.metric("Contracts Reviewed", "1,234") # Replace with dynamic data
col2.metric("High Risk Contracts", "56") # Replace with dynamic data
col3.metric("Approved Contracts", "987") # Replace with dynamic data
col4.metric("Active Users", "42") # Replace with dynamic data
# Upload Contract Section
st.header("Upload New Contract")
st.markdown("**Please upload a contract file (PDF, DOC, TXT) that is less than 100 KB.**")
uploaded_file = st.file_uploader("Drag and drop your contract file or click to browse", type=["pdf", "doc", "txt"])
if uploaded_file is not None:
# Check file size
if uploaded_file.size > 100 * 1024: # 100 KB in bytes
st.error("File size exceeds 100 KB. Please upload a smaller file.")
else:
try:
# Extract text from the uploaded file
if uploaded_file.type == "application/pdf":
contract_text = extract_text_from_pdf(uploaded_file)
else:
contract_text = uploaded_file.read().decode("utf-8")
# Analyze the contract
if st.button("Start AI Review"):
with st.spinner("Analyzing contract..."):
# Use the API key from session state
api_key = st.session_state.api_key
# Analyze the contract using the AI/ML API
analysis_result = asyncio.run(analyze_contract(api_key, contract_text))
if analysis_result:
st.markdown("### Analysis Result")
display_analysis_result(analysis_result) # Display the parsed analysis result
else:
st.error("Failed to analyze the contract.")
except Exception as e:
st.error(f"Error processing the file: {e}")
# Recent Contract Activity (can be dynamically updated based on backend data)
st.header("Recent Contract Activity")
st.write("Latest updates on contract reviews and approvals")
# Example dynamic data (replace with actual data from backend)
recent_activity = [
{"Contract Name": "Service Agreement - TechCorp", "Status": "Approved", "Risk Level": "Low", "Last Updated": "2023-09-15"},
{"Contract Name": "NDA - StartupX", "Status": "In Review", "Risk Level": "Medium", "Last Updated": "2023-09-14"},
{"Contract Name": "Licensing Agreement - BigCo", "Status": "Needs Attention", "Risk Level": "High", "Last Updated": "2023-09-13"},
]
st.table(recent_activity)