Spaces:
Sleeping
Sleeping
File size: 11,350 Bytes
a74c53a 248864a a74c53a 59b7743 a74c53a 59b7743 a74c53a 59b7743 a74c53a 59b7743 a74c53a 59b7743 a74c53a 14d5b28 a74c53a 14d5b28 a74c53a 59b7743 a74c53a | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 | import streamlit as st
import requests
import asyncio
import pdfplumber # For PDF text extraction
import json # For parsing JSON response
# Function to split text into chunks based on token limit
def split_text_into_chunks(text, max_tokens=4000):
"""
Splits the text into chunks of approximately `max_tokens` tokens.
Assumes 1 token ≈ 4 characters or 0.75 words.
"""
words = text.split()
chunks = []
current_chunk = []
for word in words:
current_chunk.append(word)
# Approximate token count (1 token ~= 4 characters or 0.75 words)
if len(" ".join(current_chunk)) > max_tokens * 0.75:
chunks.append(" ".join(current_chunk))
current_chunk = []
if current_chunk:
chunks.append(" ".join(current_chunk))
return chunks
# Function to generate chat completion using AI/ML API
async def generate_chat_completion(api_key, system_prompt, user_prompt):
base_url = "https://api.aimlapi.com/v1"
try:
# Define the payload for the API request
payload = {
"model": "gpt-3.5-turbo", # Use GPT-3.5 Turbo (or your custom model)
"messages": [
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt}
],
"max_tokens": 1000, # Increase tokens for detailed analysis
"temperature": 0.5 # Lower temperature for more focused responses
}
# Send the request to the API
response = await asyncio.to_thread(
requests.post,
f"{base_url}/chat/completions",
json=payload,
headers={
'Authorization': f'Bearer {api_key}',
'Content-Type': 'application/json'
}
)
response.raise_for_status() # Raise an error for bad responses (4xx, 5xx)
return response.json()["choices"][0]["message"]["content"]
except requests.exceptions.HTTPError as e:
st.error(f'HTTP Error: {e.response.status_code} - {e.response.text}')
except requests.exceptions.RequestException as e:
st.error(f'API Request Error: {e}')
return None
# Function to extract text from PDFs
def extract_text_from_pdf(uploaded_file):
text = ""
try:
with pdfplumber.open(uploaded_file) as pdf:
for page in pdf.pages:
text += page.extract_text() or "" # Handle pages with no text
except Exception as e:
st.error(f"Error extracting text from PDF: {e}")
return text
# Function to merge multiple JSON responses into a single JSON object
def merge_json_responses(json_responses):
merged_result = {
"risk_analysis": {
"high_risk_clauses": [],
"medium_risk_clauses": [],
"low_risk_clauses": []
},
"compliance": {
"gdpr": "Compliant",
"data_protection": "Compliant",
"intellectual_property": "Compliant"
},
"key_clauses": []
}
for response in json_responses:
try:
data = json.loads(response)
# Merge risk analysis
if "risk_analysis" in data:
for risk_level in ["high_risk_clauses", "medium_risk_clauses", "low_risk_clauses"]:
if risk_level in data["risk_analysis"]:
merged_result["risk_analysis"][risk_level].extend(data["risk_analysis"][risk_level])
# Merge compliance (take the strictest compliance)
if "compliance" in data:
for compliance_key in ["gdpr", "data_protection", "intellectual_property"]:
if compliance_key in data["compliance"]:
if data["compliance"][compliance_key] == "Non-compliant":
merged_result["compliance"][compliance_key] = "Non-compliant"
# Merge key clauses
if "key_clauses" in data:
merged_result["key_clauses"].extend(data["key_clauses"])
except json.JSONDecodeError:
st.error(f"Failed to parse JSON response: {response}")
return merged_result
# Function to analyze the contract using the AI/ML API
async def analyze_contract(api_key, contract_text):
# Define comprehensive system prompt
system_prompt = """
You are an AI-powered contract review assistant. Your task is to analyze contracts for the following aspects:
1. Clause extraction: Identify and extract key clauses.
2. Risk assessment: Evaluate the risk level of each clause.
3. Anomaly detection: Detect any unusual or non-standard clauses.
4. Compliance checking: Ensure the contract complies with relevant regulations (e.g., GDPR).
5. Provide a detailed analysis report in the following JSON format:
{
"risk_analysis": {
"high_risk_clauses": [],
"medium_risk_clauses": [],
"low_risk_clauses": []
},
"compliance": {
"gdpr": "Compliant/Non-compliant",
"data_protection": "Compliant/Non-compliant",
"intellectual_property": "Compliant/Non-compliant"
},
"key_clauses": [
{
"clause_name": "Termination Clause",
"description": "30 days' notice"
},
{
"clause_name": "Liability Limitation",
"description": "Limited to contract value"
},
{
"clause_name": "Confidentiality Agreement",
"description": "Standard clause"
}
]
}
"""
# Split the contract text into smaller chunks
chunks = split_text_into_chunks(contract_text)
analysis_results = []
for chunk in chunks:
user_prompt = f"""Analyze the following contract text and provide a detailed report in JSON format:
{chunk}
"""
# Generate analysis using the AI/ML API
analysis_result = await generate_chat_completion(api_key, system_prompt, user_prompt)
if analysis_result:
analysis_results.append(analysis_result)
# Combine results from all chunks into a single JSON object
return merge_json_responses(analysis_results)
# Function to parse and display the analysis result
def display_analysis_result(analysis_result):
try:
# Display Risk Analysis
st.subheader("Risk Analysis")
st.write("**High Risk Clauses:**")
for clause in analysis_result["risk_analysis"]["high_risk_clauses"]:
if isinstance(clause, dict):
st.write(f"- {clause['clause_name']}: {clause['description']}")
else:
st.write(f"- {clause}")
st.write("**Medium Risk Clauses:**")
for clause in analysis_result["risk_analysis"]["medium_risk_clauses"]:
if isinstance(clause, dict):
st.write(f"- {clause['clause_name']}: {clause['description']}")
else:
st.write(f"- {clause}")
st.write("**Low Risk Clauses:**")
for clause in analysis_result["risk_analysis"]["low_risk_clauses"]:
if isinstance(clause, dict):
st.write(f"- {clause['clause_name']}: {clause['description']}")
else:
st.write(f"- {clause}")
# Display Compliance
st.subheader("Compliance")
st.write(f"**GDPR:** {analysis_result['compliance']['gdpr']}")
st.write(f"**Data Protection:** {analysis_result['compliance']['data_protection']}")
st.write(f"**Intellectual Property:** {analysis_result['compliance']['intellectual_property']}")
# Display Key Clauses
st.subheader("Key Clauses")
for clause in analysis_result["key_clauses"]:
st.write(f"**{clause['clause_name']}:** {clause['description']}")
except KeyError as e:
st.error(f"Missing expected key in analysis result: {e}")
# Streamlit UI
st.title("ContractIQ")
# Initialize session state for API key
if "api_key" not in st.session_state:
st.session_state.api_key = None
# Prompt the user to enter their API key
if not st.session_state.api_key:
st.header("Get Started")
st.markdown("**Please enter your AI/ML API key to continue.**")
api_key = st.text_input("Enter your API key:", type="password")
if api_key:
st.session_state.api_key = api_key
st.success("API key saved successfully!")
else:
st.warning("Please enter a valid API key to proceed.")
st.stop()
# Display key metrics (can be dynamically updated based on backend data)
col1, col2, col3, col4 = st.columns(4)
col1.metric("Contracts Reviewed", "1,234") # Replace with dynamic data
col2.metric("High Risk Contracts", "56") # Replace with dynamic data
col3.metric("Approved Contracts", "987") # Replace with dynamic data
col4.metric("Active Users", "42") # Replace with dynamic data
# Upload Contract Section
st.header("Upload New Contract")
st.markdown("**Please upload a contract file (PDF, DOC, TXT) that is less than 100 KB.**")
uploaded_file = st.file_uploader("Drag and drop your contract file or click to browse", type=["pdf", "doc", "txt"])
if uploaded_file is not None:
# Check file size
if uploaded_file.size > 100 * 1024: # 100 KB in bytes
st.error("File size exceeds 100 KB. Please upload a smaller file.")
else:
try:
# Extract text from the uploaded file
if uploaded_file.type == "application/pdf":
contract_text = extract_text_from_pdf(uploaded_file)
else:
contract_text = uploaded_file.read().decode("utf-8")
# Analyze the contract
if st.button("Start AI Review"):
with st.spinner("Analyzing contract..."):
# Use the API key from session state
api_key = st.session_state.api_key
# Analyze the contract using the AI/ML API
analysis_result = asyncio.run(analyze_contract(api_key, contract_text))
if analysis_result:
st.markdown("### Analysis Result")
display_analysis_result(analysis_result) # Display the parsed analysis result
else:
st.error("Failed to analyze the contract.")
except Exception as e:
st.error(f"Error processing the file: {e}")
# Recent Contract Activity (can be dynamically updated based on backend data)
st.header("Recent Contract Activity")
st.write("Latest updates on contract reviews and approvals")
# Example dynamic data (replace with actual data from backend)
recent_activity = [
{"Contract Name": "Service Agreement - TechCorp", "Status": "Approved", "Risk Level": "Low", "Last Updated": "2023-09-15"},
{"Contract Name": "NDA - StartupX", "Status": "In Review", "Risk Level": "Medium", "Last Updated": "2023-09-14"},
{"Contract Name": "Licensing Agreement - BigCo", "Status": "Needs Attention", "Risk Level": "High", "Last Updated": "2023-09-13"},
]
st.table(recent_activity) |