import streamlit as st import requests import asyncio import pdfplumber # For PDF text extraction import json # For parsing JSON response # Function to split text into chunks based on token limit def split_text_into_chunks(text, max_tokens=4000): """ Splits the text into chunks of approximately `max_tokens` tokens. Assumes 1 token ≈ 4 characters or 0.75 words. """ words = text.split() chunks = [] current_chunk = [] for word in words: current_chunk.append(word) # Approximate token count (1 token ~= 4 characters or 0.75 words) if len(" ".join(current_chunk)) > max_tokens * 0.75: chunks.append(" ".join(current_chunk)) current_chunk = [] if current_chunk: chunks.append(" ".join(current_chunk)) return chunks # Function to generate chat completion using AI/ML API async def generate_chat_completion(api_key, system_prompt, user_prompt): base_url = "https://api.aimlapi.com/v1" try: # Define the payload for the API request payload = { "model": "gpt-3.5-turbo", # Use GPT-3.5 Turbo (or your custom model) "messages": [ {"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt} ], "max_tokens": 1000, # Increase tokens for detailed analysis "temperature": 0.5 # Lower temperature for more focused responses } # Send the request to the API response = await asyncio.to_thread( requests.post, f"{base_url}/chat/completions", json=payload, headers={ 'Authorization': f'Bearer {api_key}', 'Content-Type': 'application/json' } ) response.raise_for_status() # Raise an error for bad responses (4xx, 5xx) return response.json()["choices"][0]["message"]["content"] except requests.exceptions.HTTPError as e: st.error(f'HTTP Error: {e.response.status_code} - {e.response.text}') except requests.exceptions.RequestException as e: st.error(f'API Request Error: {e}') return None # Function to extract text from PDFs def extract_text_from_pdf(uploaded_file): text = "" try: with pdfplumber.open(uploaded_file) as pdf: for page in pdf.pages: text += page.extract_text() or "" # Handle pages with no text except Exception as e: st.error(f"Error extracting text from PDF: {e}") return text # Function to merge multiple JSON responses into a single JSON object def merge_json_responses(json_responses): merged_result = { "risk_analysis": { "high_risk_clauses": [], "medium_risk_clauses": [], "low_risk_clauses": [] }, "compliance": { "gdpr": "Compliant", "data_protection": "Compliant", "intellectual_property": "Compliant" }, "key_clauses": [] } for response in json_responses: try: data = json.loads(response) # Merge risk analysis if "risk_analysis" in data: for risk_level in ["high_risk_clauses", "medium_risk_clauses", "low_risk_clauses"]: if risk_level in data["risk_analysis"]: merged_result["risk_analysis"][risk_level].extend(data["risk_analysis"][risk_level]) # Merge compliance (take the strictest compliance) if "compliance" in data: for compliance_key in ["gdpr", "data_protection", "intellectual_property"]: if compliance_key in data["compliance"]: if data["compliance"][compliance_key] == "Non-compliant": merged_result["compliance"][compliance_key] = "Non-compliant" # Merge key clauses if "key_clauses" in data: merged_result["key_clauses"].extend(data["key_clauses"]) except json.JSONDecodeError: st.error(f"Failed to parse JSON response: {response}") return merged_result # Function to analyze the contract using the AI/ML API async def analyze_contract(api_key, contract_text): # Define comprehensive system prompt system_prompt = """ You are an AI-powered contract review assistant. Your task is to analyze contracts for the following aspects: 1. Clause extraction: Identify and extract key clauses. 2. Risk assessment: Evaluate the risk level of each clause. 3. Anomaly detection: Detect any unusual or non-standard clauses. 4. Compliance checking: Ensure the contract complies with relevant regulations (e.g., GDPR). 5. Provide a detailed analysis report in the following JSON format: { "risk_analysis": { "high_risk_clauses": [], "medium_risk_clauses": [], "low_risk_clauses": [] }, "compliance": { "gdpr": "Compliant/Non-compliant", "data_protection": "Compliant/Non-compliant", "intellectual_property": "Compliant/Non-compliant" }, "key_clauses": [ { "clause_name": "Termination Clause", "description": "30 days' notice" }, { "clause_name": "Liability Limitation", "description": "Limited to contract value" }, { "clause_name": "Confidentiality Agreement", "description": "Standard clause" } ] } """ # Split the contract text into smaller chunks chunks = split_text_into_chunks(contract_text) analysis_results = [] for chunk in chunks: user_prompt = f"""Analyze the following contract text and provide a detailed report in JSON format: {chunk} """ # Generate analysis using the AI/ML API analysis_result = await generate_chat_completion(api_key, system_prompt, user_prompt) if analysis_result: analysis_results.append(analysis_result) # Combine results from all chunks into a single JSON object return merge_json_responses(analysis_results) # Function to parse and display the analysis result def display_analysis_result(analysis_result): try: # Display Risk Analysis st.subheader("Risk Analysis") st.write("**High Risk Clauses:**") for clause in analysis_result["risk_analysis"]["high_risk_clauses"]: if isinstance(clause, dict): st.write(f"- {clause['clause_name']}: {clause['description']}") else: st.write(f"- {clause}") st.write("**Medium Risk Clauses:**") for clause in analysis_result["risk_analysis"]["medium_risk_clauses"]: if isinstance(clause, dict): st.write(f"- {clause['clause_name']}: {clause['description']}") else: st.write(f"- {clause}") st.write("**Low Risk Clauses:**") for clause in analysis_result["risk_analysis"]["low_risk_clauses"]: if isinstance(clause, dict): st.write(f"- {clause['clause_name']}: {clause['description']}") else: st.write(f"- {clause}") # Display Compliance st.subheader("Compliance") st.write(f"**GDPR:** {analysis_result['compliance']['gdpr']}") st.write(f"**Data Protection:** {analysis_result['compliance']['data_protection']}") st.write(f"**Intellectual Property:** {analysis_result['compliance']['intellectual_property']}") # Display Key Clauses st.subheader("Key Clauses") for clause in analysis_result["key_clauses"]: st.write(f"**{clause['clause_name']}:** {clause['description']}") except KeyError as e: st.error(f"Missing expected key in analysis result: {e}") # Streamlit UI st.title("ContractIQ") # Initialize session state for API key if "api_key" not in st.session_state: st.session_state.api_key = None # Prompt the user to enter their API key if not st.session_state.api_key: st.header("Get Started") st.markdown("**Please enter your AI/ML API key to continue.**") api_key = st.text_input("Enter your API key:", type="password") if api_key: st.session_state.api_key = api_key st.success("API key saved successfully!") else: st.warning("Please enter a valid API key to proceed.") st.stop() # Display key metrics (can be dynamically updated based on backend data) col1, col2, col3, col4 = st.columns(4) col1.metric("Contracts Reviewed", "1,234") # Replace with dynamic data col2.metric("High Risk Contracts", "56") # Replace with dynamic data col3.metric("Approved Contracts", "987") # Replace with dynamic data col4.metric("Active Users", "42") # Replace with dynamic data # Upload Contract Section st.header("Upload New Contract") st.markdown("**Please upload a contract file (PDF, DOC, TXT) that is less than 100 KB.**") uploaded_file = st.file_uploader("Drag and drop your contract file or click to browse", type=["pdf", "doc", "txt"]) if uploaded_file is not None: # Check file size if uploaded_file.size > 100 * 1024: # 100 KB in bytes st.error("File size exceeds 100 KB. Please upload a smaller file.") else: try: # Extract text from the uploaded file if uploaded_file.type == "application/pdf": contract_text = extract_text_from_pdf(uploaded_file) else: contract_text = uploaded_file.read().decode("utf-8") # Analyze the contract if st.button("Start AI Review"): with st.spinner("Analyzing contract..."): # Use the API key from session state api_key = st.session_state.api_key # Analyze the contract using the AI/ML API analysis_result = asyncio.run(analyze_contract(api_key, contract_text)) if analysis_result: st.markdown("### Analysis Result") display_analysis_result(analysis_result) # Display the parsed analysis result else: st.error("Failed to analyze the contract.") except Exception as e: st.error(f"Error processing the file: {e}") # Recent Contract Activity (can be dynamically updated based on backend data) st.header("Recent Contract Activity") st.write("Latest updates on contract reviews and approvals") # Example dynamic data (replace with actual data from backend) recent_activity = [ {"Contract Name": "Service Agreement - TechCorp", "Status": "Approved", "Risk Level": "Low", "Last Updated": "2023-09-15"}, {"Contract Name": "NDA - StartupX", "Status": "In Review", "Risk Level": "Medium", "Last Updated": "2023-09-14"}, {"Contract Name": "Licensing Agreement - BigCo", "Status": "Needs Attention", "Risk Level": "High", "Last Updated": "2023-09-13"}, ] st.table(recent_activity)