Varsha Jeyaraj
Final version of the AI Legal Explainer app
7b7ad6a
import streamlit as st
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyPDFLoader
import os
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.documents import Document
def process_document(file_path):
"""Process PDF document and create vector store for retrieval"""
loader = PyPDFLoader(file_path)
documents = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
texts = text_splitter.split_documents(documents)
model_name = "sentence-transformers/all-MiniLM-L6-v2"
embeddings = HuggingFaceEmbeddings(model_name=model_name)
vectorstore = FAISS.from_documents(texts, embedding=embeddings)
return vectorstore
def verify_legal_document(file_path, api_key):
"""Verify if the uploaded document is a legal document"""
try:
loader = PyPDFLoader(file_path)
documents = loader.load()
if not documents:
return False
full_text = "\n".join([doc.page_content for doc in documents])
if len(full_text.strip()) < 50:
return False
llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", google_api_key=api_key)
verification_prompt = f"""
Analyze the following text carefully and determine if it is a legal document.
Legal documents include: contracts, agreements, terms of service, privacy policies,
legal notices, lease agreements, employment contracts, NDAs, legal forms, court documents, etc.
Non-legal documents include: research papers, books, articles, manuals, reports,
personal documents, educational materials, etc.
Respond with ONLY ONE WORD:
- "LEGAL" if this is a legal document
- "NON-LEGAL" if this is not a legal document
Text to analyze:
{full_text[:3000]}
"""
response = llm.invoke(verification_prompt)
response_text = response.content.strip().upper()
is_legal = "LEGAL" in response_text and "NON-LEGAL" not in response_text
return is_legal
except Exception as e:
st.error(f"Error during verification: {str(e)}")
return False
def generate_analysis(vectorstore, api_key):
"""Generate automated summary and risk analysis"""
try:
retriever = vectorstore.as_retriever()
llm = ChatGoogleGenerativeAI(
model="gemini-2.0-flash",
google_api_key=api_key,
temperature=0.3
)
qa_chain = RetrievalQA.from_chain_type(
llm=llm,
chain_type="stuff",
retriever=retriever
)
# Generate summary
summary_query = """
Provide a concise, three-bullet point summary of this document's main purpose,
key parties involved, and primary obligations. Use simple language.
"""
summary = qa_chain.run(summary_query)
# Identify risks
risks_query = """
Identify potential risks, red flags, or important clauses including:
- Financial obligations, penalties, or fees
- Auto-renewal clauses
- Termination conditions
- Liability limitations
- Unusual or potentially unfavorable terms
Format as bullet points.
"""
risks = qa_chain.run(risks_query)
return summary, risks
except Exception as e:
st.error(f"Error during analysis: {str(e)}")
return None, None
# Streamlit App Configuration
st.set_page_config(
page_title="AI Legal Doc Explainer",
page_icon="⚖️",
layout="centered",
initial_sidebar_state="auto"
)
st.title("⚖️ AI Legal Doc Explainer")
st.write("Upload your legal document (PDF) and get a simple, easy-to-understand explanation.")
st.markdown("""
<style>
/* Blue highlight for text input */
.stTextInput > div > div > input {
border-color: #0066cc !important;
box-shadow: 0 0 0 0.2rem rgba(0, 102, 204, 0.25) !important;
}
.stTextInput > div > div > input:focus {
border-color: #0066cc !important;
box-shadow: 0 0 0 0.2rem rgba(0, 102, 204, 0.5) !important;
}
/* Green submit button */
.stButton > button[kind="primary"] {
background-color: #28a745 !important;
border-color: #28a745 !important;
}
.stButton > button[kind="primary"]:hover {
background-color: #218838 !important;
border-color: #1e7e34 !important;
}
</style>
""", unsafe_allow_html=True)
# Initialize session state for Q&A
if "qa_history" not in st.session_state:
st.session_state.qa_history = []
if "vectorstore" not in st.session_state:
st.session_state.vectorstore = None
if "document_processed" not in st.session_state:
st.session_state.document_processed = False
# File uploader
uploaded_file = st.file_uploader("Upload a PDF document", type="pdf")
if uploaded_file is not None:
# Save uploaded file temporarily
temp_file_path = f"temp_{uploaded_file.name}"
with open(temp_file_path, "wb") as f:
f.write(uploaded_file.getbuffer())
try:
# Check if API key exists
if "GOOGLE_API_KEY" not in st.secrets:
st.error("Google API key not found in secrets. Please add your API key.")
st.stop()
# STEP 1: Verify document type
with st.spinner("Verifying document type..."):
is_legal_doc = verify_legal_document(temp_file_path, st.secrets["GOOGLE_API_KEY"])
# STEP 2: Show immediate notification for non-legal documents
if not is_legal_doc:
#st.error("⚠️ Document Verification Failed")
st.warning("This does not appear to be a legal document.")
st.info("This tool is optimized for legal documents like contracts, agreements, terms of service, privacy policies, etc.")
# Ask user what to do
st.markdown("**What would you like to do?**")
col1, col2 = st.columns(2)
with col2:
proceed_anyway = st.button("▶️ Continue Anyway", use_container_width=True)
if not proceed_anyway:
st.stop() # Stop here if user doesn't choose to continue
# STEP 3: Process the document (either legal doc or user chose to continue)
if not st.session_state.document_processed:
if is_legal_doc:
st.success("Legal document verified!")
else:
st.info("Proceeding with analysis as requested...")
with st.spinner("Processing document..."):
st.session_state.vectorstore = process_document(temp_file_path)
# STEP 4: Generate analysis
with st.spinner("Analyzing document for key points and risks..."):
summary, risks = generate_analysis(st.session_state.vectorstore, st.secrets["GOOGLE_API_KEY"])
if summary and risks:
st.session_state.summary = summary
st.session_state.risks = risks
st.session_state.document_processed = True
# Display analysis results if document is processed
if st.session_state.document_processed:
st.success("Document analysis complete!")
# Display analysis results
with st.expander("Document Summary", expanded=True):
st.write(st.session_state.summary)
with st.expander("🚩 Potential Red Flags & Important Clauses", expanded=True):
st.write(st.session_state.risks)
st.markdown("---")
# STEP 5: Q&A Section with persistent chat
st.header("Ask Questions About Your Document")
st.write("Ask specific questions about the document content, terms, or anything you'd like clarified.")
# Always show previous Q&A history section (even if empty)
st.subheader("Previous Questions & Answers:")
if st.session_state.qa_history:
for i, qa in enumerate(st.session_state.qa_history, 1):
with st.expander(f"Q{i}: {qa['question'][:50]}...", expanded=False):
st.write(f"**Question:** {qa['question']}")
st.write(f"**Answer:** {qa['answer']}")
else:
st.write("*No questions asked yet*")
st.markdown("---")
# Always show the question input box
user_question = st.text_input(
"Enter your question:",
placeholder="e.g., What are the termination conditions? What fees am I responsible for?",
key=f"question_input_{len(st.session_state.qa_history)}"
)
if st.button("Submit Question", type="primary"):
if user_question:
with st.spinner("Finding the answer..."):
try:
retriever = st.session_state.vectorstore.as_retriever()
llm = ChatGoogleGenerativeAI(
model="gemini-2.0-flash",
google_api_key=st.secrets["GOOGLE_API_KEY"],
temperature=0.2
)
qa_chain = RetrievalQA.from_chain_type(
llm=llm,
chain_type="stuff",
retriever=retriever
)
# Enhanced prompt for better answers
enhanced_question = f"""
Based on the document content, please answer this question clearly and concisely: {user_question}
If the answer involves specific terms, conditions, or clauses, please quote the relevant text.
If the information is not clearly stated in the document, please say so.
"""
answer = qa_chain.run(enhanced_question)
# Add to history
st.session_state.qa_history.append({
'question': user_question,
'answer': answer
})
except Exception as e:
st.error(f"Error generating answer: {str(e)}")
else:
st.warning("Please enter a question before submitting.")
# Display the most recent answer if available
if st.session_state.qa_history:
st.markdown("### Answer")
latest_qa = st.session_state.qa_history[-1]
st.write(f"**Question:** {latest_qa['question']}")
st.write(f"**Answer:** {latest_qa['answer']}")
st.markdown("---")
st.write("**Ask another question below:**")
except Exception as e:
st.error(f"An error occurred: {str(e)}")
finally:
# Clean up temporary file
if os.path.exists(temp_file_path):
os.remove(temp_file_path)
else:
st.info("Please upload a PDF document to get started.")
# Add some helpful information
with st.expander("ℹ️ What types of documents work best?"):
st.write("""
This tool works best with legal documents such as:
- Contracts and agreements
- Terms of service
- Privacy policies
- Lease agreements
- Employment contracts
- Legal notices
- Service agreements
The AI will analyze the document and provide:
- A clear summary of the main points
- Identification of potential risks or red flags
- Answers to your specific questions about the content
""")