Spaces:

VarshaJeyaraj
/

WhyNotUs_AI_Legal_Doc_Explainer

No application file

WhyNotUs_AI_Legal_Doc_Explainer / app.py

Varsha Jeyaraj

Final version of the AI Legal Explainer app

7b7ad6a 5 months ago

12.6 kB

	import streamlit as st
	from langchain.text_splitter import RecursiveCharacterTextSplitter
	from langchain_community.document_loaders import PyPDFLoader
	import os
	from langchain_huggingface import HuggingFaceEmbeddings
	from langchain_community.vectorstores import FAISS
	from langchain.chains import RetrievalQA
	from langchain_google_genai import ChatGoogleGenerativeAI
	from langchain_core.documents import Document


	def process_document(file_path):
	"""Process PDF document and create vector store for retrieval"""
	loader = PyPDFLoader(file_path)
	documents = loader.load()
	text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
	texts = text_splitter.split_documents(documents)

	model_name = "sentence-transformers/all-MiniLM-L6-v2"
	embeddings = HuggingFaceEmbeddings(model_name=model_name)

	vectorstore = FAISS.from_documents(texts, embedding=embeddings)
	return vectorstore


	def verify_legal_document(file_path, api_key):
	"""Verify if the uploaded document is a legal document"""
	try:
	loader = PyPDFLoader(file_path)
	documents = loader.load()

	if not documents:
	return False

	full_text = "\n".join([doc.page_content for doc in documents])

	if len(full_text.strip()) < 50:
	return False

	llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", google_api_key=api_key)
	verification_prompt = f"""
	Analyze the following text carefully and determine if it is a legal document.

	Legal documents include: contracts, agreements, terms of service, privacy policies,
	legal notices, lease agreements, employment contracts, NDAs, legal forms, court documents, etc.

	Non-legal documents include: research papers, books, articles, manuals, reports,
	personal documents, educational materials, etc.

	Respond with ONLY ONE WORD:
	- "LEGAL" if this is a legal document
	- "NON-LEGAL" if this is not a legal document

	Text to analyze:
	{full_text[:3000]}
	"""

	response = llm.invoke(verification_prompt)
	response_text = response.content.strip().upper()

	is_legal = "LEGAL" in response_text and "NON-LEGAL" not in response_text
	return is_legal

	except Exception as e:
	st.error(f"Error during verification: {str(e)}")
	return False


	def generate_analysis(vectorstore, api_key):
	"""Generate automated summary and risk analysis"""
	try:
	retriever = vectorstore.as_retriever()
	llm = ChatGoogleGenerativeAI(
	model="gemini-2.0-flash",
	google_api_key=api_key,
	temperature=0.3
	)
	qa_chain = RetrievalQA.from_chain_type(
	llm=llm,
	chain_type="stuff",
	retriever=retriever
	)

	# Generate summary
	summary_query = """
	Provide a concise, three-bullet point summary of this document's main purpose,
	key parties involved, and primary obligations. Use simple language.
	"""
	summary = qa_chain.run(summary_query)

	# Identify risks
	risks_query = """
	Identify potential risks, red flags, or important clauses including:
	- Financial obligations, penalties, or fees
	- Auto-renewal clauses
	- Termination conditions
	- Liability limitations
	- Unusual or potentially unfavorable terms
	Format as bullet points.
	"""
	risks = qa_chain.run(risks_query)

	return summary, risks
	except Exception as e:
	st.error(f"Error during analysis: {str(e)}")
	return None, None


	# Streamlit App Configuration
	st.set_page_config(
	page_title="AI Legal Doc Explainer",
	page_icon="⚖️",
	layout="centered",
	initial_sidebar_state="auto"
	)

	st.title("⚖️ AI Legal Doc Explainer")
	st.write("Upload your legal document (PDF) and get a simple, easy-to-understand explanation.")

	st.markdown("""
	<style>
	/* Blue highlight for text input */
	.stTextInput > div > div > input {
	border-color: #0066cc !important;
	box-shadow: 0 0 0 0.2rem rgba(0, 102, 204, 0.25) !important;
	}

	.stTextInput > div > div > input:focus {
	border-color: #0066cc !important;
	box-shadow: 0 0 0 0.2rem rgba(0, 102, 204, 0.5) !important;
	}

	/* Green submit button */
	.stButton > button[kind="primary"] {
	background-color: #28a745 !important;
	border-color: #28a745 !important;
	}

	.stButton > button[kind="primary"]:hover {
	background-color: #218838 !important;
	border-color: #1e7e34 !important;
	}
	</style>
	""", unsafe_allow_html=True)

	# Initialize session state for Q&A
	if "qa_history" not in st.session_state:
	st.session_state.qa_history = []
	if "vectorstore" not in st.session_state:
	st.session_state.vectorstore = None
	if "document_processed" not in st.session_state:
	st.session_state.document_processed = False

	# File uploader
	uploaded_file = st.file_uploader("Upload a PDF document", type="pdf")

	if uploaded_file is not None:
	# Save uploaded file temporarily
	temp_file_path = f"temp_{uploaded_file.name}"
	with open(temp_file_path, "wb") as f:
	f.write(uploaded_file.getbuffer())

	try:
	# Check if API key exists
	if "GOOGLE_API_KEY" not in st.secrets:
	st.error("Google API key not found in secrets. Please add your API key.")
	st.stop()

	# STEP 1: Verify document type
	with st.spinner("Verifying document type..."):
	is_legal_doc = verify_legal_document(temp_file_path, st.secrets["GOOGLE_API_KEY"])

	# STEP 2: Show immediate notification for non-legal documents
	if not is_legal_doc:
	#st.error("⚠️ Document Verification Failed")
	st.warning("This does not appear to be a legal document.")
	st.info("This tool is optimized for legal documents like contracts, agreements, terms of service, privacy policies, etc.")

	# Ask user what to do
	st.markdown("What would you like to do?")
	col1, col2 = st.columns(2)



	with col2:
	proceed_anyway = st.button("▶️ Continue Anyway", use_container_width=True)

	if not proceed_anyway:
	st.stop() # Stop here if user doesn't choose to continue

	# STEP 3: Process the document (either legal doc or user chose to continue)
	if not st.session_state.document_processed:
	if is_legal_doc:
	st.success("Legal document verified!")
	else:
	st.info("Proceeding with analysis as requested...")

	with st.spinner("Processing document..."):
	st.session_state.vectorstore = process_document(temp_file_path)

	# STEP 4: Generate analysis
	with st.spinner("Analyzing document for key points and risks..."):
	summary, risks = generate_analysis(st.session_state.vectorstore, st.secrets["GOOGLE_API_KEY"])

	if summary and risks:
	st.session_state.summary = summary
	st.session_state.risks = risks
	st.session_state.document_processed = True

	# Display analysis results if document is processed
	if st.session_state.document_processed:
	st.success("Document analysis complete!")

	# Display analysis results
	with st.expander("Document Summary", expanded=True):
	st.write(st.session_state.summary)

	with st.expander("🚩 Potential Red Flags & Important Clauses", expanded=True):
	st.write(st.session_state.risks)

	st.markdown("---")

	# STEP 5: Q&A Section with persistent chat
	st.header("Ask Questions About Your Document")
	st.write("Ask specific questions about the document content, terms, or anything you'd like clarified.")

	# Always show previous Q&A history section (even if empty)
	st.subheader("Previous Questions & Answers:")
	if st.session_state.qa_history:
	for i, qa in enumerate(st.session_state.qa_history, 1):
	with st.expander(f"Q{i}: {qa['question'][:50]}...", expanded=False):
	st.write(f"Question: {qa['question']}")
	st.write(f"Answer: {qa['answer']}")
	else:
	st.write("No questions asked yet")

	st.markdown("---")

	# Always show the question input box
	user_question = st.text_input(
	"Enter your question:",
	placeholder="e.g., What are the termination conditions? What fees am I responsible for?",
	key=f"question_input_{len(st.session_state.qa_history)}"
	)

	if st.button("Submit Question", type="primary"):
	if user_question:
	with st.spinner("Finding the answer..."):
	try:
	retriever = st.session_state.vectorstore.as_retriever()
	llm = ChatGoogleGenerativeAI(
	model="gemini-2.0-flash",
	google_api_key=st.secrets["GOOGLE_API_KEY"],
	temperature=0.2
	)
	qa_chain = RetrievalQA.from_chain_type(
	llm=llm,
	chain_type="stuff",
	retriever=retriever
	)

	# Enhanced prompt for better answers
	enhanced_question = f"""
	Based on the document content, please answer this question clearly and concisely: {user_question}

	If the answer involves specific terms, conditions, or clauses, please quote the relevant text.
	If the information is not clearly stated in the document, please say so.
	"""

	answer = qa_chain.run(enhanced_question)

	# Add to history
	st.session_state.qa_history.append({
	'question': user_question,
	'answer': answer
	})

	except Exception as e:
	st.error(f"Error generating answer: {str(e)}")
	else:
	st.warning("Please enter a question before submitting.")

	# Display the most recent answer if available
	if st.session_state.qa_history:
	st.markdown("### Answer")
	latest_qa = st.session_state.qa_history[-1]
	st.write(f"Question: {latest_qa['question']}")
	st.write(f"Answer: {latest_qa['answer']}")

	st.markdown("---")
	st.write("Ask another question below:")



	except Exception as e:
	st.error(f"An error occurred: {str(e)}")

	finally:
	# Clean up temporary file
	if os.path.exists(temp_file_path):
	os.remove(temp_file_path)

	else:
	st.info("Please upload a PDF document to get started.")

	# Add some helpful information
	with st.expander("ℹ️ What types of documents work best?"):
	st.write("""
	This tool works best with legal documents such as:
	- Contracts and agreements
	- Terms of service
	- Privacy policies
	- Lease agreements
	- Employment contracts
	- Legal notices
	- Service agreements

	The AI will analyze the document and provide:
	- A clear summary of the main points
	- Identification of potential risks or red flags
	- Answers to your specific questions about the content
	""")