import streamlit as st from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_community.document_loaders import PyPDFLoader import os from langchain_huggingface import HuggingFaceEmbeddings from langchain_community.vectorstores import FAISS from langchain.chains import RetrievalQA from langchain_google_genai import ChatGoogleGenerativeAI from langchain_core.documents import Document def process_document(file_path): """Process PDF document and create vector store for retrieval""" loader = PyPDFLoader(file_path) documents = loader.load() text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) texts = text_splitter.split_documents(documents) model_name = "sentence-transformers/all-MiniLM-L6-v2" embeddings = HuggingFaceEmbeddings(model_name=model_name) vectorstore = FAISS.from_documents(texts, embedding=embeddings) return vectorstore def verify_legal_document(file_path, api_key): """Verify if the uploaded document is a legal document""" try: loader = PyPDFLoader(file_path) documents = loader.load() if not documents: return False full_text = "\n".join([doc.page_content for doc in documents]) if len(full_text.strip()) < 50: return False llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", google_api_key=api_key) verification_prompt = f""" Analyze the following text carefully and determine if it is a legal document. Legal documents include: contracts, agreements, terms of service, privacy policies, legal notices, lease agreements, employment contracts, NDAs, legal forms, court documents, etc. Non-legal documents include: research papers, books, articles, manuals, reports, personal documents, educational materials, etc. Respond with ONLY ONE WORD: - "LEGAL" if this is a legal document - "NON-LEGAL" if this is not a legal document Text to analyze: {full_text[:3000]} """ response = llm.invoke(verification_prompt) response_text = response.content.strip().upper() is_legal = "LEGAL" in response_text and "NON-LEGAL" not in response_text return is_legal except Exception as e: st.error(f"Error during verification: {str(e)}") return False def generate_analysis(vectorstore, api_key): """Generate automated summary and risk analysis""" try: retriever = vectorstore.as_retriever() llm = ChatGoogleGenerativeAI( model="gemini-2.0-flash", google_api_key=api_key, temperature=0.3 ) qa_chain = RetrievalQA.from_chain_type( llm=llm, chain_type="stuff", retriever=retriever ) # Generate summary summary_query = """ Provide a concise, three-bullet point summary of this document's main purpose, key parties involved, and primary obligations. Use simple language. """ summary = qa_chain.run(summary_query) # Identify risks risks_query = """ Identify potential risks, red flags, or important clauses including: - Financial obligations, penalties, or fees - Auto-renewal clauses - Termination conditions - Liability limitations - Unusual or potentially unfavorable terms Format as bullet points. """ risks = qa_chain.run(risks_query) return summary, risks except Exception as e: st.error(f"Error during analysis: {str(e)}") return None, None # Streamlit App Configuration st.set_page_config( page_title="AI Legal Doc Explainer", page_icon="⚖️", layout="centered", initial_sidebar_state="auto" ) st.title("⚖️ AI Legal Doc Explainer") st.write("Upload your legal document (PDF) and get a simple, easy-to-understand explanation.") st.markdown(""" """, unsafe_allow_html=True) # Initialize session state for Q&A if "qa_history" not in st.session_state: st.session_state.qa_history = [] if "vectorstore" not in st.session_state: st.session_state.vectorstore = None if "document_processed" not in st.session_state: st.session_state.document_processed = False # File uploader uploaded_file = st.file_uploader("Upload a PDF document", type="pdf") if uploaded_file is not None: # Save uploaded file temporarily temp_file_path = f"temp_{uploaded_file.name}" with open(temp_file_path, "wb") as f: f.write(uploaded_file.getbuffer()) try: # Check if API key exists if "GOOGLE_API_KEY" not in st.secrets: st.error("Google API key not found in secrets. Please add your API key.") st.stop() # STEP 1: Verify document type with st.spinner("Verifying document type..."): is_legal_doc = verify_legal_document(temp_file_path, st.secrets["GOOGLE_API_KEY"]) # STEP 2: Show immediate notification for non-legal documents if not is_legal_doc: #st.error("⚠️ Document Verification Failed") st.warning("This does not appear to be a legal document.") st.info("This tool is optimized for legal documents like contracts, agreements, terms of service, privacy policies, etc.") # Ask user what to do st.markdown("**What would you like to do?**") col1, col2 = st.columns(2) with col2: proceed_anyway = st.button("▶️ Continue Anyway", use_container_width=True) if not proceed_anyway: st.stop() # Stop here if user doesn't choose to continue # STEP 3: Process the document (either legal doc or user chose to continue) if not st.session_state.document_processed: if is_legal_doc: st.success("Legal document verified!") else: st.info("Proceeding with analysis as requested...") with st.spinner("Processing document..."): st.session_state.vectorstore = process_document(temp_file_path) # STEP 4: Generate analysis with st.spinner("Analyzing document for key points and risks..."): summary, risks = generate_analysis(st.session_state.vectorstore, st.secrets["GOOGLE_API_KEY"]) if summary and risks: st.session_state.summary = summary st.session_state.risks = risks st.session_state.document_processed = True # Display analysis results if document is processed if st.session_state.document_processed: st.success("Document analysis complete!") # Display analysis results with st.expander("Document Summary", expanded=True): st.write(st.session_state.summary) with st.expander("🚩 Potential Red Flags & Important Clauses", expanded=True): st.write(st.session_state.risks) st.markdown("---") # STEP 5: Q&A Section with persistent chat st.header("Ask Questions About Your Document") st.write("Ask specific questions about the document content, terms, or anything you'd like clarified.") # Always show previous Q&A history section (even if empty) st.subheader("Previous Questions & Answers:") if st.session_state.qa_history: for i, qa in enumerate(st.session_state.qa_history, 1): with st.expander(f"Q{i}: {qa['question'][:50]}...", expanded=False): st.write(f"**Question:** {qa['question']}") st.write(f"**Answer:** {qa['answer']}") else: st.write("*No questions asked yet*") st.markdown("---") # Always show the question input box user_question = st.text_input( "Enter your question:", placeholder="e.g., What are the termination conditions? What fees am I responsible for?", key=f"question_input_{len(st.session_state.qa_history)}" ) if st.button("Submit Question", type="primary"): if user_question: with st.spinner("Finding the answer..."): try: retriever = st.session_state.vectorstore.as_retriever() llm = ChatGoogleGenerativeAI( model="gemini-2.0-flash", google_api_key=st.secrets["GOOGLE_API_KEY"], temperature=0.2 ) qa_chain = RetrievalQA.from_chain_type( llm=llm, chain_type="stuff", retriever=retriever ) # Enhanced prompt for better answers enhanced_question = f""" Based on the document content, please answer this question clearly and concisely: {user_question} If the answer involves specific terms, conditions, or clauses, please quote the relevant text. If the information is not clearly stated in the document, please say so. """ answer = qa_chain.run(enhanced_question) # Add to history st.session_state.qa_history.append({ 'question': user_question, 'answer': answer }) except Exception as e: st.error(f"Error generating answer: {str(e)}") else: st.warning("Please enter a question before submitting.") # Display the most recent answer if available if st.session_state.qa_history: st.markdown("### Answer") latest_qa = st.session_state.qa_history[-1] st.write(f"**Question:** {latest_qa['question']}") st.write(f"**Answer:** {latest_qa['answer']}") st.markdown("---") st.write("**Ask another question below:**") except Exception as e: st.error(f"An error occurred: {str(e)}") finally: # Clean up temporary file if os.path.exists(temp_file_path): os.remove(temp_file_path) else: st.info("Please upload a PDF document to get started.") # Add some helpful information with st.expander("ℹ️ What types of documents work best?"): st.write(""" This tool works best with legal documents such as: - Contracts and agreements - Terms of service - Privacy policies - Lease agreements - Employment contracts - Legal notices - Service agreements The AI will analyze the document and provide: - A clear summary of the main points - Identification of potential risks or red flags - Answers to your specific questions about the content """)