Spaces:

VarshaJeyaraj
/

WhyNotUs_AI_Legal_Doc_Explainer

No application file

File size: 12,561 Bytes

7b7ad6a

import streamlit as st
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyPDFLoader
import os
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.documents import Document


def process_document(file_path):
    """Process PDF document and create vector store for retrieval"""
    loader = PyPDFLoader(file_path)
    documents = loader.load()
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    texts = text_splitter.split_documents(documents)
    
    model_name = "sentence-transformers/all-MiniLM-L6-v2"
    embeddings = HuggingFaceEmbeddings(model_name=model_name)
    
    vectorstore = FAISS.from_documents(texts, embedding=embeddings)
    return vectorstore


def verify_legal_document(file_path, api_key):
    """Verify if the uploaded document is a legal document"""
    try:
        loader = PyPDFLoader(file_path)
        documents = loader.load()
        
        if not documents:
            return False
            
        full_text = "\n".join([doc.page_content for doc in documents])
        
        if len(full_text.strip()) < 50:
            return False
        
        llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", google_api_key=api_key)
        verification_prompt = f"""
        Analyze the following text carefully and determine if it is a legal document.
        
        Legal documents include: contracts, agreements, terms of service, privacy policies, 
        legal notices, lease agreements, employment contracts, NDAs, legal forms, court documents, etc.
        
        Non-legal documents include: research papers, books, articles, manuals, reports, 
        personal documents, educational materials, etc.
        
        Respond with ONLY ONE WORD:
        - "LEGAL" if this is a legal document
        - "NON-LEGAL" if this is not a legal document
        
        Text to analyze:
        {full_text[:3000]}
        """
        
        response = llm.invoke(verification_prompt)
        response_text = response.content.strip().upper()
        
        is_legal = "LEGAL" in response_text and "NON-LEGAL" not in response_text
        return is_legal
        
    except Exception as e:
        st.error(f"Error during verification: {str(e)}")
        return False


def generate_analysis(vectorstore, api_key):
    """Generate automated summary and risk analysis"""
    try:
        retriever = vectorstore.as_retriever()
        llm = ChatGoogleGenerativeAI(
            model="gemini-2.0-flash", 
            google_api_key=api_key, 
            temperature=0.3
        )
        qa_chain = RetrievalQA.from_chain_type(
            llm=llm, 
            chain_type="stuff", 
            retriever=retriever
        )
        
        # Generate summary
        summary_query = """
        Provide a concise, three-bullet point summary of this document's main purpose, 
        key parties involved, and primary obligations. Use simple language.
        """
        summary = qa_chain.run(summary_query)
        
        # Identify risks
        risks_query = """
        Identify potential risks, red flags, or important clauses including:
        - Financial obligations, penalties, or fees
        - Auto-renewal clauses
        - Termination conditions
        - Liability limitations
        - Unusual or potentially unfavorable terms
        Format as bullet points.
        """
        risks = qa_chain.run(risks_query)
        
        return summary, risks
    except Exception as e:
        st.error(f"Error during analysis: {str(e)}")
        return None, None


# Streamlit App Configuration
st.set_page_config(
    page_title="AI Legal Doc Explainer",
    page_icon="⚖️",
    layout="centered",
    initial_sidebar_state="auto"
)

st.title("⚖️ AI Legal Doc Explainer")
st.write("Upload your legal document (PDF) and get a simple, easy-to-understand explanation.")

st.markdown("""
<style>
/* Blue highlight for text input */
.stTextInput > div > div > input {
    border-color: #0066cc !important;
    box-shadow: 0 0 0 0.2rem rgba(0, 102, 204, 0.25) !important;
}

.stTextInput > div > div > input:focus {
    border-color: #0066cc !important;
    box-shadow: 0 0 0 0.2rem rgba(0, 102, 204, 0.5) !important;
}

/* Green submit button */
.stButton > button[kind="primary"] {
    background-color: #28a745 !important;
    border-color: #28a745 !important;
}

.stButton > button[kind="primary"]:hover {
    background-color: #218838 !important;
    border-color: #1e7e34 !important;
}
</style>
""", unsafe_allow_html=True)

# Initialize session state for Q&A
if "qa_history" not in st.session_state:
    st.session_state.qa_history = []
if "vectorstore" not in st.session_state:
    st.session_state.vectorstore = None
if "document_processed" not in st.session_state:
    st.session_state.document_processed = False

# File uploader
uploaded_file = st.file_uploader("Upload a PDF document", type="pdf")

if uploaded_file is not None:
    # Save uploaded file temporarily
    temp_file_path = f"temp_{uploaded_file.name}"
    with open(temp_file_path, "wb") as f:
        f.write(uploaded_file.getbuffer())
    
    try:
        # Check if API key exists
        if "GOOGLE_API_KEY" not in st.secrets:
            st.error("Google API key not found in secrets. Please add your API key.")
            st.stop()
        
        # STEP 1: Verify document type
        with st.spinner("Verifying document type..."):
            is_legal_doc = verify_legal_document(temp_file_path, st.secrets["GOOGLE_API_KEY"])
        
        # STEP 2: Show immediate notification for non-legal documents
        if not is_legal_doc:
            #st.error("⚠️ Document Verification Failed")
            st.warning("This does not appear to be a legal document.")
            st.info("This tool is optimized for legal documents like contracts, agreements, terms of service, privacy policies, etc.")
            
            # Ask user what to do
            st.markdown("**What would you like to do?**")
            col1, col2 = st.columns(2)
            
        
            
            with col2:
                proceed_anyway = st.button("▶️ Continue Anyway", use_container_width=True)
                
            if not proceed_anyway:
                st.stop()  # Stop here if user doesn't choose to continue
        
        # STEP 3: Process the document (either legal doc or user chose to continue)
        if not st.session_state.document_processed:
            if is_legal_doc:
                st.success("Legal document verified!")
            else:
                st.info("Proceeding with analysis as requested...")
                
            with st.spinner("Processing document..."):
                st.session_state.vectorstore = process_document(temp_file_path)
            
            # STEP 4: Generate analysis
            with st.spinner("Analyzing document for key points and risks..."):
                summary, risks = generate_analysis(st.session_state.vectorstore, st.secrets["GOOGLE_API_KEY"])
            
            if summary and risks:
                st.session_state.summary = summary
                st.session_state.risks = risks
                st.session_state.document_processed = True
        
        # Display analysis results if document is processed
        if st.session_state.document_processed:
            st.success("Document analysis complete!")
            
            # Display analysis results
            with st.expander("Document Summary", expanded=True):
                st.write(st.session_state.summary)
            
            with st.expander("🚩 Potential Red Flags & Important Clauses", expanded=True):
                st.write(st.session_state.risks)
            
            st.markdown("---")
            
            # STEP 5: Q&A Section with persistent chat
            st.header("Ask Questions About Your Document")
            st.write("Ask specific questions about the document content, terms, or anything you'd like clarified.")
            
            # Always show previous Q&A history section (even if empty)
            st.subheader("Previous Questions & Answers:")
            if st.session_state.qa_history:
                for i, qa in enumerate(st.session_state.qa_history, 1):
                    with st.expander(f"Q{i}: {qa['question'][:50]}...", expanded=False):
                        st.write(f"**Question:** {qa['question']}")
                        st.write(f"**Answer:** {qa['answer']}")
            else:
                st.write("*No questions asked yet*")
            
            st.markdown("---")
            
            # Always show the question input box
            user_question = st.text_input(
                "Enter your question:", 
                placeholder="e.g., What are the termination conditions? What fees am I responsible for?",
                key=f"question_input_{len(st.session_state.qa_history)}"
            )
            
            if st.button("Submit Question", type="primary"):
                if user_question:
                    with st.spinner("Finding the answer..."):
                        try:
                            retriever = st.session_state.vectorstore.as_retriever()
                            llm = ChatGoogleGenerativeAI(
                                model="gemini-2.0-flash", 
                                google_api_key=st.secrets["GOOGLE_API_KEY"],
                                temperature=0.2
                            )
                            qa_chain = RetrievalQA.from_chain_type(
                                llm=llm, 
                                chain_type="stuff", 
                                retriever=retriever
                            )
                            
                            # Enhanced prompt for better answers
                            enhanced_question = f"""
                            Based on the document content, please answer this question clearly and concisely: {user_question}
                            
                            If the answer involves specific terms, conditions, or clauses, please quote the relevant text.
                            If the information is not clearly stated in the document, please say so.
                            """
                            
                            answer = qa_chain.run(enhanced_question)
                            
                            # Add to history
                            st.session_state.qa_history.append({
                                'question': user_question,
                                'answer': answer
                            })
                            
                        except Exception as e:
                            st.error(f"Error generating answer: {str(e)}")
                else:
                    st.warning("Please enter a question before submitting.")
            
            # Display the most recent answer if available
            if st.session_state.qa_history:
                st.markdown("### Answer")
                latest_qa = st.session_state.qa_history[-1]
                st.write(f"**Question:** {latest_qa['question']}")
                st.write(f"**Answer:** {latest_qa['answer']}")
                
                st.markdown("---")
                st.write("**Ask another question below:**")
            
          
    
    except Exception as e:
        st.error(f"An error occurred: {str(e)}")
    
    finally:
        # Clean up temporary file
        if os.path.exists(temp_file_path):
            os.remove(temp_file_path)

else:
    st.info("Please upload a PDF document to get started.")
    
    # Add some helpful information
    with st.expander("ℹ️ What types of documents work best?"):
        st.write("""
        This tool works best with legal documents such as:
        - Contracts and agreements
        - Terms of service
        - Privacy policies
        - Lease agreements
        - Employment contracts
        - Legal notices
        - Service agreements
        
        The AI will analyze the document and provide:
        - A clear summary of the main points
        - Identification of potential risks or red flags
        - Answers to your specific questions about the content
        """)