Spaces:

VarshaJeyaraj
/

WhyNotUs_AI_Legal_Doc_Explainer

No application file

App Files Files Community

Varsha Jeyaraj commited on Aug 17, 2025

Commit

7b7ad6a

0 Parent(s):

Final version of the AI Legal Explainer app

Browse files

Files changed (4) hide show

.gitignore +15 -0
.streamlit/secrets.toml +2 -0
app.py +322 -0
requirements.txt +0 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,15 @@

+# Python virtual environment
+venv/
+# IDE and editor folders
+.vscode/
+# Python cache files
+__pycache__/
+*.pyc
+# User-specific history or session files
+.history/
+# Temporary files created by the app
+temp_*.pdf

.streamlit/secrets.toml ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ GOOGLE_API_KEY="YOUR_GOOGLE_API_KEY_GOES_HERE"
2	+ HUGGINGFACEHUB_API_TOKEN="YOUR_HF_TOKEN_GOES_HERE"

app.py ADDED Viewed

	@@ -0,0 +1,322 @@

+import streamlit as st
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain_community.document_loaders import PyPDFLoader
+import os
+from langchain_huggingface import HuggingFaceEmbeddings
+from langchain_community.vectorstores import FAISS
+from langchain.chains import RetrievalQA
+from langchain_google_genai import ChatGoogleGenerativeAI
+from langchain_core.documents import Document
+def process_document(file_path):
+    """Process PDF document and create vector store for retrieval"""
+    loader = PyPDFLoader(file_path)
+    documents = loader.load()
+    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
+    texts = text_splitter.split_documents(documents)
+    model_name = "sentence-transformers/all-MiniLM-L6-v2"
+    embeddings = HuggingFaceEmbeddings(model_name=model_name)
+    vectorstore = FAISS.from_documents(texts, embedding=embeddings)
+    return vectorstore
+def verify_legal_document(file_path, api_key):
+    """Verify if the uploaded document is a legal document"""
+    try:
+        loader = PyPDFLoader(file_path)
+        documents = loader.load()
+        if not documents:
+            return False
+        full_text = "\n".join([doc.page_content for doc in documents])
+        if len(full_text.strip()) < 50:
+            return False
+        llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", google_api_key=api_key)
+        verification_prompt = f"""
+        Analyze the following text carefully and determine if it is a legal document.
+        Legal documents include: contracts, agreements, terms of service, privacy policies,
+        legal notices, lease agreements, employment contracts, NDAs, legal forms, court documents, etc.
+        Non-legal documents include: research papers, books, articles, manuals, reports,
+        personal documents, educational materials, etc.
+        Respond with ONLY ONE WORD:
+        - "LEGAL" if this is a legal document
+        - "NON-LEGAL" if this is not a legal document
+        Text to analyze:
+        {full_text[:3000]}
+        """
+        response = llm.invoke(verification_prompt)
+        response_text = response.content.strip().upper()
+        is_legal = "LEGAL" in response_text and "NON-LEGAL" not in response_text
+        return is_legal
+    except Exception as e:
+        st.error(f"Error during verification: {str(e)}")
+        return False
+def generate_analysis(vectorstore, api_key):
+    """Generate automated summary and risk analysis"""
+    try:
+        retriever = vectorstore.as_retriever()
+        llm = ChatGoogleGenerativeAI(
+            model="gemini-2.0-flash",
+            google_api_key=api_key,
+            temperature=0.3
+        )
+        qa_chain = RetrievalQA.from_chain_type(
+            llm=llm,
+            chain_type="stuff",
+            retriever=retriever
+        )
+        # Generate summary
+        summary_query = """
+        Provide a concise, three-bullet point summary of this document's main purpose,
+        key parties involved, and primary obligations. Use simple language.
+        """
+        summary = qa_chain.run(summary_query)
+        # Identify risks
+        risks_query = """
+        Identify potential risks, red flags, or important clauses including:
+        - Financial obligations, penalties, or fees
+        - Auto-renewal clauses
+        - Termination conditions
+        - Liability limitations
+        - Unusual or potentially unfavorable terms
+        Format as bullet points.
+        """
+        risks = qa_chain.run(risks_query)
+        return summary, risks
+    except Exception as e:
+        st.error(f"Error during analysis: {str(e)}")
+        return None, None
+# Streamlit App Configuration
+st.set_page_config(
+    page_title="AI Legal Doc Explainer",
+    page_icon="⚖️",
+    layout="centered",
+    initial_sidebar_state="auto"
+)
+st.title("⚖️ AI Legal Doc Explainer")
+st.write("Upload your legal document (PDF) and get a simple, easy-to-understand explanation.")
+st.markdown("""
+<style>
+/* Blue highlight for text input */
+.stTextInput > div > div > input {
+    border-color: #0066cc !important;
+    box-shadow: 0 0 0 0.2rem rgba(0, 102, 204, 0.25) !important;
+}
+.stTextInput > div > div > input:focus {
+    border-color: #0066cc !important;
+    box-shadow: 0 0 0 0.2rem rgba(0, 102, 204, 0.5) !important;
+}
+/* Green submit button */
+.stButton > button[kind="primary"] {
+    background-color: #28a745 !important;
+    border-color: #28a745 !important;
+}
+.stButton > button[kind="primary"]:hover {
+    background-color: #218838 !important;
+    border-color: #1e7e34 !important;
+}
+</style>
+""", unsafe_allow_html=True)
+# Initialize session state for Q&A
+if "qa_history" not in st.session_state:
+    st.session_state.qa_history = []
+if "vectorstore" not in st.session_state:
+    st.session_state.vectorstore = None
+if "document_processed" not in st.session_state:
+    st.session_state.document_processed = False
+# File uploader
+uploaded_file = st.file_uploader("Upload a PDF document", type="pdf")
+if uploaded_file is not None:
+    # Save uploaded file temporarily
+    temp_file_path = f"temp_{uploaded_file.name}"
+    with open(temp_file_path, "wb") as f:
+        f.write(uploaded_file.getbuffer())
+    try:
+        # Check if API key exists
+        if "GOOGLE_API_KEY" not in st.secrets:
+            st.error("Google API key not found in secrets. Please add your API key.")
+            st.stop()
+        # STEP 1: Verify document type
+        with st.spinner("Verifying document type..."):
+            is_legal_doc = verify_legal_document(temp_file_path, st.secrets["GOOGLE_API_KEY"])
+        # STEP 2: Show immediate notification for non-legal documents
+        if not is_legal_doc:
+            #st.error("⚠️ Document Verification Failed")
+            st.warning("This does not appear to be a legal document.")
+            st.info("This tool is optimized for legal documents like contracts, agreements, terms of service, privacy policies, etc.")
+            # Ask user what to do
+            st.markdown("**What would you like to do?**")
+            col1, col2 = st.columns(2)
+            with col2:
+                proceed_anyway = st.button("▶️ Continue Anyway", use_container_width=True)
+            if not proceed_anyway:
+                st.stop()  # Stop here if user doesn't choose to continue
+        # STEP 3: Process the document (either legal doc or user chose to continue)
+        if not st.session_state.document_processed:
+            if is_legal_doc:
+                st.success("Legal document verified!")
+            else:
+                st.info("Proceeding with analysis as requested...")
+            with st.spinner("Processing document..."):
+                st.session_state.vectorstore = process_document(temp_file_path)
+            # STEP 4: Generate analysis
+            with st.spinner("Analyzing document for key points and risks..."):
+                summary, risks = generate_analysis(st.session_state.vectorstore, st.secrets["GOOGLE_API_KEY"])
+            if summary and risks:
+                st.session_state.summary = summary
+                st.session_state.risks = risks
+                st.session_state.document_processed = True
+        # Display analysis results if document is processed
+        if st.session_state.document_processed:
+            st.success("Document analysis complete!")
+            # Display analysis results
+            with st.expander("Document Summary", expanded=True):
+                st.write(st.session_state.summary)
+            with st.expander("🚩 Potential Red Flags & Important Clauses", expanded=True):
+                st.write(st.session_state.risks)
+            st.markdown("---")
+            # STEP 5: Q&A Section with persistent chat
+            st.header("Ask Questions About Your Document")
+            st.write("Ask specific questions about the document content, terms, or anything you'd like clarified.")
+            # Always show previous Q&A history section (even if empty)
+            st.subheader("Previous Questions & Answers:")
+            if st.session_state.qa_history:
+                for i, qa in enumerate(st.session_state.qa_history, 1):
+                    with st.expander(f"Q{i}: {qa['question'][:50]}...", expanded=False):
+                        st.write(f"**Question:** {qa['question']}")
+                        st.write(f"**Answer:** {qa['answer']}")
+            else:
+                st.write("*No questions asked yet*")
+            st.markdown("---")
+            # Always show the question input box
+            user_question = st.text_input(
+                "Enter your question:",
+                placeholder="e.g., What are the termination conditions? What fees am I responsible for?",
+                key=f"question_input_{len(st.session_state.qa_history)}"
+            )
+            if st.button("Submit Question", type="primary"):
+                if user_question:
+                    with st.spinner("Finding the answer..."):
+                        try:
+                            retriever = st.session_state.vectorstore.as_retriever()
+                            llm = ChatGoogleGenerativeAI(
+                                model="gemini-2.0-flash",
+                                google_api_key=st.secrets["GOOGLE_API_KEY"],
+                                temperature=0.2
+                            )
+                            qa_chain = RetrievalQA.from_chain_type(
+                                llm=llm,
+                                chain_type="stuff",
+                                retriever=retriever
+                            )
+                            # Enhanced prompt for better answers
+                            enhanced_question = f"""
+                            Based on the document content, please answer this question clearly and concisely: {user_question}
+                            If the answer involves specific terms, conditions, or clauses, please quote the relevant text.
+                            If the information is not clearly stated in the document, please say so.
+                            """
+                            answer = qa_chain.run(enhanced_question)
+                            # Add to history
+                            st.session_state.qa_history.append({
+                                'question': user_question,
+                                'answer': answer
+                            })
+                        except Exception as e:
+                            st.error(f"Error generating answer: {str(e)}")
+                else:
+                    st.warning("Please enter a question before submitting.")
+            # Display the most recent answer if available
+            if st.session_state.qa_history:
+                st.markdown("### Answer")
+                latest_qa = st.session_state.qa_history[-1]
+                st.write(f"**Question:** {latest_qa['question']}")
+                st.write(f"**Answer:** {latest_qa['answer']}")
+                st.markdown("---")
+                st.write("**Ask another question below:**")
+    except Exception as e:
+        st.error(f"An error occurred: {str(e)}")
+    finally:
+        # Clean up temporary file
+        if os.path.exists(temp_file_path):
+            os.remove(temp_file_path)
+else:
+    st.info("Please upload a PDF document to get started.")
+    # Add some helpful information
+    with st.expander("ℹ️ What types of documents work best?"):
+        st.write("""
+        This tool works best with legal documents such as:
+        - Contracts and agreements
+        - Terms of service
+        - Privacy policies
+        - Lease agreements
+        - Employment contracts
+        - Legal notices
+        - Service agreements
+        The AI will analyze the document and provide:
+        - A clear summary of the main points
+        - Identification of potential risks or red flags
+        - Answers to your specific questions about the content
+        """)

requirements.txt ADDED Viewed

Binary file (4.31 kB). View file