File size: 12,561 Bytes
7b7ad6a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
import streamlit as st
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyPDFLoader
import os
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.documents import Document


def process_document(file_path):
    """Process PDF document and create vector store for retrieval"""
    loader = PyPDFLoader(file_path)
    documents = loader.load()
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    texts = text_splitter.split_documents(documents)
    
    model_name = "sentence-transformers/all-MiniLM-L6-v2"
    embeddings = HuggingFaceEmbeddings(model_name=model_name)
    
    vectorstore = FAISS.from_documents(texts, embedding=embeddings)
    return vectorstore


def verify_legal_document(file_path, api_key):
    """Verify if the uploaded document is a legal document"""
    try:
        loader = PyPDFLoader(file_path)
        documents = loader.load()
        
        if not documents:
            return False
            
        full_text = "\n".join([doc.page_content for doc in documents])
        
        if len(full_text.strip()) < 50:
            return False
        
        llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", google_api_key=api_key)
        verification_prompt = f"""
        Analyze the following text carefully and determine if it is a legal document.
        
        Legal documents include: contracts, agreements, terms of service, privacy policies, 
        legal notices, lease agreements, employment contracts, NDAs, legal forms, court documents, etc.
        
        Non-legal documents include: research papers, books, articles, manuals, reports, 
        personal documents, educational materials, etc.
        
        Respond with ONLY ONE WORD:
        - "LEGAL" if this is a legal document
        - "NON-LEGAL" if this is not a legal document
        
        Text to analyze:
        {full_text[:3000]}
        """
        
        response = llm.invoke(verification_prompt)
        response_text = response.content.strip().upper()
        
        is_legal = "LEGAL" in response_text and "NON-LEGAL" not in response_text
        return is_legal
        
    except Exception as e:
        st.error(f"Error during verification: {str(e)}")
        return False


def generate_analysis(vectorstore, api_key):
    """Generate automated summary and risk analysis"""
    try:
        retriever = vectorstore.as_retriever()
        llm = ChatGoogleGenerativeAI(
            model="gemini-2.0-flash", 
            google_api_key=api_key, 
            temperature=0.3
        )
        qa_chain = RetrievalQA.from_chain_type(
            llm=llm, 
            chain_type="stuff", 
            retriever=retriever
        )
        
        # Generate summary
        summary_query = """
        Provide a concise, three-bullet point summary of this document's main purpose, 
        key parties involved, and primary obligations. Use simple language.
        """
        summary = qa_chain.run(summary_query)
        
        # Identify risks
        risks_query = """
        Identify potential risks, red flags, or important clauses including:
        - Financial obligations, penalties, or fees
        - Auto-renewal clauses
        - Termination conditions
        - Liability limitations
        - Unusual or potentially unfavorable terms
        Format as bullet points.
        """
        risks = qa_chain.run(risks_query)
        
        return summary, risks
    except Exception as e:
        st.error(f"Error during analysis: {str(e)}")
        return None, None


# Streamlit App Configuration
st.set_page_config(
    page_title="AI Legal Doc Explainer",
    page_icon="⚖️",
    layout="centered",
    initial_sidebar_state="auto"
)

st.title("⚖️ AI Legal Doc Explainer")
st.write("Upload your legal document (PDF) and get a simple, easy-to-understand explanation.")

st.markdown("""
<style>
/* Blue highlight for text input */
.stTextInput > div > div > input {
    border-color: #0066cc !important;
    box-shadow: 0 0 0 0.2rem rgba(0, 102, 204, 0.25) !important;
}

.stTextInput > div > div > input:focus {
    border-color: #0066cc !important;
    box-shadow: 0 0 0 0.2rem rgba(0, 102, 204, 0.5) !important;
}

/* Green submit button */
.stButton > button[kind="primary"] {
    background-color: #28a745 !important;
    border-color: #28a745 !important;
}

.stButton > button[kind="primary"]:hover {
    background-color: #218838 !important;
    border-color: #1e7e34 !important;
}
</style>
""", unsafe_allow_html=True)

# Initialize session state for Q&A
if "qa_history" not in st.session_state:
    st.session_state.qa_history = []
if "vectorstore" not in st.session_state:
    st.session_state.vectorstore = None
if "document_processed" not in st.session_state:
    st.session_state.document_processed = False

# File uploader
uploaded_file = st.file_uploader("Upload a PDF document", type="pdf")

if uploaded_file is not None:
    # Save uploaded file temporarily
    temp_file_path = f"temp_{uploaded_file.name}"
    with open(temp_file_path, "wb") as f:
        f.write(uploaded_file.getbuffer())
    
    try:
        # Check if API key exists
        if "GOOGLE_API_KEY" not in st.secrets:
            st.error("Google API key not found in secrets. Please add your API key.")
            st.stop()
        
        # STEP 1: Verify document type
        with st.spinner("Verifying document type..."):
            is_legal_doc = verify_legal_document(temp_file_path, st.secrets["GOOGLE_API_KEY"])
        
        # STEP 2: Show immediate notification for non-legal documents
        if not is_legal_doc:
            #st.error("⚠️ Document Verification Failed")
            st.warning("This does not appear to be a legal document.")
            st.info("This tool is optimized for legal documents like contracts, agreements, terms of service, privacy policies, etc.")
            
            # Ask user what to do
            st.markdown("**What would you like to do?**")
            col1, col2 = st.columns(2)
            
        
            
            with col2:
                proceed_anyway = st.button("▶️ Continue Anyway", use_container_width=True)
                
            if not proceed_anyway:
                st.stop()  # Stop here if user doesn't choose to continue
        
        # STEP 3: Process the document (either legal doc or user chose to continue)
        if not st.session_state.document_processed:
            if is_legal_doc:
                st.success("Legal document verified!")
            else:
                st.info("Proceeding with analysis as requested...")
                
            with st.spinner("Processing document..."):
                st.session_state.vectorstore = process_document(temp_file_path)
            
            # STEP 4: Generate analysis
            with st.spinner("Analyzing document for key points and risks..."):
                summary, risks = generate_analysis(st.session_state.vectorstore, st.secrets["GOOGLE_API_KEY"])
            
            if summary and risks:
                st.session_state.summary = summary
                st.session_state.risks = risks
                st.session_state.document_processed = True
        
        # Display analysis results if document is processed
        if st.session_state.document_processed:
            st.success("Document analysis complete!")
            
            # Display analysis results
            with st.expander("Document Summary", expanded=True):
                st.write(st.session_state.summary)
            
            with st.expander("🚩 Potential Red Flags & Important Clauses", expanded=True):
                st.write(st.session_state.risks)
            
            st.markdown("---")
            
            # STEP 5: Q&A Section with persistent chat
            st.header("Ask Questions About Your Document")
            st.write("Ask specific questions about the document content, terms, or anything you'd like clarified.")
            
            # Always show previous Q&A history section (even if empty)
            st.subheader("Previous Questions & Answers:")
            if st.session_state.qa_history:
                for i, qa in enumerate(st.session_state.qa_history, 1):
                    with st.expander(f"Q{i}: {qa['question'][:50]}...", expanded=False):
                        st.write(f"**Question:** {qa['question']}")
                        st.write(f"**Answer:** {qa['answer']}")
            else:
                st.write("*No questions asked yet*")
            
            st.markdown("---")
            
            # Always show the question input box
            user_question = st.text_input(
                "Enter your question:", 
                placeholder="e.g., What are the termination conditions? What fees am I responsible for?",
                key=f"question_input_{len(st.session_state.qa_history)}"
            )
            
            if st.button("Submit Question", type="primary"):
                if user_question:
                    with st.spinner("Finding the answer..."):
                        try:
                            retriever = st.session_state.vectorstore.as_retriever()
                            llm = ChatGoogleGenerativeAI(
                                model="gemini-2.0-flash", 
                                google_api_key=st.secrets["GOOGLE_API_KEY"],
                                temperature=0.2
                            )
                            qa_chain = RetrievalQA.from_chain_type(
                                llm=llm, 
                                chain_type="stuff", 
                                retriever=retriever
                            )
                            
                            # Enhanced prompt for better answers
                            enhanced_question = f"""
                            Based on the document content, please answer this question clearly and concisely: {user_question}
                            
                            If the answer involves specific terms, conditions, or clauses, please quote the relevant text.
                            If the information is not clearly stated in the document, please say so.
                            """
                            
                            answer = qa_chain.run(enhanced_question)
                            
                            # Add to history
                            st.session_state.qa_history.append({
                                'question': user_question,
                                'answer': answer
                            })
                            
                        except Exception as e:
                            st.error(f"Error generating answer: {str(e)}")
                else:
                    st.warning("Please enter a question before submitting.")
            
            # Display the most recent answer if available
            if st.session_state.qa_history:
                st.markdown("### Answer")
                latest_qa = st.session_state.qa_history[-1]
                st.write(f"**Question:** {latest_qa['question']}")
                st.write(f"**Answer:** {latest_qa['answer']}")
                
                st.markdown("---")
                st.write("**Ask another question below:**")
            
          
    
    except Exception as e:
        st.error(f"An error occurred: {str(e)}")
    
    finally:
        # Clean up temporary file
        if os.path.exists(temp_file_path):
            os.remove(temp_file_path)

else:
    st.info("Please upload a PDF document to get started.")
    
    # Add some helpful information
    with st.expander("ℹ️ What types of documents work best?"):
        st.write("""
        This tool works best with legal documents such as:
        - Contracts and agreements
        - Terms of service
        - Privacy policies
        - Lease agreements
        - Employment contracts
        - Legal notices
        - Service agreements
        
        The AI will analyze the document and provide:
        - A clear summary of the main points
        - Identification of potential risks or red flags
        - Answers to your specific questions about the content
        """)