Varsha Jeyaraj commited on
Commit
7b7ad6a
·
0 Parent(s):

Final version of the AI Legal Explainer app

Browse files
Files changed (4) hide show
  1. .gitignore +15 -0
  2. .streamlit/secrets.toml +2 -0
  3. app.py +322 -0
  4. requirements.txt +0 -0
.gitignore ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Python virtual environment
2
+ venv/
3
+
4
+ # IDE and editor folders
5
+ .vscode/
6
+
7
+ # Python cache files
8
+ __pycache__/
9
+ *.pyc
10
+
11
+ # User-specific history or session files
12
+ .history/
13
+
14
+ # Temporary files created by the app
15
+ temp_*.pdf
.streamlit/secrets.toml ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ GOOGLE_API_KEY="YOUR_GOOGLE_API_KEY_GOES_HERE"
2
+ HUGGINGFACEHUB_API_TOKEN="YOUR_HF_TOKEN_GOES_HERE"
app.py ADDED
@@ -0,0 +1,322 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
3
+ from langchain_community.document_loaders import PyPDFLoader
4
+ import os
5
+ from langchain_huggingface import HuggingFaceEmbeddings
6
+ from langchain_community.vectorstores import FAISS
7
+ from langchain.chains import RetrievalQA
8
+ from langchain_google_genai import ChatGoogleGenerativeAI
9
+ from langchain_core.documents import Document
10
+
11
+
12
+ def process_document(file_path):
13
+ """Process PDF document and create vector store for retrieval"""
14
+ loader = PyPDFLoader(file_path)
15
+ documents = loader.load()
16
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
17
+ texts = text_splitter.split_documents(documents)
18
+
19
+ model_name = "sentence-transformers/all-MiniLM-L6-v2"
20
+ embeddings = HuggingFaceEmbeddings(model_name=model_name)
21
+
22
+ vectorstore = FAISS.from_documents(texts, embedding=embeddings)
23
+ return vectorstore
24
+
25
+
26
+ def verify_legal_document(file_path, api_key):
27
+ """Verify if the uploaded document is a legal document"""
28
+ try:
29
+ loader = PyPDFLoader(file_path)
30
+ documents = loader.load()
31
+
32
+ if not documents:
33
+ return False
34
+
35
+ full_text = "\n".join([doc.page_content for doc in documents])
36
+
37
+ if len(full_text.strip()) < 50:
38
+ return False
39
+
40
+ llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", google_api_key=api_key)
41
+ verification_prompt = f"""
42
+ Analyze the following text carefully and determine if it is a legal document.
43
+
44
+ Legal documents include: contracts, agreements, terms of service, privacy policies,
45
+ legal notices, lease agreements, employment contracts, NDAs, legal forms, court documents, etc.
46
+
47
+ Non-legal documents include: research papers, books, articles, manuals, reports,
48
+ personal documents, educational materials, etc.
49
+
50
+ Respond with ONLY ONE WORD:
51
+ - "LEGAL" if this is a legal document
52
+ - "NON-LEGAL" if this is not a legal document
53
+
54
+ Text to analyze:
55
+ {full_text[:3000]}
56
+ """
57
+
58
+ response = llm.invoke(verification_prompt)
59
+ response_text = response.content.strip().upper()
60
+
61
+ is_legal = "LEGAL" in response_text and "NON-LEGAL" not in response_text
62
+ return is_legal
63
+
64
+ except Exception as e:
65
+ st.error(f"Error during verification: {str(e)}")
66
+ return False
67
+
68
+
69
+ def generate_analysis(vectorstore, api_key):
70
+ """Generate automated summary and risk analysis"""
71
+ try:
72
+ retriever = vectorstore.as_retriever()
73
+ llm = ChatGoogleGenerativeAI(
74
+ model="gemini-2.0-flash",
75
+ google_api_key=api_key,
76
+ temperature=0.3
77
+ )
78
+ qa_chain = RetrievalQA.from_chain_type(
79
+ llm=llm,
80
+ chain_type="stuff",
81
+ retriever=retriever
82
+ )
83
+
84
+ # Generate summary
85
+ summary_query = """
86
+ Provide a concise, three-bullet point summary of this document's main purpose,
87
+ key parties involved, and primary obligations. Use simple language.
88
+ """
89
+ summary = qa_chain.run(summary_query)
90
+
91
+ # Identify risks
92
+ risks_query = """
93
+ Identify potential risks, red flags, or important clauses including:
94
+ - Financial obligations, penalties, or fees
95
+ - Auto-renewal clauses
96
+ - Termination conditions
97
+ - Liability limitations
98
+ - Unusual or potentially unfavorable terms
99
+ Format as bullet points.
100
+ """
101
+ risks = qa_chain.run(risks_query)
102
+
103
+ return summary, risks
104
+ except Exception as e:
105
+ st.error(f"Error during analysis: {str(e)}")
106
+ return None, None
107
+
108
+
109
+ # Streamlit App Configuration
110
+ st.set_page_config(
111
+ page_title="AI Legal Doc Explainer",
112
+ page_icon="⚖️",
113
+ layout="centered",
114
+ initial_sidebar_state="auto"
115
+ )
116
+
117
+ st.title("⚖️ AI Legal Doc Explainer")
118
+ st.write("Upload your legal document (PDF) and get a simple, easy-to-understand explanation.")
119
+
120
+ st.markdown("""
121
+ <style>
122
+ /* Blue highlight for text input */
123
+ .stTextInput > div > div > input {
124
+ border-color: #0066cc !important;
125
+ box-shadow: 0 0 0 0.2rem rgba(0, 102, 204, 0.25) !important;
126
+ }
127
+
128
+ .stTextInput > div > div > input:focus {
129
+ border-color: #0066cc !important;
130
+ box-shadow: 0 0 0 0.2rem rgba(0, 102, 204, 0.5) !important;
131
+ }
132
+
133
+ /* Green submit button */
134
+ .stButton > button[kind="primary"] {
135
+ background-color: #28a745 !important;
136
+ border-color: #28a745 !important;
137
+ }
138
+
139
+ .stButton > button[kind="primary"]:hover {
140
+ background-color: #218838 !important;
141
+ border-color: #1e7e34 !important;
142
+ }
143
+ </style>
144
+ """, unsafe_allow_html=True)
145
+
146
+ # Initialize session state for Q&A
147
+ if "qa_history" not in st.session_state:
148
+ st.session_state.qa_history = []
149
+ if "vectorstore" not in st.session_state:
150
+ st.session_state.vectorstore = None
151
+ if "document_processed" not in st.session_state:
152
+ st.session_state.document_processed = False
153
+
154
+ # File uploader
155
+ uploaded_file = st.file_uploader("Upload a PDF document", type="pdf")
156
+
157
+ if uploaded_file is not None:
158
+ # Save uploaded file temporarily
159
+ temp_file_path = f"temp_{uploaded_file.name}"
160
+ with open(temp_file_path, "wb") as f:
161
+ f.write(uploaded_file.getbuffer())
162
+
163
+ try:
164
+ # Check if API key exists
165
+ if "GOOGLE_API_KEY" not in st.secrets:
166
+ st.error("Google API key not found in secrets. Please add your API key.")
167
+ st.stop()
168
+
169
+ # STEP 1: Verify document type
170
+ with st.spinner("Verifying document type..."):
171
+ is_legal_doc = verify_legal_document(temp_file_path, st.secrets["GOOGLE_API_KEY"])
172
+
173
+ # STEP 2: Show immediate notification for non-legal documents
174
+ if not is_legal_doc:
175
+ #st.error("⚠️ Document Verification Failed")
176
+ st.warning("This does not appear to be a legal document.")
177
+ st.info("This tool is optimized for legal documents like contracts, agreements, terms of service, privacy policies, etc.")
178
+
179
+ # Ask user what to do
180
+ st.markdown("**What would you like to do?**")
181
+ col1, col2 = st.columns(2)
182
+
183
+
184
+
185
+ with col2:
186
+ proceed_anyway = st.button("▶️ Continue Anyway", use_container_width=True)
187
+
188
+ if not proceed_anyway:
189
+ st.stop() # Stop here if user doesn't choose to continue
190
+
191
+ # STEP 3: Process the document (either legal doc or user chose to continue)
192
+ if not st.session_state.document_processed:
193
+ if is_legal_doc:
194
+ st.success("Legal document verified!")
195
+ else:
196
+ st.info("Proceeding with analysis as requested...")
197
+
198
+ with st.spinner("Processing document..."):
199
+ st.session_state.vectorstore = process_document(temp_file_path)
200
+
201
+ # STEP 4: Generate analysis
202
+ with st.spinner("Analyzing document for key points and risks..."):
203
+ summary, risks = generate_analysis(st.session_state.vectorstore, st.secrets["GOOGLE_API_KEY"])
204
+
205
+ if summary and risks:
206
+ st.session_state.summary = summary
207
+ st.session_state.risks = risks
208
+ st.session_state.document_processed = True
209
+
210
+ # Display analysis results if document is processed
211
+ if st.session_state.document_processed:
212
+ st.success("Document analysis complete!")
213
+
214
+ # Display analysis results
215
+ with st.expander("Document Summary", expanded=True):
216
+ st.write(st.session_state.summary)
217
+
218
+ with st.expander("🚩 Potential Red Flags & Important Clauses", expanded=True):
219
+ st.write(st.session_state.risks)
220
+
221
+ st.markdown("---")
222
+
223
+ # STEP 5: Q&A Section with persistent chat
224
+ st.header("Ask Questions About Your Document")
225
+ st.write("Ask specific questions about the document content, terms, or anything you'd like clarified.")
226
+
227
+ # Always show previous Q&A history section (even if empty)
228
+ st.subheader("Previous Questions & Answers:")
229
+ if st.session_state.qa_history:
230
+ for i, qa in enumerate(st.session_state.qa_history, 1):
231
+ with st.expander(f"Q{i}: {qa['question'][:50]}...", expanded=False):
232
+ st.write(f"**Question:** {qa['question']}")
233
+ st.write(f"**Answer:** {qa['answer']}")
234
+ else:
235
+ st.write("*No questions asked yet*")
236
+
237
+ st.markdown("---")
238
+
239
+ # Always show the question input box
240
+ user_question = st.text_input(
241
+ "Enter your question:",
242
+ placeholder="e.g., What are the termination conditions? What fees am I responsible for?",
243
+ key=f"question_input_{len(st.session_state.qa_history)}"
244
+ )
245
+
246
+ if st.button("Submit Question", type="primary"):
247
+ if user_question:
248
+ with st.spinner("Finding the answer..."):
249
+ try:
250
+ retriever = st.session_state.vectorstore.as_retriever()
251
+ llm = ChatGoogleGenerativeAI(
252
+ model="gemini-2.0-flash",
253
+ google_api_key=st.secrets["GOOGLE_API_KEY"],
254
+ temperature=0.2
255
+ )
256
+ qa_chain = RetrievalQA.from_chain_type(
257
+ llm=llm,
258
+ chain_type="stuff",
259
+ retriever=retriever
260
+ )
261
+
262
+ # Enhanced prompt for better answers
263
+ enhanced_question = f"""
264
+ Based on the document content, please answer this question clearly and concisely: {user_question}
265
+
266
+ If the answer involves specific terms, conditions, or clauses, please quote the relevant text.
267
+ If the information is not clearly stated in the document, please say so.
268
+ """
269
+
270
+ answer = qa_chain.run(enhanced_question)
271
+
272
+ # Add to history
273
+ st.session_state.qa_history.append({
274
+ 'question': user_question,
275
+ 'answer': answer
276
+ })
277
+
278
+ except Exception as e:
279
+ st.error(f"Error generating answer: {str(e)}")
280
+ else:
281
+ st.warning("Please enter a question before submitting.")
282
+
283
+ # Display the most recent answer if available
284
+ if st.session_state.qa_history:
285
+ st.markdown("### Answer")
286
+ latest_qa = st.session_state.qa_history[-1]
287
+ st.write(f"**Question:** {latest_qa['question']}")
288
+ st.write(f"**Answer:** {latest_qa['answer']}")
289
+
290
+ st.markdown("---")
291
+ st.write("**Ask another question below:**")
292
+
293
+
294
+
295
+ except Exception as e:
296
+ st.error(f"An error occurred: {str(e)}")
297
+
298
+ finally:
299
+ # Clean up temporary file
300
+ if os.path.exists(temp_file_path):
301
+ os.remove(temp_file_path)
302
+
303
+ else:
304
+ st.info("Please upload a PDF document to get started.")
305
+
306
+ # Add some helpful information
307
+ with st.expander("ℹ️ What types of documents work best?"):
308
+ st.write("""
309
+ This tool works best with legal documents such as:
310
+ - Contracts and agreements
311
+ - Terms of service
312
+ - Privacy policies
313
+ - Lease agreements
314
+ - Employment contracts
315
+ - Legal notices
316
+ - Service agreements
317
+
318
+ The AI will analyze the document and provide:
319
+ - A clear summary of the main points
320
+ - Identification of potential risks or red flags
321
+ - Answers to your specific questions about the content
322
+ """)
requirements.txt ADDED
Binary file (4.31 kB). View file