Spaces:
No application file
No application file
| import streamlit as st | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| from langchain_community.document_loaders import PyPDFLoader | |
| import os | |
| from langchain_huggingface import HuggingFaceEmbeddings | |
| from langchain_community.vectorstores import FAISS | |
| from langchain.chains import RetrievalQA | |
| from langchain_google_genai import ChatGoogleGenerativeAI | |
| from langchain_core.documents import Document | |
| def process_document(file_path): | |
| """Process PDF document and create vector store for retrieval""" | |
| loader = PyPDFLoader(file_path) | |
| documents = loader.load() | |
| text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) | |
| texts = text_splitter.split_documents(documents) | |
| model_name = "sentence-transformers/all-MiniLM-L6-v2" | |
| embeddings = HuggingFaceEmbeddings(model_name=model_name) | |
| vectorstore = FAISS.from_documents(texts, embedding=embeddings) | |
| return vectorstore | |
| def verify_legal_document(file_path, api_key): | |
| """Verify if the uploaded document is a legal document""" | |
| try: | |
| loader = PyPDFLoader(file_path) | |
| documents = loader.load() | |
| if not documents: | |
| return False | |
| full_text = "\n".join([doc.page_content for doc in documents]) | |
| if len(full_text.strip()) < 50: | |
| return False | |
| llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", google_api_key=api_key) | |
| verification_prompt = f""" | |
| Analyze the following text carefully and determine if it is a legal document. | |
| Legal documents include: contracts, agreements, terms of service, privacy policies, | |
| legal notices, lease agreements, employment contracts, NDAs, legal forms, court documents, etc. | |
| Non-legal documents include: research papers, books, articles, manuals, reports, | |
| personal documents, educational materials, etc. | |
| Respond with ONLY ONE WORD: | |
| - "LEGAL" if this is a legal document | |
| - "NON-LEGAL" if this is not a legal document | |
| Text to analyze: | |
| {full_text[:3000]} | |
| """ | |
| response = llm.invoke(verification_prompt) | |
| response_text = response.content.strip().upper() | |
| is_legal = "LEGAL" in response_text and "NON-LEGAL" not in response_text | |
| return is_legal | |
| except Exception as e: | |
| st.error(f"Error during verification: {str(e)}") | |
| return False | |
| def generate_analysis(vectorstore, api_key): | |
| """Generate automated summary and risk analysis""" | |
| try: | |
| retriever = vectorstore.as_retriever() | |
| llm = ChatGoogleGenerativeAI( | |
| model="gemini-2.0-flash", | |
| google_api_key=api_key, | |
| temperature=0.3 | |
| ) | |
| qa_chain = RetrievalQA.from_chain_type( | |
| llm=llm, | |
| chain_type="stuff", | |
| retriever=retriever | |
| ) | |
| # Generate summary | |
| summary_query = """ | |
| Provide a concise, three-bullet point summary of this document's main purpose, | |
| key parties involved, and primary obligations. Use simple language. | |
| """ | |
| summary = qa_chain.run(summary_query) | |
| # Identify risks | |
| risks_query = """ | |
| Identify potential risks, red flags, or important clauses including: | |
| - Financial obligations, penalties, or fees | |
| - Auto-renewal clauses | |
| - Termination conditions | |
| - Liability limitations | |
| - Unusual or potentially unfavorable terms | |
| Format as bullet points. | |
| """ | |
| risks = qa_chain.run(risks_query) | |
| return summary, risks | |
| except Exception as e: | |
| st.error(f"Error during analysis: {str(e)}") | |
| return None, None | |
| # Streamlit App Configuration | |
| st.set_page_config( | |
| page_title="AI Legal Doc Explainer", | |
| page_icon="⚖️", | |
| layout="centered", | |
| initial_sidebar_state="auto" | |
| ) | |
| st.title("⚖️ AI Legal Doc Explainer") | |
| st.write("Upload your legal document (PDF) and get a simple, easy-to-understand explanation.") | |
| st.markdown(""" | |
| <style> | |
| /* Blue highlight for text input */ | |
| .stTextInput > div > div > input { | |
| border-color: #0066cc !important; | |
| box-shadow: 0 0 0 0.2rem rgba(0, 102, 204, 0.25) !important; | |
| } | |
| .stTextInput > div > div > input:focus { | |
| border-color: #0066cc !important; | |
| box-shadow: 0 0 0 0.2rem rgba(0, 102, 204, 0.5) !important; | |
| } | |
| /* Green submit button */ | |
| .stButton > button[kind="primary"] { | |
| background-color: #28a745 !important; | |
| border-color: #28a745 !important; | |
| } | |
| .stButton > button[kind="primary"]:hover { | |
| background-color: #218838 !important; | |
| border-color: #1e7e34 !important; | |
| } | |
| </style> | |
| """, unsafe_allow_html=True) | |
| # Initialize session state for Q&A | |
| if "qa_history" not in st.session_state: | |
| st.session_state.qa_history = [] | |
| if "vectorstore" not in st.session_state: | |
| st.session_state.vectorstore = None | |
| if "document_processed" not in st.session_state: | |
| st.session_state.document_processed = False | |
| # File uploader | |
| uploaded_file = st.file_uploader("Upload a PDF document", type="pdf") | |
| if uploaded_file is not None: | |
| # Save uploaded file temporarily | |
| temp_file_path = f"temp_{uploaded_file.name}" | |
| with open(temp_file_path, "wb") as f: | |
| f.write(uploaded_file.getbuffer()) | |
| try: | |
| # Check if API key exists | |
| if "GOOGLE_API_KEY" not in st.secrets: | |
| st.error("Google API key not found in secrets. Please add your API key.") | |
| st.stop() | |
| # STEP 1: Verify document type | |
| with st.spinner("Verifying document type..."): | |
| is_legal_doc = verify_legal_document(temp_file_path, st.secrets["GOOGLE_API_KEY"]) | |
| # STEP 2: Show immediate notification for non-legal documents | |
| if not is_legal_doc: | |
| #st.error("⚠️ Document Verification Failed") | |
| st.warning("This does not appear to be a legal document.") | |
| st.info("This tool is optimized for legal documents like contracts, agreements, terms of service, privacy policies, etc.") | |
| # Ask user what to do | |
| st.markdown("**What would you like to do?**") | |
| col1, col2 = st.columns(2) | |
| with col2: | |
| proceed_anyway = st.button("▶️ Continue Anyway", use_container_width=True) | |
| if not proceed_anyway: | |
| st.stop() # Stop here if user doesn't choose to continue | |
| # STEP 3: Process the document (either legal doc or user chose to continue) | |
| if not st.session_state.document_processed: | |
| if is_legal_doc: | |
| st.success("Legal document verified!") | |
| else: | |
| st.info("Proceeding with analysis as requested...") | |
| with st.spinner("Processing document..."): | |
| st.session_state.vectorstore = process_document(temp_file_path) | |
| # STEP 4: Generate analysis | |
| with st.spinner("Analyzing document for key points and risks..."): | |
| summary, risks = generate_analysis(st.session_state.vectorstore, st.secrets["GOOGLE_API_KEY"]) | |
| if summary and risks: | |
| st.session_state.summary = summary | |
| st.session_state.risks = risks | |
| st.session_state.document_processed = True | |
| # Display analysis results if document is processed | |
| if st.session_state.document_processed: | |
| st.success("Document analysis complete!") | |
| # Display analysis results | |
| with st.expander("Document Summary", expanded=True): | |
| st.write(st.session_state.summary) | |
| with st.expander("🚩 Potential Red Flags & Important Clauses", expanded=True): | |
| st.write(st.session_state.risks) | |
| st.markdown("---") | |
| # STEP 5: Q&A Section with persistent chat | |
| st.header("Ask Questions About Your Document") | |
| st.write("Ask specific questions about the document content, terms, or anything you'd like clarified.") | |
| # Always show previous Q&A history section (even if empty) | |
| st.subheader("Previous Questions & Answers:") | |
| if st.session_state.qa_history: | |
| for i, qa in enumerate(st.session_state.qa_history, 1): | |
| with st.expander(f"Q{i}: {qa['question'][:50]}...", expanded=False): | |
| st.write(f"**Question:** {qa['question']}") | |
| st.write(f"**Answer:** {qa['answer']}") | |
| else: | |
| st.write("*No questions asked yet*") | |
| st.markdown("---") | |
| # Always show the question input box | |
| user_question = st.text_input( | |
| "Enter your question:", | |
| placeholder="e.g., What are the termination conditions? What fees am I responsible for?", | |
| key=f"question_input_{len(st.session_state.qa_history)}" | |
| ) | |
| if st.button("Submit Question", type="primary"): | |
| if user_question: | |
| with st.spinner("Finding the answer..."): | |
| try: | |
| retriever = st.session_state.vectorstore.as_retriever() | |
| llm = ChatGoogleGenerativeAI( | |
| model="gemini-2.0-flash", | |
| google_api_key=st.secrets["GOOGLE_API_KEY"], | |
| temperature=0.2 | |
| ) | |
| qa_chain = RetrievalQA.from_chain_type( | |
| llm=llm, | |
| chain_type="stuff", | |
| retriever=retriever | |
| ) | |
| # Enhanced prompt for better answers | |
| enhanced_question = f""" | |
| Based on the document content, please answer this question clearly and concisely: {user_question} | |
| If the answer involves specific terms, conditions, or clauses, please quote the relevant text. | |
| If the information is not clearly stated in the document, please say so. | |
| """ | |
| answer = qa_chain.run(enhanced_question) | |
| # Add to history | |
| st.session_state.qa_history.append({ | |
| 'question': user_question, | |
| 'answer': answer | |
| }) | |
| except Exception as e: | |
| st.error(f"Error generating answer: {str(e)}") | |
| else: | |
| st.warning("Please enter a question before submitting.") | |
| # Display the most recent answer if available | |
| if st.session_state.qa_history: | |
| st.markdown("### Answer") | |
| latest_qa = st.session_state.qa_history[-1] | |
| st.write(f"**Question:** {latest_qa['question']}") | |
| st.write(f"**Answer:** {latest_qa['answer']}") | |
| st.markdown("---") | |
| st.write("**Ask another question below:**") | |
| except Exception as e: | |
| st.error(f"An error occurred: {str(e)}") | |
| finally: | |
| # Clean up temporary file | |
| if os.path.exists(temp_file_path): | |
| os.remove(temp_file_path) | |
| else: | |
| st.info("Please upload a PDF document to get started.") | |
| # Add some helpful information | |
| with st.expander("ℹ️ What types of documents work best?"): | |
| st.write(""" | |
| This tool works best with legal documents such as: | |
| - Contracts and agreements | |
| - Terms of service | |
| - Privacy policies | |
| - Lease agreements | |
| - Employment contracts | |
| - Legal notices | |
| - Service agreements | |
| The AI will analyze the document and provide: | |
| - A clear summary of the main points | |
| - Identification of potential risks or red flags | |
| - Answers to your specific questions about the content | |
| """) |