import streamlit as st from llama_index.core import Settings from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, StorageContext from llama_index.embeddings.gemini import GeminiEmbedding from llama_index.llms.gemini import Gemini from llama_index.core import Document import google.generativeai as genai import os import PyPDF2 from io import BytesIO from llama_index.embeddings.fastembed import FastEmbedEmbedding # Configure Google Gemini Settings.embed_model = FastEmbedEmbedding(model_name="BAAI/bge-small-en-v1.5") Settings.llm = Gemini(api_key=os.getenv("GOOGLE_API_KEY"), temperature=0.1, model_name="models/gemini-pro") def write_to_file(content, filename="./files/test.pdf"): with open(filename, "wb") as f: f.write(content) def ingest_documents(): reader = SimpleDirectoryReader("./files/") documents = reader.load_data() print(documents) return documents def load_data(documents): index = VectorStoreIndex.from_documents(documents) return index # Generate legal document summary def generate_summary(index, document_text): query_engine = index.as_query_engine() response = query_engine.query(f""" You are a skilled legal analyst. Your task is to provide a comprehensive summary of the given legal document. Analyze the following document and summarize it: {document_text} Please cover the following aspects: 1. Document type and purpose 2. Key parties involved 3. Main clauses and provisions 4. Important dates and deadlines 5. Potential legal implications 6. Any notable or unusual elements Provide a clear, concise, and professional summary """) return response.response # Streamlit app def main(): st.title("Legal Document Summarizer") st.write("Upload a legal document, and let our AI summarize it!") # File uploader uploaded_file = st.file_uploader("Choose a legal document file", type=["txt", "pdf"]) if uploaded_file is not None: # Read file contents if uploaded_file.type == "application/pdf": pdf_reader = PyPDF2.PdfReader(BytesIO(uploaded_file.getvalue())) document_text = "" for page in pdf_reader.pages: document_text += page.extract_text() else: document_text = uploaded_file.getvalue().decode("utf-8") # Write content to file write_to_file(uploaded_file.getvalue()) st.write("Analyzing legal document...") # Ingest documents using SimpleDirectoryReader documents = ingest_documents() # Load data and generate summary index = load_data(documents) summary = generate_summary(index, document_text) st.write("## Legal Document Summary") st.write(summary) if __name__ == "__main__": main()