Hidayatmahar commited on
Commit
5f3a3e9
·
verified ·
1 Parent(s): 202d4a1

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +101 -0
app.py ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import streamlit as st
3
+ import faiss
4
+ import openai
5
+ from PyPDF2 import PdfReader
6
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
7
+ from langchain.embeddings.openai import OpenAIEmbeddings
8
+ from langchain.vectorstores import FAISS
9
+ from langchain.prompts import PromptTemplate
10
+ from langchain.chains import RetrievalQA
11
+ from langchain.document_loaders import GoogleDriveLoader
12
+
13
+ # Set OpenAI API key
14
+ openai.api_key = os.getenv("OPENAI_API_KEY")
15
+
16
+ # Google Drive loader setup
17
+ def load_documents_from_drive(drive_folder_id):
18
+ loader = GoogleDriveLoader(folder_id=drive_folder_id)
19
+ return loader.load()
20
+
21
+ # Helper function to process documents into chunks
22
+ def process_documents(documents):
23
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
24
+ chunks = []
25
+ for doc in documents:
26
+ reader = PdfReader(doc.file_path)
27
+ text = "".join([page.extract_text() for page in reader.pages])
28
+ chunks.extend(text_splitter.split_text(text))
29
+ return chunks
30
+
31
+ # Function to build FAISS index
32
+ def build_faiss_index(chunks):
33
+ embeddings = OpenAIEmbeddings()
34
+ vectorstore = FAISS.from_texts(chunks, embeddings)
35
+ return vectorstore
36
+
37
+ # Streamlit app setup
38
+ def main():
39
+ st.title("Legal Document Assistance")
40
+ st.sidebar.title("Settings")
41
+
42
+ # Input for Google Drive folder ID
43
+ drive_folder_id = st.sidebar.text_input("Google Drive Folder ID", "")
44
+
45
+ # Initialize FAISS index
46
+ if st.sidebar.button("Load and Process Documents"):
47
+ st.write("Loading documents...")
48
+ try:
49
+ documents = load_documents_from_drive(drive_folder_id)
50
+ st.write(f"Loaded {len(documents)} documents.")
51
+
52
+ chunks = process_documents(documents)
53
+ st.write(f"Processed into {len(chunks)} chunks.")
54
+
55
+ vectorstore = build_faiss_index(chunks)
56
+ st.session_state.vectorstore = vectorstore
57
+
58
+ st.write("FAISS index built successfully!")
59
+ except Exception as e:
60
+ st.error(f"Error: {str(e)}")
61
+
62
+ # User query input
63
+ query = st.text_input("Enter your legal query:")
64
+
65
+ if query and "vectorstore" in st.session_state:
66
+ vectorstore = st.session_state.vectorstore
67
+ retriever = vectorstore.as_retriever()
68
+
69
+ prompt_template = PromptTemplate(
70
+ input_variables=["context", "question"],
71
+ template="You are a legal assistant. Given the context: {context}, answer the question: {question} succinctly.",
72
+ )
73
+
74
+ qa_chain = RetrievalQA(retriever=retriever, prompt_template=prompt_template)
75
+ response = qa_chain.run(query)
76
+
77
+ st.write("Generated Response:")
78
+ st.write(response)
79
+
80
+ # Generate and display downloadable PDF
81
+ if st.button("Generate PDF"):
82
+ from fpdf import FPDF
83
+
84
+ pdf = FPDF()
85
+ pdf.add_page()
86
+ pdf.set_font("Arial", size=12)
87
+ pdf.multi_cell(0, 10, f"Query: {query}\n\nResponse: {response}")
88
+
89
+ pdf_file_path = "response.pdf"
90
+ pdf.output(pdf_file_path)
91
+
92
+ with open(pdf_file_path, "rb") as f:
93
+ st.download_button(
94
+ label="Download PDF",
95
+ data=f,
96
+ file_name="response.pdf",
97
+ mime="application/pdf",
98
+ )
99
+
100
+ if __name__ == "__main__":
101
+ main()