Parthiban97 commited on
Commit
61b0cb3
·
verified ·
1 Parent(s): 34ab1a2

Upload 12 files

Browse files
.env ADDED
@@ -0,0 +1 @@
 
 
1
+ LANGCHAIN_PROJECT="RAG_Demo"
.gitattributes CHANGED
@@ -33,3 +33,8 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ us_census/2306.09782v1.pdf filter=lfs diff=lfs merge=lfs -text
37
+ us_census/acsbr-016.pdf filter=lfs diff=lfs merge=lfs -text
38
+ us_census/acsbr-017.pdf filter=lfs diff=lfs merge=lfs -text
39
+ us_census/attention.pdf filter=lfs diff=lfs merge=lfs -text
40
+ us_census/uk_budget.pdf filter=lfs diff=lfs merge=lfs -text
app.py ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import os
3
+ import tempfile
4
+ import time
5
+ from langchain_groq import ChatGroq
6
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
7
+ from langchain.chains.combine_documents import create_stuff_documents_chain
8
+ from langchain_core.prompts import ChatPromptTemplate
9
+ from langchain.chains import create_retrieval_chain
10
+ from langchain_community.vectorstores import FAISS
11
+ from langchain_community.document_loaders import PyPDFLoader
12
+ from langchain_google_genai import GoogleGenerativeAIEmbeddings
13
+ from dotenv import load_dotenv
14
+
15
+
16
+ load_dotenv()
17
+
18
+
19
+ # Langmith tracking
20
+ os.environ["LANGCHAIN_TRACING_V2"] = "true"
21
+
22
+ st.set_page_config(page_title="Chat with PDFs", page_icon=":books:")
23
+
24
+ st.title("Chat Groq Document Q&A")
25
+
26
+ # Custom prompt template
27
+ custom_context_input = """
28
+ <context>
29
+ {context}
30
+ <context>
31
+ Questions:{input}
32
+ """
33
+
34
+ # Default prompt template
35
+ default_prompt_template = """
36
+ Answer the questions based on the provided context only.
37
+ Please provide the most accurate response based on the question
38
+ <context>
39
+ {context}
40
+ <context>
41
+ Questions:{input}
42
+ """
43
+
44
+ def vector_embedding(pdf_files):
45
+ if "vectors" not in st.session_state:
46
+ st.session_state.embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
47
+
48
+ documents = []
49
+ for pdf_file in pdf_files:
50
+ # Save the uploaded file to a temporary location
51
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
52
+ tmp_file.write(pdf_file.getvalue())
53
+ tmp_file_path = tmp_file.name
54
+
55
+ # Load the PDF from the temporary file path
56
+ loader = PyPDFLoader(tmp_file_path)
57
+ documents.extend(loader.load()) ## append the files
58
+
59
+ # Remove the temporary file
60
+ os.remove(tmp_file_path)
61
+
62
+ st.session_state.text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=1000)
63
+ st.session_state.final_documents = st.session_state.text_splitter.split_documents(documents)
64
+ st.session_state.vectors = FAISS.from_documents(st.session_state.final_documents, st.session_state.embeddings)
65
+
66
+ st.success("Document embedding is completed!")
67
+
68
+
69
+ # Define model options
70
+ model_options = [
71
+ "llama3-8b-8192",
72
+ "llama3-70b-8192",
73
+ "mixtral-8x7b-32768",
74
+ "gemma-7b-it"
75
+ ]
76
+
77
+ # Sidebar elements
78
+ with st.sidebar:
79
+ st.header("Configuration")
80
+ st.markdown("Enter your API keys below:")
81
+ groq_api_key = st.text_input("Enter your GROQ API Key", type="password", help="Get your API key from [GROQ Console](https://console.groq.com/keys)")
82
+ google_api_key = st.text_input("Enter your Google API Key", type="password", help="Get your API key from [Google AI Studio](https://aistudio.google.com/app/apikey)")
83
+ langsmith_api_key = st.text_input("Enter your Langsmith API Key", type="password", help="Get your API key from [Langsmith Console](https://smith.langchain.com/o/2a79134f-7562-5c92-a437-96b080547a1e/settings)")
84
+ selected_model = st.selectbox("Select any Groq Model", model_options)
85
+ os.environ["GOOGLE_API_KEY"]=str(google_api_key)
86
+ st.markdown("Upload your PDF files:")
87
+ uploaded_files = st.file_uploader("Choose PDF files", accept_multiple_files=True, type="pdf")
88
+
89
+
90
+ # Custom prompt text areas
91
+ st.markdown("Enter a custom prompt template (optional):")
92
+ custom_prompt_template = st.text_area("Custom Prompt Template", placeholder="Enter your custom prompt here...")
93
+
94
+ if st.button("Start Document Embedding"):
95
+ if uploaded_files:
96
+ vector_embedding(uploaded_files)
97
+ st.success("Vector Store DB is Ready")
98
+ else:
99
+ st.warning("Please upload at least one PDF file.")
100
+
101
+ # Main section for question input and results
102
+ prompt1 = st.text_area("Enter Your Question From Documents")
103
+
104
+ if prompt1 and "vectors" in st.session_state:
105
+ if custom_prompt_template:
106
+ custom_prompt = custom_prompt_template + custom_context_input
107
+ prompt = ChatPromptTemplate.from_template(custom_prompt)
108
+ else:
109
+ prompt = ChatPromptTemplate.from_template(default_prompt_template)
110
+
111
+ llm = ChatGroq(groq_api_key=groq_api_key, model_name=selected_model)
112
+ document_chain = create_stuff_documents_chain(llm, prompt)
113
+ retriever = st.session_state.vectors.as_retriever()
114
+ retrieval_chain = create_retrieval_chain(retriever, document_chain)
115
+ start = time.process_time()
116
+ response = retrieval_chain.invoke({'input': prompt1})
117
+ st.write("Response time:", time.process_time() - start)
118
+ st.write(response['answer'])
119
+
120
+ # With a Streamlit expander
121
+ with st.expander("Document Similarity Search"):
122
+ # Find the relevant chunks
123
+ for i, doc in enumerate(response["context"]):
124
+ st.write(doc.page_content)
125
+ st.write("--------------------------------")
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ faiss-cpu
2
+ groq
3
+ PyPDF2
4
+ langchain_google_genai
5
+ langchain
6
+ streamlit
7
+ langchain_community
8
+ python-dotenv
9
+ pypdf
10
+
us_census/2005.11401v4.pdf ADDED
Binary file (885 kB). View file
 
us_census/2306.09782v1.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c05b75d95337918cf53899c614cfa0468ac0cccd458c0b285d8ced780a565a27
3
+ size 1051977
us_census/acsbr-015.pdf ADDED
Binary file (872 kB). View file
 
us_census/acsbr-016.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:efdd4140ab4bfd3801771525f4c784dedeaec7c4f83aaa382517aae37ea05eed
3
+ size 2286774
us_census/acsbr-017.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4cacfe8c64d32bf3a5a7729a271cbf7a526c3bea798c866e075af033f50d5d81
3
+ size 1389492
us_census/ahaSENSESinSensationalOrganisaton.pdf ADDED
The diff for this file is too large to render. See raw diff
 
us_census/attention.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b7d72988fd8107d07f7d278bf0ba6621adb6ed47df74be4014fa4a01f03aff6a
3
+ size 2215244
us_census/p70-178.pdf ADDED
Binary file (419 kB). View file
 
us_census/uk_budget.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:afc978ee917571f6f11ab8f644162e19db9b76d8df82571b91465356f9c92c13
3
+ size 1148980