Spaces:

Parthiban97
/

Chat_Groq_Document_Q_A

Sleeping

App Files Files Community

Parthiban97 commited on May 27, 2024

Commit

61b0cb3

verified ·

1 Parent(s): 34ab1a2

Upload 12 files

Browse files

Files changed (13) hide show

.env +1 -0
.gitattributes +5 -0
app.py +125 -0
requirements.txt +10 -0
us_census/2005.11401v4.pdf +0 -0
us_census/2306.09782v1.pdf +3 -0
us_census/acsbr-015.pdf +0 -0
us_census/acsbr-016.pdf +3 -0
us_census/acsbr-017.pdf +3 -0
us_census/ahaSENSESinSensationalOrganisaton.pdf +0 -0
us_census/attention.pdf +3 -0
us_census/p70-178.pdf +0 -0
us_census/uk_budget.pdf +3 -0

.env ADDED Viewed

	@@ -0,0 +1 @@


1	+ LANGCHAIN_PROJECT="RAG_Demo"

.gitattributes CHANGED Viewed

@@ -33,3 +33,8 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+us_census/2306.09782v1.pdf filter=lfs diff=lfs merge=lfs -text
+us_census/acsbr-016.pdf filter=lfs diff=lfs merge=lfs -text
+us_census/acsbr-017.pdf filter=lfs diff=lfs merge=lfs -text
+us_census/attention.pdf filter=lfs diff=lfs merge=lfs -text
+us_census/uk_budget.pdf filter=lfs diff=lfs merge=lfs -text

app.py ADDED Viewed

	@@ -0,0 +1,125 @@

+import streamlit as st
+import os
+import tempfile
+import time
+from langchain_groq import ChatGroq
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain.chains.combine_documents import create_stuff_documents_chain
+from langchain_core.prompts import ChatPromptTemplate
+from langchain.chains import create_retrieval_chain
+from langchain_community.vectorstores import FAISS
+from langchain_community.document_loaders import PyPDFLoader
+from langchain_google_genai import GoogleGenerativeAIEmbeddings
+from dotenv import load_dotenv
+load_dotenv()
+# Langmith tracking
+os.environ["LANGCHAIN_TRACING_V2"] = "true"
+st.set_page_config(page_title="Chat with PDFs", page_icon=":books:")
+st.title("Chat Groq Document Q&A")
+# Custom prompt template
+custom_context_input = """
+<context>
+{context}
+<context>
+Questions:{input}
+"""
+# Default prompt template
+default_prompt_template = """
+Answer the questions based on the provided context only.
+Please provide the most accurate response based on the question
+<context>
+{context}
+<context>
+Questions:{input}
+"""
+def vector_embedding(pdf_files):
+    if "vectors" not in st.session_state:
+        st.session_state.embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
+    documents = []
+    for pdf_file in pdf_files:
+        # Save the uploaded file to a temporary location
+        with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
+            tmp_file.write(pdf_file.getvalue())
+            tmp_file_path = tmp_file.name
+        # Load the PDF from the temporary file path
+        loader = PyPDFLoader(tmp_file_path)
+        documents.extend(loader.load()) ## append the files
+        # Remove the temporary file
+        os.remove(tmp_file_path)
+        st.session_state.text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=1000)
+        st.session_state.final_documents = st.session_state.text_splitter.split_documents(documents)
+        st.session_state.vectors = FAISS.from_documents(st.session_state.final_documents, st.session_state.embeddings)
+    st.success("Document embedding is completed!")
+# Define model options
+model_options = [
+    "llama3-8b-8192",
+    "llama3-70b-8192",
+    "mixtral-8x7b-32768",
+    "gemma-7b-it"
+]
+# Sidebar elements
+with st.sidebar:
+    st.header("Configuration")
+    st.markdown("Enter your API keys below:")
+    groq_api_key = st.text_input("Enter your GROQ API Key", type="password", help="Get your API key from [GROQ Console](https://console.groq.com/keys)")
+    google_api_key = st.text_input("Enter your Google API Key", type="password", help="Get your API key from [Google AI Studio](https://aistudio.google.com/app/apikey)")
+    langsmith_api_key = st.text_input("Enter your Langsmith API Key", type="password", help="Get your API key from [Langsmith Console](https://smith.langchain.com/o/2a79134f-7562-5c92-a437-96b080547a1e/settings)")
+    selected_model = st.selectbox("Select any Groq Model", model_options)
+    os.environ["GOOGLE_API_KEY"]=str(google_api_key)
+    st.markdown("Upload your PDF files:")
+    uploaded_files = st.file_uploader("Choose PDF files", accept_multiple_files=True, type="pdf")
+    # Custom prompt text areas
+    st.markdown("Enter a custom prompt template (optional):")
+    custom_prompt_template = st.text_area("Custom Prompt Template", placeholder="Enter your custom prompt here...")
+    if st.button("Start Document Embedding"):
+        if uploaded_files:
+            vector_embedding(uploaded_files)
+            st.success("Vector Store DB is Ready")
+        else:
+            st.warning("Please upload at least one PDF file.")
+# Main section for question input and results
+prompt1 = st.text_area("Enter Your Question From Documents")
+if prompt1 and "vectors" in st.session_state:
+    if custom_prompt_template:
+        custom_prompt = custom_prompt_template + custom_context_input
+        prompt = ChatPromptTemplate.from_template(custom_prompt)
+    else:
+        prompt = ChatPromptTemplate.from_template(default_prompt_template)
+    llm = ChatGroq(groq_api_key=groq_api_key, model_name=selected_model)
+    document_chain = create_stuff_documents_chain(llm, prompt)
+    retriever = st.session_state.vectors.as_retriever()
+    retrieval_chain = create_retrieval_chain(retriever, document_chain)
+    start = time.process_time()
+    response = retrieval_chain.invoke({'input': prompt1})
+    st.write("Response time:", time.process_time() - start)
+    st.write(response['answer'])
+    # With a Streamlit expander
+    with st.expander("Document Similarity Search"):
+        # Find the relevant chunks
+        for i, doc in enumerate(response["context"]):
+            st.write(doc.page_content)
+            st.write("--------------------------------")

requirements.txt ADDED Viewed

	@@ -0,0 +1,10 @@

+faiss-cpu
+groq
+PyPDF2
+langchain_google_genai
+langchain
+streamlit
+langchain_community
+python-dotenv
+pypdf

us_census/2005.11401v4.pdf ADDED Viewed

Binary file (885 kB). View file

us_census/2306.09782v1.pdf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c05b75d95337918cf53899c614cfa0468ac0cccd458c0b285d8ced780a565a27
+size 1051977

us_census/acsbr-015.pdf ADDED Viewed

Binary file (872 kB). View file

us_census/acsbr-016.pdf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:efdd4140ab4bfd3801771525f4c784dedeaec7c4f83aaa382517aae37ea05eed
+size 2286774

us_census/acsbr-017.pdf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4cacfe8c64d32bf3a5a7729a271cbf7a526c3bea798c866e075af033f50d5d81
+size 1389492

us_census/ahaSENSESinSensationalOrganisaton.pdf ADDED Viewed

The diff for this file is too large to render. See raw diff

us_census/attention.pdf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b7d72988fd8107d07f7d278bf0ba6621adb6ed47df74be4014fa4a01f03aff6a
+size 2215244

us_census/p70-178.pdf ADDED Viewed

Binary file (419 kB). View file

us_census/uk_budget.pdf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:afc978ee917571f6f11ab8f644162e19db9b76d8df82571b91465356f9c92c13
+size 1148980