Spaces:

Chinar-Q-AI
/

pdf-Interactor

Sleeping

App Files Files Community

ChinarQ-AI commited on May 13, 2025

Commit

fa3ff28

verified ·

1 Parent(s): c8db483

Upload 3 files

Browse files

Files changed (3) hide show

src/CSS/style.css +85 -0
src/PDFprocess_sample.py +49 -0
src/app.py +132 -0

src/CSS/style.css ADDED Viewed

	@@ -0,0 +1,85 @@

+.st-emotion-cache-bm2z3a{
+    background-color: #28a745;
+}
+.st-emotion-cache-12fmjuu {
+    background-color: #28a745;
+}
+.st-emotion-cache-6qob1r {
+    background-color: #007bff
+}
+.st-emotion-cache-1iqhbn7 {
+    background-color: #28a745;
+}
+.st-emotion-cache-1jfa4hj {
+    background-color: #28a745;
+}
+.st-emotion-cache-taue2i {
+    background-color: #007bff
+}
+.st-emotion-cache-n5r31u {
+    border-radius: 25px;
+}
+.st-emotion-cache-n5r31u:hover {
+    border-color: #28a745;
+    color: #28a745;
+}
+.st-emotion-cache-1bps1dx:hover {
+    background-color: #007bff
+}
+h1 {
+    color: aliceblue;
+    display: flex;
+    justify-content: center;
+    font-weight: 100;
+}
+.st-emotion-cache-1v6glgu > ul[role="listbox"]:not(:last-child) {
+    background-color: #28a745;
+}
+.st-emotion-cache-1iqhbn7:hover {
+    background-color: #28a745;
+}
+.st-cr {
+    border: 1px solid #28a745;
+}
+p, ol, ul, dl {
+    margin: 0px 0px 1rem;
+    padding: 0px;
+    font-size: 1 rem;
+    font-weight: 400;
+    color: rgb(0, 0, 0);
+}
+.st-emotion-cache-13ln4jf {
+    width: 100%;
+    padding: 2rem 1rem 10rem;
+    max-width: 68rem;
+}
+.card {
+    background-color: #f8f9fa;
+    border-radius: 10px;
+    box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1);
+    padding: 20px;
+    margin-bottom: 20px;
+}
+.response {
+    font-size: 18px;
+    font-weight: bold;
+    margin-bottom: 10px;
+    color: #333333;
+}

src/PDFprocess_sample.py ADDED Viewed

	@@ -0,0 +1,49 @@

+import tempfile
+import streamlit as st
+import pickle
+from langchain_google_genai import GoogleGenerativeAIEmbeddings
+from langchain_community.document_loaders import PyPDFLoader
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain_community.vectorstores import FAISS
+import faiss
+def process_pdf(uploaded_file):
+    all_documents = []
+    st.session_state.embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
+    main_placeholder = st.empty()
+    # Creating  a temporary file to store the uploaded PDF's
+    main_placeholder.text("Data Loading...Started...✅✅✅")
+    for uploaded_file in uploaded_file:
+        with tempfile.NamedTemporaryFile(delete=False , suffix='.pdf') as temp_file:
+            temp_file.write(uploaded_file.read()) ## write file to temporary
+            temp_file_path = temp_file.name  # Get the temporary file path
+            # Load the PDF's from the temporary file path
+        loader = PyPDFLoader(temp_file_path) # Document loader
+        doc= loader.load() # load Document
+        main_placeholder.text("Text Splitter...Started...✅✅✅")
+        text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) # Recursive Character String
+        #final_documents = text_splitter.split_documents(doc)# splitting
+        final_documents = text_splitter.split_documents(doc)
+        all_documents.extend(final_documents)
+        if all_documents:
+            main_placeholder.text("Embedding Vector Started Building...✅✅✅")
+            st.session_state.vectors = FAISS.from_documents(all_documents,st.session_state.embeddings)
+            st.session_state.docs = all_documents
+            # Save FAISS vector store to disk
+            faiss_index = st.session_state.vectors.index  # Extract FAISS index
+            faiss.write_index(faiss_index, "faiss_index.bin")  # Save index to a binary file
+            main_placeholder.text("Vector database created!...✅✅✅")
+        else:
+            st.error("No documents found after processing the uploaded files or the pdf is corrupted / unsupported.")

src/app.py ADDED Viewed

	@@ -0,0 +1,132 @@

+import streamlit as st
+import os
+from langchain_groq import ChatGroq
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain.chains.combine_documents import create_stuff_documents_chain
+from langchain_core.prompts import ChatPromptTemplate
+from langchain.chains import create_retrieval_chain
+from langchain_community.vectorstores import FAISS
+from langchain_community.document_loaders import PyPDFDirectoryLoader
+from langchain_google_genai import GoogleGenerativeAIEmbeddings
+from dotenv import load_dotenv
+from  PDFprocess_sample import process_pdf
+# Loading GROQ and Google API
+load_dotenv()
+GROQ_API_KEY = os.getenv('GROQ_API_KEY')
+os.environ["GOOGLE_API_KEY"]= os.getenv('GOOGLE_API_KEY')
+#Loading CSS files
+def load_css(file_name):
+    with open(file_name) as f:
+        css = f.read()
+    st.markdown(f"<style>{css}</style>", unsafe_allow_html=True)
+load_css('CSS/style.css')
+#setting up LLM
+llm = ChatGroq(
+    api_key=GROQ_API_KEY,
+    model_name="Llama3-8b-8192"
+)
+prompt = ChatPromptTemplate.from_template(
+    """
+    Answer the questions based on the provided context only.
+    Please provide the most accurate response based on the question. Try to answer in detail in 1500 words
+    <context>
+    {context}
+    <context>
+    Questions: {input}
+    """
+)
+input_method = st.sidebar.selectbox("Choose a method" , ["Choose input method...","Interact with Doc", "Get Ques from Doc"])
+st.sidebar.title("Upload your pdf")
+main_placeholder = st.empty()
+#Document upload
+uploaded_file = st.sidebar.file_uploader("_____________________________________", type="pdf", accept_multiple_files=True)
+st.sidebar.write("Press Submit to process:")
+process = st.sidebar.button("Submit")
+#Document processing to convert it into vectors
+if process:
+    if uploaded_file:
+        # Process the uploaded PDF file
+        process_pdf(uploaded_file)
+    else:
+        st.warning("Please upload a PDF file.")
+if input_method == "Choose input method...":
+    st.title(f"Welcome You all!")
+    st.title("Choose an option in the sidebar")
+    st.title("Now, let's get started!")
+#If User wants to interact with the document
+elif input_method == "Interact with Doc":
+    st.title(f"let's Interact with pdf's")
+    prompt1 = st.text_input("______", placeholder="Enter your Question")
+    # Generate response if question is entered
+    if prompt1 and "vectors" in st.session_state:
+        document_chain = create_stuff_documents_chain(llm, prompt)
+        retriever = st.session_state.vectors.as_retriever()
+        retrieval_chain = create_retrieval_chain(retriever, document_chain)
+        response = retrieval_chain.invoke({'input': prompt1})
+        # st.write(response['answer'])
+        #Get the respose in the card
+        st.markdown(
+        f"""
+        <div class="card">
+            <div class="response">{response['answer']}</div>
+        </div>
+        """,
+        unsafe_allow_html=True,
+    )
+#When User wants to get questions from the doc based on certain topic
+elif input_method == "Get Ques from Doc":
+    st.title(f"Let's Get Ques from Document")
+    prompt2 = """Based on the topic of {topic},
+                kindly provide a comprehensive list of all possible questions that could arise.
+                For each question, provide detailed and explanatory answers in atleast 1000 words detail based on the context,
+                ensuring that the responses are as informative as possible.
+                make sure you strictly follow the {topic}"""
+    topic = st.text_input("Enter a topic", placeholder="What is your topic")
+    # Generate response if question is entered
+    if topic and "vectors" in st.session_state:
+        document_chain = create_stuff_documents_chain(llm, prompt)
+        retriever = st.session_state.vectors.as_retriever()
+        retrieval_chain = create_retrieval_chain(retriever, document_chain)
+        response = retrieval_chain.invoke({'input': prompt2})
+        #Get the respose in the card
+        st.markdown(
+        f"""
+        <div class="card">
+            <div class="response">{response['answer']}</div>
+        </div>
+        """,
+        unsafe_allow_html=True,
+    )