Spaces:

meesamraza
/

document_gpt

Sleeping

App Files Files Community

meesamraza commited on Aug 11, 2025

Commit

e5f5057

verified ·

1 Parent(s): cd2a69a

Update app.py

Browse files

Files changed (1) hide show

app.py +70 -36

app.py CHANGED Viewed

@@ -10,16 +10,22 @@ from langchain.memory import ConversationBufferMemory
 from langchain.chains import ConversationalRetrievalChain
 from langchain_groq import ChatGroq
 # Load environment variables
 load_dotenv()
-# Set up logging
 logging.basicConfig(
     level=logging.INFO,
     format='%(asctime)s - %(levelname)s - %(message)s'
 )
-# Function to extract text from PDF files
 def get_pdf_text(pdf_docs):
     text = ""
     for pdf in pdf_docs:
@@ -30,7 +36,9 @@ def get_pdf_text(pdf_docs):
                 text += extracted_text + "\n"
     return text
-# Function to split the extracted text into chunks
 def get_text_chunks(text):
     text_splitter = CharacterTextSplitter(
         separator="\n",
@@ -40,70 +48,96 @@ def get_text_chunks(text):
     )
     return text_splitter.split_text(text)
-# Function to create a FAISS vectorstore
 def get_vectorstore(text_chunks):
     embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
-    vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
-    return vectorstore
-# Function to set up the conversational retrieval chain
 def get_conversation_chain(vectorstore):
     try:
         llm = ChatGroq(model="llama-3.3-70b-versatile", temperature=0.5)
         memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)
         conversation_chain = ConversationalRetrievalChain.from_llm(
             llm=llm,
             retriever=vectorstore.as_retriever(),
             memory=memory
         )
-        logging.info("Conversation chain created successfully.")
         return conversation_chain
     except Exception as e:
         logging.error(f"Error creating conversation chain: {e}")
         st.error("An error occurred while setting up the conversation chain.")
 # Handle user input
 def handle_userinput(user_question):
     if st.session_state.conversation is not None:
-        response = st.session_state.conversation({'question': user_question})
-        st.session_state.chat_history = response['chat_history']
         for i, message in enumerate(st.session_state.chat_history):
-            role = "User" if i % 2 == 0 else "Bot"
-            st.write(f"*{role}:* {message.content}")
     else:
-        st.warning("Please process the documents first.")
-# Main function to run the Streamlit app
 def main():
-    load_dotenv()
-    st.set_page_config(page_title="Chat with PDFs", page_icon=":books:")
     if "conversation" not in st.session_state:
         st.session_state.conversation = None
     if "chat_history" not in st.session_state:
         st.session_state.chat_history = None
-    st.header("Chat with PDFs :books:")
-    user_question = st.text_input("Ask a question about your documents:")
-    if user_question:
-        handle_userinput(user_question)
     with st.sidebar:
-        st.subheader("Your documents")
-        pdf_docs = st.file_uploader("Upload PDFs and click 'Process'", accept_multiple_files=True, type=["pdf"])
-        if st.button("Process"):
-            with st.spinner("Processing..."):
-                raw_text = get_pdf_text(pdf_docs)
-                if raw_text.strip():
-                    text_chunks = get_text_chunks(raw_text)
-                    vectorstore = get_vectorstore(text_chunks)
-                    st.session_state.conversation = get_conversation_chain(vectorstore)
-                    st.success("Processing complete! You can now ask questions.")
-                else:
-                    st.error("No valid text extracted from the PDFs.")
 if __name__ == '__main__':
-    main()

 from langchain.chains import ConversationalRetrievalChain
 from langchain_groq import ChatGroq
+# --------------------------
 # Load environment variables
+# --------------------------
 load_dotenv()
+# --------------------------
+# Logging configuration
+# --------------------------
 logging.basicConfig(
     level=logging.INFO,
     format='%(asctime)s - %(levelname)s - %(message)s'
 )
+# --------------------------
+# PDF text extraction
+# --------------------------
 def get_pdf_text(pdf_docs):
     text = ""
     for pdf in pdf_docs:
                 text += extracted_text + "\n"
     return text
+# --------------------------
+# Text chunking
+# --------------------------
 def get_text_chunks(text):
     text_splitter = CharacterTextSplitter(
         separator="\n",
     )
     return text_splitter.split_text(text)
+# --------------------------
+# FAISS VectorStore creation
+# --------------------------
 def get_vectorstore(text_chunks):
     embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
+    return FAISS.from_texts(texts=text_chunks, embedding=embeddings)
+# --------------------------
+# Conversation chain
+# --------------------------
 def get_conversation_chain(vectorstore):
     try:
         llm = ChatGroq(model="llama-3.3-70b-versatile", temperature=0.5)
         memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)
         conversation_chain = ConversationalRetrievalChain.from_llm(
             llm=llm,
             retriever=vectorstore.as_retriever(),
             memory=memory
         )
+        logging.info("✅ Conversation chain created successfully.")
         return conversation_chain
     except Exception as e:
         logging.error(f"Error creating conversation chain: {e}")
         st.error("An error occurred while setting up the conversation chain.")
+# --------------------------
 # Handle user input
+# --------------------------
 def handle_userinput(user_question):
     if st.session_state.conversation is not None:
+        with st.spinner("🤖 Thinking..."):
+            response = st.session_state.conversation({'question': user_question})
+            st.session_state.chat_history = response['chat_history']
+        # Display chat history in a chat-like format
         for i, message in enumerate(st.session_state.chat_history):
+            if i % 2 == 0:
+                st.markdown(f"🧑 **You:** {message.content}")
+            else:
+                st.markdown(f"🤖 **Bot:** {message.content}")
     else:
+        st.warning("⚠ Please process the documents first.")
+# --------------------------
+# Main Streamlit App
+# --------------------------
 def main():
+    st.set_page_config(page_title="AI PDF Chatbot", page_icon="📚", layout="wide")
+    st.title("📚 AI-Powered PDF Chatbot")
+    st.markdown("Chat with your documents using **LLaMA 3.3** and **Groq AI**. Perfect for research, learning, and exhibitions!")
+    # Session state initialization
     if "conversation" not in st.session_state:
         st.session_state.conversation = None
     if "chat_history" not in st.session_state:
         st.session_state.chat_history = None
+    # Sidebar - Upload PDFs
     with st.sidebar:
+        st.header("📂 Upload & Process")
+        pdf_docs = st.file_uploader("Upload PDFs", accept_multiple_files=True, type=["pdf"])
+        if st.button("🚀 Process Documents"):
+            if pdf_docs:
+                with st.spinner("📖 Reading & Processing..."):
+                    raw_text = get_pdf_text(pdf_docs)
+                    if raw_text.strip():
+                        text_chunks = get_text_chunks(raw_text)
+                        vectorstore = get_vectorstore(text_chunks)
+                        st.session_state.conversation = get_conversation_chain(vectorstore)
+                        st.success("✅ Documents processed! You can now ask questions.")
+                    else:
+                        st.error("No valid text found in PDFs.")
+            else:
+                st.warning("Please upload at least one PDF.")
+    # Main Chat Section
+    st.subheader("💬 Ask a Question")
+    user_question = st.text_input("Type your question here...")
+    if st.button("Submit Question"):
+        if user_question.strip():
+            handle_userinput(user_question)
+        else:
+            st.warning("Please enter a question before submitting.")
+    # Chat History
+    if st.session_state.chat_history:
+        st.subheader("📝 Chat History")
+        for i, message in enumerate(st.session_state.chat_history):
+            role = "User" if i % 2 == 0 else "Bot"
+            st.write(f"**{role}:** {message.content}")
 if __name__ == '__main__':
+    main()