Spaces:

meesamraza
/

document_gpt

Sleeping

App Files Files Community

meesamraza commited on Aug 11, 2025

Commit

f4cfcfd

verified ·

1 Parent(s): e5f5057

Update app.py

Browse files

Files changed (1) hide show

app.py +43 -5

app.py CHANGED Viewed

@@ -9,6 +9,8 @@ from langchain_community.vectorstores import FAISS
 from langchain.memory import ConversationBufferMemory
 from langchain.chains import ConversationalRetrievalChain
 from langchain_groq import ChatGroq
 # --------------------------
 # Load environment variables
@@ -28,13 +30,15 @@ logging.basicConfig(
 # --------------------------
 def get_pdf_text(pdf_docs):
     text = ""
     for pdf in pdf_docs:
         pdf_reader = PdfReader(pdf)
         for page in pdf_reader.pages:
             extracted_text = page.extract_text()
             if extracted_text:
                 text += extracted_text + "\n"
-    return text
 # --------------------------
 # Text chunking
@@ -78,9 +82,11 @@ def get_conversation_chain(vectorstore):
 # --------------------------
 def handle_userinput(user_question):
     if st.session_state.conversation is not None:
         with st.spinner("🤖 Thinking..."):
             response = st.session_state.conversation({'question': user_question})
             st.session_state.chat_history = response['chat_history']
         # Display chat history in a chat-like format
         for i, message in enumerate(st.session_state.chat_history):
@@ -88,22 +94,41 @@ def handle_userinput(user_question):
                 st.markdown(f"🧑 **You:** {message.content}")
             else:
                 st.markdown(f"🤖 **Bot:** {message.content}")
     else:
         st.warning("⚠ Please process the documents first.")
 # --------------------------
 # Main Streamlit App
 # --------------------------
 def main():
     st.set_page_config(page_title="AI PDF Chatbot", page_icon="📚", layout="wide")
     st.title("📚 AI-Powered PDF Chatbot")
-    st.markdown("Chat with your documents using **LLaMA 3.3** and **Groq AI**. Perfect for research, learning, and exhibitions!")
     # Session state initialization
     if "conversation" not in st.session_state:
         st.session_state.conversation = None
     if "chat_history" not in st.session_state:
         st.session_state.chat_history = None
     # Sidebar - Upload PDFs
     with st.sidebar:
@@ -112,17 +137,23 @@ def main():
         if st.button("🚀 Process Documents"):
             if pdf_docs:
                 with st.spinner("📖 Reading & Processing..."):
-                    raw_text = get_pdf_text(pdf_docs)
                     if raw_text.strip():
                         text_chunks = get_text_chunks(raw_text)
                         vectorstore = get_vectorstore(text_chunks)
                         st.session_state.conversation = get_conversation_chain(vectorstore)
-                        st.success("✅ Documents processed! You can now ask questions.")
                     else:
                         st.error("No valid text found in PDFs.")
             else:
                 st.warning("Please upload at least one PDF.")
     # Main Chat Section
     st.subheader("💬 Ask a Question")
     user_question = st.text_input("Type your question here...")
@@ -137,7 +168,14 @@ def main():
         st.subheader("📝 Chat History")
         for i, message in enumerate(st.session_state.chat_history):
             role = "User" if i % 2 == 0 else "Bot"
-            st.write(f"**{role}:** {message.content}")
 if __name__ == '__main__':
     main()

 from langchain.memory import ConversationBufferMemory
 from langchain.chains import ConversationalRetrievalChain
 from langchain_groq import ChatGroq
+import time
+import io
 # --------------------------
 # Load environment variables
 # --------------------------
 def get_pdf_text(pdf_docs):
     text = ""
+    page_count = 0
     for pdf in pdf_docs:
         pdf_reader = PdfReader(pdf)
+        page_count += len(pdf_reader.pages)
         for page in pdf_reader.pages:
             extracted_text = page.extract_text()
             if extracted_text:
                 text += extracted_text + "\n"
+    return text, page_count
 # --------------------------
 # Text chunking
 # --------------------------
 def handle_userinput(user_question):
     if st.session_state.conversation is not None:
+        start_time = time.time()
         with st.spinner("🤖 Thinking..."):
             response = st.session_state.conversation({'question': user_question})
             st.session_state.chat_history = response['chat_history']
+        elapsed_time = round(time.time() - start_time, 2)
         # Display chat history in a chat-like format
         for i, message in enumerate(st.session_state.chat_history):
                 st.markdown(f"🧑 **You:** {message.content}")
             else:
                 st.markdown(f"🤖 **Bot:** {message.content}")
+        # Stats
+        st.info(f"⏱ Response Time: {elapsed_time}s | 📄 Words: {len(response['answer'].split())}")
     else:
         st.warning("⚠ Please process the documents first.")
+# --------------------------
+# Export chat
+# --------------------------
+def export_chat():
+    if st.session_state.chat_history:
+        chat_text = "\n".join([f"{'User' if i % 2 == 0 else 'Bot'}: {m.content}" for i, m in enumerate(st.session_state.chat_history)])
+        buffer = io.BytesIO(chat_text.encode())
+        st.download_button(
+            label="💾 Download Chat",
+            data=buffer,
+            file_name="chat_history.txt",
+            mime="text/plain"
+        )
 # --------------------------
 # Main Streamlit App
 # --------------------------
 def main():
     st.set_page_config(page_title="AI PDF Chatbot", page_icon="📚", layout="wide")
     st.title("📚 AI-Powered PDF Chatbot")
+    st.markdown("Chat with your documents using **LLaMA 3.3** and **Groq AI**. Perfect for research, learning, and exhibitions! 🚀")
     # Session state initialization
     if "conversation" not in st.session_state:
         st.session_state.conversation = None
     if "chat_history" not in st.session_state:
         st.session_state.chat_history = None
+    if "pages_processed" not in st.session_state:
+        st.session_state.pages_processed = 0
     # Sidebar - Upload PDFs
     with st.sidebar:
         if st.button("🚀 Process Documents"):
             if pdf_docs:
                 with st.spinner("📖 Reading & Processing..."):
+                    raw_text, page_count = get_pdf_text(pdf_docs)
+                    st.session_state.pages_processed = page_count
                     if raw_text.strip():
                         text_chunks = get_text_chunks(raw_text)
                         vectorstore = get_vectorstore(text_chunks)
                         st.session_state.conversation = get_conversation_chain(vectorstore)
+                        st.success(f"✅ {len(pdf_docs)} file(s) processed | 📄 {page_count} pages")
                     else:
                         st.error("No valid text found in PDFs.")
             else:
                 st.warning("Please upload at least one PDF.")
+        # Clear chat
+        if st.button("🗑 Clear Chat"):
+            st.session_state.chat_history = None
+            st.success("Chat cleared.")
     # Main Chat Section
     st.subheader("💬 Ask a Question")
     user_question = st.text_input("Type your question here...")
         st.subheader("📝 Chat History")
         for i, message in enumerate(st.session_state.chat_history):
             role = "User" if i % 2 == 0 else "Bot"
+            st.markdown(f"**{role}:** {message.content}")
+        # Export chat
+        export_chat()
+    # Footer Branding
+    st.markdown("---")
+    st.markdown("**Made with ❤️ by Meesam Raza | Powered by LLaMA 3.3 & Groq AI**")
 if __name__ == '__main__':
     main()