Spaces:

himanshukumar378
/

Mutliple_chat_pdf

Sleeping

App Files Files Community

himanshukumar378 commited on Aug 19, 2025

Commit

67d85a4

verified ·

1 Parent(s): 8096e94

Update app.py

Browse files

Files changed (1) hide show

app.py +84 -70

app.py CHANGED Viewed

@@ -1,82 +1,96 @@
-# app.py
-import streamlit as st
 from PyPDF2 import PdfReader
 from langchain.text_splitter import CharacterTextSplitter
-from langchain_community.embeddings import HuggingFaceEmbeddings
-from langchain_community.vectorstores import FAISS
 from langchain.chains import ConversationalRetrievalChain
 from langchain.memory import ConversationBufferMemory
-from langchain_community.llms import HuggingFacePipeline
 from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, pipeline
-from streamlit_chat import message
-# ----------------- Initialize session state -----------------
-if "processComplete" not in st.session_state:
-    st.session_state.processComplete = False
-if "conversation" not in st.session_state:
-    st.session_state.conversation = None
-if "chat_history" not in st.session_state:
-    st.session_state.chat_history = []
-# ----------------- Main function -----------------
-def main():
-    st.set_page_config(page_title="PDF Chatbot", layout="wide")
-    st.title("📑 Chat with Multiple PDFs")
-    uploaded_files = st.file_uploader(
-        "Upload your PDFs", type=["pdf"], accept_multiple_files=True
     )
-    if uploaded_files and st.button("Process PDFs"):
-        with st.spinner("Processing PDFs..."):
-            all_text = ""
-            for file in uploaded_files:
-                pdf_reader = PdfReader(file)
-                for page in pdf_reader.pages:
-                    text = page.extract_text()
-                    if text:
-                        all_text += text
-            # Split text into chunks
-            text_splitter = CharacterTextSplitter(
-                separator="\n", chunk_size=1000, chunk_overlap=200, length_function=len
-            )
-            chunks = text_splitter.split_text(all_text)
-            # Create embeddings
-            embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
-            vectorstore = FAISS.from_texts(chunks, embeddings)
-            # Load HuggingFace model (Seq2Seq for QA)
-            model_name = "google/flan-t5-small"  # lightweight & fast
-            tokenizer = AutoTokenizer.from_pretrained(model_name)
-            model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
-            pipe = pipeline("text2text-generation", model=model, tokenizer=tokenizer, max_length=512)
-            llm = HuggingFacePipeline(pipeline=pipe)
-            memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
-            st.session_state.conversation = ConversationalRetrievalChain.from_llm(
-                llm=llm, retriever=vectorstore.as_retriever(), memory=memory
-            )
-            st.session_state.processComplete = True
-            st.success("✅ PDFs processed successfully!")
-    # ----------------- Chat Section -----------------
-    if st.session_state.processComplete and st.session_state.conversation:
-        user_question = st.text_input("Ask a question about your PDFs:")
-        if user_question:
-            response = st.session_state.conversation({"question": user_question})
-            answer = response["answer"]
-            st.session_state.chat_history.append((user_question, answer))
-        # Display chat
-        for i, (q, a) in enumerate(st.session_state.chat_history):
-            message(q, is_user=True, key=f"user_{i}")
-            message(a, is_user=False, key=f"bot_{i}")
-if __name__ == "__main__":
-    main()

+import gradio as gr
 from PyPDF2 import PdfReader
 from langchain.text_splitter import CharacterTextSplitter
+from langchain.embeddings import HuggingFaceEmbeddings
+from langchain.vectorstores import FAISS
 from langchain.chains import ConversationalRetrievalChain
 from langchain.memory import ConversationBufferMemory
 from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, pipeline
+from langchain.llms import HuggingFacePipeline
+# -----------------------
+# Load LLM model locally
+# -----------------------
+model_name = "google/flan-t5-small"  # keep small for Spaces, you can change
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
+pipe = pipeline("text2text-generation", model=model, tokenizer=tokenizer, max_length=512)
+llm = HuggingFacePipeline(pipeline=pipe)
+# Global variables
+db = None
+conversation = None
+# -----------------------
+# Step 1: Process PDFs
+# -----------------------
+def process_pdfs(files):
+    global db, conversation
+    text = ""
+    for file in files:
+        pdf = PdfReader(file.name)
+        for page in pdf.pages:
+            text += page.extract_text() or ""
+    # Split text
+    splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
+    chunks = splitter.split_text(text)
+    # Embeddings
+    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
+    # Vector DB
+    db = FAISS.from_texts(chunks, embeddings)
+    # Memory
+    memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
+    # Conversation Chain
+    conversation = ConversationalRetrievalChain.from_llm(
+        llm=llm,
+        retriever=db.as_retriever(),
+        memory=memory
     )
+    return "✅ PDFs processed! You can now start chatting."
+# -----------------------
+# Step 2: Chat Function
+# -----------------------
+def chat(user_input):
+    global conversation, db
+    if conversation is None or db is None:
+        return "⚠️ Please upload and process PDFs first.", []
+    result = conversation({"question": user_input})
+    answer = result["answer"]
+    return answer, result["chat_history"]
+# -----------------------
+# Gradio UI
+# -----------------------
+with gr.Blocks() as demo:
+    gr.Markdown("## 📚 Multiple PDF Chatbot")
+    with gr.Row():
+        pdfs = gr.File(file_types=[".pdf"], file_count="multiple", label="Upload PDFs")
+        process_btn = gr.Button("Process PDFs")
+    status = gr.Textbox(label="Status", interactive=False)
+    chatbot = gr.Chatbot()
+    user_msg = gr.Textbox(label="Ask a question about your PDFs")
+    send_btn = gr.Button("Send")
+    # Actions
+    process_btn.click(process_pdfs, inputs=[pdfs], outputs=[status])
+    send_btn.click(chat, inputs=[user_msg], outputs=[chatbot, chatbot])
+demo.launch()