Spaces:

Tannuyadav
/

DocTalk-Chat_With_PDF

Sleeping

App Files Files Community

Update app.py

by ChiragKaushikCK - opened Dec 21, 2025

base: refs/heads/main

←

from: refs/pr/1

Discussion Files changed

+161

-67

Files changed (1) hide show

app.py +161 -67

app.py CHANGED Viewed

@@ -9,8 +9,9 @@ from langchain_community.vectorstores import FAISS
 from langchain_huggingface import HuggingFacePipeline
 from langchain_classic.prompts import PromptTemplate
 from langchain_classic.chains import RetrievalQA
-from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
 from huggingface_hub import login
 # --- Page Config & Styling ---
@@ -52,6 +53,49 @@ st.markdown("""
     [data-testid="stSidebar"] {
         padding-bottom: 50px;
     }
 </style>
 """, unsafe_allow_html=True)
@@ -59,18 +103,25 @@ st.markdown("""
 if 'qa_chain' not in st.session_state: st.session_state.qa_chain = None
 if 'messages' not in st.session_state: st.session_state.messages = []
 if 'processing_done' not in st.session_state: st.session_state.processing_done = False
 # --- Authentication (Secrets Only) ---
 hf_token = os.environ.get("HF_TOKEN")
-# --- Model Loading (Cached & CPU Optimized) ---
 @st.cache_resource
 def load_embedding_model():
     """Load the embedding model once to save time."""
     try:
         # Using a lightweight, fast embedding model
-        embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
         return embeddings
     except Exception as e:
         st.error(f"Error loading embedding model: {e}")
@@ -78,73 +129,93 @@ def load_embedding_model():
 @st.cache_resource
 def load_llm_model(token):
-    """Load the Gemma LLM once."""
     try:
         login(token=token)
-        model_id = "google/gemma-2-2b-it"
         tokenizer = AutoTokenizer.from_pretrained(model_id, token=token)
-        # Load model to CPU (float32 is safe for CPU stability)
         model = AutoModelForCausalLM.from_pretrained(
             model_id,
             device_map="cpu",
-            torch_dtype=torch.float32,
             token=token
         )
-        pipe = pipeline(
-            "text-generation",
-            model=model,
-            tokenizer=tokenizer,
-            max_new_tokens=512,
-            temperature=0.1,
-            repetition_penalty=1.1,
-            return_full_text=False
-        )
-        return pipe
     except Exception as e:
-        return None
-# --- PDF Processing ---
-def process_document(uploaded_file, model_pipeline, embedding_model):
     try:
         # Save temp file
         with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as tmp:
             tmp.write(uploaded_file.getvalue())
             tmp_path = tmp.name
-        # Load & Split
         loader = PyPDFLoader(tmp_path)
         docs = loader.load()
-        splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
         chunks = splitter.split_documents(docs)
         # Vector Store (FAISS is faster for in-memory)
         vector_store = FAISS.from_documents(chunks, embedding_model)
-        # Chain Setup
-        llm = HuggingFacePipeline(pipeline=model_pipeline)
-        template = """<start_of_turn>user
-Answer the question based strictly on the context below. Keep answers concise.
-Context: {context}
-Question: {question}<end_of_turn>
-<start_of_turn>model
-"""
-        prompt = PromptTemplate(template=template, input_variables=["context", "question"])
-        qa_chain = RetrievalQA.from_chain_type(
-            llm=llm,
-            retriever=vector_store.as_retriever(search_kwargs={"k": 3}),
-            chain_type_kwargs={"prompt": prompt},
-            return_source_documents=True
-        )
-        return qa_chain
     except Exception as e:
         st.error(f"Error processing PDF: {e}")
         return None
 # --- Main Layout ---
 # 1. Sidebar Configuration
@@ -154,7 +225,7 @@ with st.sidebar:
     if not hf_token:
         st.error("🚨 **HF_TOKEN missing!**")
-        st.info("Go to Space Settings -> Repository Secrets and add your Hugging Face Access Token as `HF_TOKEN`.")
         st.stop()
     else:
         st.success("✅ Huggingface Active")
@@ -166,17 +237,20 @@ with st.sidebar:
         process_btn = st.button("🚀 Process Document", type="primary", use_container_width=True)
         if process_btn:
-            with st.spinner("🧠 Analyzing PDF"):
                 # Load models (cached)
-                llm_pipeline = load_llm_model(hf_token)
                 embed_model = load_embedding_model()
-                if llm_pipeline and embed_model:
-                    qa_chain = process_document(uploaded_file, llm_pipeline, embed_model)
-                    if qa_chain:
-                        st.session_state.qa_chain = qa_chain
                         st.session_state.processing_done = True
-                        st.success("Done! You can now chat.")
                     else:
                         st.error("Failed to process document.")
                 else:
@@ -184,13 +258,14 @@ with st.sidebar:
     if st.session_state.processing_done:
         st.markdown("---")
         if st.button("🗑️ Clear Chat History", use_container_width=True):
             st.session_state.messages = []
             st.rerun()
 # 2. Main Chat Area
 st.title("📗💬 DocTalk - Chat With PDF")
-#st.caption("Powered by Google Gemma-2-2B-IT")
 if st.session_state.processing_done:
     # Display History
@@ -205,29 +280,48 @@ if st.session_state.processing_done:
             st.markdown(user_input)
         with st.chat_message("assistant"):
-            with st.spinner("Thinking..."):
-                try:
-                    response = st.session_state.qa_chain.invoke({"query": user_input})
-                    answer = response['result']
-                    st.markdown(answer)
-                    st.session_state.messages.append({"role": "assistant", "content": answer})
-                    # Optional: Show sources
-                    with st.expander("🔎 View Source Context"):
-                        for doc in response['source_documents']:
-                            st.caption(f"Page {doc.metadata.get('page', '?')}: {doc.page_content[:200]}...")
-                except Exception as e:
-                    st.error(f"An error occurred: {e}")
 else:
     # Empty State
     st.info("👋 **Welcome!** Please upload a PDF in the sidebar to begin chatting.")
     st.markdown("""
     **How it works:**
-    1. Upload a PDF document.
-    2. Click 'Process Document'.
-    3. Ask questions and get answers based strictly on your file.
     """)
 # --- Footer ---

 from langchain_huggingface import HuggingFacePipeline
 from langchain_classic.prompts import PromptTemplate
 from langchain_classic.chains import RetrievalQA
+from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, TextIteratorStreamer
 from huggingface_hub import login
+from threading import Thread
 # --- Page Config & Styling ---
     [data-testid="stSidebar"] {
         padding-bottom: 50px;
     }
+    /* Responsive Design */
+    @media (max-width: 768px) {
+        /* Make sidebar collapsible on mobile */
+        [data-testid="stSidebar"] {
+            width: 100% !important;
+        }
+        /* Adjust chat input for mobile */
+        .stChatInput {
+            font-size: 16px !important;
+        }
+        /* Better spacing on mobile */
+        .block-container {
+            padding: 1rem !important;
+        }
+        /* Footer text smaller on mobile */
+        .footer {
+            font-size: 12px;
+            padding: 8px;
+        }
+    }
+    @media (max-width: 480px) {
+        /* Extra small devices */
+        h1 {
+            font-size: 1.5rem !important;
+        }
+        .stButton button {
+            font-size: 14px !important;
+        }
+    }
+    /* Touch-friendly buttons */
+    .stButton button {
+        min-height: 44px;
+        padding: 0.5rem 1rem;
+    }
+    /* Better chat message display on mobile */
+    [data-testid="stChatMessage"] {
+        max-width: 100%;
+        padding: 0.5rem;
+    }
 </style>
 """, unsafe_allow_html=True)
 if 'qa_chain' not in st.session_state: st.session_state.qa_chain = None
 if 'messages' not in st.session_state: st.session_state.messages = []
 if 'processing_done' not in st.session_state: st.session_state.processing_done = False
+if 'vector_store' not in st.session_state: st.session_state.vector_store = None
+if 'model' not in st.session_state: st.session_state.model = None
+if 'tokenizer' not in st.session_state: st.session_state.tokenizer = None
 # --- Authentication (Secrets Only) ---
 hf_token = os.environ.get("HF_TOKEN")
+# --- Model Loading (Cached & Optimized) ---
 @st.cache_resource
 def load_embedding_model():
     """Load the embedding model once to save time."""
     try:
         # Using a lightweight, fast embedding model
+        embeddings = HuggingFaceEmbeddings(
+            model_name="all-MiniLM-L6-v2",
+            model_kwargs={'device': 'cpu'},
+            encode_kwargs={'normalize_embeddings': True}
+        )
         return embeddings
     except Exception as e:
         st.error(f"Error loading embedding model: {e}")
 @st.cache_resource
 def load_llm_model(token):
+    """Load the Gemma LLM once - returns model and tokenizer for streaming."""
     try:
         login(token=token)
+        model_id = "google/gemma-2-2b-it"
         tokenizer = AutoTokenizer.from_pretrained(model_id, token=token)
+        # Load model to CPU with optimizations
         model = AutoModelForCausalLM.from_pretrained(
             model_id,
             device_map="cpu",
+            torch_dtype=torch.float32,
+            low_cpu_mem_usage=True,
             token=token
         )
+        return model, tokenizer
     except Exception as e:
+        st.error(f"Error loading LLM: {e}")
+        return None, None
+# --- PDF Processing (Optimized) ---
+def process_document(uploaded_file, embedding_model):
     try:
         # Save temp file
         with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as tmp:
             tmp.write(uploaded_file.getvalue())
             tmp_path = tmp.name
+        # Load & Split with optimized parameters
         loader = PyPDFLoader(tmp_path)
         docs = loader.load()
+        # Larger chunks, less overlap = faster processing
+        splitter = RecursiveCharacterTextSplitter(
+            chunk_size=1500,  # Increased from 1000
+            chunk_overlap=50   # Reduced from 100
+        )
         chunks = splitter.split_documents(docs)
         # Vector Store (FAISS is faster for in-memory)
         vector_store = FAISS.from_documents(chunks, embedding_model)
+        # Clean up temp file
+        os.unlink(tmp_path)
+        return vector_store
     except Exception as e:
         st.error(f"Error processing PDF: {e}")
         return None
+def get_relevant_context(vector_store, question):
+    """Retrieve relevant context from vector store."""
+    retriever = vector_store.as_retriever(search_kwargs={"k": 2})
+    docs = retriever.invoke(question)
+    context = "\n\n".join([doc.page_content for doc in docs])
+    return context, docs
+def stream_response(model, tokenizer, prompt):
+    """Generate streaming response from the model."""
+    # Tokenize input
+    inputs = tokenizer(prompt, return_tensors="pt")
+    # Create streamer
+    streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
+    # Generation config
+    generation_kwargs = dict(
+        inputs,
+        streamer=streamer,
+        max_new_tokens=200,
+        temperature=0.2,
+        top_p=0.9,
+        repetition_penalty=1.15,
+        do_sample=True
+    )
+    # Start generation in a separate thread
+    thread = Thread(target=model.generate, kwargs=generation_kwargs)
+    thread.start()
+    # Yield tokens as they're generated
+    for text in streamer:
+        yield text
+    thread.join()
 # --- Main Layout ---
 # 1. Sidebar Configuration
     if not hf_token:
         st.error("🚨 **HF_TOKEN missing!**")
+        st.info("Go to Space Settings → Repository Secrets and add your Hugging Face Access Token as `HF_TOKEN`.")
         st.stop()
     else:
         st.success("✅ Huggingface Active")
         process_btn = st.button("🚀 Process Document", type="primary", use_container_width=True)
         if process_btn:
+            with st.spinner("🧠 Analyzing PDF ..."):
                 # Load models (cached)
+                model, tokenizer = load_llm_model(hf_token)
                 embed_model = load_embedding_model()
+                if model and tokenizer and embed_model:
+                    vector_store = process_document(uploaded_file, embed_model)
+                    if vector_store:
+                        st.session_state.vector_store = vector_store
+                        st.session_state.model = model
+                        st.session_state.tokenizer = tokenizer
                         st.session_state.processing_done = True
+                        st.success("✅ Done! You can now chat with streaming responses.")
+                        st.rerun()
                     else:
                         st.error("Failed to process document.")
                 else:
     if st.session_state.processing_done:
         st.markdown("---")
+        st.info("✅ Document Processed")
         if st.button("🗑️ Clear Chat History", use_container_width=True):
             st.session_state.messages = []
             st.rerun()
 # 2. Main Chat Area
 st.title("📗💬 DocTalk - Chat With PDF")
 if st.session_state.processing_done:
     # Display History
             st.markdown(user_input)
         with st.chat_message("assistant"):
+            try:
+                # Get relevant context
+                context, source_docs = get_relevant_context(st.session_state.vector_store, user_input)
+                # Build prompt
+                prompt = f"""<|system|>
+You are a helpful assistant. Answer based only on the context provided. Be concise.</s>
+<|user|>
+Context: {context}
+Question: {user_input}</s>
+<|assistant|>
+"""
+                # Stream the response
+                response_placeholder = st.empty()
+                full_response = ""
+                for chunk in stream_response(st.session_state.model, st.session_state.tokenizer, prompt):
+                    full_response += chunk
+                    response_placeholder.markdown(full_response + "▌")
+                # Final update without cursor
+                response_placeholder.markdown(full_response)
+                # Save to history
+                st.session_state.messages.append({"role": "assistant", "content": full_response})
+                # Optional: Show sources
+                with st.expander("🔎 View Source Context"):
+                    for i, doc in enumerate(source_docs):
+                        st.caption(f"**Source {i+1}** (Page {doc.metadata.get('page', '?')}): {doc.page_content[:150]}...")
+            except Exception as e:
+                st.error(f"An error occurred: {e}")
 else:
     # Empty State
     st.info("👋 **Welcome!** Please upload a PDF in the sidebar to begin chatting.")
     st.markdown("""
     **How it works:**
+    1. Upload a PDF document
+    2. Click 'Process Document'
+    3. Ask questions and get **live streaming answers**
     """)
 # --- Footer ---