Spaces:

himanshukumar378
/

ragchatbot

Runtime error

App Files Files Community

Himanshu kumar Vishwakrma commited on Jul 19, 2025

Commit

968023b

1 Parent(s): 462faf7

HF Spaces compatible version

Browse files

Files changed (3) hide show

Dockerfile +10 -16
requirements.txt +11 -6
src/streamlit_app.py +52 -229

Dockerfile CHANGED Viewed

@@ -1,21 +1,15 @@
-FROM python:3.9-slim
 WORKDIR /app
-RUN apt-get update && apt-get install -y \
-    build-essential \
-    curl \
-    software-properties-common \
-    git \
-    && rm -rf /var/lib/apt/lists/*
-COPY requirements.txt ./
-COPY src/ ./src/
-RUN pip3 install -r requirements.txt
-EXPOSE 8501
-HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health
-ENTRYPOINT ["streamlit", "run", "src/streamlit_app.py", "--server.port=8501", "--server.address=0.0.0.0"]

+FROM python:3.10-slim  # Hugging Face currently supports up to 3.10
 WORKDIR /app
+COPY . .
+RUN apt-get update && apt-get install -y gcc python3-dev && \
+    pip install --upgrade pip && \
+    pip install -r requirements.txt --no-cache-dir && \
+    python -m spacy download en_core_web_sm && \
+    python -m nltk.downloader punkt wordnet
+ENV STREAMLIT_SERVER_PORT=7860
+EXPOSE 7860
+CMD ["streamlit", "run", "src/streamlit_app.py", "--server.port=7860", "--server.address=0.0.0.0"]

requirements.txt CHANGED Viewed

@@ -1,6 +1,11 @@
-altair
-pandas
-streamlit
-pypdf
-docx
-chromadb

+streamlit==1.31.0
+pypdf==4.2.0
+python-docx==1.1.0  # Replaces 'docx' which causes the exceptions error
+chromadb==0.4.24
+sentence-transformers==2.6.0
+transformers==4.38.2
+torch==2.2.1
+accelerate==0.29.3
+huggingface-hub==0.22.2
+spacy==3.7.4
+nltk==3.8.1

src/streamlit_app.py CHANGED Viewed

@@ -1,293 +1,116 @@
 import streamlit as st
 from pypdf import PdfReader
 from docx import Document
-import os
-import time
 import chromadb
 from chromadb.utils import embedding_functions
-from typing import List, Tuple
-from transformers import AutoTokenizer, AutoModelForCausalLM
-import torch
-# Initialize ChromaDB
-client = chromadb.PersistentClient(path="./chroma_db")
-sentence_transformer_ef = embedding_functions.SentenceTransformerEmbeddingFunction(model_name="all-MiniLM-L6-v2")
-try:
-    collection = client.get_collection(name="documents", embedding_function=sentence_transformer_ef)
-except:
-    collection = client.create_collection(name="documents", embedding_function=sentence_transformer_ef)
-# Initialize Hugging Face model and tokenizer
-@st.cache_resource
-def load_model():
-    model_name = "google/gemma-1.1-7b-it"  # Using the 7B instruct-tuned version
-    tokenizer = AutoTokenizer.from_pretrained(model_name)
-    model = AutoModelForCausalLM.from_pretrained(
-        model_name,
-        device_map="auto",
-        torch_dtype=torch.float16
-    )
-    return model, tokenizer
-model, tokenizer = load_model()
 def chunk_text(text: str, chunk_size: int = 1000) -> List[str]:
-    """Split text into chunks of approximately chunk_size characters"""
     chunks = []
     start = 0
     while start < len(text):
         end = min(start + chunk_size, len(text))
-        # Try to split at sentence boundary
         if end < len(text):
             while end > start and text[end] not in {'.', '!', '?', '\n'}:
                 end -= 1
-            if end == start:  # No sentence boundary found
                 end = start + chunk_size
         chunks.append(text[start:end].strip())
         start = end
     return chunks
-def process_document(uploaded_file, progress_bar=None, status_text=None):
-    """Extract text from document and store in ChromaDB with progress tracking"""
     text = ""
-    # Update status
-    if status_text:
-        status_text.text(f"Extracting text from {uploaded_file.name}...")
     if uploaded_file.type == "application/pdf":
         reader = PdfReader(uploaded_file)
-        total_pages = len(reader.pages)
-        for i, page in enumerate(reader.pages):
-            text += page.extract_text()
-            if progress_bar:
-                progress_bar.progress((i + 1) / (total_pages * 2))  # First half is for extraction
     elif uploaded_file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
         doc = Document(uploaded_file)
-        total_paras = len(doc.paragraphs)
-        for i, para in enumerate(doc.paragraphs):
-            text += para.text + "\n"
-            if progress_bar:
-                progress_bar.progress((i + 1) / (total_paras * 2))  # First half is for extraction
     elif uploaded_file.type == "text/plain":
         text = str(uploaded_file.read(), "utf-8")
-        if progress_bar:
-            progress_bar.progress(0.5)  # Mark extraction as 50% complete
-    # Update status
-    if status_text:
-        status_text.text(f"Chunking and storing {uploaded_file.name} in database...")
-    # Split text into chunks
     chunks = chunk_text(text)
-    # Store in ChromaDB
     ids = [f"{uploaded_file.name}-{i}" for i in range(len(chunks))]
-    # Add chunks in batches for smoother progress updates
-    batch_size = max(1, len(chunks) // 10)  # Create 10 progress updates
-    for i in range(0, len(chunks), batch_size):
-        end_idx = min(i + batch_size, len(chunks))
-        collection.add(
-            documents=chunks[i:end_idx],
-            ids=ids[i:end_idx],
-            metadatas=[{"source": uploaded_file.name} for _ in range(i, end_idx)]
-        )
-        if progress_bar:
-            # Calculate progress for second half (storage)
-            extraction_half = 0.5  # First 50% was for extraction
-            storage_progress = (end_idx / len(chunks)) * 0.5  # Second 50% for storage
-            progress_bar.progress(extraction_half + storage_progress)
-    # Complete the progress
-    if progress_bar:
-        progress_bar.progress(1.0)
-    if status_text:
-        status_text.text(f"Completed processing {uploaded_file.name}")
     return len(chunks)
-@st.cache_data(ttl=300)  # Cache results for 5 minutes
-def retrieve_relevant_chunks(query: str, k: int = 5) -> Tuple[List[str], List[str]]:
-    """Retrieve relevant document chunks from ChromaDB with caching for performance"""
     results = collection.query(
         query_texts=[query],
         n_results=k
     )
     return results['documents'][0], results['metadatas'][0]
-@st.cache_data(ttl=60, show_spinner=False)  # Cache for 1 minute
-def generate_response(query: str, context: str, temp: float = 0.7) -> str:
-    """Generate response using Hugging Face Gemma with RAG context and caching"""
-    prompt = f"""Use the following context to answer the question. If you don't know the answer, say you don't know.
-Context:
-{context}
-Question: {query}
-Answer:"""
-    # Tokenize the input
-    input_ids = tokenizer(prompt, return_tensors="pt").to(model.device)
-    # Generate response
-    with torch.no_grad():
-        outputs = model.generate(
-            **input_ids,
-            max_new_tokens=512,
-            temperature=temp,
-            do_sample=True if temp > 0 else False,
-            top_k=50,
-            top_p=0.95
-        )
-    # Decode the response
-    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    # Remove the input prompt from the response
-    response = response[len(prompt):].strip()
-    return response
-# Initialize session states
-if "messages" not in st.session_state:
-    st.session_state.messages = []
-if "uploaded_files" not in st.session_state:
-    st.session_state.uploaded_files = []
-# Initialize performance tracking
-if "performance_metrics" not in st.session_state:
-    st.session_state.performance_metrics = {
-        "total_queries": 0,
-        "avg_response_time": 0,
-        "last_response_time": 0
-    }
-# App title
 st.title("📄 Document Q&A Assistant")
-# Sidebar for document upload
 with st.sidebar:
-    st.header("Document Management")
     uploaded_files = st.file_uploader(
-        "Upload documents",
         type=["pdf", "docx", "txt"],
         accept_multiple_files=True
     )
-    st.markdown("---")
-    st.header("Settings")
-    temperature = st.slider("Temperature", 0.0, 1.0, 0.7)
-    st.markdown("ℹ️ All processing happens locally")
-    if uploaded_files and st.button("Process Documents", use_container_width=True):
-        progress_container = st.container()
-        with progress_container:
-            st.markdown("### Processing Documents")
-            progress_bar = st.progress(0)
-            status_text = st.empty()
-            st.markdown("**Progress Metrics**")
-            metric_col1, metric_col2 = st.columns(2)
-            total_chunks_metric = metric_col1.empty()
-            eta_metric = metric_col2.empty()
-            start_time = time.time()
-            total_chunks = 0
-            files_processed = 0
-            for uploaded_file in uploaded_files:
-                if uploaded_file.name not in st.session_state.uploaded_files:
-                    status_text.text(f"Starting to process {uploaded_file.name}...")
-                    chunks_count = process_document(uploaded_file, progress_bar, status_text)
-                    total_chunks += chunks_count
-                    files_processed += 1
-                    elapsed = time.time() - start_time
-                    eta = (elapsed / files_processed) * (len(uploaded_files) - files_processed) if files_processed > 0 else 0
-                    total_chunks_metric.metric("Chunks Created", f"{total_chunks}")
-                    eta_metric.metric("Time Remaining", f"{eta:.1f}s")
-                    st.session_state.uploaded_files.append(uploaded_file.name)
-            progress_bar.progress(1.0)
-            status_text.text("✅ Processing completed!")
-            st.success(f"Successfully processed {files_processed} document(s) into {total_chunks} searchable chunks.")
-            st.balloons()
-            st.markdown("### 🎉 Your documents are now ready!")
-            st.markdown("You can start asking questions about your documents in the chat below.")
-# Display chat messages
 for message in st.session_state.messages:
     with st.chat_message(message["role"]):
         st.markdown(message["content"])
-# Optional: Display performance metrics in an expandable section
-with st.sidebar:
-    if st.session_state.performance_metrics["total_queries"] > 0:
-        with st.expander("Performance Metrics"):
-            st.metric("Average Response Time", f"{st.session_state.performance_metrics['avg_response_time']:.2f} seconds")
-            st.metric("Last Response Time", f"{st.session_state.performance_metrics['last_response_time']:.2f} seconds")
-            st.metric("Total Queries", f"{st.session_state.performance_metrics['total_queries']}")
-# Chat input
-if prompt := st.chat_input("Ask about your documents..."):
-    query_start_time = time.time()
     st.session_state.messages.append({"role": "user", "content": prompt})
     with st.chat_message("user"):
         st.markdown(prompt)
     with st.chat_message("assistant"):
-        message_placeholder = st.empty()
-        full_response = ""
-        with st.status("Searching documents for relevant information...", expanded=True) as status:
-            st.write("🔍 Finding relevant information...")
-            chunks, metadata = retrieve_relevant_chunks(prompt)
             context = "\n\n".join(chunks)
             sources = list(set([m['source'] for m in metadata]))
-            st.write(f"📚 Found information in {len(sources)} document(s)")
-            st.write("💭 Generating response...")
-            response = generate_response(prompt, context, temp=temperature)
-            status.update(label="✅ Answer ready!", state="complete", expanded=False)
-        words = response.split()
-        total_words = len(words)
-        update_frequency = max(1, total_words // 20)
-        for i in range(0, total_words, update_frequency):
-            end_idx = min(i + update_frequency, total_words)
-            full_response += " ".join(words[i:end_idx]) + " "
-            message_placeholder.markdown(full_response + "▌")
-            time.sleep(0.01)
-        if sources:
-            full_response += f"\n\nSources: {', '.join(sources)}"
-        message_placeholder.markdown(full_response)
-    st.session_state.messages.append({"role": "assistant", "content": full_response})
-    end_time = time.time()
-    query_time = end_time - query_start_time
-    st.session_state.performance_metrics["total_queries"] += 1
-    st.session_state.performance_metrics["last_response_time"] = query_time
-    prev_avg = st.session_state.performance_metrics["avg_response_time"]
-    prev_count = st.session_state.performance_metrics["total_queries"] - 1
-    if prev_count > 0:
-        st.session_state.performance_metrics["avg_response_time"] = (prev_avg * prev_count + query_time) / st.session_state.performance_metrics["total_queries"]
-    else:
-        st.session_state.performance_metrics["avg_response_time"] = query_time

 import streamlit as st
 from pypdf import PdfReader
 from docx import Document
 import chromadb
 from chromadb.utils import embedding_functions
+from huggingface_hub import InferenceClient
+import time
+import os
+# Initialize ChromaDB (ephemeral for HF Spaces)
+client = chromadb.EphemeralClient()
+sentence_transformer_ef = embedding_functions.SentenceTransformerEmbeddingFunction(
+    model_name="all-MiniLM-L6-v2"
+)
+collection = client.get_or_create_collection(
+    name="documents",
+    embedding_function=sentence_transformer_ef
+)
+# Initialize HF Inference Client
+hf_client = InferenceClient(model="google/gemma-2b-it")
 def chunk_text(text: str, chunk_size: int = 1000) -> List[str]:
     chunks = []
     start = 0
     while start < len(text):
         end = min(start + chunk_size, len(text))
         if end < len(text):
             while end > start and text[end] not in {'.', '!', '?', '\n'}:
                 end -= 1
+            if end == start:
                 end = start + chunk_size
         chunks.append(text[start:end].strip())
         start = end
     return chunks
+def process_document(uploaded_file):
     text = ""
     if uploaded_file.type == "application/pdf":
         reader = PdfReader(uploaded_file)
+        text = "\n".join([page.extract_text() for page in reader.pages])
     elif uploaded_file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
         doc = Document(uploaded_file)
+        text = "\n".join([para.text for para in doc.paragraphs])
     elif uploaded_file.type == "text/plain":
         text = str(uploaded_file.read(), "utf-8")
     chunks = chunk_text(text)
     ids = [f"{uploaded_file.name}-{i}" for i in range(len(chunks))]
+    collection.add(
+        documents=chunks,
+        ids=ids,
+        metadatas=[{"source": uploaded_file.name} for _ in chunks]
+    )
     return len(chunks)
+def retrieve_chunks(query: str, k: int = 3) -> Tuple[List[str], List[str]]:
     results = collection.query(
         query_texts=[query],
         n_results=k
     )
     return results['documents'][0], results['metadatas'][0]
+def generate_response(query: str, context: str) -> str:
+    prompt = f"""Context: {context}\n\nQuestion: {query}\nAnswer:"""
+    return hf_client.text_generation(
+        prompt,
+        max_new_tokens=512,
+        temperature=0.7
+    )
+# Streamlit UI
 st.title("📄 Document Q&A Assistant")
 with st.sidebar:
+    st.header("Upload Documents")
     uploaded_files = st.file_uploader(
+        "Choose files",
         type=["pdf", "docx", "txt"],
         accept_multiple_files=True
     )
+    if uploaded_files:
+        with st.spinner("Processing documents..."):
+            for file in uploaded_files:
+                chunks = process_document(file)
+                st.success(f"Processed {file.name} into {chunks} chunks")
+if "messages" not in st.session_state:
+    st.session_state.messages = []
 for message in st.session_state.messages:
     with st.chat_message(message["role"]):
         st.markdown(message["content"])
+if prompt := st.chat_input("Ask about your documents"):
     st.session_state.messages.append({"role": "user", "content": prompt})
     with st.chat_message("user"):
         st.markdown(prompt)
     with st.chat_message("assistant"):
+        with st.spinner("Searching documents..."):
+            chunks, metadata = retrieve_chunks(prompt)
             context = "\n\n".join(chunks)
+        with st.spinner("Generating response..."):
+            response = generate_response(prompt, context)
             sources = list(set([m['source'] for m in metadata]))
+            if sources:
+                response += f"\n\nSources: {', '.join(sources)}"
+            st.markdown(response)
+    st.session_state.messages.append({"role": "assistant", "content": response})