Spaces:

anamjafar6
/

study

Sleeping

App Files Files Community

anamjafar6 commited on Sep 27, 2025

Commit

cb18ce7

verified ·

1 Parent(s): 5b5b3f4

Update app.py

Browse files

Files changed (1) hide show

app.py +197 -411

app.py CHANGED Viewed

@@ -1,218 +1,185 @@
-# PAGEMENTOR - ENHANCED UI/UX RAG STREAMLIT APP
-# IMPORTS & CONFIGURATION
-import streamlit as st  # Main web app framework
-import os  # For environment variables
-import pypdf  # For PDF text extraction
-import numpy as np  # For numerical operations
-import chromadb  # Vector database for storing embeddings
-from sentence_transformers import SentenceTransformer  # For creating text embeddings
-# Groq client (LLM) - will be used if available
 try:
     from groq import Groq
-except Exception:
     Groq = None
-from typing import List, Dict, Any, Optional  # Type hints for better code clarity
-import re  # For text processing
-from uuid import uuid4
-import time
-# CONFIGURABLE CONSTANTS
-SIMILARITY_THRESHOLD = 0.2  # Slightly lower so relevant chunks are not missed
-TOP_K_CHUNKS = 3  # Number of most relevant chunks to retrieve
-CHUNK_SIZE = 300  # Target number of words per text chunk
-EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"  # Free embedding model
-# PDF EXTRACTION FUNCTION
-def extract_text_from_pdf(pdf_file) -> Dict[str, Any]:
-    """Extract text from uploaded PDF file with page numbers."""
     try:
-        pdf_reader = pypdf.PdfReader(pdf_file)  # Create PDF reader object
-        pages_text = []  # List to store text from each page
-        for page_num, page in enumerate(pdf_reader.pages):  # Loop through each page
-            page_text = page.extract_text() or ""  # Extract text (may return None)
-            if page_text and page_text.strip():  # Only add non-empty pages
-                pages_text.append({
-                    'page_number': page_num + 1,  # Page numbers start from 1
-                    'text': page_text.strip()  # Remove extra whitespace
-                })
-        return {
-            'success': True,
-            'pages': pages_text,
-            'total_pages': len(pages_text)
-        }
-    except Exception as e:  # Handle any errors during PDF processing
-        return {
-            'success': False,
-            'error': str(e)
-        }
-# CHUNKING FUNCTION
-def create_chunks(pages_text: List[Dict]) -> List[Dict]:
-    """Split text into smaller chunks while preserving page information."""
     chunks = []
-    chunk_id = 0
-    for page_data in pages_text:
-        page_num = page_data['page_number']
-        text = page_data['text']
         words = text.split()
-        # Create chunks of approximately CHUNK_SIZE words
-        for i in range(0, len(words), CHUNK_SIZE):
-            chunk_words = words[i:i + CHUNK_SIZE]
-            chunk_text = ' '.join(chunk_words)
-            if len(chunk_words) > 20:  # Only keep substantial chunks (more than 20 words)
                 chunks.append({
-                    'id': chunk_id,
-                    'text': chunk_text,
-                    'page_number': page_num,
-                    'word_count': len(chunk_words)
                 })
-                chunk_id += 1
     return chunks
-# EMBEDDING LOADING FUNCTION
-@st.cache_resource
-def load_embedding_model():
-    """Load the sentence transformer model for creating embeddings."""
-    try:
-        model = SentenceTransformer(EMBEDDING_MODEL)
-        return model
-    except Exception as e:
-        st.error(f"Failed to load embedding model: {e}")
         return None
-# VECTOR DATABASE CREATION & QUERY FUNCTIONS
-def create_vector_database(chunks: List[Dict], embedding_model) -> Optional[Any]:
-    """Create ChromaDB vector database with embeddings.
-    FIXES:
-    - Use a unique collection name per uploaded file to avoid "already exists" errors.
-    - Store collection reference and name in session_state so later queries use the right collection.
-    """
     try:
-        client = chromadb.Client()
-        # create a unique collection name per upload to avoid conflicts
-        collection_name = f"pdf_chunks_{uuid4().hex[:8]}"
         collection = client.create_collection(collection_name)
-        texts = [chunk['text'] for chunk in chunks]
-        embeddings = embedding_model.encode(texts).tolist()
-        # Add chunks to database with embeddings and metadata
         collection.add(
             embeddings=embeddings,
             documents=texts,
-            metadatas=[{
-                'page_number': chunk['page_number'],
-                'chunk_id': chunk['id'],
-                'word_count': chunk['word_count']
-            } for chunk in chunks],
-            ids=[str(chunk['id']) for chunk in chunks]
         )
-        # store collection name in session state so queries can reference it
-        st.session_state.collection_name = collection_name
-        return collection
     except Exception as e:
-        st.error(f"Failed to create vector database: {e}")
         return None
-def query_vector_database(collection, query: str, embedding_model, k: int = TOP_K_CHUNKS) -> List[Dict]:
-    """Query the vector database for relevant chunks."""
     try:
         query_embedding = embedding_model.encode([query]).tolist()
         results = collection.query(
             query_embeddings=query_embedding,
-            n_results=k
         )
-        relevant_chunks = []
-        # Chroma returns lists in results; careful with indexing
-        docs = results.get('documents', [])
-        dists = results.get('distances', [])
-        metas = results.get('metadatas', [])
-        if not docs:
-            return []
-        for i in range(len(docs[0])):
-            distance = dists[0][i] if dists else 0
-            # Convert distance to similarity (works if distances in [0,1])
-            similarity = max(0, 1 - distance) if isinstance(distance, (int, float)) else 0
-            if similarity >= SIMILARITY_THRESHOLD:
-                relevant_chunks.append({
-                    'text': docs[0][i],
-                    'page_number': metas[0][i].get('page_number') if metas else None,
-                    'similarity': similarity,
-                    'chunk_id': metas[0][i].get('chunk_id') if metas else None
-                })
-        return relevant_chunks
     except Exception as e:
-        st.error(f"Failed to query database: {e}")
         return []
-# LLM WRAPPER FOR GROQ
-def setup_groq():
-    """Configure Groq client using GROQ_API_KEY from secrets or env."""
-    api_key = None
-    # Hugging Face / Streamlit secrets: try st.secrets first (HF sets as env, but we'll check both)
-    try:
-        api_key = st.secrets.get('GROQ_API_KEY')  # type: ignore
-    except Exception:
-        api_key = None
-    if not api_key:
-        api_key = os.getenv('GROQ_API_KEY')
-    if not api_key:
-        st.error("❌ GROQ_API_KEY not found. Please add it to Hugging Face secrets or environment variables.")
-        return None
-    if Groq is None:
-        st.error("❌ groq package not installed or failed to import. Add 'groq' to requirements.txt")
-        return None
-    try:
-        client = Groq(api_key=api_key)
-        return client
-    except Exception as e:
-        st.error(f"Failed to initialize Groq client: {e}")
-        return None
 def generate_answer_with_groq(client, query: str, relevant_chunks: List[Dict]) -> str:
-    """Generate answer using Groq (chat/completions). Keep prompt strict to only use context.
-    NOTE: Groq client libraries and method names can change. This implementation uses a generic
-    chat completions call pattern; when deploying, if Groq client has different API you may need
-    to adjust the call accordingly. We surface clear error messages to help debugging.
-    """
     try:
-        # Build strict context with page citations
         context_parts = [f"[Page {c['page_number']}]: {c['text']}" for c in relevant_chunks]
-        context = ""
-.join(context_parts)
         prompt = f"""Based ONLY on the following context from a PDF document, answer the user's question.
@@ -230,266 +197,85 @@ Instructions:
 Answer:"""
-        # Example chat-style call — adjust if Groq client exposes a different interface
-        chat_resp = client.chat.completions.create(
-            model="llama3-8b-8192",
-            messages=[
-                {"role": "system", "content": "You are a strict assistant that only uses provided context."},
-                {"role": "user", "content": prompt}
-            ],
-            temperature=0.1,
-            max_tokens=500
-        )
-        # Parse response depending on returned structure
-        if hasattr(chat_resp, 'choices'):
-            # SDK-style response
             return chat_resp.choices[0].message.content
         elif isinstance(chat_resp, dict):
-            # dict-style response
-            choices = chat_resp.get('choices') or []
             if choices:
-                # try common paths
-                return choices[0].get('message', {}).get('content') or choices[0].get('text') or str(choices[0])
         return str(chat_resp)
     except Exception as e:
         return f"Error generating answer: {e}"
-# ANSWER GENERATION FUNCTION
-def generate_answer(query: str, relevant_chunks: List[Dict]) -> str:
-    """Main function to generate answers using Groq; fallback to safe messages."""
-    if not relevant_chunks:
-        return "❌ Insufficient evidence"
-    client = setup_groq()
-    if not client:
-        return "❌ No LLM configured. Please add GROQ_API_KEY to your secrets."
-    return generate_answer_with_groq(client, query, relevant_chunks)
-# STREAMLIT UI
 def main():
-    """Main Streamlit application."""
-    # Page configuration with wide layout for centered design
-    st.set_page_config(
-        page_title="PageMentor",
-        page_icon="📚",
-        layout="wide"
-    )
-    # Custom CSS (kept exactly as your original UI)
-    st.markdown("""
-        <style>
-        /* Center the main container with max width */
-        .main > div {
-            max-width: 900px;
-            margin: 0 auto;
-            padding: 2rem 1rem;
-        }
-        .stApp { background-color: #f8f9fa; }
-        .header-container { text-align: center; padding: 2rem 0; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); border-radius: 15px; margin-bottom: 2rem; box-shadow: 0 4px 6px rgba(0,0,0,0.1); }
-        .header-title { color: white; font-size: 2.5rem; font-weight: 700; margin-bottom: 0.5rem; }
-        .header-subtitle { color: rgba(255,255,255,0.9); font-size: 1.1rem; }
-        .answer-box { background-color: white; border-radius: 15px; padding: 1.5rem; margin: 1rem 0; box-shadow: 0 2px 8px rgba(0,0,0,0.08); border-left: 4px solid #667eea; }
-        .source-card { background-color: #f0f2f6; border-radius: 10px; padding: 1rem; margin: 0.5rem 0; border-left: 3px solid #764ba2; }
-        .stButton > button { background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); color: white; border: none; border-radius: 8px; padding: 0.5rem 2rem; font-weight: 600; }
-        .stTextInput > div > div > input { border-radius: 8px; border: 2px solid #e0e0e0; padding: 0.75rem; }
-        .stTextInput > div > div > input:focus { border-color: #667eea; box-shadow: 0 0 0 2px rgba(102,126,234,0.1); }
-        .footer { text-align: center; padding: 2rem 0; margin-top: 3rem; border-top: 1px solid #e0e0e0; color: #666; }
-        </style>
-    """, unsafe_allow_html=True)
-    st.markdown("""
-        <div class="header-container">
-            <div class="header-title">📚 PageMentor</div>
-            <div class="header-subtitle">Book-based AI Tutor - Learn from any PDF document</div>
-        </div>
-    """, unsafe_allow_html=True)
-    st.markdown("---")
-    # Initialize session state for storing data
-    if 'vector_db' not in st.session_state:
-        st.session_state.vector_db = None
-    if 'embedding_model' not in st.session_state:
-        st.session_state.embedding_model = None
-    if 'processed_file' not in st.session_state:
-        st.session_state.processed_file = None
-    if 'collection_name' not in st.session_state:
-        st.session_state.collection_name = None
-    # Load embedding model
-    if st.session_state.embedding_model is None:
-        with st.spinner("🔄 Loading AI models..."):
-            st.session_state.embedding_model = load_embedding_model()
-    col1, col2 = st.columns([2, 1])
-    with col1:
-        with st.container():
-            st.markdown("### 📄 Upload Your Document")
-            st.markdown("*Select a PDF file to start learning*")
-            uploaded_file = st.file_uploader(
-                "Choose a PDF file",
-                type="pdf",
-                help="Upload any PDF document - textbooks, research papers, articles, etc.",
-                label_visibility="collapsed"
-            )
-            # When a new file is uploaded we clear previous DB to avoid accidental cross-document queries
-            if uploaded_file is not None:
-                st.info(f"📎 **File:** {uploaded_file.name} ({uploaded_file.size / 1024:.1f} KB)")
-                if st.button("🚀 Process Document", use_container_width=True):
-                    # Reset previous DB and state before processing new file
-                    if st.session_state.get('vector_db') is not None:
-                        try:
-                            # best-effort: attempt to delete old collection if name stored
-                            old_name = st.session_state.get('collection_name')
-                            if old_name:
-                                client = chromadb.Client()
-                                try:
-                                    client.delete_collection(old_name)
-                                except Exception:
-                                    # if SDK doesn't support delete or fails, ignore and continue
-                                    pass
-                        except Exception:
-                            pass
-                        st.session_state.vector_db = None
-                        st.session_state.collection_name = None
-                        st.session_state.processed_file = None
-                    with st.spinner("📖 Reading and analyzing your document..."):
-                        pdf_result = extract_text_from_pdf(uploaded_file)
-                        if pdf_result['success']:
-                            st.success(f"✅ Successfully processed **{pdf_result['total_pages']} pages**")
-                            with st.spinner("🔍 Creating searchable chunks..."):
-                                chunks = create_chunks(pdf_result['pages'])
-                                st.info(f"📝 Created **{len(chunks)}** searchable text segments")
-                            # Create vector database using a unique collection name
-                            if st.session_state.embedding_model:
-                                with st.spinner("🧠 Building knowledge base..."):
-                                    collection = create_vector_database(chunks, st.session_state.embedding_model)
-                                    if collection:
-                                        st.session_state.vector_db = collection
-                                        st.success("✅ **Ready to answer your questions!**")
-                                        st.session_state.processed_file = uploaded_file.name
-                                        st.balloons()
-                                    else:
-                                        st.error("❌ Failed to create knowledge base")
-                            else:
-                                st.error("❌ AI model not available")
-                        else:
-                            st.error(f"❌ Failed to process PDF: {pdf_result['error']}")
-    # Question answering section
-    if st.session_state.vector_db is not None:
-        st.markdown("---")
-        st.markdown("### 💬 Ask Your Questions")
-        if st.session_state.processed_file:
-            st.markdown(f"*Currently learning from: **{st.session_state.processed_file}***")
-        with st.form(key="question_form"):
-            question = st.text_input(
-                "What would you like to know?",
-                placeholder="e.g., What is the main topic? Summarize chapter 3. Explain the key concepts.",
-                help="Ask any question about the content of your document",
-                label_visibility="collapsed"
-            )
-            submit_button = st.form_submit_button(
-                "🔍 Get Answer",
-                use_container_width=True
-            )
-        if submit_button and question.strip():
-            with st.spinner("🤔 Thinking..."):
-                relevant_chunks = query_vector_database(
-                    st.session_state.vector_db,
-                    question,
-                    st.session_state.embedding_model
-                )
-                if relevant_chunks:
-                    answer = generate_answer(question, relevant_chunks)
-                    st.markdown("#### 🎯 Answer")
-                    st.markdown(f'<div class="answer-box">{answer}</div>', unsafe_allow_html=True)
-                    st.markdown("#### 📚 Top Sources")
-                    st.markdown("*Most relevant passages from your document:*")
-                    for i, chunk in enumerate(relevant_chunks, 1):
-                        with st.expander(
-                            f"**Source {i}** | 📄 Page {chunk['page_number']} | "
-                            f"���� Relevance: {chunk['similarity']*100:.0f}%"
-                        ):
-                            st.markdown(f'<div class="source-card">{chunk["text"][:500]}...</div>', unsafe_allow_html=True)
-                else:
-                    st.warning("❌ No relevant information found for your question. Try rephrasing or asking about topics covered in the document.")
-    else:
-        st.markdown("""
-        <div style='text-align: center; padding: 3rem; background-color: white; border-radius: 15px; margin: 2rem 0;'>
-            <h3>👋 Welcome to PageMentor!</h3>
-            <p style='color: #666; font-size: 1.1rem;'>Upload a PDF document above to start your learning journey.</p>
-            <p style='color: #999;'>Support for textbooks, research papers, articles, and more!</p>
-        </div>
-        """, unsafe_allow_html=True)
-    # Sidebar with About sections
-    with st.sidebar:
-        st.markdown("### 📱 About This App")
-        st.markdown("""
-        PageMentor is an AI-powered learning assistant that helps you understand any PDF document through intelligent Q&A.
-        **Features:**
-        - 🔍 Smart document analysis
-        - 💡 Instant answers with citations
-        - 📚 Source verification
-        - 🎯 High accuracy responses
-        """)
-        st.markdown("---")
-        st.markdown("### ⚙️ Current Settings")
-        st.markdown(f"""
-        - **Similarity Threshold:** {SIMILARITY_THRESHOLD}
-        - **Retrieved Chunks:** {TOP_K_CHUNKS}
-        - **Chunk Size:** {CHUNK_SIZE} words
-        """)
-        st.markdown("---")
-        st.markdown("### 👨‍💻 About Developer")
-        st.markdown("""
-        **© 2025 Anam Jafar**
-        Connect with me:
-        - 💼 [LinkedIn](https://www.linkedin.com/in/anam-jafar6/)
-        - 🚀 AI/ML Engineer & Developer
-        """)
-    st.markdown("""
-    <div class="footer">
-        <p>Built with ❤️ using Streamlit | Powered by AI | © 2025 PageMentor</p>
-        <p style='font-size: 0.9rem; color: #999;'>Transform any document into your personal tutor</p>
-    </div>
-    """, unsafe_allow_html=True)
-# RUN THE APPLICATION
 if __name__ == "__main__":
     main()

+import os
+import streamlit as st
+import numpy as np
+from pypdf import PdfReader
+from typing import List, Dict
+from sentence_transformers import SentenceTransformer
+import chromadb
+# Try importing Groq client
 try:
     from groq import Groq
+except ImportError:
     Groq = None
+# -----------------------------
+# Utility Functions
+# -----------------------------
+def load_api_key() -> str:
+    """Load the GROQ API key from Hugging Face secrets or env vars."""
+    api_key = os.environ.get("GROQ_API_KEY")
+    if not api_key:
+        try:
+            from huggingface_hub import HfFolder
+            api_key = HfFolder.get_token()
+        except Exception:
+            pass
+    return api_key
+def setup_groq() -> Groq:
+    """Initialize Groq client with API key."""
+    api_key = load_api_key()
+    if not api_key:
+        st.error("❌ Missing GROQ_API_KEY in environment or Hugging Face secrets.")
+        return None
+    if Groq is None:
+        st.error("❌ Groq library not installed. Please add `groq` to requirements.txt.")
+        return None
     try:
+        client = Groq(api_key=api_key)
+        return client
+    except Exception as e:
+        st.error(f"Failed to initialize Groq client: {e}")
+        return None
+@st.cache_resource
+def load_embedding_model(model_name: str = "all-MiniLM-L6-v2") -> SentenceTransformer:
+    """Load and cache the embedding model."""
+    return SentenceTransformer(model_name)
+def pdf_to_chunks(uploaded_file, chunk_size: int = 500, overlap: int = 50) -> List[Dict]:
+    """Convert PDF to overlapping text chunks."""
+    try:
+        reader = PdfReader(uploaded_file)
+    except Exception as e:
+        st.error(f"Error reading PDF: {e}")
+        return []
     chunks = []
+    for page_num, page in enumerate(reader.pages, start=1):
+        try:
+            text = page.extract_text() or ""
+        except Exception:
+            text = ""
+        if not text.strip():
+            continue
         words = text.split()
+        for i in range(0, len(words), chunk_size - overlap):
+            chunk_text = " ".join(words[i:i + chunk_size])
+            if chunk_text.strip():
                 chunks.append({
+                    "page_number": page_num,
+                    "text": chunk_text
                 })
     return chunks
+def create_vector_database(chunks: List[Dict], embedding_model: SentenceTransformer) -> str:
+    """Create a new ChromaDB collection with embeddings and return its name."""
+    if not chunks:
+        st.error("No text chunks extracted from PDF.")
         return None
+    client = chromadb.Client()
+    collection_name = f"pdf_chunks_{np.random.randint(10000)}"
     try:
         collection = client.create_collection(collection_name)
+    except Exception as e:
+        st.error(f"Error creating collection: {e}")
+        return None
+    texts = [c["text"] for c in chunks]
+    ids = [str(i) for i in range(len(chunks))]
+    # Encode in batches for safety
+    embeddings = []
+    batch_size = 64
+    for i in range(0, len(texts), batch_size):
+        batch = texts[i:i + batch_size]
+        emb = embedding_model.encode(batch)
+        embeddings.extend(emb.tolist() if hasattr(emb, 'tolist') else list(map(list, emb)))
+    try:
         collection.add(
             embeddings=embeddings,
             documents=texts,
+            ids=ids,
+            metadatas=chunks
         )
     except Exception as e:
+        st.error(f"Error adding embeddings: {e}")
         return None
+    # Store only the collection name (not object) in session_state
+    st.session_state.collection_name = collection_name
+    return collection_name
+def query_vector_database(query: str, embedding_model: SentenceTransformer,
+                          top_k: int = 5) -> List[Dict]:
+    """Query ChromaDB for relevant chunks."""
+    if "collection_name" not in st.session_state:
+        st.error("No active collection found. Upload and process a PDF first.")
+        return []
+    try:
+        client = chromadb.Client()
+        collection = client.get_collection(st.session_state.collection_name)
+    except Exception as e:
+        st.error(f"Error accessing collection: {e}")
+        return []
     try:
         query_embedding = embedding_model.encode([query]).tolist()
+    except Exception as e:
+        st.error(f"Error encoding query: {e}")
+        return []
+    try:
         results = collection.query(
             query_embeddings=query_embedding,
+            n_results=top_k
         )
     except Exception as e:
+        st.error(f"Error querying database: {e}")
         return []
+    documents = results.get("documents", [[]])[0]
+    metadatas = results.get("metadatas", [[]])[0]
+    dists = results.get("distances", [[]])[0] if "distances" in results else []
+    relevant_chunks = []
+    for i, doc in enumerate(documents):
+        meta = metadatas[i] if i < len(metadatas) else {}
+        distance = dists[i] if i < len(dists) else None
+        if distance is None:
+            similarity = 1.0
+        elif isinstance(distance, (int, float)) and distance <= 1:
+            similarity = max(0, 1 - distance)
+        else:
+            similarity = float(distance)
+        relevant_chunks.append({
+            "text": doc,
+            "page_number": meta.get("page_number", "N/A"),
+            "similarity": similarity
+        })
+    return relevant_chunks
 def generate_answer_with_groq(client, query: str, relevant_chunks: List[Dict]) -> str:
+    """Generate answer from Groq LLM using retrieved context."""
     try:
         context_parts = [f"[Page {c['page_number']}]: {c['text']}" for c in relevant_chunks]
+        context = "\n\n".join(context_parts) if context_parts else ""
         prompt = f"""Based ONLY on the following context from a PDF document, answer the user's question.
 Answer:"""
+        if hasattr(client, "chat") and hasattr(client.chat, "completions"):
+            chat_resp = client.chat.completions.create(
+                model="llama3-8b-8192",
+                messages=[
+                    {"role": "system", "content": "You are a strict assistant that only uses provided context."},
+                    {"role": "user", "content": prompt}
+                ],
+                temperature=0.1,
+                max_tokens=500
+            )
+        else:
+            chat_resp = client.create(prompt=prompt, max_tokens=500)
+        if hasattr(chat_resp, "choices"):
             return chat_resp.choices[0].message.content
         elif isinstance(chat_resp, dict):
+            choices = chat_resp.get("choices") or []
             if choices:
+                return choices[0].get("message", {}).get("content") \
+                       or choices[0].get("text") \
+                       or str(choices[0])
         return str(chat_resp)
     except Exception as e:
         return f"Error generating answer: {e}"
+# -----------------------------
+# Streamlit UI
+# -----------------------------
 def main():
+    st.set_page_config(page_title="PDF Chatbot with Groq", layout="wide")
+    st.title("📚 PDF Chatbot with Groq")
+    st.sidebar.header("Upload PDF")
+    uploaded_file = st.sidebar.file_uploader("Choose a PDF file", type="pdf")
+    if uploaded_file:
+        if "processed_file" not in st.session_state or \
+           st.session_state.processed_file != uploaded_file.name:
+            with st.spinner("Processing PDF..."):
+                embedding_model = load_embedding_model()
+                chunks = pdf_to_chunks(uploaded_file)
+                if not chunks:
+                    st.error("No text extracted from PDF.")
+                    return
+                collection_name = create_vector_database(chunks, embedding_model)
+                if collection_name:
+                    st.session_state.processed_file = uploaded_file.name
+                    st.success("PDF processed and vector database created!")
+    st.sidebar.header("Ask a Question")
+    query = st.sidebar.text_input("Enter your question:")
+    if query:
+        if "collection_name" not in st.session_state:
+            st.warning("Please upload and process a PDF first.")
+        else:
+            embedding_model = load_embedding_model()
+            groq_client = setup_groq()
+            if groq_client:
+                with st.spinner("Generating answer..."):
+                    relevant_chunks = query_vector_database(query, embedding_model)
+                    if not relevant_chunks:
+                        st.error("No relevant chunks found.")
+                        return
+                    answer = generate_answer_with_groq(groq_client, query, relevant_chunks)
+                st.subheader("Answer:")
+                st.write(answer)
+                st.subheader("Relevant Chunks:")
+                for chunk in relevant_chunks:
+                    st.markdown(
+                        f"**Page {chunk['page_number']} (Score: {chunk['similarity']:.2f})**\n\n"
+                        f"{chunk['text'][:500]}..."
+                    )
 if __name__ == "__main__":
     main()