Spaces:

stevafernandes
/

RAG

Sleeping

App Files Files Community

stevafernandes commited on Feb 3

Commit

a8538b1

verified ·

1 Parent(s): 60f48e3

Update app.py

Browse files

Files changed (1) hide show

app.py +35 -114

app.py CHANGED Viewed

@@ -1,56 +1,16 @@
 import streamlit as st
-from PyPDF2 import PdfReader
-from io import BytesIO
 import os
-import tempfile
-# Updated imports for current LangChain
-from langchain_text_splitters import RecursiveCharacterTextSplitter
 from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI
 from langchain_community.vectorstores import FAISS
 from langchain_core.prompts import PromptTemplate
 from langchain_core.output_parsers import StrOutputParser
-from langchain_core.runnables import RunnablePassthrough
 # --- Configuration ---
-# Get API key from Hugging Face Secrets
 GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY", "")
-# Path to your pre-uploaded PDF file in the Hugging Face Space
-# Place your PDF in the same directory as app.py or specify a subdirectory
-PDF_FILE_PATH = "Papal_Encyclicals.pdf"  # Change this to your PDF filename
-# Use temporary directory for FAISS index
-TEMP_DIR = tempfile.gettempdir()
-FAISS_INDEX_PATH = os.path.join(TEMP_DIR, "faiss_index")
-def get_pdf_text(pdf_path):
-    """Extract text from a PDF file at the given path."""
-    text = ""
-    try:
-        pdf_reader = PdfReader(pdf_path)
-        for page in pdf_reader.pages:
-            page_text = page.extract_text()
-            if page_text:
-                text += page_text
-    except Exception as e:
-        st.error(f"Error reading PDF: {str(e)}")
-        return ""
-    return text
-def get_text_chunks(text):
-    """Split text into chunks for processing."""
-    text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=1000)
-    return text_splitter.split_text(text)
-def get_vector_store(text_chunks, api_key):
-    """Create and save FAISS vector store from text chunks."""
-    embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=api_key)
-    vector_store = FAISS.from_texts(text_chunks, embedding=embeddings)
-    vector_store.save_local(FAISS_INDEX_PATH)
 def get_conversational_chain(api_key):
@@ -72,7 +32,7 @@ def get_conversational_chain(api_key):
     Answer (based only on the context above):
     """
-    model = ChatGoogleGenerativeAI(model="gemini-3-flash-preview", temperature=0, google_api_key=api_key)
     prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
     chain = prompt | model | StrOutputParser()
@@ -84,11 +44,9 @@ def format_docs(docs):
     return "\n\n".join(doc.page_content for doc in docs)
-def user_input(user_question, api_key):
     """Process user question and return answer from the PDF context."""
-    embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=api_key)
-    new_db = FAISS.load_local(FAISS_INDEX_PATH, embeddings, allow_dangerous_deserialization=True)
-    docs = new_db.similarity_search(user_question)
     chain = get_conversational_chain(api_key)
     context = format_docs(docs)
@@ -97,21 +55,18 @@ def user_input(user_question, api_key):
 @st.cache_resource
-def initialize_vector_store(_api_key):
-    """Initialize the vector store from the pre-uploaded PDF (cached)."""
-    if not os.path.exists(PDF_FILE_PATH):
-        return False, f"PDF file not found at: {PDF_FILE_PATH}"
-    try:
-        raw_text = get_pdf_text(PDF_FILE_PATH)
-        if not raw_text.strip():
-            return False, "No text could be extracted from the PDF."
-        text_chunks = get_text_chunks(raw_text)
-        get_vector_store(text_chunks, _api_key)
-        return True, "PDF processed successfully!"
-    except Exception as e:
-        return False, f"Error processing PDF: {str(e)}"
 def main():
@@ -122,29 +77,25 @@ def main():
         initial_sidebar_state="collapsed"
     )
-    # Custom CSS for clean, professional appearance
     st.markdown(
         """
         <style>
-        /* Hide Streamlit header, footer, and menu */
         #MainMenu {visibility: hidden;}
         header {visibility: hidden;}
         footer {visibility: hidden;}
         .stDeployButton {display: none;}
-        /* Remove top padding caused by hidden header */
         .block-container {
             padding-top: 2rem;
             padding-bottom: 2rem;
             max-width: 800px;
         }
-        /* Clean white background */
         .stApp {
             background-color: #ffffff;
         }
-        /* Typography */
         .main-title {
             font-size: 2.5rem;
             font-weight: 600;
@@ -161,21 +112,11 @@ def main():
             margin-bottom: 2rem;
         }
-        /* Success message styling */
-        .stSuccess {
-            background-color: #f0f9f4;
-            border: 1px solid #86efac;
-            border-radius: 8px;
-            padding: 0.75rem 1rem;
-        }
-        /* Input field styling */
         .stTextInput > div > div > input {
             border: 1px solid #e0e0e0;
             border-radius: 8px;
             padding: 0.75rem 1rem;
             font-size: 1rem;
-            transition: border-color 0.2s ease;
         }
         .stTextInput > div > div > input:focus {
@@ -183,16 +124,6 @@ def main():
             box-shadow: 0 0 0 2px rgba(74, 144, 217, 0.1);
         }
-        /* Section headers */
-        .section-header {
-            font-size: 1.1rem;
-            font-weight: 500;
-            color: #333333;
-            margin-top: 1.5rem;
-            margin-bottom: 1rem;
-        }
-        /* Answer box styling */
         .answer-container {
             background-color: #fafafa;
             border: 1px solid #e8e8e8;
@@ -216,14 +147,6 @@ def main():
             line-height: 1.7;
         }
-        /* Divider */
-        hr {
-            border: none;
-            border-top: 1px solid #eaeaea;
-            margin: 1.5rem 0;
-        }
-        /* Status indicator */
         .status-badge {
             display: inline-flex;
             align-items: center;
@@ -244,7 +167,6 @@ def main():
             border-radius: 50%;
         }
-        /* Hide label for cleaner look */
         .stTextInput label {
             font-size: 0.95rem;
             color: #444444;
@@ -261,33 +183,32 @@ def main():
     st.markdown('<p class="subtitle">Ask questions about papal encyclicals and get answers based on the source document</p>', unsafe_allow_html=True)
     # Check for API key
-    api_key = GOOGLE_API_KEY
-    if not api_key:
         st.error("Google API Key not found in environment variables.")
         st.info("Please add GOOGLE_API_KEY to your Hugging Face Space secrets.")
         st.stop()
-    # Check if PDF exists
-    if not os.path.exists(PDF_FILE_PATH):
-        st.error(f"PDF file not found: {PDF_FILE_PATH}")
-        st.info("Please upload your PDF file to the Hugging Face Space repository.")
         st.stop()
-    # Initialize vector store (cached, runs only once)
-    with st.spinner("Loading document..."):
-        success, message = initialize_vector_store(api_key)
-    if not success:
-        st.error(message)
-        st.stop()
-    # Display status badge
     st.markdown(
-        f'''
         <div class="status-badge">
             <span class="status-dot"></span>
-            Document loaded: {PDF_FILE_PATH}
         </div>
         ''',
         unsafe_allow_html=True
@@ -304,7 +225,7 @@ def main():
     if user_question:
         with st.spinner("Searching for answer..."):
             try:
-                answer = user_input(user_question, api_key)
                 st.markdown(
                     f'''
                     <div class="answer-container">

 import streamlit as st
 import os
 from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI
 from langchain_community.vectorstores import FAISS
 from langchain_core.prompts import PromptTemplate
 from langchain_core.output_parsers import StrOutputParser
 # --- Configuration ---
 GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY", "")
+# Path to pre-built FAISS index in the repo
+FAISS_INDEX_PATH = "faiss_index"
 def get_conversational_chain(api_key):
     Answer (based only on the context above):
     """
+    model = ChatGoogleGenerativeAI(model="gemini-2.0-flash", temperature=0, google_api_key=api_key)
     prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
     chain = prompt | model | StrOutputParser()
     return "\n\n".join(doc.page_content for doc in docs)
+def user_input(user_question, vector_store, api_key):
     """Process user question and return answer from the PDF context."""
+    docs = vector_store.similarity_search(user_question)
     chain = get_conversational_chain(api_key)
     context = format_docs(docs)
 @st.cache_resource
+def load_vector_store(_api_key):
+    """Load pre-built FAISS vector store."""
+    embeddings = GoogleGenerativeAIEmbeddings(
+        model="models/embedding-001",
+        google_api_key=_api_key
+    )
+    vector_store = FAISS.load_local(
+        FAISS_INDEX_PATH,
+        embeddings,
+        allow_dangerous_deserialization=True
+    )
+    return vector_store
 def main():
         initial_sidebar_state="collapsed"
     )
+    # Custom CSS
     st.markdown(
         """
         <style>
         #MainMenu {visibility: hidden;}
         header {visibility: hidden;}
         footer {visibility: hidden;}
         .stDeployButton {display: none;}
         .block-container {
             padding-top: 2rem;
             padding-bottom: 2rem;
             max-width: 800px;
         }
         .stApp {
             background-color: #ffffff;
         }
         .main-title {
             font-size: 2.5rem;
             font-weight: 600;
             margin-bottom: 2rem;
         }
         .stTextInput > div > div > input {
             border: 1px solid #e0e0e0;
             border-radius: 8px;
             padding: 0.75rem 1rem;
             font-size: 1rem;
         }
         .stTextInput > div > div > input:focus {
             box-shadow: 0 0 0 2px rgba(74, 144, 217, 0.1);
         }
         .answer-container {
             background-color: #fafafa;
             border: 1px solid #e8e8e8;
             line-height: 1.7;
         }
         .status-badge {
             display: inline-flex;
             align-items: center;
             border-radius: 50%;
         }
         .stTextInput label {
             font-size: 0.95rem;
             color: #444444;
     st.markdown('<p class="subtitle">Ask questions about papal encyclicals and get answers based on the source document</p>', unsafe_allow_html=True)
     # Check for API key
+    if not GOOGLE_API_KEY:
         st.error("Google API Key not found in environment variables.")
         st.info("Please add GOOGLE_API_KEY to your Hugging Face Space secrets.")
         st.stop()
+    # Check if FAISS index exists
+    index_file = os.path.join(FAISS_INDEX_PATH, "index.faiss")
+    if not os.path.exists(index_file):
+        st.error(f"FAISS index not found at: {FAISS_INDEX_PATH}/")
+        st.info("Please upload index.faiss and index.pkl to the faiss_index folder.")
         st.stop()
+    # Load vector store (cached)
+    with st.spinner("Loading index..."):
+        try:
+            vector_store = load_vector_store(GOOGLE_API_KEY)
+        except Exception as e:
+            st.error(f"Error loading index: {str(e)}")
+            st.stop()
+    # Status badge
     st.markdown(
+        '''
         <div class="status-badge">
             <span class="status-dot"></span>
+            Document ready
         </div>
         ''',
         unsafe_allow_html=True
     if user_question:
         with st.spinner("Searching for answer..."):
             try:
+                answer = user_input(user_question, vector_store, GOOGLE_API_KEY)
                 st.markdown(
                     f'''
                     <div class="answer-container">