Spaces:

shallou
/

LLMchatbotpdf

Runtime error

App Files Files Community

shallou commited on Aug 15, 2024

Commit

427863b

verified ·

1 Parent(s): dcf7dc4

Update app.py

Browse files

Files changed (1) hide show

app.py +10 -91

app.py CHANGED Viewed

@@ -1,10 +1,4 @@
-"""
-Streamlit application for PDF-based Retrieval-Augmented Generation (RAG) using Ollama + LangChain.
-This application allows users to upload a PDF, process it,
-and then ask questions about the content using a selected language model.
-"""
 import streamlit as st
 import logging
 import os
@@ -46,15 +40,7 @@ logger = logging.getLogger(__name__)
 def extract_model_names(
     models_info: Dict[str, List[Dict[str, Any]]],
 ) -> Tuple[str, ...]:
-    """
-    Extract model names from the provided models information.
-    Args:
-        models_info (Dict[str, List[Dict[str, Any]]]): Dictionary containing information about available models.
-    Returns:
-        Tuple[str, ...]: A tuple of model names.
-    """
     logger.info("Extracting model names from models_info")
     model_names = tuple(model["name"] for model in models_info["models"])
     logger.info(f"Extracted model names: {model_names}")
@@ -62,15 +48,7 @@ def extract_model_names(
 def create_vector_db(file_upload) -> Chroma:
-    """
-    Create a vector database from an uploaded PDF file.
-    Args:
-        file_upload (st.UploadedFile): Streamlit file upload object containing the PDF.
-    Returns:
-        Chroma: A vector store containing the processed document chunks.
-    """
     logger.info(f"Creating vector DB from file upload: {file_upload.name}")
     temp_dir = tempfile.mkdtemp()
@@ -97,19 +75,8 @@ def create_vector_db(file_upload) -> Chroma:
 def process_question(question: str, vector_db: Chroma, selected_model: str) -> str:
-    """
-    Process a user question using the vector database and selected language model.
-    Args:
-        question (str): The user's question.
-        vector_db (Chroma): The vector database containing document embeddings.
-        selected_model (str): The name of the selected language model.
-    Returns:
-        str: The generated response to the user's question.
-    """
-    logger.info(f"""Processing question: {
-                question} using model: {selected_model}""")
     llm = ChatOllama(model=selected_model, temperature=0)
     QUERY_PROMPT = PromptTemplate(
         input_variables=["question"],
@@ -149,17 +116,8 @@ def process_question(question: str, vector_db: Chroma, selected_model: str) -> s
 @st.cache_data
 def extract_all_pages_as_images(file_upload) -> List[Any]:
-    """
-    Extract all pages from a PDF file as images.
-    Args:
-        file_upload (st.UploadedFile): Streamlit file upload object containing the PDF.
-    Returns:
-        List[Any]: A list of image objects representing each page of the PDF.
-    """
-    logger.info(f"""Extracting all pages as images from file: {
-                file_upload.name}""")
     pdf_pages = []
     with pdfplumber.open(file_upload) as pdf:
         pdf_pages = [page.to_image().original for page in pdf.pages]
@@ -168,12 +126,7 @@ def extract_all_pages_as_images(file_upload) -> List[Any]:
 def delete_vector_db(vector_db: Optional[Chroma]) -> None:
-    """
-    Delete the vector database and clear related session state.
-    Args:
-        vector_db (Optional[Chroma]): The vector database to be deleted.
-    """
     logger.info("Deleting vector DB")
     if vector_db is not None:
         vector_db.delete_collection()
@@ -189,12 +142,7 @@ def delete_vector_db(vector_db: Optional[Chroma]) -> None:
 def main() -> None:
-    """
-    Main function to run the Streamlit application.
-    This function sets up the user interface, handles file uploads,
-    processes user queries, and displays results.
-    """
     st.subheader("🧠 Ollama PDF RAG playground", divider="gray", anchor=False)
     models_info = ollama.list()
@@ -246,33 +194,4 @@ def main() -> None:
             with message_container.chat_message(message["role"], avatar=avatar):
                 st.markdown(message["content"])
-        if prompt := st.chat_input("Enter a prompt here..."):
-            try:
-                st.session_state["messages"].append({"role": "user", "content": prompt})
-                message_container.chat_message("user", avatar="😎").markdown(prompt)
-                with message_container.chat_message("assistant", avatar="🤖"):
-                    with st.spinner(":green[processing...]"):
-                        if st.session_state["vector_db"] is not None:
-                            response = process_question(
-                                prompt, st.session_state["vector_db"], selected_model
-                            )
-                            st.markdown(response)
-                        else:
-                            st.warning("Please upload a PDF file first.")
-                if st.session_state["vector_db"] is not None:
-                    st.session_state["messages"].append(
-                        {"role": "assistant", "content": response}
-                    )
-            except Exception as e:
-                st.error(e, icon="⛔️")
-                logger.error(f"Error processing prompt: {e}")
-        else:
-            if st.session_state["vector_db"] is None:
-                st.warning("Upload a PDF file to begin chat...")
-if __name__ == "__main__":
-    main()

+!pip install langchain-community # Install the missing module
 import streamlit as st
 import logging
 import os
 def extract_model_names(
     models_info: Dict[str, List[Dict[str, Any]]],
 ) -> Tuple[str, ...]:
+    """Extract model names from the provided models information."""
     logger.info("Extracting model names from models_info")
     model_names = tuple(model["name"] for model in models_info["models"])
     logger.info(f"Extracted model names: {model_names}")
 def create_vector_db(file_upload) -> Chroma:
+    """Create a vector database from an uploaded PDF file."""
     logger.info(f"Creating vector DB from file upload: {file_upload.name}")
     temp_dir = tempfile.mkdtemp()
 def process_question(question: str, vector_db: Chroma, selected_model: str) -> str:
+    """Process a user question using the vector database and selected language model."""
+    logger.info(f"Processing question: {question} using model: {selected_model}")
     llm = ChatOllama(model=selected_model, temperature=0)
     QUERY_PROMPT = PromptTemplate(
         input_variables=["question"],
 @st.cache_data
 def extract_all_pages_as_images(file_upload) -> List[Any]:
+    """Extract all pages from a PDF file as images."""
+    logger.info(f"Extracting all pages as images from file: {file_upload.name}")
     pdf_pages = []
     with pdfplumber.open(file_upload) as pdf:
         pdf_pages = [page.to_image().original for page in pdf.pages]
 def delete_vector_db(vector_db: Optional[Chroma]) -> None:
+    """Delete the vector database and clear related session state."""
     logger.info("Deleting vector DB")
     if vector_db is not None:
         vector_db.delete_collection()
 def main() -> None:
+    """Main function to run the Streamlit application."""
     st.subheader("🧠 Ollama PDF RAG playground", divider="gray", anchor=False)
     models_info = ollama.list()
             with message_container.chat_message(message["role"], avatar=avatar):
                 st.markdown(message["content"])