Spaces:

dev2607
/

AI-Powered_PDF_Reader_QandA_Assistant

Sleeping

App Files Files Community

dev2607 commited on Mar 30, 2025

Commit

1818ffd

verified ·

1 Parent(s): bb518df

Update app.py

Browse files

Files changed (1) hide show

app.py +26 -16

app.py CHANGED Viewed

@@ -31,7 +31,7 @@ if "file_names" not in st.session_state:
 class PDFQAAssistant:
     def __init__(self,
                  hf_token: str = None,
-                 model_name: str = "mistralai/Mistral-7B-Instruct-v0.2",
                  embedding_model_name: str = "sentence-transformers/all-MiniLM-L6-v2"):
         """
         Initialize the PDF Q&A Assistant with Hugging Face models.
@@ -52,7 +52,7 @@ class PDFQAAssistant:
         self.llm = HuggingFaceEndpoint(
             repo_id=model_name,
             huggingfacehub_api_token=hf_token,
-            max_length=1024,
             temperature=0.5
         )
@@ -64,8 +64,8 @@ class PDFQAAssistant:
         # Initialize text splitter for chunking documents
         self.text_splitter = RecursiveCharacterTextSplitter(
-            chunk_size=1000,
-            chunk_overlap=200,
             length_function=len
         )
@@ -274,20 +274,25 @@ def main():
         if use_manual_token:
             hf_token = st.text_input("Enter Hugging Face API Token:", type="password")
-        # Model selection
         st.subheader("Model Settings")
         model_name = st.selectbox(
             "Select LLM model:",
-            ["mistralai/Mistral-7B-Instruct-v0.2",
-             "google/flan-t5-large",
-             "tiiuae/falcon-7b-instruct"],
             index=0
         )
         embedding_model = st.selectbox(
             "Select Embedding model:",
-            ["sentence-transformers/all-MiniLM-L6-v2",
-             "sentence-transformers/all-mpnet-base-v2"],
             index=0
         )
@@ -314,29 +319,33 @@ def main():
                         # Process each uploaded file
                         for pdf_file in uploaded_files:
                             file_name = pdf_file.name
-                            st.session_state.file_names.append(file_name)
                             assistant.process_pdf(pdf_file, file_name)
                         # Store the assistant in session state
                         st.session_state.assistant = assistant
                     except Exception as e:
                         st.error(f"Error initializing assistant: {e}")
         # Document management
-        if st.session_state.document_processed:
             st.subheader("Document Management")
             if st.button("Clear Chat History"):
-                st.session_state.assistant.clear_memory()
                 st.session_state.chat_history = []
                 st.success("Chat history cleared!")
             if st.button("Generate Document Summary"):
-                get_document_summary(st.session_state.assistant,
-                                     st.session_state.file_names[0])
     # Main area for chat interface
-    if not st.session_state.document_processed:
         st.info("👈 Please upload and process a PDF document to get started.")
         # Display demo information
@@ -405,6 +414,7 @@ def main():
                         })
                     except Exception as e:
                         st.error(f"Error getting response: {e}")
 if __name__ == "__main__":
     main()

 class PDFQAAssistant:
     def __init__(self,
                  hf_token: str = None,
+                 model_name: str = "google/flan-t5-base",  # Changed to a more accessible model
                  embedding_model_name: str = "sentence-transformers/all-MiniLM-L6-v2"):
         """
         Initialize the PDF Q&A Assistant with Hugging Face models.
         self.llm = HuggingFaceEndpoint(
             repo_id=model_name,
             huggingfacehub_api_token=hf_token,
+            max_length=512,  # Reduced for smaller models
             temperature=0.5
         )
         # Initialize text splitter for chunking documents
         self.text_splitter = RecursiveCharacterTextSplitter(
+            chunk_size=800,  # Smaller chunks for better processing
+            chunk_overlap=150,
             length_function=len
         )
         if use_manual_token:
             hf_token = st.text_input("Enter Hugging Face API Token:", type="password")
+        # Model selection with open-source models
         st.subheader("Model Settings")
         model_name = st.selectbox(
             "Select LLM model:",
+            [
+                "google/flan-t5-base",  # Smaller, more accessible model
+                "google/flan-t5-small",  # Even smaller model
+                "facebook/bart-large-cnn",  # Good for summarization
+                "distilbert-base-uncased"  # Lightweight model
+            ],
             index=0
         )
         embedding_model = st.selectbox(
             "Select Embedding model:",
+            [
+                "sentence-transformers/all-MiniLM-L6-v2",
+                "sentence-transformers/paraphrase-MiniLM-L3-v2"  # Smaller embedding model
+            ],
             index=0
         )
                         # Process each uploaded file
                         for pdf_file in uploaded_files:
                             file_name = pdf_file.name
+                            if file_name not in st.session_state.file_names:
+                                st.session_state.file_names.append(file_name)
                             assistant.process_pdf(pdf_file, file_name)
                         # Store the assistant in session state
                         st.session_state.assistant = assistant
                     except Exception as e:
                         st.error(f"Error initializing assistant: {e}")
+                        st.error("Try selecting a different model or check your token permissions.")
         # Document management
+        if st.session_state.get("document_processed", False):
             st.subheader("Document Management")
             if st.button("Clear Chat History"):
+                if "assistant" in st.session_state:
+                    st.session_state.assistant.clear_memory()
                 st.session_state.chat_history = []
                 st.success("Chat history cleared!")
             if st.button("Generate Document Summary"):
+                if "assistant" in st.session_state and len(st.session_state.file_names) > 0:
+                    get_document_summary(st.session_state.assistant,
+                                        st.session_state.file_names[0])
     # Main area for chat interface
+    if not st.session_state.get("document_processed", False):
         st.info("👈 Please upload and process a PDF document to get started.")
         # Display demo information
                         })
                     except Exception as e:
                         st.error(f"Error getting response: {e}")
+                        st.error("Please try a different question or model.")
 if __name__ == "__main__":
     main()