Spaces:

bacancydataprophets
/

Smart-PDF-Search

Sleeping

App Files Files Community

Avanisha commited on Jan 28, 2025

Commit

94ca2d7

verified ·

1 Parent(s): dbcc12d

Update app.py

Browse files

Files changed (1) hide show

app.py +15 -31

app.py CHANGED Viewed

@@ -460,7 +460,7 @@ def display_source_documents_with_images(source_documents, query):
                     highlighted_snippet = highlight_query_words(snippet, query)
                     st.markdown(f'<div class="source-content">{highlighted_snippet}</div>', unsafe_allow_html=True)
-                    # st.markdown(f"[View other results in this book](?page=pdf_details&filename={pdf_name}&page_number={page_number})", unsafe_allow_html=True)
                     logger.debug(f"Successfully displayed content for {pdf_name}, page {page_number + 1}")
@@ -519,41 +519,33 @@ def get_pdf_details(filename, page_number):
     """Get details of a specific PDF page."""
     logger.info(f"Processing PDF details for file: {filename}, page: {page_number}")
     try:
-        # Check if running in Hugging Face space or locally
-        if os.path.exists('/tmp'):
-            data_path = '/tmp/data'  # Hugging Face temporary storage
-        else:
-            data_path = 'data'  # Local storage
         file_path = os.path.join(data_path, filename)
-        # Ensure file exists
-        if not os.path.exists(file_path):
-            logger.error(f"File does not exist at {file_path}")
-            st.error(f"File not found at {file_path}")
-            return
         # Open the PDF
         logger.debug(f"Opening PDF file: {file_path}")
         doc = fitz.open(file_path)
         # Extract full PDF text
         full_text = ""
         for page in doc:
             full_text += page.get_text()
         # Get PDF metadata
         pdf_metadata = doc.metadata or {}
         # Extract page text and render page image
         page = doc.load_page(page_number)
         page_text = page.get_text()
         # Render page as image
         pix = page.get_pixmap()
         img_bytes = pix.tobytes("png")
         page_image_base64 = base64.b64encode(img_bytes).decode('utf-8')
         # Detect language
         try:
             lang_code = detect(page_text)
@@ -561,7 +553,7 @@ def get_pdf_details(filename, page_number):
         except Exception as e:
             logger.warning(f"Language detection failed: {str(e)}")
             language = 'Unknown'
         # Prepare response
         return {
             "file_path": file_path,
@@ -590,20 +582,12 @@ def get_romanized_text(filename):
     """Get romanized text from a PDF."""
     logger.info(f"Processing romanized text for file: {filename}")
     try:
-        # Check if running in Hugging Face space or locally
-        if os.path.exists('/tmp'):
-            data_path = '/tmp/data'  # Use Hugging Face's temp directory
-        else:
-            data_path = 'data'  # Use local directory
         file_path = os.path.join(data_path, filename)
-        # Ensure file exists
-        if not os.path.exists(file_path):
-            logger.error(f"File does not exist at {file_path}")
-            st.error(f"File not found at {file_path}")
-            return
         # Open the PDF
         logger.debug(f"Opening PDF file for romanization: {file_path}")
         doc = fitz.open(file_path)

                     highlighted_snippet = highlight_query_words(snippet, query)
                     st.markdown(f'<div class="source-content">{highlighted_snippet}</div>', unsafe_allow_html=True)
+                    st.markdown(f"[View other results in this book](?page=pdf_details&filename={pdf_name}&page_number={page_number})", unsafe_allow_html=True)
                     logger.debug(f"Successfully displayed content for {pdf_name}, page {page_number + 1}")
     """Get details of a specific PDF page."""
     logger.info(f"Processing PDF details for file: {filename}, page: {page_number}")
     try:
+        with open(CONFIG_FILE, 'r') as f:
+            config = json.load(f)
+        data_path = config.get('data_path', '/tmp/data')
         file_path = os.path.join(data_path, filename)
         # Open the PDF
         logger.debug(f"Opening PDF file: {file_path}")
         doc = fitz.open(file_path)
         # Extract full PDF text
         full_text = ""
         for page in doc:
             full_text += page.get_text()
         # Get PDF metadata
         pdf_metadata = doc.metadata or {}
         # Extract page text and render page image
         page = doc.load_page(page_number)
         page_text = page.get_text()
         # Render page as image
         pix = page.get_pixmap()
         img_bytes = pix.tobytes("png")
         page_image_base64 = base64.b64encode(img_bytes).decode('utf-8')
         # Detect language
         try:
             lang_code = detect(page_text)
         except Exception as e:
             logger.warning(f"Language detection failed: {str(e)}")
             language = 'Unknown'
         # Prepare response
         return {
             "file_path": file_path,
     """Get romanized text from a PDF."""
     logger.info(f"Processing romanized text for file: {filename}")
     try:
+        with open(CONFIG_FILE, 'r') as f:
+            config = json.load(f)
+        data_path = config.get('data_path', '/tmp/data')
         file_path = os.path.join(data_path, filename)
         # Open the PDF
         logger.debug(f"Opening PDF file for romanization: {file_path}")
         doc = fitz.open(file_path)