Spaces:

bacancydataprophets
/

Smart-PDF-Search

Sleeping

App Files Files Community

Avanisha commited on Jan 28, 2025

Commit

496b314

verified ·

1 Parent(s): 002703a

Update app.py

Browse files

Files changed (1) hide show

app.py +39 -13

app.py CHANGED Viewed

@@ -31,11 +31,16 @@ nltk.download('punkt')
 nltk.download('punkt_tab')
 nltk.download('stopwords')
-# Create directories if they don't exist
 def create_dirs_if_needed():
     """Create the necessary directories if they don't exist."""
-    os.makedirs('/tmp/data', exist_ok=True)
-    os.makedirs('/tmp/db', exist_ok=True)
 # Call the function at the start of your app
 create_dirs_if_needed()
@@ -509,35 +514,46 @@ def is_query_relevant(question, source_documents, threshold=0.1):
     except Exception as e:
         logger.error(f"Error checking query relevance: {str(e)}", exc_info=True)
         return False
 def get_pdf_details(filename, page_number):
     """Get details of a specific PDF page."""
     logger.info(f"Processing PDF details for file: {filename}, page: {page_number}")
     try:
-        # Update the paths to point to /tmp for Hugging Face Space
-        data_path = '/tmp/data'
         file_path = os.path.join(data_path, filename)
         # Open the PDF
         logger.debug(f"Opening PDF file: {file_path}")
         doc = fitz.open(file_path)
         # Extract full PDF text
         full_text = ""
         for page in doc:
             full_text += page.get_text()
         # Get PDF metadata
         pdf_metadata = doc.metadata or {}
         # Extract page text and render page image
         page = doc.load_page(page_number)
         page_text = page.get_text()
         # Render page as image
         pix = page.get_pixmap()
         img_bytes = pix.tobytes("png")
         page_image_base64 = base64.b64encode(img_bytes).decode('utf-8')
         # Detect language
         try:
             lang_code = detect(page_text)
@@ -545,7 +561,7 @@ def get_pdf_details(filename, page_number):
         except Exception as e:
             logger.warning(f"Language detection failed: {str(e)}")
             language = 'Unknown'
         # Prepare response
         return {
             "file_path": file_path,
@@ -574,10 +590,20 @@ def get_romanized_text(filename):
     """Get romanized text from a PDF."""
     logger.info(f"Processing romanized text for file: {filename}")
     try:
-        # Update the paths to point to /tmp for Hugging Face Space
-        data_path = '/tmp/data'
         file_path = os.path.join(data_path, filename)
         # Open the PDF
         logger.debug(f"Opening PDF file for romanization: {file_path}")
         doc = fitz.open(file_path)

 nltk.download('punkt_tab')
 nltk.download('stopwords')
 def create_dirs_if_needed():
     """Create the necessary directories if they don't exist."""
+    if os.path.exists('/tmp'):
+        # We're in Hugging Face space
+        os.makedirs('/tmp/data', exist_ok=True)
+        os.makedirs('/tmp/db', exist_ok=True)
+    else:
+        # Local environment
+        os.makedirs('data', exist_ok=True)
+        os.makedirs('db', exist_ok=True)
 # Call the function at the start of your app
 create_dirs_if_needed()
     except Exception as e:
         logger.error(f"Error checking query relevance: {str(e)}", exc_info=True)
         return False
 def get_pdf_details(filename, page_number):
     """Get details of a specific PDF page."""
     logger.info(f"Processing PDF details for file: {filename}, page: {page_number}")
     try:
+        # Check if running in Hugging Face space or locally
+        if os.path.exists('/tmp'):
+            data_path = '/tmp/data'  # Hugging Face temporary storage
+        else:
+            data_path = 'data'  # Local storage
         file_path = os.path.join(data_path, filename)
+        # Ensure file exists
+        if not os.path.exists(file_path):
+            logger.error(f"File does not exist at {file_path}")
+            st.error(f"File not found at {file_path}")
+            return
         # Open the PDF
         logger.debug(f"Opening PDF file: {file_path}")
         doc = fitz.open(file_path)
         # Extract full PDF text
         full_text = ""
         for page in doc:
             full_text += page.get_text()
         # Get PDF metadata
         pdf_metadata = doc.metadata or {}
         # Extract page text and render page image
         page = doc.load_page(page_number)
         page_text = page.get_text()
         # Render page as image
         pix = page.get_pixmap()
         img_bytes = pix.tobytes("png")
         page_image_base64 = base64.b64encode(img_bytes).decode('utf-8')
         # Detect language
         try:
             lang_code = detect(page_text)
         except Exception as e:
             logger.warning(f"Language detection failed: {str(e)}")
             language = 'Unknown'
         # Prepare response
         return {
             "file_path": file_path,
     """Get romanized text from a PDF."""
     logger.info(f"Processing romanized text for file: {filename}")
     try:
+        # Check if running in Hugging Face space or locally
+        if os.path.exists('/tmp'):
+            data_path = '/tmp/data'  # Use Hugging Face's temp directory
+        else:
+            data_path = 'data'  # Use local directory
         file_path = os.path.join(data_path, filename)
+        # Ensure file exists
+        if not os.path.exists(file_path):
+            logger.error(f"File does not exist at {file_path}")
+            st.error(f"File not found at {file_path}")
+            return
         # Open the PDF
         logger.debug(f"Opening PDF file for romanization: {file_path}")
         doc = fitz.open(file_path)