Spaces:

Ephraimmm
/

studybuddy

Sleeping

App Files Files Community

Ephraimmm commited on Jun 23

Commit

15754f2

verified ·

1 Parent(s): be5938e

Update app.py

Browse files

Files changed (1) hide show

app.py +5 -33

app.py CHANGED Viewed

@@ -53,6 +53,7 @@ class GPTDriveIntegration:
         """Download and extract text content from file"""
     try:
         if 'text' in mime_type or 'document' in mime_type:
             if 'document' in mime_type:
                 request = self.drive_service.files().export_media(
                     fileId=file_id, mimeType='text/plain'
@@ -66,7 +67,7 @@ class GPTDriveIntegration:
             while done is False:
                 status, done = downloader.next_chunk()
-        return file_content.getvalue().decode('utf-8')
         elif 'spreadsheet' in mime_type:
             # For Google Sheets, export as CSV
@@ -90,10 +91,9 @@ class GPTDriveIntegration:
             while done is False:
                 status, done = downloader.next_chunk()
-            # Extract text from PDF using PyPDF2 or pdfplumber
-            file_content.seek(0)  # Reset buffer position
-            # Option 1: Using PyPDF2
             try:
                 import PyPDF2
                 pdf_reader = PyPDF2.PdfReader(file_content)
@@ -102,35 +102,7 @@ class GPTDriveIntegration:
                     text += page.extract_text() + "\n"
                 return text
             except ImportError:
-                pass
-            # Option 2: Using pdfplumber (better for complex PDFs)
-            try:
-                import pdfplumber
-                text = ""
-                with pdfplumber.open(file_content) as pdf:
-                    for page in pdf.pages:
-                        page_text = page.extract_text()
-                        if page_text:
-                            text += page_text + "\n"
-                return text
-            except ImportError:
-                pass
-            # Option 3: Using pymupdf (fitz) - fastest option
-            try:
-                import fitz  # pymupdf
-                pdf_document = fitz.open(stream=file_content.read(), filetype="pdf")
-                text = ""
-                for page_num in range(pdf_document.page_count):
-                    page = pdf_document[page_num]
-                    text += page.get_text() + "\n"
-                pdf_document.close()
-                return text
-            except ImportError:
-                pass
-            return "PDF text extraction requires PyPDF2, pdfplumber, or pymupdf library"
         else:
             return "File type not supported for text extraction"

         """Download and extract text content from file"""
     try:
         if 'text' in mime_type or 'document' in mime_type:
+            # For Google Docs, export as plain text
             if 'document' in mime_type:
                 request = self.drive_service.files().export_media(
                     fileId=file_id, mimeType='text/plain'
             while done is False:
                 status, done = downloader.next_chunk()
+            return file_content.getvalue().decode('utf-8')
         elif 'spreadsheet' in mime_type:
             # For Google Sheets, export as CSV
             while done is False:
                 status, done = downloader.next_chunk()
+            # Extract text from PDF
+            file_content.seek(0)
             try:
                 import PyPDF2
                 pdf_reader = PyPDF2.PdfReader(file_content)
                     text += page.extract_text() + "\n"
                 return text
             except ImportError:
+                return "PDF text extraction requires PyPDF2 library"
         else:
             return "File type not supported for text extraction"