Spaces:

Redfire-1234
/

google-doc-chatbot

Sleeping

App Files Files Community

Redfire-1234 commited on Jan 16

Commit

b04d4ab

verified ·

1 Parent(s): 7cc17cd

Update app/services/google_docs.py

Browse files

Files changed (1) hide show

app/services/google_docs.py +42 -33

app/services/google_docs.py CHANGED Viewed

@@ -1,7 +1,7 @@
 from google.oauth2 import service_account
 from googleapiclient.discovery import build
 from googleapiclient.errors import HttpError
-from typing import Optional
 class GoogleDocsReader:
     def __init__(self, credentials_dict: dict):
@@ -12,51 +12,60 @@ class GoogleDocsReader:
             credentials_dict,
             scopes=['https://www.googleapis.com/auth/documents.readonly']
         )
-        self.docs_service = build('docs', 'v1', credentials=self.credentials)
     def read_document(self, document_id: str) -> str:
-        """Read and extract text from Google Doc"""
         try:
             document = self.service.documents().get(documentId=document_id).execute()
-            return self._extract_text(document)
         except HttpError as e:
-            # Handle specific Google API errors
             if e.resp.status == 404:
-                raise Exception(f"Document not found. Please check the document ID: {document_id}")
             elif e.resp.status == 403:
                 raise Exception(
                     f"Permission denied. Please ensure:\n"
                     f"1. The document is shared with your service account\n"
-                    f"2. The service account has at least 'Viewer' access\n"
-                    f"3. The document is not private/restricted"
                 )
             else:
                 raise Exception(f"Error reading document: {str(e)}")
         except Exception as e:
             raise Exception(f"Error reading document: {str(e)}")
-    def _extract_text(self, document: dict) -> str:
-        """Extract plain text from document structure"""
-        text_parts = []
-        content = document.get('body', {}).get('content', [])
-        for element in content:
-            if 'paragraph' in element:
-                paragraph = element['paragraph']
-                for text_element in paragraph.get('elements', []):
-                    if 'textRun' in text_element:
-                        text_parts.append(text_element['textRun']['content'])
-            elif 'table' in element:
-                table = element['table']
-                for row in table.get('tableRows', []):
-                    for cell in row.get('tableCells', []):
-                        for cell_content in cell.get('content', []):
-                            if 'paragraph' in cell_content:
-                                paragraph = cell_content['paragraph']
-                                for text_element in paragraph.get('elements', []):
-                                    if 'textRun' in text_element:
-                                        text_parts.append(text_element['textRun']['content'])
-        return ''.join(text_parts).strip()

 from google.oauth2 import service_account
 from googleapiclient.discovery import build
 from googleapiclient.errors import HttpError
+from typing import Dict
 class GoogleDocsReader:
     def __init__(self, credentials_dict: dict):
             credentials_dict,
             scopes=['https://www.googleapis.com/auth/documents.readonly']
         )
+        self.service = build('docs', 'v1', credentials=self.credentials)
     def read_document(self, document_id: str) -> str:
+        """
+        Read the content of a Google Doc and return as plain text
+        """
         try:
+            # Get the document
             document = self.service.documents().get(documentId=document_id).execute()
+            # Extract text content
+            content = document.get('body', {}).get('content', [])
+            text_parts = []
+            for element in content:
+                if 'paragraph' in element:
+                    paragraph = element['paragraph']
+                    for text_element in paragraph.get('elements', []):
+                        if 'textRun' in text_element:
+                            text_parts.append(text_element['textRun'].get('content', ''))
+                elif 'table' in element:
+                    # Handle tables
+                    table = element['table']
+                    for row in table.get('tableRows', []):
+                        for cell in row.get('tableCells', []):
+                            for cell_element in cell.get('content', []):
+                                if 'paragraph' in cell_element:
+                                    paragraph = cell_element['paragraph']
+                                    for text_element in paragraph.get('elements', []):
+                                        if 'textRun' in text_element:
+                                            text_parts.append(text_element['textRun'].get('content', ''))
+            return ''.join(text_parts)
         except HttpError as e:
             if e.resp.status == 404:
+                raise Exception(f"Document not found: {document_id}")
             elif e.resp.status == 403:
                 raise Exception(
                     f"Permission denied. Please ensure:\n"
                     f"1. The document is shared with your service account\n"
+                    f"2. Service account has at least 'Viewer' access"
                 )
             else:
                 raise Exception(f"Error reading document: {str(e)}")
         except Exception as e:
             raise Exception(f"Error reading document: {str(e)}")
+    def get_document_title(self, document_id: str) -> str:
+        """
+        Get the title of a Google Doc
+        """
+        try:
+            document = self.service.documents().get(documentId=document_id).execute()
+            return document.get('title', 'Untitled')
+        except Exception as e:
+            raise Exception(f"Error getting document title: {str(e)}")