from google.oauth2 import service_account from googleapiclient.discovery import build from googleapiclient.errors import HttpError from typing import Dict class GoogleDocsReader: def __init__(self, credentials_dict: dict): """ Initialize with credentials dictionary (decoded from base64) """ self.credentials = service_account.Credentials.from_service_account_info( credentials_dict, scopes=['https://www.googleapis.com/auth/documents.readonly'] ) self.service = build('docs', 'v1', credentials=self.credentials) def read_document(self, document_id: str) -> str: """ Read the content of a Google Doc and return as plain text """ try: # Get the document document = self.service.documents().get(documentId=document_id).execute() # Extract text content content = document.get('body', {}).get('content', []) text_parts = [] for element in content: if 'paragraph' in element: paragraph = element['paragraph'] for text_element in paragraph.get('elements', []): if 'textRun' in text_element: text_parts.append(text_element['textRun'].get('content', '')) elif 'table' in element: # Handle tables table = element['table'] for row in table.get('tableRows', []): for cell in row.get('tableCells', []): for cell_element in cell.get('content', []): if 'paragraph' in cell_element: paragraph = cell_element['paragraph'] for text_element in paragraph.get('elements', []): if 'textRun' in text_element: text_parts.append(text_element['textRun'].get('content', '')) return ''.join(text_parts) except HttpError as e: if e.resp.status == 404: raise Exception(f"Document not found: {document_id}") elif e.resp.status == 403: raise Exception( f"Permission denied. Please ensure:\n" f"1. The document is shared with your service account\n" f"2. Service account has at least 'Viewer' access" ) else: raise Exception(f"Error reading document: {str(e)}") except Exception as e: raise Exception(f"Error reading document: {str(e)}") def get_document_title(self, document_id: str) -> str: """ Get the title of a Google Doc """ try: document = self.service.documents().get(documentId=document_id).execute() return document.get('title', 'Untitled') except Exception as e: raise Exception(f"Error getting document title: {str(e)}")