from google.oauth2 import service_account from googleapiclient.discovery import build from googleapiclient.errors import HttpError from typing import List, Dict class GoogleDriveService: def __init__(self, credentials_dict: dict): """ Initialize with credentials dictionary (works with both file and env variable) Args: credentials_dict: Google service account credentials as dictionary """ self.credentials = service_account.Credentials.from_service_account_info( credentials_dict, scopes=[ 'https://www.googleapis.com/auth/drive.readonly', 'https://www.googleapis.com/auth/documents.readonly' ] ) self.drive_service = build('drive', 'v3', credentials=self.credentials) self.docs_service = build('docs', 'v1', credentials=self.credentials) def list_documents_in_folder(self, folder_id: str) -> List[Dict[str, str]]: """List all Google Docs in a folder""" try: query = f"'{folder_id}' in parents and mimeType='application/vnd.google-apps.document' and trashed=false" results = self.drive_service.files().list( q=query, fields="files(id, name, modifiedTime)", orderBy="modifiedTime desc" ).execute() files = results.get('files', []) return [ { 'id': file['id'], 'name': file['name'], 'modified': file.get('modifiedTime', 'Unknown') } for file in files ] except HttpError as e: if e.resp.status == 404: raise Exception( f"Folder not found. Please check:\n" f"1. The folder ID is correct\n" f"2. The folder exists in Google Drive" ) elif e.resp.status == 403: raise Exception( f"Permission denied. Please ensure:\n" f"1. The folder is shared with your service account\n" f"2. Service account email has at least 'Viewer' access\n" f"3. Check your GOOGLE_DRIVE_FOLDER_ID in environment variables" ) else: raise Exception(f"Error accessing Google Drive: {str(e)}") except Exception as e: raise Exception(f"Error listing documents in folder: {str(e)}") def get_document_content(self, document_id: str) -> str: """Get content of a specific document""" try: document = self.docs_service.documents().get(documentId=document_id).execute() return self._extract_text(document) except HttpError as e: if e.resp.status == 404: raise Exception(f"Document not found. Please check the document ID: {document_id}") elif e.resp.status == 403: raise Exception( f"Permission denied. Please ensure:\n" f"1. The document is shared with your service account\n" f"2. The service account has at least 'Viewer' access\n" f"3. The document is not private/restricted" ) else: raise Exception(f"Error reading document: {str(e)}") except Exception as e: raise Exception(f"Error reading document: {str(e)}") def _extract_text(self, document: dict) -> str: """Extract plain text from document structure""" text_parts = [] content = document.get('body', {}).get('content', []) for element in content: if 'paragraph' in element: paragraph = element['paragraph'] for text_element in paragraph.get('elements', []): if 'textRun' in text_element: text_parts.append(text_element['textRun']['content']) elif 'table' in element: table = element['table'] for row in table.get('tableRows', []): for cell in row.get('tableCells', []): for cell_content in cell.get('content', []): if 'paragraph' in cell_content: paragraph = cell_content['paragraph'] for text_element in paragraph.get('elements', []): if 'textRun' in text_element: text_parts.append(text_element['textRun']['content']) return ''.join(text_parts).strip() def get_document_metadata(self, document_id: str) -> Dict[str, str]: """Get metadata for a document""" try: file = self.drive_service.files().get( fileId=document_id, fields="id, name, modifiedTime, createdTime, webViewLink" ).execute() return { 'id': file['id'], 'name': file['name'], 'modified': file.get('modifiedTime', 'Unknown'), 'created': file.get('createdTime', 'Unknown'), 'url': file.get('webViewLink', '') } except HttpError as e: if e.resp.status == 404: raise Exception(f"Document not found: {document_id}") elif e.resp.status == 403: raise Exception(f"Permission denied for document: {document_id}") else: raise Exception(f"Error getting document metadata: {str(e)}") except Exception as e: raise Exception(f"Error getting document metadata: {str(e)}")