Spaces:
Sleeping
Sleeping
| from google.oauth2 import service_account | |
| from googleapiclient.discovery import build | |
| from googleapiclient.errors import HttpError | |
| from typing import List, Dict | |
| class GoogleDriveService: | |
| def __init__(self, credentials_dict: dict): | |
| """ | |
| Initialize with credentials dictionary (works with both file and env variable) | |
| Args: | |
| credentials_dict: Google service account credentials as dictionary | |
| """ | |
| self.credentials = service_account.Credentials.from_service_account_info( | |
| credentials_dict, | |
| scopes=[ | |
| 'https://www.googleapis.com/auth/drive.readonly', | |
| 'https://www.googleapis.com/auth/documents.readonly' | |
| ] | |
| ) | |
| self.drive_service = build('drive', 'v3', credentials=self.credentials) | |
| self.docs_service = build('docs', 'v1', credentials=self.credentials) | |
| def list_documents_in_folder(self, folder_id: str) -> List[Dict[str, str]]: | |
| """List all Google Docs in a folder""" | |
| try: | |
| query = f"'{folder_id}' in parents and mimeType='application/vnd.google-apps.document' and trashed=false" | |
| results = self.drive_service.files().list( | |
| q=query, | |
| fields="files(id, name, modifiedTime)", | |
| orderBy="modifiedTime desc" | |
| ).execute() | |
| files = results.get('files', []) | |
| return [ | |
| { | |
| 'id': file['id'], | |
| 'name': file['name'], | |
| 'modified': file.get('modifiedTime', 'Unknown') | |
| } | |
| for file in files | |
| ] | |
| except HttpError as e: | |
| if e.resp.status == 404: | |
| raise Exception( | |
| f"Folder not found. Please check:\n" | |
| f"1. The folder ID is correct\n" | |
| f"2. The folder exists in Google Drive" | |
| ) | |
| elif e.resp.status == 403: | |
| raise Exception( | |
| f"Permission denied. Please ensure:\n" | |
| f"1. The folder is shared with your service account\n" | |
| f"2. Service account email has at least 'Viewer' access\n" | |
| f"3. Check your GOOGLE_DRIVE_FOLDER_ID in environment variables" | |
| ) | |
| else: | |
| raise Exception(f"Error accessing Google Drive: {str(e)}") | |
| except Exception as e: | |
| raise Exception(f"Error listing documents in folder: {str(e)}") | |
| def get_document_content(self, document_id: str) -> str: | |
| """Get content of a specific document""" | |
| try: | |
| document = self.docs_service.documents().get(documentId=document_id).execute() | |
| return self._extract_text(document) | |
| except HttpError as e: | |
| if e.resp.status == 404: | |
| raise Exception(f"Document not found. Please check the document ID: {document_id}") | |
| elif e.resp.status == 403: | |
| raise Exception( | |
| f"Permission denied. Please ensure:\n" | |
| f"1. The document is shared with your service account\n" | |
| f"2. The service account has at least 'Viewer' access\n" | |
| f"3. The document is not private/restricted" | |
| ) | |
| else: | |
| raise Exception(f"Error reading document: {str(e)}") | |
| except Exception as e: | |
| raise Exception(f"Error reading document: {str(e)}") | |
| def _extract_text(self, document: dict) -> str: | |
| """Extract plain text from document structure""" | |
| text_parts = [] | |
| content = document.get('body', {}).get('content', []) | |
| for element in content: | |
| if 'paragraph' in element: | |
| paragraph = element['paragraph'] | |
| for text_element in paragraph.get('elements', []): | |
| if 'textRun' in text_element: | |
| text_parts.append(text_element['textRun']['content']) | |
| elif 'table' in element: | |
| table = element['table'] | |
| for row in table.get('tableRows', []): | |
| for cell in row.get('tableCells', []): | |
| for cell_content in cell.get('content', []): | |
| if 'paragraph' in cell_content: | |
| paragraph = cell_content['paragraph'] | |
| for text_element in paragraph.get('elements', []): | |
| if 'textRun' in text_element: | |
| text_parts.append(text_element['textRun']['content']) | |
| return ''.join(text_parts).strip() | |
| def get_document_metadata(self, document_id: str) -> Dict[str, str]: | |
| """Get metadata for a document""" | |
| try: | |
| file = self.drive_service.files().get( | |
| fileId=document_id, | |
| fields="id, name, modifiedTime, createdTime, webViewLink" | |
| ).execute() | |
| return { | |
| 'id': file['id'], | |
| 'name': file['name'], | |
| 'modified': file.get('modifiedTime', 'Unknown'), | |
| 'created': file.get('createdTime', 'Unknown'), | |
| 'url': file.get('webViewLink', '') | |
| } | |
| except HttpError as e: | |
| if e.resp.status == 404: | |
| raise Exception(f"Document not found: {document_id}") | |
| elif e.resp.status == 403: | |
| raise Exception(f"Permission denied for document: {document_id}") | |
| else: | |
| raise Exception(f"Error getting document metadata: {str(e)}") | |
| except Exception as e: | |
| raise Exception(f"Error getting document metadata: {str(e)}") |