Spaces:
Sleeping
Sleeping
| from google.oauth2 import service_account | |
| from googleapiclient.discovery import build | |
| from googleapiclient.errors import HttpError | |
| from typing import Dict | |
| class GoogleDocsReader: | |
| def __init__(self, credentials_dict: dict): | |
| """ | |
| Initialize with credentials dictionary (decoded from base64) | |
| """ | |
| self.credentials = service_account.Credentials.from_service_account_info( | |
| credentials_dict, | |
| scopes=['https://www.googleapis.com/auth/documents.readonly'] | |
| ) | |
| self.service = build('docs', 'v1', credentials=self.credentials) | |
| def read_document(self, document_id: str) -> str: | |
| """ | |
| Read the content of a Google Doc and return as plain text | |
| """ | |
| try: | |
| # Get the document | |
| document = self.service.documents().get(documentId=document_id).execute() | |
| # Extract text content | |
| content = document.get('body', {}).get('content', []) | |
| text_parts = [] | |
| for element in content: | |
| if 'paragraph' in element: | |
| paragraph = element['paragraph'] | |
| for text_element in paragraph.get('elements', []): | |
| if 'textRun' in text_element: | |
| text_parts.append(text_element['textRun'].get('content', '')) | |
| elif 'table' in element: | |
| # Handle tables | |
| table = element['table'] | |
| for row in table.get('tableRows', []): | |
| for cell in row.get('tableCells', []): | |
| for cell_element in cell.get('content', []): | |
| if 'paragraph' in cell_element: | |
| paragraph = cell_element['paragraph'] | |
| for text_element in paragraph.get('elements', []): | |
| if 'textRun' in text_element: | |
| text_parts.append(text_element['textRun'].get('content', '')) | |
| return ''.join(text_parts) | |
| except HttpError as e: | |
| if e.resp.status == 404: | |
| raise Exception(f"Document not found: {document_id}") | |
| elif e.resp.status == 403: | |
| raise Exception( | |
| f"Permission denied. Please ensure:\n" | |
| f"1. The document is shared with your service account\n" | |
| f"2. Service account has at least 'Viewer' access" | |
| ) | |
| else: | |
| raise Exception(f"Error reading document: {str(e)}") | |
| except Exception as e: | |
| raise Exception(f"Error reading document: {str(e)}") | |
| def get_document_title(self, document_id: str) -> str: | |
| """ | |
| Get the title of a Google Doc | |
| """ | |
| try: | |
| document = self.service.documents().get(documentId=document_id).execute() | |
| return document.get('title', 'Untitled') | |
| except Exception as e: | |
| raise Exception(f"Error getting document title: {str(e)}") |