Spaces:
Sleeping
Sleeping
File size: 3,147 Bytes
49adc11 b04d4ab 49adc11 a018299 49adc11 b04d4ab 49adc11 b04d4ab 49adc11 b04d4ab 49adc11 b04d4ab 49adc11 b04d4ab 49adc11 b04d4ab 49adc11 b04d4ab |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 |
from google.oauth2 import service_account
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
from typing import Dict
class GoogleDocsReader:
def __init__(self, credentials_dict: dict):
"""
Initialize with credentials dictionary (decoded from base64)
"""
self.credentials = service_account.Credentials.from_service_account_info(
credentials_dict,
scopes=['https://www.googleapis.com/auth/documents.readonly']
)
self.service = build('docs', 'v1', credentials=self.credentials)
def read_document(self, document_id: str) -> str:
"""
Read the content of a Google Doc and return as plain text
"""
try:
# Get the document
document = self.service.documents().get(documentId=document_id).execute()
# Extract text content
content = document.get('body', {}).get('content', [])
text_parts = []
for element in content:
if 'paragraph' in element:
paragraph = element['paragraph']
for text_element in paragraph.get('elements', []):
if 'textRun' in text_element:
text_parts.append(text_element['textRun'].get('content', ''))
elif 'table' in element:
# Handle tables
table = element['table']
for row in table.get('tableRows', []):
for cell in row.get('tableCells', []):
for cell_element in cell.get('content', []):
if 'paragraph' in cell_element:
paragraph = cell_element['paragraph']
for text_element in paragraph.get('elements', []):
if 'textRun' in text_element:
text_parts.append(text_element['textRun'].get('content', ''))
return ''.join(text_parts)
except HttpError as e:
if e.resp.status == 404:
raise Exception(f"Document not found: {document_id}")
elif e.resp.status == 403:
raise Exception(
f"Permission denied. Please ensure:\n"
f"1. The document is shared with your service account\n"
f"2. Service account has at least 'Viewer' access"
)
else:
raise Exception(f"Error reading document: {str(e)}")
except Exception as e:
raise Exception(f"Error reading document: {str(e)}")
def get_document_title(self, document_id: str) -> str:
"""
Get the title of a Google Doc
"""
try:
document = self.service.documents().get(documentId=document_id).execute()
return document.get('title', 'Untitled')
except Exception as e:
raise Exception(f"Error getting document title: {str(e)}") |