google-doc-chatbot / app /services /google_drive.py
Redfire-1234's picture
Update app/services/google_drive.py
5c5f6f8 verified
from google.oauth2 import service_account
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
from typing import List, Dict
class GoogleDriveService:
def __init__(self, credentials_dict: dict):
"""
Initialize with credentials dictionary (works with both file and env variable)
Args:
credentials_dict: Google service account credentials as dictionary
"""
self.credentials = service_account.Credentials.from_service_account_info(
credentials_dict,
scopes=[
'https://www.googleapis.com/auth/drive.readonly',
'https://www.googleapis.com/auth/documents.readonly'
]
)
self.drive_service = build('drive', 'v3', credentials=self.credentials)
self.docs_service = build('docs', 'v1', credentials=self.credentials)
def list_documents_in_folder(self, folder_id: str) -> List[Dict[str, str]]:
"""List all Google Docs in a folder"""
try:
query = f"'{folder_id}' in parents and mimeType='application/vnd.google-apps.document' and trashed=false"
results = self.drive_service.files().list(
q=query,
fields="files(id, name, modifiedTime)",
orderBy="modifiedTime desc"
).execute()
files = results.get('files', [])
return [
{
'id': file['id'],
'name': file['name'],
'modified': file.get('modifiedTime', 'Unknown')
}
for file in files
]
except HttpError as e:
if e.resp.status == 404:
raise Exception(
f"Folder not found. Please check:\n"
f"1. The folder ID is correct\n"
f"2. The folder exists in Google Drive"
)
elif e.resp.status == 403:
raise Exception(
f"Permission denied. Please ensure:\n"
f"1. The folder is shared with your service account\n"
f"2. Service account email has at least 'Viewer' access\n"
f"3. Check your GOOGLE_DRIVE_FOLDER_ID in environment variables"
)
else:
raise Exception(f"Error accessing Google Drive: {str(e)}")
except Exception as e:
raise Exception(f"Error listing documents in folder: {str(e)}")
def get_document_content(self, document_id: str) -> str:
"""Get content of a specific document"""
try:
document = self.docs_service.documents().get(documentId=document_id).execute()
return self._extract_text(document)
except HttpError as e:
if e.resp.status == 404:
raise Exception(f"Document not found. Please check the document ID: {document_id}")
elif e.resp.status == 403:
raise Exception(
f"Permission denied. Please ensure:\n"
f"1. The document is shared with your service account\n"
f"2. The service account has at least 'Viewer' access\n"
f"3. The document is not private/restricted"
)
else:
raise Exception(f"Error reading document: {str(e)}")
except Exception as e:
raise Exception(f"Error reading document: {str(e)}")
def _extract_text(self, document: dict) -> str:
"""Extract plain text from document structure"""
text_parts = []
content = document.get('body', {}).get('content', [])
for element in content:
if 'paragraph' in element:
paragraph = element['paragraph']
for text_element in paragraph.get('elements', []):
if 'textRun' in text_element:
text_parts.append(text_element['textRun']['content'])
elif 'table' in element:
table = element['table']
for row in table.get('tableRows', []):
for cell in row.get('tableCells', []):
for cell_content in cell.get('content', []):
if 'paragraph' in cell_content:
paragraph = cell_content['paragraph']
for text_element in paragraph.get('elements', []):
if 'textRun' in text_element:
text_parts.append(text_element['textRun']['content'])
return ''.join(text_parts).strip()
def get_document_metadata(self, document_id: str) -> Dict[str, str]:
"""Get metadata for a document"""
try:
file = self.drive_service.files().get(
fileId=document_id,
fields="id, name, modifiedTime, createdTime, webViewLink"
).execute()
return {
'id': file['id'],
'name': file['name'],
'modified': file.get('modifiedTime', 'Unknown'),
'created': file.get('createdTime', 'Unknown'),
'url': file.get('webViewLink', '')
}
except HttpError as e:
if e.resp.status == 404:
raise Exception(f"Document not found: {document_id}")
elif e.resp.status == 403:
raise Exception(f"Permission denied for document: {document_id}")
else:
raise Exception(f"Error getting document metadata: {str(e)}")
except Exception as e:
raise Exception(f"Error getting document metadata: {str(e)}")