Spaces:

Redfire-1234
/

google-doc-chatbot

Sleeping

File size: 5,892 Bytes

from google.oauth2 import service_account
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
from typing import List, Dict

class GoogleDriveService:
    def __init__(self, credentials_dict: dict):
        """
        Initialize with credentials dictionary (works with both file and env variable)
        
        Args:
            credentials_dict: Google service account credentials as dictionary
        """
        self.credentials = service_account.Credentials.from_service_account_info(
            credentials_dict,
            scopes=[
                'https://www.googleapis.com/auth/drive.readonly',
                'https://www.googleapis.com/auth/documents.readonly'
            ]
        )
        self.drive_service = build('drive', 'v3', credentials=self.credentials)
        self.docs_service = build('docs', 'v1', credentials=self.credentials)
    
    def list_documents_in_folder(self, folder_id: str) -> List[Dict[str, str]]:
        """List all Google Docs in a folder"""
        try:
            query = f"'{folder_id}' in parents and mimeType='application/vnd.google-apps.document' and trashed=false"
            
            results = self.drive_service.files().list(
                q=query,
                fields="files(id, name, modifiedTime)",
                orderBy="modifiedTime desc"
            ).execute()
            
            files = results.get('files', [])
            
            return [
                {
                    'id': file['id'],
                    'name': file['name'],
                    'modified': file.get('modifiedTime', 'Unknown')
                }
                for file in files
            ]
        
        except HttpError as e:
            if e.resp.status == 404:
                raise Exception(
                    f"Folder not found. Please check:\n"
                    f"1. The folder ID is correct\n"
                    f"2. The folder exists in Google Drive"
                )
            elif e.resp.status == 403:
                raise Exception(
                    f"Permission denied. Please ensure:\n"
                    f"1. The folder is shared with your service account\n"
                    f"2. Service account email has at least 'Viewer' access\n"
                    f"3. Check your GOOGLE_DRIVE_FOLDER_ID in environment variables"
                )
            else:
                raise Exception(f"Error accessing Google Drive: {str(e)}")
        except Exception as e:
            raise Exception(f"Error listing documents in folder: {str(e)}")
    
    def get_document_content(self, document_id: str) -> str:
        """Get content of a specific document"""
        try:
            document = self.docs_service.documents().get(documentId=document_id).execute()
            return self._extract_text(document)
        except HttpError as e:
            if e.resp.status == 404:
                raise Exception(f"Document not found. Please check the document ID: {document_id}")
            elif e.resp.status == 403:
                raise Exception(
                    f"Permission denied. Please ensure:\n"
                    f"1. The document is shared with your service account\n"
                    f"2. The service account has at least 'Viewer' access\n"
                    f"3. The document is not private/restricted"
                )
            else:
                raise Exception(f"Error reading document: {str(e)}")
        except Exception as e:
            raise Exception(f"Error reading document: {str(e)}")
    
    def _extract_text(self, document: dict) -> str:
        """Extract plain text from document structure"""
        text_parts = []
        
        content = document.get('body', {}).get('content', [])
        
        for element in content:
            if 'paragraph' in element:
                paragraph = element['paragraph']
                for text_element in paragraph.get('elements', []):
                    if 'textRun' in text_element:
                        text_parts.append(text_element['textRun']['content'])
            
            elif 'table' in element:
                table = element['table']
                for row in table.get('tableRows', []):
                    for cell in row.get('tableCells', []):
                        for cell_content in cell.get('content', []):
                            if 'paragraph' in cell_content:
                                paragraph = cell_content['paragraph']
                                for text_element in paragraph.get('elements', []):
                                    if 'textRun' in text_element:
                                        text_parts.append(text_element['textRun']['content'])
        
        return ''.join(text_parts).strip()
    
    def get_document_metadata(self, document_id: str) -> Dict[str, str]:
        """Get metadata for a document"""
        try:
            file = self.drive_service.files().get(
                fileId=document_id,
                fields="id, name, modifiedTime, createdTime, webViewLink"
            ).execute()
            
            return {
                'id': file['id'],
                'name': file['name'],
                'modified': file.get('modifiedTime', 'Unknown'),
                'created': file.get('createdTime', 'Unknown'),
                'url': file.get('webViewLink', '')
            }
        
        except HttpError as e:
            if e.resp.status == 404:
                raise Exception(f"Document not found: {document_id}")
            elif e.resp.status == 403:
                raise Exception(f"Permission denied for document: {document_id}")
            else:
                raise Exception(f"Error getting document metadata: {str(e)}")
        except Exception as e:
            raise Exception(f"Error getting document metadata: {str(e)}")