Redfire-1234 commited on
Commit
5c5f6f8
·
verified ·
1 Parent(s): 9a99896

Update app/services/google_drive.py

Browse files
Files changed (1) hide show
  1. app/services/google_drive.py +52 -97
app/services/google_drive.py CHANGED
@@ -2,12 +2,14 @@ from google.oauth2 import service_account
2
  from googleapiclient.discovery import build
3
  from googleapiclient.errors import HttpError
4
  from typing import List, Dict
5
- from app.services.google_docs import GoogleDocsReader
6
 
7
  class GoogleDriveService:
8
  def __init__(self, credentials_dict: dict):
9
  """
10
- Initialize with credentials dictionary (decoded from base64)
 
 
 
11
  """
12
  self.credentials = service_account.Credentials.from_service_account_info(
13
  credentials_dict,
@@ -17,7 +19,7 @@ class GoogleDriveService:
17
  ]
18
  )
19
  self.drive_service = build('drive', 'v3', credentials=self.credentials)
20
- self.docs_reader = GoogleDocsReader(credentials_dict)
21
 
22
  def list_documents_in_folder(self, folder_id: str) -> List[Dict[str, str]]:
23
  """List all Google Docs in a folder"""
@@ -40,7 +42,7 @@ class GoogleDriveService:
40
  }
41
  for file in files
42
  ]
43
-
44
  except HttpError as e:
45
  if e.resp.status == 404:
46
  raise Exception(
@@ -53,7 +55,7 @@ class GoogleDriveService:
53
  f"Permission denied. Please ensure:\n"
54
  f"1. The folder is shared with your service account\n"
55
  f"2. Service account email has at least 'Viewer' access\n"
56
- f"3. Check your GOOGLE_DRIVE_FOLDER_ID in .env"
57
  )
58
  else:
59
  raise Exception(f"Error accessing Google Drive: {str(e)}")
@@ -62,7 +64,49 @@ class GoogleDriveService:
62
 
63
  def get_document_content(self, document_id: str) -> str:
64
  """Get content of a specific document"""
65
- return self.docs_reader.read_document(document_id)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
 
67
  def get_document_metadata(self, document_id: str) -> Dict[str, str]:
68
  """Get metadata for a document"""
@@ -79,7 +123,7 @@ class GoogleDriveService:
79
  'created': file.get('createdTime', 'Unknown'),
80
  'url': file.get('webViewLink', '')
81
  }
82
-
83
  except HttpError as e:
84
  if e.resp.status == 404:
85
  raise Exception(f"Document not found: {document_id}")
@@ -88,93 +132,4 @@ class GoogleDriveService:
88
  else:
89
  raise Exception(f"Error getting document metadata: {str(e)}")
90
  except Exception as e:
91
- raise Exception(f"Error getting document metadata: {str(e)}")
92
-
93
- # from google.oauth2 import service_account
94
- # from googleapiclient.discovery import build
95
- # from googleapiclient.errors import HttpError
96
- # from typing import List, Dict
97
- # from app.services.google_docs import GoogleDocsReader
98
-
99
- # class GoogleDriveService:
100
- # def __init__(self, credentials_path: str):
101
- # self.credentials = service_account.Credentials.from_service_account_file(
102
- # credentials_path,
103
- # scopes=[
104
- # 'https://www.googleapis.com/auth/drive.readonly',
105
- # 'https://www.googleapis.com/auth/documents.readonly'
106
- # ]
107
- # )
108
- # self.drive_service = build('drive', 'v3', credentials=self.credentials)
109
- # self.docs_reader = GoogleDocsReader(credentials_path)
110
-
111
- # def list_documents_in_folder(self, folder_id: str) -> List[Dict[str, str]]:
112
- # """List all Google Docs in a folder"""
113
- # try:
114
- # query = f"'{folder_id}' in parents and mimeType='application/vnd.google-apps.document' and trashed=false"
115
-
116
- # results = self.drive_service.files().list(
117
- # q=query,
118
- # fields="files(id, name, modifiedTime)",
119
- # orderBy="modifiedTime desc"
120
- # ).execute()
121
-
122
- # files = results.get('files', [])
123
-
124
- # return [
125
- # {
126
- # 'id': file['id'],
127
- # 'name': file['name'],
128
- # 'modified': file.get('modifiedTime', 'Unknown')
129
- # }
130
- # for file in files
131
- # ]
132
-
133
- # except HttpError as e:
134
- # if e.resp.status == 404:
135
- # raise Exception(
136
- # f"Folder not found. Please check:\n"
137
- # f"1. The folder ID is correct\n"
138
- # f"2. The folder exists in Google Drive"
139
- # )
140
- # elif e.resp.status == 403:
141
- # raise Exception(
142
- # f"Permission denied. Please ensure:\n"
143
- # f"1. The folder is shared with your service account\n"
144
- # f"2. Service account email has at least 'Viewer' access\n"
145
- # f"3. Check your GOOGLE_DRIVE_FOLDER_ID in .env"
146
- # )
147
- # else:
148
- # raise Exception(f"Error accessing Google Drive: {str(e)}")
149
- # except Exception as e:
150
- # raise Exception(f"Error listing documents in folder: {str(e)}")
151
-
152
- # def get_document_content(self, document_id: str) -> str:
153
- # """Get content of a specific document"""
154
- # return self.docs_reader.read_document(document_id)
155
-
156
- # def get_document_metadata(self, document_id: str) -> Dict[str, str]:
157
- # """Get metadata for a document"""
158
- # try:
159
- # file = self.drive_service.files().get(
160
- # fileId=document_id,
161
- # fields="id, name, modifiedTime, createdTime, webViewLink"
162
- # ).execute()
163
-
164
- # return {
165
- # 'id': file['id'],
166
- # 'name': file['name'],
167
- # 'modified': file.get('modifiedTime', 'Unknown'),
168
- # 'created': file.get('createdTime', 'Unknown'),
169
- # 'url': file.get('webViewLink', '')
170
- # }
171
-
172
- # except HttpError as e:
173
- # if e.resp.status == 404:
174
- # raise Exception(f"Document not found: {document_id}")
175
- # elif e.resp.status == 403:
176
- # raise Exception(f"Permission denied for document: {document_id}")
177
- # else:
178
- # raise Exception(f"Error getting document metadata: {str(e)}")
179
- # except Exception as e:
180
- # raise Exception(f"Error getting document metadata: {str(e)}")
 
2
  from googleapiclient.discovery import build
3
  from googleapiclient.errors import HttpError
4
  from typing import List, Dict
 
5
 
6
  class GoogleDriveService:
7
  def __init__(self, credentials_dict: dict):
8
  """
9
+ Initialize with credentials dictionary (works with both file and env variable)
10
+
11
+ Args:
12
+ credentials_dict: Google service account credentials as dictionary
13
  """
14
  self.credentials = service_account.Credentials.from_service_account_info(
15
  credentials_dict,
 
19
  ]
20
  )
21
  self.drive_service = build('drive', 'v3', credentials=self.credentials)
22
+ self.docs_service = build('docs', 'v1', credentials=self.credentials)
23
 
24
  def list_documents_in_folder(self, folder_id: str) -> List[Dict[str, str]]:
25
  """List all Google Docs in a folder"""
 
42
  }
43
  for file in files
44
  ]
45
+
46
  except HttpError as e:
47
  if e.resp.status == 404:
48
  raise Exception(
 
55
  f"Permission denied. Please ensure:\n"
56
  f"1. The folder is shared with your service account\n"
57
  f"2. Service account email has at least 'Viewer' access\n"
58
+ f"3. Check your GOOGLE_DRIVE_FOLDER_ID in environment variables"
59
  )
60
  else:
61
  raise Exception(f"Error accessing Google Drive: {str(e)}")
 
64
 
65
  def get_document_content(self, document_id: str) -> str:
66
  """Get content of a specific document"""
67
+ try:
68
+ document = self.docs_service.documents().get(documentId=document_id).execute()
69
+ return self._extract_text(document)
70
+ except HttpError as e:
71
+ if e.resp.status == 404:
72
+ raise Exception(f"Document not found. Please check the document ID: {document_id}")
73
+ elif e.resp.status == 403:
74
+ raise Exception(
75
+ f"Permission denied. Please ensure:\n"
76
+ f"1. The document is shared with your service account\n"
77
+ f"2. The service account has at least 'Viewer' access\n"
78
+ f"3. The document is not private/restricted"
79
+ )
80
+ else:
81
+ raise Exception(f"Error reading document: {str(e)}")
82
+ except Exception as e:
83
+ raise Exception(f"Error reading document: {str(e)}")
84
+
85
+ def _extract_text(self, document: dict) -> str:
86
+ """Extract plain text from document structure"""
87
+ text_parts = []
88
+
89
+ content = document.get('body', {}).get('content', [])
90
+
91
+ for element in content:
92
+ if 'paragraph' in element:
93
+ paragraph = element['paragraph']
94
+ for text_element in paragraph.get('elements', []):
95
+ if 'textRun' in text_element:
96
+ text_parts.append(text_element['textRun']['content'])
97
+
98
+ elif 'table' in element:
99
+ table = element['table']
100
+ for row in table.get('tableRows', []):
101
+ for cell in row.get('tableCells', []):
102
+ for cell_content in cell.get('content', []):
103
+ if 'paragraph' in cell_content:
104
+ paragraph = cell_content['paragraph']
105
+ for text_element in paragraph.get('elements', []):
106
+ if 'textRun' in text_element:
107
+ text_parts.append(text_element['textRun']['content'])
108
+
109
+ return ''.join(text_parts).strip()
110
 
111
  def get_document_metadata(self, document_id: str) -> Dict[str, str]:
112
  """Get metadata for a document"""
 
123
  'created': file.get('createdTime', 'Unknown'),
124
  'url': file.get('webViewLink', '')
125
  }
126
+
127
  except HttpError as e:
128
  if e.resp.status == 404:
129
  raise Exception(f"Document not found: {document_id}")
 
132
  else:
133
  raise Exception(f"Error getting document metadata: {str(e)}")
134
  except Exception as e:
135
+ raise Exception(f"Error getting document metadata: {str(e)}")