Ephraimmm commited on
Commit
3572cae
Β·
verified Β·
1 Parent(s): 8b2aa1d

Upload 10 files

Browse files
RAG/.gradio/certificate.pem ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ -----BEGIN CERTIFICATE-----
2
+ MIIFazCCA1OgAwIBAgIRAIIQz7DSQONZRGPgu2OCiwAwDQYJKoZIhvcNAQELBQAw
3
+ TzELMAkGA1UEBhMCVVMxKTAnBgNVBAoTIEludGVybmV0IFNlY3VyaXR5IFJlc2Vh
4
+ cmNoIEdyb3VwMRUwEwYDVQQDEwxJU1JHIFJvb3QgWDEwHhcNMTUwNjA0MTEwNDM4
5
+ WhcNMzUwNjA0MTEwNDM4WjBPMQswCQYDVQQGEwJVUzEpMCcGA1UEChMgSW50ZXJu
6
+ ZXQgU2VjdXJpdHkgUmVzZWFyY2ggR3JvdXAxFTATBgNVBAMTDElTUkcgUm9vdCBY
7
+ MTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAK3oJHP0FDfzm54rVygc
8
+ h77ct984kIxuPOZXoHj3dcKi/vVqbvYATyjb3miGbESTtrFj/RQSa78f0uoxmyF+
9
+ 0TM8ukj13Xnfs7j/EvEhmkvBioZxaUpmZmyPfjxwv60pIgbz5MDmgK7iS4+3mX6U
10
+ A5/TR5d8mUgjU+g4rk8Kb4Mu0UlXjIB0ttov0DiNewNwIRt18jA8+o+u3dpjq+sW
11
+ T8KOEUt+zwvo/7V3LvSye0rgTBIlDHCNAymg4VMk7BPZ7hm/ELNKjD+Jo2FR3qyH
12
+ B5T0Y3HsLuJvW5iB4YlcNHlsdu87kGJ55tukmi8mxdAQ4Q7e2RCOFvu396j3x+UC
13
+ B5iPNgiV5+I3lg02dZ77DnKxHZu8A/lJBdiB3QW0KtZB6awBdpUKD9jf1b0SHzUv
14
+ KBds0pjBqAlkd25HN7rOrFleaJ1/ctaJxQZBKT5ZPt0m9STJEadao0xAH0ahmbWn
15
+ OlFuhjuefXKnEgV4We0+UXgVCwOPjdAvBbI+e0ocS3MFEvzG6uBQE3xDk3SzynTn
16
+ jh8BCNAw1FtxNrQHusEwMFxIt4I7mKZ9YIqioymCzLq9gwQbooMDQaHWBfEbwrbw
17
+ qHyGO0aoSCqI3Haadr8faqU9GY/rOPNk3sgrDQoo//fb4hVC1CLQJ13hef4Y53CI
18
+ rU7m2Ys6xt0nUW7/vGT1M0NPAgMBAAGjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNV
19
+ HRMBAf8EBTADAQH/MB0GA1UdDgQWBBR5tFnme7bl5AFzgAiIyBpY9umbbjANBgkq
20
+ hkiG9w0BAQsFAAOCAgEAVR9YqbyyqFDQDLHYGmkgJykIrGF1XIpu+ILlaS/V9lZL
21
+ ubhzEFnTIZd+50xx+7LSYK05qAvqFyFWhfFQDlnrzuBZ6brJFe+GnY+EgPbk6ZGQ
22
+ 3BebYhtF8GaV0nxvwuo77x/Py9auJ/GpsMiu/X1+mvoiBOv/2X/qkSsisRcOj/KK
23
+ NFtY2PwByVS5uCbMiogziUwthDyC3+6WVwW6LLv3xLfHTjuCvjHIInNzktHCgKQ5
24
+ ORAzI4JMPJ+GslWYHb4phowim57iaztXOoJwTdwJx4nLCgdNbOhdjsnvzqvHu7Ur
25
+ TkXWStAmzOVyyghqpZXjFaH3pO3JLF+l+/+sKAIuvtd7u+Nxe5AW0wdeRlN8NwdC
26
+ jNPElpzVmbUq4JUagEiuTDkHzsxHpFKVK7q4+63SM1N95R1NbdWhscdCb+ZAJzVc
27
+ oyi3B43njTOQ5yOf+1CceWxG1bQVs5ZufpsMljq4Ui0/1lvh+wjChP4kqKOJ2qxq
28
+ 4RgqsahDYVvTH9w7jXbyLeiNdd8XM2w9U/t7y0Ff/9yi0GE44Za4rF2LN9d11TPA
29
+ mRGunUHBcnWEvgJBQl9nJEiU0Zsnvgc/ubhPgXRR4Xq37Z0j4r7g1SgEEzwxA57d
30
+ emyPxgcYxn/eR44/KJ4EBs+lVDR3veyJm+kXQ99b21/+jh5Xos1AnX5iItreGCc=
31
+ -----END CERTIFICATE-----
RAG/.ipynb_checkpoints/New try-checkpoint.ipynb ADDED
@@ -0,0 +1,692 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 34,
6
+ "id": "e549bafd-78b1-4a83-80b4-2cb597efff79",
7
+ "metadata": {},
8
+ "outputs": [],
9
+ "source": [
10
+ "import os\n",
11
+ "import json\n",
12
+ "from google.oauth2 import service_account\n",
13
+ "from googleapiclient.discovery import build\n",
14
+ "from googleapiclient.http import MediaIoBaseDownload\n",
15
+ "import openai\n",
16
+ "from dotenv import load_dotenv\n",
17
+ "import io"
18
+ ]
19
+ },
20
+ {
21
+ "cell_type": "code",
22
+ "execution_count": 35,
23
+ "id": "04361235-7896-4439-9d04-1400e043528b",
24
+ "metadata": {},
25
+ "outputs": [
26
+ {
27
+ "data": {
28
+ "text/plain": [
29
+ "True"
30
+ ]
31
+ },
32
+ "execution_count": 35,
33
+ "metadata": {},
34
+ "output_type": "execute_result"
35
+ }
36
+ ],
37
+ "source": [
38
+ "load_dotenv()"
39
+ ]
40
+ },
41
+ {
42
+ "cell_type": "code",
43
+ "execution_count": 36,
44
+ "id": "7622a0e4-64a6-4848-b588-bd65d56c55e0",
45
+ "metadata": {},
46
+ "outputs": [],
47
+ "source": [
48
+ "from openai import OpenAI\n",
49
+ "openai.api_key = os.getenv('OPENAI_API_KEY')\n",
50
+ "openai = OpenAI(api_key = openai.api_key)"
51
+ ]
52
+ },
53
+ {
54
+ "cell_type": "code",
55
+ "execution_count": 37,
56
+ "id": "b3bcbada-6a72-4cc8-a166-d2e596cd1fc4",
57
+ "metadata": {},
58
+ "outputs": [],
59
+ "source": [
60
+ "service_account_file_path = os.getenv(\"GOOGLE_SERVICE_ACCOUNT_FILE\")"
61
+ ]
62
+ },
63
+ {
64
+ "cell_type": "code",
65
+ "execution_count": 38,
66
+ "id": "81c59a6d-0831-4bff-b3fc-fbe3d4cc1e31",
67
+ "metadata": {},
68
+ "outputs": [
69
+ {
70
+ "name": "stdout",
71
+ "output_type": "stream",
72
+ "text": [
73
+ "openai activated\n",
74
+ "service_account_file_path not activated\n"
75
+ ]
76
+ }
77
+ ],
78
+ "source": [
79
+ "#troubleshoot\n",
80
+ "if openai is None:\n",
81
+ " print(\"openai not activated\")\n",
82
+ "else: \n",
83
+ " print (\"openai activated\")\n",
84
+ "\n",
85
+ "if service_account_file_path is None:\n",
86
+ " print(\"service_account_file_path not activated\")\n",
87
+ "else: \n",
88
+ " print (\"service_account_file_path activated\")"
89
+ ]
90
+ },
91
+ {
92
+ "cell_type": "code",
93
+ "execution_count": null,
94
+ "id": "a70f32aa-9e43-4175-8cca-d6af723aef91",
95
+ "metadata": {},
96
+ "outputs": [],
97
+ "source": []
98
+ },
99
+ {
100
+ "cell_type": "code",
101
+ "execution_count": 12,
102
+ "id": "0efe325d-badd-4533-affe-47d572ef128e",
103
+ "metadata": {},
104
+ "outputs": [],
105
+ "source": [
106
+ "class GPTDriveIntegration:\n",
107
+ " def __init__(self):\n",
108
+ " # Initialize Google Drive API\n",
109
+ " self.credentials = service_account.Credentials.from_service_account_file(\n",
110
+ " os.getenv('GOOGLE_SERVICE_ACCOUNT_FILE'),\n",
111
+ " scopes=['https://www.googleapis.com/auth/drive.readonly']\n",
112
+ " )\n",
113
+ " self.drive_service = build('drive', 'v3', credentials=self.credentials)\n",
114
+ " \n",
115
+ " # Initialize OpenAI\n",
116
+ " openai.api_key = os.getenv('OPENAI_API_KEY')\n",
117
+ " \n",
118
+ " def search_files(self, query, file_types=None):\n",
119
+ " \"\"\"Search for files in Google Drive\"\"\"\n",
120
+ " search_query = f\"name contains '{query}'\"\n",
121
+ " \n",
122
+ " if file_types:\n",
123
+ " type_queries = []\n",
124
+ " for file_type in file_types:\n",
125
+ " if file_type.lower() == 'pdf':\n",
126
+ " type_queries.append(\"mimeType='application/pdf'\")\n",
127
+ " elif file_type.lower() in ['doc', 'docx']:\n",
128
+ " type_queries.append(\"mimeType contains 'document'\")\n",
129
+ " elif file_type.lower() in ['xls', 'xlsx']:\n",
130
+ " type_queries.append(\"mimeType contains 'spreadsheet'\")\n",
131
+ " \n",
132
+ " if type_queries:\n",
133
+ " search_query += f\" and ({' or '.join(type_queries)})\"\n",
134
+ " \n",
135
+ " results = self.drive_service.files().list(\n",
136
+ " q=search_query,\n",
137
+ " fields=\"files(id, name, mimeType, size)\"\n",
138
+ " ).execute()\n",
139
+ " \n",
140
+ " return results.get('files', [])\n",
141
+ " \n",
142
+ " def get_file_content(self, file_id, mime_type):\n",
143
+ " \"\"\"Download and extract text content from file\"\"\"\n",
144
+ " try:\n",
145
+ " if 'text' in mime_type or 'document' in mime_type:\n",
146
+ " # For Google Docs, export as plain text\n",
147
+ " if 'document' in mime_type:\n",
148
+ " request = self.drive_service.files().export_media(\n",
149
+ " fileId=file_id, mimeType='text/plain'\n",
150
+ " )\n",
151
+ " else:\n",
152
+ " request = self.drive_service.files().get_media(fileId=file_id)\n",
153
+ " \n",
154
+ " file_content = io.BytesIO()\n",
155
+ " downloader = MediaIoBaseDownload(file_content, request)\n",
156
+ " done = False\n",
157
+ " while done is False:\n",
158
+ " status, done = downloader.next_chunk()\n",
159
+ " \n",
160
+ " return file_content.getvalue().decode('utf-8')\n",
161
+ " \n",
162
+ " elif 'spreadsheet' in mime_type:\n",
163
+ " # For Google Sheets, export as CSV\n",
164
+ " request = self.drive_service.files().export_media(\n",
165
+ " fileId=file_id, mimeType='text/csv'\n",
166
+ " )\n",
167
+ " file_content = io.BytesIO()\n",
168
+ " downloader = MediaIoBaseDownload(file_content, request)\n",
169
+ " done = False\n",
170
+ " while done is False:\n",
171
+ " status, done = downloader.next_chunk()\n",
172
+ " \n",
173
+ " return file_content.getvalue().decode('utf-8')\n",
174
+ " \n",
175
+ " else:\n",
176
+ " return \"File type not supported for text extraction\"\n",
177
+ " \n",
178
+ " except Exception as e:\n",
179
+ " return f\"Error reading file: {str(e)}\"\n",
180
+ " \n",
181
+ " def query_gpt_with_context(self, user_query, file_contents):\n",
182
+ " \"\"\"Send query to GPT with file context\"\"\"\n",
183
+ " context = \"\\n\\n\".join([\n",
184
+ " f\"File: {content['name']}\\nContent: {content['text'][:2000]}...\"\n",
185
+ " for content in file_contents\n",
186
+ " ])\n",
187
+ " \n",
188
+ " messages = [\n",
189
+ " {\n",
190
+ " \"role\": \"system\", \n",
191
+ " \"content\": \"\"\"\n",
192
+ " You are an AI assistant that can analyze documents from Google Drive. \n",
193
+ " Use the provided file contents to answer user questions.\"\"\"\n",
194
+ " },\n",
195
+ " {\n",
196
+ " \"role\": \"user\", \n",
197
+ " \"content\": f\"Context from Google Drive files:\\n{context}\\n\\nUser Question: {user_query}\"\n",
198
+ " }\n",
199
+ " ]\n",
200
+ " \n",
201
+ " response = openai.ChatCompletion.create(\n",
202
+ " model=\"gpt-4o-mini\",\n",
203
+ " messages=messages,\n",
204
+ " max_tokens=1000\n",
205
+ " )\n",
206
+ " \n",
207
+ " return response.choices[0].message.content\n",
208
+ " \n",
209
+ " def process_query(self, user_query, search_terms=None):\n",
210
+ " \"\"\"Main function to process user queries\"\"\"\n",
211
+ " # Extract search terms from query if not provided\n",
212
+ " if not search_terms:\n",
213
+ " search_terms = user_query.split()[:3] # Simple extraction\n",
214
+ " \n",
215
+ " # Search for relevant files\n",
216
+ " files = []\n",
217
+ " for term in search_terms:\n",
218
+ " files.extend(self.search_files(term))\n",
219
+ " \n",
220
+ " # Remove duplicates\n",
221
+ " unique_files = {f['id']: f for f in files}.values()\n",
222
+ " \n",
223
+ " # Get content from top 3 most relevant files\n",
224
+ " file_contents = []\n",
225
+ " for file in list(unique_files)[:3]:\n",
226
+ " content = self.get_file_content(file['id'], file['mimeType'])\n",
227
+ " file_contents.append({\n",
228
+ " 'name': file['name'],\n",
229
+ " 'text': content\n",
230
+ " })\n",
231
+ " \n",
232
+ " # Query GPT with context\n",
233
+ " if file_contents:\n",
234
+ " response = self.query_gpt_with_context(user_query, file_contents)\n",
235
+ " return {\n",
236
+ " 'answer': response,\n",
237
+ " 'sources': [f['name'] for f in file_contents]\n",
238
+ " }\n",
239
+ " else:\n",
240
+ " return {\n",
241
+ " 'answer': \"No relevant files found in your Google Drive.\",\n",
242
+ " 'sources': []\n",
243
+ " }"
244
+ ]
245
+ },
246
+ {
247
+ "cell_type": "code",
248
+ "execution_count": null,
249
+ "id": "3c2c1ccf-9ade-482d-a170-978e97bc1c08",
250
+ "metadata": {},
251
+ "outputs": [],
252
+ "source": [
253
+ "if __name__ == \"__main__\":\n",
254
+ " integration = GPTDriveIntegration()\n",
255
+ " \n",
256
+ " # Test query\n",
257
+ " result = integration.process_query(\n",
258
+ " \"What are the main points from my meeting notes?\",\n",
259
+ " search_terms=[\"meeting\", \"notes\"]\n",
260
+ " )\n",
261
+ " \n",
262
+ " print(\"Answer:\", result['answer'])\n",
263
+ " print(\"Sources:\", result['sources'])"
264
+ ]
265
+ },
266
+ {
267
+ "cell_type": "code",
268
+ "execution_count": 10,
269
+ "id": "120e7c93-b38a-4c89-8e76-5b0170d22548",
270
+ "metadata": {},
271
+ "outputs": [],
272
+ "source": [
273
+ "GOOGLE_SERVICE_ACCOUNT_FILE= r\"C:\\Users\\Uche Buzz\\myprojects\\RAG\\rag-system-463320-f292991d0516.json\""
274
+ ]
275
+ },
276
+ {
277
+ "cell_type": "code",
278
+ "execution_count": 17,
279
+ "id": "634b9787-d493-46e5-8114-0851c6172ed6",
280
+ "metadata": {},
281
+ "outputs": [
282
+ {
283
+ "name": "stdout",
284
+ "output_type": "stream",
285
+ "text": [
286
+ "❌ GOOGLE_SERVICE_ACCOUNT_FILE environment variable not set\n",
287
+ "πŸ’‘ Add this to your .env file:\n",
288
+ "GOOGLE_SERVICE_ACCOUNT_FILE=path/to/your/service-account-key.json\n"
289
+ ]
290
+ }
291
+ ],
292
+ "source": [
293
+ "import json\n",
294
+ "from google.auth.exceptions import RefreshError\n",
295
+ "from googleapiclient.errors import HttpError\n",
296
+ "\n",
297
+ "class GPTDriveTroubleshooter:\n",
298
+ " def __init__(self, service_account_file_path):\n",
299
+ " self.service_account_file = service_account_file_path\n",
300
+ " self.credentials = None\n",
301
+ " self.drive_service = None\n",
302
+ " \n",
303
+ " def run_full_diagnostic(self):\n",
304
+ " \"\"\"Run complete diagnostic check\"\"\"\n",
305
+ " print(\"πŸ” Starting Google Drive API Diagnostic...\")\n",
306
+ " print(\"=\" * 50)\n",
307
+ " \n",
308
+ " # Step 1: Check service account file\n",
309
+ " if not self.check_service_account_file():\n",
310
+ " return False\n",
311
+ " \n",
312
+ " # Step 2: Check credentials\n",
313
+ " if not self.check_credentials():\n",
314
+ " return False\n",
315
+ " \n",
316
+ " # Step 3: Test API connection\n",
317
+ " if not self.test_api_connection():\n",
318
+ " return False\n",
319
+ " \n",
320
+ " # Step 4: Check permissions\n",
321
+ " if not self.check_basic_permissions():\n",
322
+ " return False\n",
323
+ " \n",
324
+ " # Step 5: Test folder access\n",
325
+ " self.test_folder_access()\n",
326
+ " \n",
327
+ " print(\"\\nβœ… All basic checks passed!\")\n",
328
+ " return True\n",
329
+ " \n",
330
+ " def check_service_account_file(self):\n",
331
+ " \"\"\"Check if service account file exists and is valid\"\"\"\n",
332
+ " print(\"\\n1️⃣ Checking service account file...\")\n",
333
+ " \n",
334
+ " if not os.path.exists(self.service_account_file):\n",
335
+ " print(f\"❌ Service account file not found: {self.service_account_file}\")\n",
336
+ " print(\"πŸ’‘ Make sure you've downloaded the JSON key file from Google Cloud Console\")\n",
337
+ " return False\n",
338
+ " \n",
339
+ " try:\n",
340
+ " with open(self.service_account_file, 'r') as f:\n",
341
+ " service_account_info = json.load(f)\n",
342
+ " \n",
343
+ " required_fields = ['type', 'project_id', 'private_key_id', 'private_key', 'client_email']\n",
344
+ " missing_fields = [field for field in required_fields if field not in service_account_info]\n",
345
+ " \n",
346
+ " if missing_fields:\n",
347
+ " print(f\"❌ Service account file missing required fields: {missing_fields}\")\n",
348
+ " return False\n",
349
+ " \n",
350
+ " print(f\"βœ… Service account file is valid\")\n",
351
+ " print(f\" πŸ“§ Service account email: {service_account_info['client_email']}\")\n",
352
+ " print(f\" πŸ—οΈ Project ID: {service_account_info['project_id']}\")\n",
353
+ " \n",
354
+ " return True\n",
355
+ " \n",
356
+ " except json.JSONDecodeError:\n",
357
+ " print(\"❌ Service account file is not valid JSON\")\n",
358
+ " return False\n",
359
+ " except Exception as e:\n",
360
+ " print(f\"❌ Error reading service account file: {e}\")\n",
361
+ " return False\n",
362
+ " \n",
363
+ " def check_credentials(self):\n",
364
+ " \"\"\"Check if credentials can be created\"\"\"\n",
365
+ " print(\"\\n2️⃣ Checking credentials...\")\n",
366
+ " \n",
367
+ " try:\n",
368
+ " self.credentials = service_account.Credentials.from_service_account_file(\n",
369
+ " self.service_account_file,\n",
370
+ " scopes=['https://www.googleapis.com/auth/drive.readonly']\n",
371
+ " )\n",
372
+ " print(\"βœ… Credentials created successfully\")\n",
373
+ " return True\n",
374
+ " \n",
375
+ " except Exception as e:\n",
376
+ " print(f\"❌ Failed to create credentials: {e}\")\n",
377
+ " print(\"πŸ’‘ Check if your service account key file is corrupted\")\n",
378
+ " return False\n",
379
+ " \n",
380
+ " def test_api_connection(self):\n",
381
+ " \"\"\"Test basic API connection\"\"\"\n",
382
+ " print(\"\\n3️⃣ Testing API connection...\")\n",
383
+ " \n",
384
+ " try:\n",
385
+ " self.drive_service = build('drive', 'v3', credentials=self.credentials)\n",
386
+ " \n",
387
+ " # Try a simple API call\n",
388
+ " about = self.drive_service.about().get(fields=\"user, storageQuota\").execute()\n",
389
+ " print(\"βœ… Successfully connected to Google Drive API\")\n",
390
+ " print(f\" πŸ‘€ Connected as: {about.get('user', {}).get('emailAddress', 'Unknown')}\")\n",
391
+ " \n",
392
+ " return True\n",
393
+ " \n",
394
+ " except HttpError as e:\n",
395
+ " print(f\"❌ HTTP Error connecting to API: {e}\")\n",
396
+ " if e.resp.status == 403:\n",
397
+ " print(\"πŸ’‘ This is likely a permissions issue - check if Drive API is enabled\")\n",
398
+ " return False\n",
399
+ " except Exception as e:\n",
400
+ " print(f\"❌ Failed to connect to API: {e}\")\n",
401
+ " return False\n",
402
+ " \n",
403
+ " def check_basic_permissions(self):\n",
404
+ " \"\"\"Check basic file listing permissions\"\"\"\n",
405
+ " print(\"\\n4️⃣ Checking basic permissions...\")\n",
406
+ " \n",
407
+ " try:\n",
408
+ " # Try to list files (this should work with readonly access)\n",
409
+ " results = self.drive_service.files().list(\n",
410
+ " pageSize=1,\n",
411
+ " fields=\"files(id, name)\"\n",
412
+ " ).execute()\n",
413
+ " \n",
414
+ " files = results.get('files', [])\n",
415
+ " print(f\"βœ… Can access Drive API - found {len(files)} files in test query\")\n",
416
+ " \n",
417
+ " if len(files) == 0:\n",
418
+ " print(\"⚠️ No files found - this might mean:\")\n",
419
+ " print(\" β€’ Service account has no shared files\")\n",
420
+ " print(\" β€’ No files are shared with the service account\")\n",
421
+ " \n",
422
+ " return True\n",
423
+ " \n",
424
+ " except HttpError as e:\n",
425
+ " print(f\"❌ Permission error: {e}\")\n",
426
+ " if e.resp.status == 403:\n",
427
+ " print(\"πŸ’‘ Common causes:\")\n",
428
+ " print(\" β€’ Google Drive API not enabled in Google Cloud Console\")\n",
429
+ " print(\" β€’ Service account doesn't have proper permissions\")\n",
430
+ " return False\n",
431
+ " except Exception as e:\n",
432
+ " print(f\"❌ Error checking permissions: {e}\")\n",
433
+ " return False\n",
434
+ " \n",
435
+ " def test_folder_access(self, folder_name=\"Blue berry\"):\n",
436
+ " \"\"\"Test access to specific folder\"\"\"\n",
437
+ " print(f\"\\n5️⃣ Testing access to '{folder_name}' folder...\")\n",
438
+ " \n",
439
+ " try:\n",
440
+ " # Search for the folder\n",
441
+ " query = f\"name='{folder_name}' and mimeType='application/vnd.google-apps.folder' and trashed=false\"\n",
442
+ " results = self.drive_service.files().list(\n",
443
+ " q=query,\n",
444
+ " fields=\"files(id, name, owners, permissions)\"\n",
445
+ " ).execute()\n",
446
+ " \n",
447
+ " folders = results.get('files', [])\n",
448
+ " \n",
449
+ " if not folders:\n",
450
+ " print(f\"❌ Folder '{folder_name}' not found or not accessible\")\n",
451
+ " print(\"πŸ’‘ Possible solutions:\")\n",
452
+ " print(\" β€’ Make sure the folder exists in Google Drive\")\n",
453
+ " print(\" β€’ Share the folder with your service account email\")\n",
454
+ " print(\" β€’ Check folder name spelling (case sensitive)\")\n",
455
+ " return False\n",
456
+ " \n",
457
+ " folder = folders[0]\n",
458
+ " print(f\"βœ… Found folder '{folder_name}'\")\n",
459
+ " print(f\" πŸ“ Folder ID: {folder['id']}\")\n",
460
+ " \n",
461
+ " # Test listing files in the folder\n",
462
+ " files_in_folder = self.drive_service.files().list(\n",
463
+ " q=f\"'{folder['id']}' in parents and trashed=false\",\n",
464
+ " fields=\"files(id, name, mimeType)\"\n",
465
+ " ).execute()\n",
466
+ " \n",
467
+ " files = files_in_folder.get('files', [])\n",
468
+ " print(f\" πŸ“„ Contains {len(files)} files\")\n",
469
+ " \n",
470
+ " if files:\n",
471
+ " print(\" πŸ“ Sample files:\")\n",
472
+ " for file in files[:3]: # Show first 3 files\n",
473
+ " print(f\" β€’ {file['name']} ({file['mimeType']})\")\n",
474
+ " \n",
475
+ " return True\n",
476
+ " \n",
477
+ " except HttpError as e:\n",
478
+ " print(f\"❌ HTTP Error accessing folder: {e}\")\n",
479
+ " return False\n",
480
+ " except Exception as e:\n",
481
+ " print(f\"❌ Error accessing folder: {e}\")\n",
482
+ " return False\n",
483
+ " \n",
484
+ " def check_file_permissions(self, file_id):\n",
485
+ " \"\"\"Check permissions for a specific file\"\"\"\n",
486
+ " print(f\"\\nπŸ” Checking permissions for file ID: {file_id}\")\n",
487
+ " \n",
488
+ " try:\n",
489
+ " file_info = self.drive_service.files().get(\n",
490
+ " fileId=file_id,\n",
491
+ " fields=\"id, name, mimeType, owners, permissions, capabilities\"\n",
492
+ " ).execute()\n",
493
+ " \n",
494
+ " print(f\"βœ… File: {file_info['name']}\")\n",
495
+ " print(f\" πŸ”— Type: {file_info['mimeType']}\")\n",
496
+ " print(f\" πŸ‘€ Owner: {file_info.get('owners', [{}])[0].get('emailAddress', 'Unknown')}\")\n",
497
+ " \n",
498
+ " capabilities = file_info.get('capabilities', {})\n",
499
+ " print(f\" πŸ“– Can read: {capabilities.get('canDownload', False)}\")\n",
500
+ " print(f\" πŸ“€ Can export: {capabilities.get('canExport', False)}\")\n",
501
+ " \n",
502
+ " except HttpError as e:\n",
503
+ " print(f\"❌ Cannot access file: {e}\")\n",
504
+ " except Exception as e:\n",
505
+ " print(f\"❌ Error: {e}\")\n",
506
+ " \n",
507
+ " def get_sharing_instructions(self):\n",
508
+ " \"\"\"Provide step-by-step sharing instructions\"\"\"\n",
509
+ " print(\"\\nπŸ“‹ HOW TO SHARE FOLDER WITH SERVICE ACCOUNT:\")\n",
510
+ " print(\"=\" * 50)\n",
511
+ " \n",
512
+ " if self.credentials:\n",
513
+ " service_email = self.credentials.service_account_email\n",
514
+ " print(f\"1. Copy this service account email: {service_email}\")\n",
515
+ " else:\n",
516
+ " print(\"1. Find your service account email in the JSON key file (client_email field)\")\n",
517
+ " \n",
518
+ " print(\"2. Open Google Drive in your browser\")\n",
519
+ " print(\"3. Right-click on your 'Blue berry' folder\")\n",
520
+ " print(\"4. Select 'Share'\")\n",
521
+ " print(\"5. Paste the service account email\")\n",
522
+ " print(\"6. Set permission to 'Viewer' or 'Editor'\")\n",
523
+ " print(\"7. Click 'Send' (you can uncheck 'Notify people')\")\n",
524
+ " print(\"8. Wait a few minutes for permissions to propagate\")\n",
525
+ " \n",
526
+ " def run_connection_test(self):\n",
527
+ " \"\"\"Quick connection test\"\"\"\n",
528
+ " print(\"πŸš€ Quick Connection Test\")\n",
529
+ " print(\"-\" * 30)\n",
530
+ " \n",
531
+ " try:\n",
532
+ " self.credentials = service_account.Credentials.from_service_account_file(\n",
533
+ " self.service_account_file,\n",
534
+ " scopes=['https://www.googleapis.com/auth/drive.readonly']\n",
535
+ " )\n",
536
+ " \n",
537
+ " service = build('drive', 'v3', credentials=self.credentials)\n",
538
+ " \n",
539
+ " # Test basic query\n",
540
+ " results = service.files().list(pageSize=5).execute()\n",
541
+ " files = results.get('files', [])\n",
542
+ " \n",
543
+ " print(f\"βœ… Connected! Found {len(files)} accessible files\")\n",
544
+ " print(f\"πŸ“§ Service account: {self.credentials.service_account_email}\")\n",
545
+ " \n",
546
+ " return True\n",
547
+ " \n",
548
+ " except Exception as e:\n",
549
+ " print(f\"❌ Connection failed: {e}\")\n",
550
+ " return False\n",
551
+ "\n",
552
+ "# Usage Examples\n",
553
+ "def troubleshoot_drive_access():\n",
554
+ " \"\"\"Main troubleshooting function\"\"\"\n",
555
+ " service_account_file = os.getenv('GOOGLE_SERVICE_ACCOUNT_FILE')\n",
556
+ " \n",
557
+ " if not service_account_file:\n",
558
+ " print(\"❌ GOOGLE_SERVICE_ACCOUNT_FILE environment variable not set\")\n",
559
+ " print(\"πŸ’‘ Add this to your .env file:\")\n",
560
+ " print(\"GOOGLE_SERVICE_ACCOUNT_FILE=path/to/your/service-account-key.json\")\n",
561
+ " return\n",
562
+ " \n",
563
+ " troubleshooter = GPTDriveTroubleshooter(service_account_file)\n",
564
+ " \n",
565
+ " # Run full diagnostic\n",
566
+ " success = troubleshooter.run_full_diagnostic()\n",
567
+ " \n",
568
+ " if not success:\n",
569
+ " print(\"\\n\" + \"=\"*50)\n",
570
+ " troubleshooter.get_sharing_instructions()\n",
571
+ " \n",
572
+ " return success\n",
573
+ "\n",
574
+ "# Quick test function\n",
575
+ "def quick_test():\n",
576
+ " \"\"\"Quick test to verify everything works\"\"\"\n",
577
+ " service_account_file = os.getenv('GOOGLE_SERVICE_ACCOUNT_FILE')\n",
578
+ " troubleshooter = GPTDriveTroubleshooter(service_account_file)\n",
579
+ " return troubleshooter.run_connection_test()\n",
580
+ "\n",
581
+ "if __name__ == \"__main__\":\n",
582
+ " # Uncomment the test you want to run:\n",
583
+ " \n",
584
+ " # Full diagnostic (recommended for first-time setup)\n",
585
+ " troubleshoot_drive_access()\n",
586
+ " \n",
587
+ " # Quick test (for regular checks)\n",
588
+ " # quick_test()"
589
+ ]
590
+ },
591
+ {
592
+ "cell_type": "code",
593
+ "execution_count": 13,
594
+ "id": "6b336789-d402-455d-a631-26e987d79ed6",
595
+ "metadata": {},
596
+ "outputs": [
597
+ {
598
+ "ename": "NameError",
599
+ "evalue": "name 'GPTDriveTroubleshooter' is not defined",
600
+ "output_type": "error",
601
+ "traceback": [
602
+ "\u001b[31m---------------------------------------------------------------------------\u001b[39m",
603
+ "\u001b[31mNameError\u001b[39m Traceback (most recent call last)",
604
+ "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[13]\u001b[39m\u001b[32m, line 1\u001b[39m\n\u001b[32m----> \u001b[39m\u001b[32m1\u001b[39m troubleshooter = \u001b[43mGPTDriveTroubleshooter\u001b[49m(GOOGLE_SERVICE_ACCOUNT_FILE)\n\u001b[32m 2\u001b[39m troubleshooter.test_folder_access(\u001b[33m\"\u001b[39m\u001b[33mBlue berry\u001b[39m\u001b[33m\"\u001b[39m)\n",
605
+ "\u001b[31mNameError\u001b[39m: name 'GPTDriveTroubleshooter' is not defined"
606
+ ]
607
+ }
608
+ ],
609
+ "source": [
610
+ "troubleshooter = GPTDriveTroubleshooter(GOOGLE_SERVICE_ACCOUNT_FILE)\n",
611
+ "troubleshooter.test_folder_access(\"Blue berry\")"
612
+ ]
613
+ },
614
+ {
615
+ "cell_type": "code",
616
+ "execution_count": 6,
617
+ "id": "e2786c67-215e-46b0-b3ca-2c3176bd8971",
618
+ "metadata": {},
619
+ "outputs": [
620
+ {
621
+ "ename": "TypeError",
622
+ "evalue": "expected str, bytes or os.PathLike object, not NoneType",
623
+ "output_type": "error",
624
+ "traceback": [
625
+ "\u001b[31m---------------------------------------------------------------------------\u001b[39m",
626
+ "\u001b[31mTypeError\u001b[39m Traceback (most recent call last)",
627
+ "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[6]\u001b[39m\u001b[32m, line 23\u001b[39m\n\u001b[32m 20\u001b[39m \u001b[38;5;28mprint\u001b[39m(\u001b[33m\"\u001b[39m\u001b[33mMake sure you\u001b[39m\u001b[33m'\u001b[39m\u001b[33mve shared the folder with your service account\u001b[39m\u001b[33m\"\u001b[39m)\n\u001b[32m 22\u001b[39m \u001b[38;5;66;03m# Run this test first\u001b[39;00m\n\u001b[32m---> \u001b[39m\u001b[32m23\u001b[39m \u001b[43mtest_folder_access\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n",
628
+ "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[6]\u001b[39m\u001b[32m, line 3\u001b[39m, in \u001b[36mtest_folder_access\u001b[39m\u001b[34m()\u001b[39m\n\u001b[32m 2\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34mtest_folder_access\u001b[39m():\n\u001b[32m----> \u001b[39m\u001b[32m3\u001b[39m integration = \u001b[43mGPTDriveIntegration\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 4\u001b[39m folder_id = integration.find_folder_by_name(\u001b[33m\"\u001b[39m\u001b[33mBlue berry\u001b[39m\u001b[33m\"\u001b[39m)\n\u001b[32m 5\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m folder_id:\n",
629
+ "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[3]\u001b[39m\u001b[32m, line 4\u001b[39m, in \u001b[36mGPTDriveIntegration.__init__\u001b[39m\u001b[34m(self)\u001b[39m\n\u001b[32m 2\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34m__init__\u001b[39m(\u001b[38;5;28mself\u001b[39m):\n\u001b[32m 3\u001b[39m \u001b[38;5;66;03m# Initialize Google Drive API\u001b[39;00m\n\u001b[32m----> \u001b[39m\u001b[32m4\u001b[39m \u001b[38;5;28mself\u001b[39m.credentials = \u001b[43mservice_account\u001b[49m\u001b[43m.\u001b[49m\u001b[43mCredentials\u001b[49m\u001b[43m.\u001b[49m\u001b[43mfrom_service_account_file\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 5\u001b[39m \u001b[43m \u001b[49m\u001b[43mos\u001b[49m\u001b[43m.\u001b[49m\u001b[43mgetenv\u001b[49m\u001b[43m(\u001b[49m\u001b[33;43m'\u001b[39;49m\u001b[33;43mGOOGLE_SERVICE_ACCOUNT_FILE\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 6\u001b[39m \u001b[43m \u001b[49m\u001b[43mscopes\u001b[49m\u001b[43m=\u001b[49m\u001b[43m[\u001b[49m\u001b[33;43m'\u001b[39;49m\u001b[33;43mhttps://www.googleapis.com/auth/drive.readonly\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[43m]\u001b[49m\n\u001b[32m 7\u001b[39m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 8\u001b[39m \u001b[38;5;28mself\u001b[39m.drive_service = build(\u001b[33m'\u001b[39m\u001b[33mdrive\u001b[39m\u001b[33m'\u001b[39m, \u001b[33m'\u001b[39m\u001b[33mv3\u001b[39m\u001b[33m'\u001b[39m, credentials=\u001b[38;5;28mself\u001b[39m.credentials)\n\u001b[32m 10\u001b[39m \u001b[38;5;66;03m# Initialize OpenAI\u001b[39;00m\n",
630
+ "\u001b[36mFile \u001b[39m\u001b[32m~\\anaconda3\\envs\\RAG\\Lib\\site-packages\\google\\oauth2\\service_account.py:260\u001b[39m, in \u001b[36mCredentials.from_service_account_file\u001b[39m\u001b[34m(cls, filename, **kwargs)\u001b[39m\n\u001b[32m 248\u001b[39m \u001b[38;5;129m@classmethod\u001b[39m\n\u001b[32m 249\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34mfrom_service_account_file\u001b[39m(\u001b[38;5;28mcls\u001b[39m, filename, **kwargs):\n\u001b[32m 250\u001b[39m \u001b[38;5;250m \u001b[39m\u001b[33;03m\"\"\"Creates a Credentials instance from a service account json file.\u001b[39;00m\n\u001b[32m 251\u001b[39m \n\u001b[32m 252\u001b[39m \u001b[33;03m Args:\u001b[39;00m\n\u001b[32m (...)\u001b[39m\u001b[32m 258\u001b[39m \u001b[33;03m credentials.\u001b[39;00m\n\u001b[32m 259\u001b[39m \u001b[33;03m \"\"\"\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m260\u001b[39m info, signer = \u001b[43m_service_account_info\u001b[49m\u001b[43m.\u001b[49m\u001b[43mfrom_filename\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 261\u001b[39m \u001b[43m \u001b[49m\u001b[43mfilename\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrequire\u001b[49m\u001b[43m=\u001b[49m\u001b[43m[\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mclient_email\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mtoken_uri\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\n\u001b[32m 262\u001b[39m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 263\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mcls\u001b[39m._from_signer_and_info(signer, info, **kwargs)\n",
631
+ "\u001b[36mFile \u001b[39m\u001b[32m~\\anaconda3\\envs\\RAG\\Lib\\site-packages\\google\\auth\\_service_account_info.py:78\u001b[39m, in \u001b[36mfrom_filename\u001b[39m\u001b[34m(filename, require, use_rsa_signer)\u001b[39m\n\u001b[32m 64\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34mfrom_filename\u001b[39m(filename, require=\u001b[38;5;28;01mNone\u001b[39;00m, use_rsa_signer=\u001b[38;5;28;01mTrue\u001b[39;00m):\n\u001b[32m 65\u001b[39m \u001b[38;5;250m \u001b[39m\u001b[33;03m\"\"\"Reads a Google service account JSON file and returns its parsed info.\u001b[39;00m\n\u001b[32m 66\u001b[39m \n\u001b[32m 67\u001b[39m \u001b[33;03m Args:\u001b[39;00m\n\u001b[32m (...)\u001b[39m\u001b[32m 76\u001b[39m \u001b[33;03m info and a signer instance.\u001b[39;00m\n\u001b[32m 77\u001b[39m \u001b[33;03m \"\"\"\u001b[39;00m\n\u001b[32m---> \u001b[39m\u001b[32m78\u001b[39m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[43mio\u001b[49m\u001b[43m.\u001b[49m\u001b[43mopen\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfilename\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mr\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mencoding\u001b[49m\u001b[43m=\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mutf-8\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m)\u001b[49m \u001b[38;5;28;01mas\u001b[39;00m json_file:\n\u001b[32m 79\u001b[39m data = json.load(json_file)\n\u001b[32m 80\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m data, from_dict(data, require=require, use_rsa_signer=use_rsa_signer)\n",
632
+ "\u001b[31mTypeError\u001b[39m: expected str, bytes or os.PathLike object, not NoneType"
633
+ ]
634
+ }
635
+ ],
636
+ "source": [
637
+ "# Add this test function to verify access\n",
638
+ "def test_folder_access():\n",
639
+ " integration = GPTDriveIntegration()\n",
640
+ " folder_id = integration.find_folder_by_name(\"Blue berry\")\n",
641
+ " if folder_id:\n",
642
+ " print(\"βœ… Successfully found 'Blue berry' folder!\")\n",
643
+ " \n",
644
+ " # List files in the folder\n",
645
+ " results = integration.drive_service.files().list(\n",
646
+ " q=f\"'{folder_id}' in parents and trashed=false\",\n",
647
+ " fields=\"files(id, name, mimeType)\"\n",
648
+ " ).execute()\n",
649
+ " \n",
650
+ " files = results.get('files', [])\n",
651
+ " print(f\"Found {len(files)} files in the folder:\")\n",
652
+ " for file in files[:5]: # Show first 5 files\n",
653
+ " print(f\" - {file['name']} ({file['mimeType']})\")\n",
654
+ " else:\n",
655
+ " print(\"❌ Could not access 'Blue berry' folder\")\n",
656
+ " print(\"Make sure you've shared the folder with your service account\")\n",
657
+ "\n",
658
+ "# Run this test first\n",
659
+ "test_folder_access()"
660
+ ]
661
+ },
662
+ {
663
+ "cell_type": "code",
664
+ "execution_count": null,
665
+ "id": "c3dede9f-5e01-436d-a7b7-905e1646baf9",
666
+ "metadata": {},
667
+ "outputs": [],
668
+ "source": []
669
+ }
670
+ ],
671
+ "metadata": {
672
+ "kernelspec": {
673
+ "display_name": "Python 3 (ipykernel)",
674
+ "language": "python",
675
+ "name": "python3"
676
+ },
677
+ "language_info": {
678
+ "codemirror_mode": {
679
+ "name": "ipython",
680
+ "version": 3
681
+ },
682
+ "file_extension": ".py",
683
+ "mimetype": "text/x-python",
684
+ "name": "python",
685
+ "nbconvert_exporter": "python",
686
+ "pygments_lexer": "ipython3",
687
+ "version": "3.13.5"
688
+ }
689
+ },
690
+ "nbformat": 4,
691
+ "nbformat_minor": 5
692
+ }
RAG/.ipynb_checkpoints/RAG-1-checkpoint.ipynb ADDED
@@ -0,0 +1,1404 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 12,
6
+ "id": "e549bafd-78b1-4a83-80b4-2cb597efff79",
7
+ "metadata": {},
8
+ "outputs": [],
9
+ "source": [
10
+ "import os\n",
11
+ "import json\n",
12
+ "from google.oauth2 import service_account\n",
13
+ "from googleapiclient.discovery import build\n",
14
+ "from googleapiclient.http import MediaIoBaseDownload\n",
15
+ "import openai\n",
16
+ "from dotenv import load_dotenv, dotenv_values\n",
17
+ "import io"
18
+ ]
19
+ },
20
+ {
21
+ "cell_type": "code",
22
+ "execution_count": 13,
23
+ "id": "04361235-7896-4439-9d04-1400e043528b",
24
+ "metadata": {},
25
+ "outputs": [
26
+ {
27
+ "data": {
28
+ "text/plain": [
29
+ "True"
30
+ ]
31
+ },
32
+ "execution_count": 13,
33
+ "metadata": {},
34
+ "output_type": "execute_result"
35
+ }
36
+ ],
37
+ "source": [
38
+ "load_dotenv()"
39
+ ]
40
+ },
41
+ {
42
+ "cell_type": "code",
43
+ "execution_count": 14,
44
+ "id": "9ae411c5-c84b-4bfd-b089-69b5c5ba70ae",
45
+ "metadata": {},
46
+ "outputs": [
47
+ {
48
+ "name": "stdout",
49
+ "output_type": "stream",
50
+ "text": [
51
+ "OPENAI_API_KEY\n",
52
+ "ANTHROPIC_API_KEY\n",
53
+ "GOOGLE_SERVICE_ACCOUNT_FILE\n"
54
+ ]
55
+ }
56
+ ],
57
+ "source": [
58
+ "config = dotenv_values(\".env\")\n",
59
+ "for key in config.keys():\n",
60
+ " print(key)"
61
+ ]
62
+ },
63
+ {
64
+ "cell_type": "code",
65
+ "execution_count": 15,
66
+ "id": "7622a0e4-64a6-4848-b588-bd65d56c55e0",
67
+ "metadata": {},
68
+ "outputs": [],
69
+ "source": [
70
+ "from openai import OpenAI\n",
71
+ "openai.api_key = os.getenv('OPENAI_API_KEY')\n",
72
+ "openai = OpenAI(api_key = openai.api_key)"
73
+ ]
74
+ },
75
+ {
76
+ "cell_type": "code",
77
+ "execution_count": 16,
78
+ "id": "2ead7c59-1f11-478a-bb69-2928ddc38901",
79
+ "metadata": {},
80
+ "outputs": [
81
+ {
82
+ "name": "stdout",
83
+ "output_type": "stream",
84
+ "text": [
85
+ "Hello! How can I assist you today?\n"
86
+ ]
87
+ }
88
+ ],
89
+ "source": [
90
+ "response = openai.chat.completions.create(\n",
91
+ " model = \"gpt-4o-mini\",\n",
92
+ " messages = [\n",
93
+ " {\"role\":\"system\", \"content\":\"you are a helpful assistant\"},\n",
94
+ " {\"role\":\"user\", \"content\":\"hi\"}\n",
95
+ " ])\n",
96
+ "\n",
97
+ "reply = response.choices[0].message.content\n",
98
+ "print(reply)"
99
+ ]
100
+ },
101
+ {
102
+ "cell_type": "code",
103
+ "execution_count": 17,
104
+ "id": "b3bcbada-6a72-4cc8-a166-d2e596cd1fc4",
105
+ "metadata": {},
106
+ "outputs": [],
107
+ "source": [
108
+ "service_account_file_path = os.getenv(\"GOOGLE_SERVICE_ACCOUNT_FILE\")"
109
+ ]
110
+ },
111
+ {
112
+ "cell_type": "code",
113
+ "execution_count": 18,
114
+ "id": "81c59a6d-0831-4bff-b3fc-fbe3d4cc1e31",
115
+ "metadata": {},
116
+ "outputs": [
117
+ {
118
+ "name": "stdout",
119
+ "output_type": "stream",
120
+ "text": [
121
+ "openai activated\n",
122
+ "service_account_file_path activated\n"
123
+ ]
124
+ }
125
+ ],
126
+ "source": [
127
+ "#troubleshoot\n",
128
+ "if openai is None:\n",
129
+ " print(\"openai not activated\")\n",
130
+ "else: \n",
131
+ " print (\"openai activated\")\n",
132
+ "\n",
133
+ "if service_account_file_path is None:\n",
134
+ " print(\"service_account_file_path not activated\")\n",
135
+ "else: \n",
136
+ " print (\"service_account_file_path activated\")"
137
+ ]
138
+ },
139
+ {
140
+ "cell_type": "code",
141
+ "execution_count": 8,
142
+ "id": "a70f32aa-9e43-4175-8cca-d6af723aef91",
143
+ "metadata": {},
144
+ "outputs": [
145
+ {
146
+ "name": "stdout",
147
+ "output_type": "stream",
148
+ "text": [
149
+ "your credentials is C:/Users/Uche Buzz/myprojects/RAG/rag-system-463320-f292991d0516.json\n"
150
+ ]
151
+ }
152
+ ],
153
+ "source": [
154
+ "print (f\"your credentials is {service_account_file_path}\")"
155
+ ]
156
+ },
157
+ {
158
+ "cell_type": "code",
159
+ "execution_count": 27,
160
+ "id": "0efe325d-badd-4533-affe-47d572ef128e",
161
+ "metadata": {},
162
+ "outputs": [],
163
+ "source": [
164
+ "class GPTDriveIntegration:\n",
165
+ " def __init__(self):\n",
166
+ " # Initialize Google Drive API\n",
167
+ " self.credentials = service_account.Credentials.from_service_account_file(\n",
168
+ " os.getenv('GOOGLE_SERVICE_ACCOUNT_FILE'),\n",
169
+ " scopes=['https://www.googleapis.com/auth/drive.readonly']\n",
170
+ " )\n",
171
+ " self.drive_service = build('drive', 'v3', credentials=self.credentials)\n",
172
+ " \n",
173
+ " # Initialize OpenAI\n",
174
+ " openai.api_key = os.getenv('OPENAI_API_KEY')\n",
175
+ " \n",
176
+ " def search_files(self, query, file_types=None):\n",
177
+ " \"\"\"Search for files in Google Drive\"\"\"\n",
178
+ " search_query = f\"name contains '{query}'\"\n",
179
+ " \n",
180
+ " if file_types:\n",
181
+ " type_queries = []\n",
182
+ " for file_type in file_types:\n",
183
+ " if file_type.lower() == 'pdf':\n",
184
+ " type_queries.append(\"mimeType='application/pdf'\")\n",
185
+ " elif file_type.lower() in ['doc', 'docx']:\n",
186
+ " type_queries.append(\"mimeType contains 'document'\")\n",
187
+ " elif file_type.lower() in ['xls', 'xlsx']:\n",
188
+ " type_queries.append(\"mimeType contains 'spreadsheet'\")\n",
189
+ " \n",
190
+ " if type_queries:\n",
191
+ " search_query += f\" and ({' or '.join(type_queries)})\"\n",
192
+ " \n",
193
+ " results = self.drive_service.files().list(\n",
194
+ " q=search_query,\n",
195
+ " fields=\"files(id, name, mimeType, size)\"\n",
196
+ " ).execute()\n",
197
+ " \n",
198
+ " return results.get('files', [])\n",
199
+ " \n",
200
+ " def get_file_content(self, file_id, mime_type):\n",
201
+ " \"\"\"Download and extract text content from file\"\"\"\n",
202
+ " try:\n",
203
+ " if 'text' in mime_type or 'document' in mime_type:\n",
204
+ " # For Google Docs, export as plain text\n",
205
+ " if 'document' in mime_type:\n",
206
+ " request = self.drive_service.files().export_media(\n",
207
+ " fileId=file_id, mimeType='text/plain'\n",
208
+ " )\n",
209
+ " else:\n",
210
+ " request = self.drive_service.files().get_media(fileId=file_id)\n",
211
+ " \n",
212
+ " file_content = io.BytesIO()\n",
213
+ " downloader = MediaIoBaseDownload(file_content, request)\n",
214
+ " done = False\n",
215
+ " while done is False:\n",
216
+ " status, done = downloader.next_chunk()\n",
217
+ " \n",
218
+ " return file_content.getvalue().decode('utf-8')\n",
219
+ " \n",
220
+ " elif 'spreadsheet' in mime_type:\n",
221
+ " # For Google Sheets, export as CSV\n",
222
+ " request = self.drive_service.files().export_media(\n",
223
+ " fileId=file_id, mimeType='text/csv'\n",
224
+ " )\n",
225
+ " file_content = io.BytesIO()\n",
226
+ " downloader = MediaIoBaseDownload(file_content, request)\n",
227
+ " done = False\n",
228
+ " while done is False:\n",
229
+ " status, done = downloader.next_chunk()\n",
230
+ " \n",
231
+ " return file_content.getvalue().decode('utf-8')\n",
232
+ " \n",
233
+ " else:\n",
234
+ " return \"File type not supported for text extraction\"\n",
235
+ " \n",
236
+ " except Exception as e:\n",
237
+ " return f\"Error reading file: {str(e)}\"\n",
238
+ " \n",
239
+ " def query_gpt_with_context(self, user_query, file_contents):\n",
240
+ " \"\"\"Send query to GPT with file context\"\"\"\n",
241
+ " context = \"\\n\\n\".join([\n",
242
+ " f\"File: {content['name']}\\nContent: {content['text'][:2000]}...\"\n",
243
+ " for content in file_contents\n",
244
+ " ])\n",
245
+ " \n",
246
+ " messages = [\n",
247
+ " {\n",
248
+ " \"role\": \"system\", \n",
249
+ " \"content\": \"\"\"\n",
250
+ " You are an AI assistant that can analyze documents from Google Drive. \n",
251
+ " Use the provided file contents to answer user questions.\"\"\"\n",
252
+ " },\n",
253
+ " {\n",
254
+ " \"role\": \"user\", \n",
255
+ " \"content\": f\"Context from Google Drive files:\\n{context}\\n\\nUser Question: {user_query}\"\n",
256
+ " }\n",
257
+ " ]\n",
258
+ " \n",
259
+ " response = openai.chat.completions.create(\n",
260
+ " model=\"gpt-4o-mini\",\n",
261
+ " messages=messages,\n",
262
+ " max_tokens=1000\n",
263
+ " )\n",
264
+ " \n",
265
+ " return response.choices[0].message.content\n",
266
+ " \n",
267
+ " def process_query(self, user_query, search_terms=None):\n",
268
+ " \"\"\"Main function to process user queries\"\"\"\n",
269
+ " # Extract search terms from query if not provided\n",
270
+ " if not search_terms:\n",
271
+ " search_terms = user_query.split()[:3] # Simple extraction\n",
272
+ " \n",
273
+ " # Search for relevant files\n",
274
+ " files = []\n",
275
+ " for term in search_terms:\n",
276
+ " files.extend(self.search_files(term))\n",
277
+ " \n",
278
+ " # Remove duplicates\n",
279
+ " unique_files = {f['id']: f for f in files}.values()\n",
280
+ " \n",
281
+ " # Get content from top 3 most relevant files\n",
282
+ " file_contents = []\n",
283
+ " for file in list(unique_files)[:3]:\n",
284
+ " content = self.get_file_content(file['id'], file['mimeType'])\n",
285
+ " file_contents.append({\n",
286
+ " 'name': file['name'],\n",
287
+ " 'text': content\n",
288
+ " })\n",
289
+ " \n",
290
+ " # Query GPT with context\n",
291
+ " if file_contents:\n",
292
+ " response = self.query_gpt_with_context(user_query, file_contents)\n",
293
+ " return {\n",
294
+ " 'answer': response,\n",
295
+ " 'sources': [f['name'] for f in file_contents]\n",
296
+ " }\n",
297
+ " else:\n",
298
+ " return {\n",
299
+ " 'answer': \"No relevant files found in your Google Drive.\",\n",
300
+ " 'sources': []\n",
301
+ " }"
302
+ ]
303
+ },
304
+ {
305
+ "cell_type": "code",
306
+ "execution_count": 29,
307
+ "id": "3c2c1ccf-9ade-482d-a170-978e97bc1c08",
308
+ "metadata": {},
309
+ "outputs": [
310
+ {
311
+ "name": "stdout",
312
+ "output_type": "stream",
313
+ "text": [
314
+ "Answer: Blue Berry specializes in smart agriculture and precision farming, focusing on creating AI-powered drones and IoT devices. These technologies help farmers monitor crop health, manage irrigation, and improve yield by utilizing real-time data.\n",
315
+ "Sources: ['About the Company']\n"
316
+ ]
317
+ }
318
+ ],
319
+ "source": [
320
+ "if __name__ == \"__main__\":\n",
321
+ " integration = GPTDriveIntegration()\n",
322
+ " \n",
323
+ " # Test query\n",
324
+ " result = integration.process_query(\n",
325
+ " \"What is the company about?\"\n",
326
+ " )\n",
327
+ " \n",
328
+ " print(\"Answer:\", result['answer'])\n",
329
+ " print(\"Sources:\", result['sources'])"
330
+ ]
331
+ },
332
+ {
333
+ "cell_type": "code",
334
+ "execution_count": null,
335
+ "id": "120e7c93-b38a-4c89-8e76-5b0170d22548",
336
+ "metadata": {},
337
+ "outputs": [],
338
+ "source": []
339
+ },
340
+ {
341
+ "cell_type": "code",
342
+ "execution_count": 11,
343
+ "id": "634b9787-d493-46e5-8114-0851c6172ed6",
344
+ "metadata": {},
345
+ "outputs": [
346
+ {
347
+ "name": "stdout",
348
+ "output_type": "stream",
349
+ "text": [
350
+ "πŸ” Starting Google Drive API Diagnostic...\n",
351
+ "==================================================\n",
352
+ "\n",
353
+ "1️⃣ Checking service account file...\n",
354
+ "βœ… Service account file is valid\n",
355
+ " πŸ“§ Service account email: rag-base@rag-system-463320.iam.gserviceaccount.com\n",
356
+ " πŸ—οΈ Project ID: rag-system-463320\n",
357
+ "\n",
358
+ "2️⃣ Checking credentials...\n",
359
+ "βœ… Credentials created successfully\n",
360
+ "\n",
361
+ "3️⃣ Testing API connection...\n",
362
+ "βœ… Successfully connected to Google Drive API\n",
363
+ " πŸ‘€ Connected as: rag-base@rag-system-463320.iam.gserviceaccount.com\n",
364
+ "\n",
365
+ "4️⃣ Checking basic permissions...\n",
366
+ "βœ… Can access Drive API - found 0 files in test query\n",
367
+ "⚠️ No files found - this might mean:\n",
368
+ " β€’ Service account has no shared files\n",
369
+ " β€’ No files are shared with the service account\n",
370
+ "\n",
371
+ "5️⃣ Testing access to 'Blue berry' folder...\n",
372
+ "❌ Folder 'Blue berry' not found or not accessible\n",
373
+ "πŸ’‘ Possible solutions:\n",
374
+ " β€’ Make sure the folder exists in Google Drive\n",
375
+ " β€’ Share the folder with your service account email\n",
376
+ " β€’ Check folder name spelling (case sensitive)\n",
377
+ "\n",
378
+ "βœ… All basic checks passed!\n"
379
+ ]
380
+ }
381
+ ],
382
+ "source": [
383
+ "import json\n",
384
+ "from google.auth.exceptions import RefreshError\n",
385
+ "from googleapiclient.errors import HttpError\n",
386
+ "\n",
387
+ "class GPTDriveTroubleshooter:\n",
388
+ " def __init__(self, service_account_file_path):\n",
389
+ " self.service_account_file = service_account_file_path\n",
390
+ " self.credentials = None\n",
391
+ " self.drive_service = None\n",
392
+ " \n",
393
+ " def run_full_diagnostic(self):\n",
394
+ " \"\"\"Run complete diagnostic check\"\"\"\n",
395
+ " print(\"πŸ” Starting Google Drive API Diagnostic...\")\n",
396
+ " print(\"=\" * 50)\n",
397
+ " \n",
398
+ " # Step 1: Check service account file\n",
399
+ " if not self.check_service_account_file():\n",
400
+ " return False\n",
401
+ " \n",
402
+ " # Step 2: Check credentials\n",
403
+ " if not self.check_credentials():\n",
404
+ " return False\n",
405
+ " \n",
406
+ " # Step 3: Test API connection\n",
407
+ " if not self.test_api_connection():\n",
408
+ " return False\n",
409
+ " \n",
410
+ " # Step 4: Check permissions\n",
411
+ " if not self.check_basic_permissions():\n",
412
+ " return False\n",
413
+ " \n",
414
+ " # Step 5: Test folder access\n",
415
+ " self.test_folder_access()\n",
416
+ " \n",
417
+ " print(\"\\nβœ… All basic checks passed!\")\n",
418
+ " return True\n",
419
+ " \n",
420
+ " def check_service_account_file(self):\n",
421
+ " \"\"\"Check if service account file exists and is valid\"\"\"\n",
422
+ " print(\"\\n1️⃣ Checking service account file...\")\n",
423
+ " \n",
424
+ " if not os.path.exists(self.service_account_file):\n",
425
+ " print(f\"❌ Service account file not found: {self.service_account_file}\")\n",
426
+ " print(\"πŸ’‘ Make sure you've downloaded the JSON key file from Google Cloud Console\")\n",
427
+ " return False\n",
428
+ " \n",
429
+ " try:\n",
430
+ " with open(self.service_account_file, 'r') as f:\n",
431
+ " service_account_info = json.load(f)\n",
432
+ " \n",
433
+ " required_fields = ['type', 'project_id', 'private_key_id', 'private_key', 'client_email']\n",
434
+ " missing_fields = [field for field in required_fields if field not in service_account_info]\n",
435
+ " \n",
436
+ " if missing_fields:\n",
437
+ " print(f\"❌ Service account file missing required fields: {missing_fields}\")\n",
438
+ " return False\n",
439
+ " \n",
440
+ " print(f\"βœ… Service account file is valid\")\n",
441
+ " print(f\" πŸ“§ Service account email: {service_account_info['client_email']}\")\n",
442
+ " print(f\" πŸ—οΈ Project ID: {service_account_info['project_id']}\")\n",
443
+ " \n",
444
+ " return True\n",
445
+ " \n",
446
+ " except json.JSONDecodeError:\n",
447
+ " print(\"❌ Service account file is not valid JSON\")\n",
448
+ " return False\n",
449
+ " except Exception as e:\n",
450
+ " print(f\"❌ Error reading service account file: {e}\")\n",
451
+ " return False\n",
452
+ " \n",
453
+ " def check_credentials(self):\n",
454
+ " \"\"\"Check if credentials can be created\"\"\"\n",
455
+ " print(\"\\n2️⃣ Checking credentials...\")\n",
456
+ " \n",
457
+ " try:\n",
458
+ " self.credentials = service_account.Credentials.from_service_account_file(\n",
459
+ " self.service_account_file,\n",
460
+ " scopes=['https://www.googleapis.com/auth/drive.readonly']\n",
461
+ " )\n",
462
+ " print(\"βœ… Credentials created successfully\")\n",
463
+ " return True\n",
464
+ " \n",
465
+ " except Exception as e:\n",
466
+ " print(f\"❌ Failed to create credentials: {e}\")\n",
467
+ " print(\"πŸ’‘ Check if your service account key file is corrupted\")\n",
468
+ " return False\n",
469
+ " \n",
470
+ " def test_api_connection(self):\n",
471
+ " \"\"\"Test basic API connection\"\"\"\n",
472
+ " print(\"\\n3️⃣ Testing API connection...\")\n",
473
+ " \n",
474
+ " try:\n",
475
+ " self.drive_service = build('drive', 'v3', credentials=self.credentials)\n",
476
+ " \n",
477
+ " # Try a simple API call\n",
478
+ " about = self.drive_service.about().get(fields=\"user, storageQuota\").execute()\n",
479
+ " print(\"βœ… Successfully connected to Google Drive API\")\n",
480
+ " print(f\" πŸ‘€ Connected as: {about.get('user', {}).get('emailAddress', 'Unknown')}\")\n",
481
+ " \n",
482
+ " return True\n",
483
+ " \n",
484
+ " except HttpError as e:\n",
485
+ " print(f\"❌ HTTP Error connecting to API: {e}\")\n",
486
+ " if e.resp.status == 403:\n",
487
+ " print(\"πŸ’‘ This is likely a permissions issue - check if Drive API is enabled\")\n",
488
+ " return False\n",
489
+ " except Exception as e:\n",
490
+ " print(f\"❌ Failed to connect to API: {e}\")\n",
491
+ " return False\n",
492
+ " \n",
493
+ " def check_basic_permissions(self):\n",
494
+ " \"\"\"Check basic file listing permissions\"\"\"\n",
495
+ " print(\"\\n4️⃣ Checking basic permissions...\")\n",
496
+ " \n",
497
+ " try:\n",
498
+ " # Try to list files (this should work with readonly access)\n",
499
+ " results = self.drive_service.files().list(\n",
500
+ " pageSize=1,\n",
501
+ " fields=\"files(id, name)\"\n",
502
+ " ).execute()\n",
503
+ " \n",
504
+ " files = results.get('files', [])\n",
505
+ " print(f\"βœ… Can access Drive API - found {len(files)} files in test query\")\n",
506
+ " \n",
507
+ " if len(files) == 0:\n",
508
+ " print(\"⚠️ No files found - this might mean:\")\n",
509
+ " print(\" β€’ Service account has no shared files\")\n",
510
+ " print(\" β€’ No files are shared with the service account\")\n",
511
+ " \n",
512
+ " return True\n",
513
+ " \n",
514
+ " except HttpError as e:\n",
515
+ " print(f\"❌ Permission error: {e}\")\n",
516
+ " if e.resp.status == 403:\n",
517
+ " print(\"πŸ’‘ Common causes:\")\n",
518
+ " print(\" β€’ Google Drive API not enabled in Google Cloud Console\")\n",
519
+ " print(\" β€’ Service account doesn't have proper permissions\")\n",
520
+ " return False\n",
521
+ " except Exception as e:\n",
522
+ " print(f\"❌ Error checking permissions: {e}\")\n",
523
+ " return False\n",
524
+ " \n",
525
+ " def test_folder_access(self, folder_name=\"Blue berry\"):\n",
526
+ " \"\"\"Test access to specific folder\"\"\"\n",
527
+ " print(f\"\\n5️⃣ Testing access to '{folder_name}' folder...\")\n",
528
+ " \n",
529
+ " try:\n",
530
+ " # Search for the folder\n",
531
+ " query = f\"name='{folder_name}' and mimeType='application/vnd.google-apps.folder' and trashed=false\"\n",
532
+ " results = self.drive_service.files().list(\n",
533
+ " q=query,\n",
534
+ " fields=\"files(id, name, owners, permissions)\"\n",
535
+ " ).execute()\n",
536
+ " \n",
537
+ " folders = results.get('files', [])\n",
538
+ " \n",
539
+ " if not folders:\n",
540
+ " print(f\"❌ Folder '{folder_name}' not found or not accessible\")\n",
541
+ " print(\"πŸ’‘ Possible solutions:\")\n",
542
+ " print(\" β€’ Make sure the folder exists in Google Drive\")\n",
543
+ " print(\" β€’ Share the folder with your service account email\")\n",
544
+ " print(\" β€’ Check folder name spelling (case sensitive)\")\n",
545
+ " return False\n",
546
+ " \n",
547
+ " folder = folders[0]\n",
548
+ " print(f\"βœ… Found folder '{folder_name}'\")\n",
549
+ " print(f\" πŸ“ Folder ID: {folder['id']}\")\n",
550
+ " \n",
551
+ " # Test listing files in the folder\n",
552
+ " files_in_folder = self.drive_service.files().list(\n",
553
+ " q=f\"'{folder['id']}' in parents and trashed=false\",\n",
554
+ " fields=\"files(id, name, mimeType)\"\n",
555
+ " ).execute()\n",
556
+ " \n",
557
+ " files = files_in_folder.get('files', [])\n",
558
+ " print(f\" πŸ“„ Contains {len(files)} files\")\n",
559
+ " \n",
560
+ " if files:\n",
561
+ " print(\" πŸ“ Sample files:\")\n",
562
+ " for file in files[:3]: # Show first 3 files\n",
563
+ " print(f\" β€’ {file['name']} ({file['mimeType']})\")\n",
564
+ " \n",
565
+ " return True\n",
566
+ " \n",
567
+ " except HttpError as e:\n",
568
+ " print(f\"❌ HTTP Error accessing folder: {e}\")\n",
569
+ " return False\n",
570
+ " except Exception as e:\n",
571
+ " print(f\"❌ Error accessing folder: {e}\")\n",
572
+ " return False\n",
573
+ " \n",
574
+ " def check_file_permissions(self, file_id):\n",
575
+ " \"\"\"Check permissions for a specific file\"\"\"\n",
576
+ " print(f\"\\nπŸ” Checking permissions for file ID: {file_id}\")\n",
577
+ " \n",
578
+ " try:\n",
579
+ " file_info = self.drive_service.files().get(\n",
580
+ " fileId=file_id,\n",
581
+ " fields=\"id, name, mimeType, owners, permissions, capabilities\"\n",
582
+ " ).execute()\n",
583
+ " \n",
584
+ " print(f\"βœ… File: {file_info['name']}\")\n",
585
+ " print(f\" πŸ”— Type: {file_info['mimeType']}\")\n",
586
+ " print(f\" πŸ‘€ Owner: {file_info.get('owners', [{}])[0].get('emailAddress', 'Unknown')}\")\n",
587
+ " \n",
588
+ " capabilities = file_info.get('capabilities', {})\n",
589
+ " print(f\" πŸ“– Can read: {capabilities.get('canDownload', False)}\")\n",
590
+ " print(f\" πŸ“€ Can export: {capabilities.get('canExport', False)}\")\n",
591
+ " \n",
592
+ " except HttpError as e:\n",
593
+ " print(f\"❌ Cannot access file: {e}\")\n",
594
+ " except Exception as e:\n",
595
+ " print(f\"❌ Error: {e}\")\n",
596
+ " \n",
597
+ " def get_sharing_instructions(self):\n",
598
+ " \"\"\"Provide step-by-step sharing instructions\"\"\"\n",
599
+ " print(\"\\nπŸ“‹ HOW TO SHARE FOLDER WITH SERVICE ACCOUNT:\")\n",
600
+ " print(\"=\" * 50)\n",
601
+ " \n",
602
+ " if self.credentials:\n",
603
+ " service_email = self.credentials.service_account_email\n",
604
+ " print(f\"1. Copy this service account email: {service_email}\")\n",
605
+ " else:\n",
606
+ " print(\"1. Find your service account email in the JSON key file (client_email field)\")\n",
607
+ " \n",
608
+ " print(\"2. Open Google Drive in your browser\")\n",
609
+ " print(\"3. Right-click on your 'Blue berry' folder\")\n",
610
+ " print(\"4. Select 'Share'\")\n",
611
+ " print(\"5. Paste the service account email\")\n",
612
+ " print(\"6. Set permission to 'Viewer' or 'Editor'\")\n",
613
+ " print(\"7. Click 'Send' (you can uncheck 'Notify people')\")\n",
614
+ " print(\"8. Wait a few minutes for permissions to propagate\")\n",
615
+ " \n",
616
+ " def run_connection_test(self):\n",
617
+ " \"\"\"Quick connection test\"\"\"\n",
618
+ " print(\"πŸš€ Quick Connection Test\")\n",
619
+ " print(\"-\" * 30)\n",
620
+ " \n",
621
+ " try:\n",
622
+ " self.credentials = service_account.Credentials.from_service_account_file(\n",
623
+ " self.service_account_file,\n",
624
+ " scopes=['https://www.googleapis.com/auth/drive.readonly']\n",
625
+ " )\n",
626
+ " \n",
627
+ " service = build('drive', 'v3', credentials=self.credentials)\n",
628
+ " \n",
629
+ " # Test basic query\n",
630
+ " results = service.files().list(pageSize=5).execute()\n",
631
+ " files = results.get('files', [])\n",
632
+ " \n",
633
+ " print(f\"βœ… Connected! Found {len(files)} accessible files\")\n",
634
+ " print(f\"πŸ“§ Service account: {self.credentials.service_account_email}\")\n",
635
+ " \n",
636
+ " return True\n",
637
+ " \n",
638
+ " except Exception as e:\n",
639
+ " print(f\"❌ Connection failed: {e}\")\n",
640
+ " return False\n",
641
+ "\n",
642
+ "# Usage Examples\n",
643
+ "def troubleshoot_drive_access():\n",
644
+ " \"\"\"Main troubleshooting function\"\"\"\n",
645
+ " service_account_file = os.getenv('GOOGLE_SERVICE_ACCOUNT_FILE')\n",
646
+ " \n",
647
+ " if not service_account_file:\n",
648
+ " print(\"❌ GOOGLE_SERVICE_ACCOUNT_FILE environment variable not set\")\n",
649
+ " print(\"πŸ’‘ Add this to your .env file:\")\n",
650
+ " print(\"GOOGLE_SERVICE_ACCOUNT_FILE=path/to/your/service-account-key.json\")\n",
651
+ " return\n",
652
+ " \n",
653
+ " troubleshooter = GPTDriveTroubleshooter(service_account_file)\n",
654
+ " \n",
655
+ " # Run full diagnostic\n",
656
+ " success = troubleshooter.run_full_diagnostic()\n",
657
+ " \n",
658
+ " if not success:\n",
659
+ " print(\"\\n\" + \"=\"*50)\n",
660
+ " troubleshooter.get_sharing_instructions()\n",
661
+ " \n",
662
+ " return success\n",
663
+ "\n",
664
+ "# Quick test function\n",
665
+ "def quick_test():\n",
666
+ " \"\"\"Quick test to verify everything works\"\"\"\n",
667
+ " service_account_file = os.getenv('GOOGLE_SERVICE_ACCOUNT_FILE')\n",
668
+ " troubleshooter = GPTDriveTroubleshooter(service_account_file)\n",
669
+ " return troubleshooter.run_connection_test()\n",
670
+ "\n",
671
+ "if __name__ == \"__main__\":\n",
672
+ " # Uncomment the test you want to run:\n",
673
+ " \n",
674
+ " # Full diagnostic (recommended for first-time setup)\n",
675
+ " troubleshoot_drive_access()\n",
676
+ " \n",
677
+ " # Quick test (for regular checks)\n",
678
+ " # quick_test()"
679
+ ]
680
+ },
681
+ {
682
+ "cell_type": "code",
683
+ "execution_count": null,
684
+ "id": "6b336789-d402-455d-a631-26e987d79ed6",
685
+ "metadata": {},
686
+ "outputs": [],
687
+ "source": [
688
+ "troubleshooter = GPTDriveTroubleshooter(GOOGLE_SERVICE_ACCOUNT_FILE)\n",
689
+ "troubleshooter.test_folder_access(\"Blue berry\")"
690
+ ]
691
+ },
692
+ {
693
+ "cell_type": "code",
694
+ "execution_count": null,
695
+ "id": "e2786c67-215e-46b0-b3ca-2c3176bd8971",
696
+ "metadata": {},
697
+ "outputs": [],
698
+ "source": [
699
+ "# Add this test function to verify access\n",
700
+ "def test_folder_access():\n",
701
+ " integration = GPTDriveIntegration()\n",
702
+ " folder_id = integration.find_folder_by_name(\"Blue berry\")\n",
703
+ " if folder_id:\n",
704
+ " print(\"βœ… Successfully found 'Blue berry' folder!\")\n",
705
+ " \n",
706
+ " # List files in the folder\n",
707
+ " results = integration.drive_service.files().list(\n",
708
+ " q=f\"'{folder_id}' in parents and trashed=false\",\n",
709
+ " fields=\"files(id, name, mimeType)\"\n",
710
+ " ).execute()\n",
711
+ " \n",
712
+ " files = results.get('files', [])\n",
713
+ " print(f\"Found {len(files)} files in the folder:\")\n",
714
+ " for file in files[:5]: # Show first 5 files\n",
715
+ " print(f\" - {file['name']} ({file['mimeType']})\")\n",
716
+ " else:\n",
717
+ " print(\"❌ Could not access 'Blue berry' folder\")\n",
718
+ " print(\"Make sure you've shared the folder with your service account\")\n",
719
+ "\n",
720
+ "# Run this test first\n",
721
+ "test_folder_access()"
722
+ ]
723
+ },
724
+ {
725
+ "cell_type": "code",
726
+ "execution_count": null,
727
+ "id": "c3dede9f-5e01-436d-a7b7-905e1646baf9",
728
+ "metadata": {},
729
+ "outputs": [],
730
+ "source": []
731
+ },
732
+ {
733
+ "cell_type": "code",
734
+ "execution_count": null,
735
+ "id": "b933c8b6-add6-40fc-827f-e5e07447ac00",
736
+ "metadata": {},
737
+ "outputs": [],
738
+ "source": [
739
+ "#Troubleshooting"
740
+ ]
741
+ },
742
+ {
743
+ "cell_type": "code",
744
+ "execution_count": 21,
745
+ "id": "b63a1f4b-1315-44fe-ba84-93fb1c2655cc",
746
+ "metadata": {},
747
+ "outputs": [
748
+ {
749
+ "name": "stdout",
750
+ "output_type": "stream",
751
+ "text": [
752
+ "πŸ” Searching for 'Blue Berry' folder...\n",
753
+ "Total folders found: 0\n",
754
+ "No folders found or error occurred.\n"
755
+ ]
756
+ }
757
+ ],
758
+ "source": [
759
+ "import os\n",
760
+ "from google.oauth2 import service_account\n",
761
+ "from googleapiclient.discovery import build\n",
762
+ "\n",
763
+ "class DrivefolderLister:\n",
764
+ " def __init__(self):\n",
765
+ " # Initialize Google Drive API\n",
766
+ " self.credentials = service_account.Credentials.from_service_account_file(\n",
767
+ " os.getenv('GOOGLE_SERVICE_ACCOUNT_FILE'),\n",
768
+ " scopes=['https://www.googleapis.com/auth/drive.readonly']\n",
769
+ " )\n",
770
+ " self.drive_service = build('drive', 'v3', credentials=self.credentials)\n",
771
+ " \n",
772
+ " def get_all_folders(self):\n",
773
+ " \"\"\"Get ALL folders in the Google Drive\"\"\"\n",
774
+ " try:\n",
775
+ " # Query to get all folders (not trashed)\n",
776
+ " query = \"mimeType='application/vnd.google-apps.folder' and trashed=false\"\n",
777
+ " \n",
778
+ " all_folders = []\n",
779
+ " page_token = None\n",
780
+ " \n",
781
+ " while True:\n",
782
+ " results = self.drive_service.files().list(\n",
783
+ " q=query,\n",
784
+ " fields=\"nextPageToken, files(id, name, parents)\",\n",
785
+ " pageSize=1000, # Maximum allowed\n",
786
+ " pageToken=page_token\n",
787
+ " ).execute()\n",
788
+ " \n",
789
+ " folders = results.get('files', [])\n",
790
+ " all_folders.extend(folders)\n",
791
+ " \n",
792
+ " page_token = results.get('nextPageToken')\n",
793
+ " if not page_token:\n",
794
+ " break\n",
795
+ " \n",
796
+ " print(f\"Total folders found: {len(all_folders)}\")\n",
797
+ " return all_folders\n",
798
+ " \n",
799
+ " except Exception as e:\n",
800
+ " print(f\"Error retrieving folders: {e}\")\n",
801
+ " return []\n",
802
+ " \n",
803
+ " def build_folder_tree(self, folders):\n",
804
+ " \"\"\"Build a tree structure from the flat list of folders\"\"\"\n",
805
+ " # Create a dictionary for quick lookup\n",
806
+ " folder_dict = {folder['id']: folder for folder in folders}\n",
807
+ " \n",
808
+ " # Add root folder\n",
809
+ " folder_dict['root'] = {'id': 'root', 'name': 'My Drive', 'parents': []}\n",
810
+ " \n",
811
+ " # Build the tree structure\n",
812
+ " tree = {}\n",
813
+ " \n",
814
+ " def build_path(folder_id, visited=None):\n",
815
+ " if visited is None:\n",
816
+ " visited = set()\n",
817
+ " \n",
818
+ " if folder_id in visited: # Prevent infinite loops\n",
819
+ " return \"CIRCULAR_REFERENCE\"\n",
820
+ " \n",
821
+ " visited.add(folder_id)\n",
822
+ " \n",
823
+ " if folder_id not in folder_dict:\n",
824
+ " return \"UNKNOWN\"\n",
825
+ " \n",
826
+ " folder = folder_dict[folder_id]\n",
827
+ " \n",
828
+ " if folder_id == 'root':\n",
829
+ " return \"My Drive\"\n",
830
+ " \n",
831
+ " parents = folder.get('parents', [])\n",
832
+ " if not parents:\n",
833
+ " return folder['name']\n",
834
+ " \n",
835
+ " parent_path = build_path(parents[0], visited.copy())\n",
836
+ " return f\"{parent_path}/{folder['name']}\"\n",
837
+ " \n",
838
+ " # Build paths for all folders\n",
839
+ " folder_paths = []\n",
840
+ " for folder in folders:\n",
841
+ " path = build_path(folder['id'])\n",
842
+ " folder_paths.append({\n",
843
+ " 'name': folder['name'],\n",
844
+ " 'id': folder['id'],\n",
845
+ " 'path': path\n",
846
+ " })\n",
847
+ " \n",
848
+ " return folder_paths\n",
849
+ " \n",
850
+ " def search_folder_by_name(self, folder_name, folders_with_paths):\n",
851
+ " \"\"\"Search for folders by name (case-insensitive)\"\"\"\n",
852
+ " matches = []\n",
853
+ " search_name = folder_name.lower()\n",
854
+ " \n",
855
+ " for folder in folders_with_paths:\n",
856
+ " if search_name in folder['name'].lower():\n",
857
+ " matches.append(folder)\n",
858
+ " \n",
859
+ " return matches\n",
860
+ " \n",
861
+ " def display_all_folders(self, folders_with_paths, search_term=None):\n",
862
+ " \"\"\"Display all folders in a readable format\"\"\"\n",
863
+ " if search_term:\n",
864
+ " print(f\"\\n=== Searching for folders containing '{search_term}' ===\")\n",
865
+ " matches = self.search_folder_by_name(search_term, folders_with_paths)\n",
866
+ " if matches:\n",
867
+ " print(f\"Found {len(matches)} matching folders:\")\n",
868
+ " for folder in matches:\n",
869
+ " print(f\"πŸ“ {folder['name']}\")\n",
870
+ " print(f\" Path: {folder['path']}\")\n",
871
+ " print(f\" ID: {folder['id']}\")\n",
872
+ " print()\n",
873
+ " else:\n",
874
+ " print(f\"No folders found containing '{search_term}'\")\n",
875
+ " else:\n",
876
+ " print(f\"\\n=== All {len(folders_with_paths)} folders in your Google Drive ===\")\n",
877
+ " \n",
878
+ " # Sort by path for better readability\n",
879
+ " sorted_folders = sorted(folders_with_paths, key=lambda x: x['path'])\n",
880
+ " \n",
881
+ " for folder in sorted_folders:\n",
882
+ " print(f\"πŸ“ {folder['path']}\")\n",
883
+ " # print(f\" ID: {folder['id']}\") # Uncomment if you need IDs\n",
884
+ " \n",
885
+ " def find_blue_berry_folder(self):\n",
886
+ " \"\"\"Specifically look for the Blue Berry folder\"\"\"\n",
887
+ " print(\"πŸ” Searching for 'Blue Berry' folder...\")\n",
888
+ " \n",
889
+ " folders = self.get_all_folders()\n",
890
+ " if not folders:\n",
891
+ " print(\"No folders found or error occurred.\")\n",
892
+ " return\n",
893
+ " \n",
894
+ " folders_with_paths = self.build_folder_tree(folders)\n",
895
+ " \n",
896
+ " # Search for Blue Berry specifically\n",
897
+ " blue_berry_matches = self.search_folder_by_name(\"blue berry\", folders_with_paths)\n",
898
+ " \n",
899
+ " if blue_berry_matches:\n",
900
+ " print(f\"\\nβœ… Found {len(blue_berry_matches)} 'Blue Berry' folder(s):\")\n",
901
+ " for folder in blue_berry_matches:\n",
902
+ " print(f\"πŸ“ {folder['name']}\")\n",
903
+ " print(f\" Full Path: {folder['path']}\")\n",
904
+ " print(f\" Folder ID: {folder['id']}\")\n",
905
+ " print()\n",
906
+ " else:\n",
907
+ " print(\"\\n❌ No 'Blue Berry' folder found.\")\n",
908
+ " print(\"Let me show you all folders to help you locate it:\")\n",
909
+ " \n",
910
+ " # Show all folders if Blue Berry not found\n",
911
+ " self.display_all_folders(folders_with_paths)\n",
912
+ " \n",
913
+ " def interactive_folder_search(self):\n",
914
+ " \"\"\"Interactive search for any folder\"\"\"\n",
915
+ " print(\"πŸ“‚ Google Drive Folder Explorer\")\n",
916
+ " print(\"=\" * 40)\n",
917
+ " \n",
918
+ " folders = self.get_all_folders()\n",
919
+ " if not folders:\n",
920
+ " print(\"No folders found or error occurred.\")\n",
921
+ " return\n",
922
+ " \n",
923
+ " folders_with_paths = self.build_folder_tree(folders)\n",
924
+ " \n",
925
+ " while True:\n",
926
+ " print(\"\\nOptions:\")\n",
927
+ " print(\"1. Search for a specific folder\")\n",
928
+ " print(\"2. Show all folders\")\n",
929
+ " print(\"3. Find 'Blue Berry' folder\")\n",
930
+ " print(\"4. Exit\")\n",
931
+ " \n",
932
+ " choice = input(\"\\nEnter your choice (1-4): \").strip()\n",
933
+ " \n",
934
+ " if choice == '1':\n",
935
+ " search_term = input(\"Enter folder name to search: \").strip()\n",
936
+ " if search_term:\n",
937
+ " self.display_all_folders(folders_with_paths, search_term)\n",
938
+ " \n",
939
+ " elif choice == '2':\n",
940
+ " self.display_all_folders(folders_with_paths)\n",
941
+ " \n",
942
+ " elif choice == '3':\n",
943
+ " blue_berry_matches = self.search_folder_by_name(\"blue berry\", folders_with_paths)\n",
944
+ " if blue_berry_matches:\n",
945
+ " print(f\"\\nβœ… Found 'Blue Berry' folder(s):\")\n",
946
+ " for folder in blue_berry_matches:\n",
947
+ " print(f\"πŸ“ {folder['name']} -> {folder['path']}\")\n",
948
+ " else:\n",
949
+ " print(\"\\n❌ 'Blue Berry' folder not found in your Drive.\")\n",
950
+ " \n",
951
+ " elif choice == '4':\n",
952
+ " print(\"Goodbye!\")\n",
953
+ " break\n",
954
+ " \n",
955
+ " else:\n",
956
+ " print(\"Invalid choice. Please try again.\")\n",
957
+ "\n",
958
+ "def main():\n",
959
+ " \"\"\"Main function to run the folder lister\"\"\"\n",
960
+ " try:\n",
961
+ " lister = DrivefolderLister()\n",
962
+ " \n",
963
+ " # Quick search for Blue Berry folder\n",
964
+ " lister.find_blue_berry_folder()\n",
965
+ " \n",
966
+ " # Uncomment the line below for interactive mode\n",
967
+ " # lister.interactive_folder_search()\n",
968
+ " \n",
969
+ " except Exception as e:\n",
970
+ " print(f\"Error initializing Drive connection: {e}\")\n",
971
+ " print(\"Make sure your GOOGLE_SERVICE_ACCOUNT_FILE environment variable is set correctly.\")\n",
972
+ "\n",
973
+ "if __name__ == \"__main__\":\n",
974
+ " main()"
975
+ ]
976
+ },
977
+ {
978
+ "cell_type": "code",
979
+ "execution_count": null,
980
+ "id": "29a61039-9043-44d5-94b1-4847350b2200",
981
+ "metadata": {},
982
+ "outputs": [],
983
+ "source": []
984
+ },
985
+ {
986
+ "cell_type": "code",
987
+ "execution_count": 23,
988
+ "id": "7350d687-f76e-46f4-a30b-0518e5b8236e",
989
+ "metadata": {},
990
+ "outputs": [
991
+ {
992
+ "name": "stdout",
993
+ "output_type": "stream",
994
+ "text": [
995
+ "πŸš€ Google Drive API Diagnostic Tool\n",
996
+ "==================================================\n",
997
+ "πŸ”§ Testing Environment Variables...\n",
998
+ "----------------------------------------\n",
999
+ "βœ… GOOGLE_SERVICE_ACCOUNT_FILE: C:/Users/Uche Buzz/myprojects/RAG/rag-system-463320-f292991d0516.json\n",
1000
+ "βœ… Service account file exists\n",
1001
+ "βœ… Service account file is valid JSON\n",
1002
+ "βœ… All required fields present in service account file\n",
1003
+ "πŸ“§ Service account email: rag-base@rag-system-463320.iam.gserviceaccount.com\n",
1004
+ "βœ… OPENAI_API_KEY: ************************************************************************************************************************************************************K1OkRRkA\n",
1005
+ "\n",
1006
+ "πŸ”‘ Testing Credentials Loading...\n",
1007
+ "----------------------------------------\n",
1008
+ "\n",
1009
+ "Testing scope set 1: ['https://www.googleapis.com/auth/drive.readonly']\n",
1010
+ "βœ… Credentials loaded successfully with scopes: ['https://www.googleapis.com/auth/drive.readonly']\n",
1011
+ "\n",
1012
+ "πŸ”§ Testing Drive Service Creation...\n",
1013
+ "----------------------------------------\n",
1014
+ "βœ… Google Drive service created successfully\n",
1015
+ "\n",
1016
+ "πŸ“‘ Testing Basic API Call...\n",
1017
+ "----------------------------------------\n",
1018
+ "βœ… API call successful!\n",
1019
+ "πŸ“§ Connected as: rag-base@rag-system-463320.iam.gserviceaccount.com\n",
1020
+ "πŸ‘€ Display name: rag-base@rag-system-463320.iam.gserviceaccount.com\n",
1021
+ "\n",
1022
+ "🏠 Testing Root Folder Access...\n",
1023
+ "----------------------------------------\n",
1024
+ "βœ… Root folder accessible\n",
1025
+ "πŸ“ Root folder name: My Drive\n",
1026
+ "πŸ“„ Items in root folder: 0\n",
1027
+ "\n",
1028
+ "πŸ“ Testing File Listing...\n",
1029
+ "----------------------------------------\n",
1030
+ "Test 1: Listing all files (including documents)...\n",
1031
+ "βœ… Found 3 files total\n",
1032
+ "πŸ“„ First few files:\n",
1033
+ " - Blue Berry (application/vnd.google-apps.folder)\n",
1034
+ " - Blue Berry Safety Protocols and Training Guidelines (application/vnd.google-apps.document)\n",
1035
+ " - About the Company (application/vnd.google-apps.document)\n",
1036
+ "\n",
1037
+ "Test 2: Listing folders only...\n",
1038
+ "βœ… Found 1 folders\n",
1039
+ "πŸ“ Folders found:\n",
1040
+ " - Blue Berry (ID: 1AYaS0yt_srFlgdE4mSNlqA6FLm10rdt4)\n",
1041
+ "\n",
1042
+ "βœ… All diagnostic tests passed!\n",
1043
+ "πŸŽ‰ Your Google Drive API connection is working correctly!\n"
1044
+ ]
1045
+ }
1046
+ ],
1047
+ "source": [
1048
+ "import os\n",
1049
+ "from google.oauth2 import service_account\n",
1050
+ "from googleapiclient.discovery import build\n",
1051
+ "from googleapiclient.errors import HttpError\n",
1052
+ "import json\n",
1053
+ "\n",
1054
+ "class DriveConnectionDiagnostic:\n",
1055
+ " def __init__(self):\n",
1056
+ " self.credentials = None\n",
1057
+ " self.drive_service = None\n",
1058
+ " self.setup_success = False\n",
1059
+ " \n",
1060
+ " def test_environment_variables(self):\n",
1061
+ " \"\"\"Test if environment variables are set correctly\"\"\"\n",
1062
+ " print(\"πŸ”§ Testing Environment Variables...\")\n",
1063
+ " print(\"-\" * 40)\n",
1064
+ " \n",
1065
+ " service_account_file = os.getenv('GOOGLE_SERVICE_ACCOUNT_FILE')\n",
1066
+ " openai_key = os.getenv('OPENAI_API_KEY')\n",
1067
+ " \n",
1068
+ " if service_account_file:\n",
1069
+ " print(f\"βœ… GOOGLE_SERVICE_ACCOUNT_FILE: {service_account_file}\")\n",
1070
+ " \n",
1071
+ " # Check if file exists\n",
1072
+ " if os.path.exists(service_account_file):\n",
1073
+ " print(f\"βœ… Service account file exists\")\n",
1074
+ " \n",
1075
+ " # Check if file is readable\n",
1076
+ " try:\n",
1077
+ " with open(service_account_file, 'r') as f:\n",
1078
+ " service_data = json.load(f)\n",
1079
+ " print(f\"βœ… Service account file is valid JSON\")\n",
1080
+ " \n",
1081
+ " # Check required fields\n",
1082
+ " required_fields = ['type', 'project_id', 'private_key_id', 'private_key', 'client_email']\n",
1083
+ " missing_fields = [field for field in required_fields if field not in service_data]\n",
1084
+ " \n",
1085
+ " if missing_fields:\n",
1086
+ " print(f\"❌ Missing required fields in service account file: {missing_fields}\")\n",
1087
+ " return False\n",
1088
+ " else:\n",
1089
+ " print(f\"βœ… All required fields present in service account file\")\n",
1090
+ " print(f\"πŸ“§ Service account email: {service_data.get('client_email')}\")\n",
1091
+ " \n",
1092
+ " except json.JSONDecodeError:\n",
1093
+ " print(f\"❌ Service account file is not valid JSON\")\n",
1094
+ " return False\n",
1095
+ " except Exception as e:\n",
1096
+ " print(f\"❌ Error reading service account file: {e}\")\n",
1097
+ " return False\n",
1098
+ " \n",
1099
+ " else:\n",
1100
+ " print(f\"❌ Service account file does not exist at: {service_account_file}\")\n",
1101
+ " return False\n",
1102
+ " else:\n",
1103
+ " print(f\"❌ GOOGLE_SERVICE_ACCOUNT_FILE environment variable not set\")\n",
1104
+ " return False\n",
1105
+ " \n",
1106
+ " if openai_key:\n",
1107
+ " print(f\"βœ… OPENAI_API_KEY: {'*' * (len(openai_key) - 8) + openai_key[-8:]}\")\n",
1108
+ " else:\n",
1109
+ " print(f\"⚠️ OPENAI_API_KEY not set (not required for folder listing)\")\n",
1110
+ " \n",
1111
+ " return True\n",
1112
+ " \n",
1113
+ " def test_credentials_loading(self):\n",
1114
+ " \"\"\"Test if credentials can be loaded\"\"\"\n",
1115
+ " print(\"\\nπŸ”‘ Testing Credentials Loading...\")\n",
1116
+ " print(\"-\" * 40)\n",
1117
+ " \n",
1118
+ " try:\n",
1119
+ " service_account_file = os.getenv('GOOGLE_SERVICE_ACCOUNT_FILE')\n",
1120
+ " \n",
1121
+ " # Try different scope combinations\n",
1122
+ " scopes_to_test = [\n",
1123
+ " ['https://www.googleapis.com/auth/drive.readonly'],\n",
1124
+ " ['https://www.googleapis.com/auth/drive'],\n",
1125
+ " ['https://www.googleapis.com/auth/drive.metadata.readonly'],\n",
1126
+ " ['https://www.googleapis.com/auth/drive.file']\n",
1127
+ " ]\n",
1128
+ " \n",
1129
+ " for i, scopes in enumerate(scopes_to_test):\n",
1130
+ " try:\n",
1131
+ " print(f\"\\nTesting scope set {i+1}: {scopes}\")\n",
1132
+ " self.credentials = service_account.Credentials.from_service_account_file(\n",
1133
+ " service_account_file,\n",
1134
+ " scopes=scopes\n",
1135
+ " )\n",
1136
+ " print(f\"βœ… Credentials loaded successfully with scopes: {scopes}\")\n",
1137
+ " return True\n",
1138
+ " except Exception as e:\n",
1139
+ " print(f\"❌ Failed to load credentials with scopes {scopes}: {e}\")\n",
1140
+ " \n",
1141
+ " return False\n",
1142
+ " \n",
1143
+ " except Exception as e:\n",
1144
+ " print(f\"❌ Error loading credentials: {e}\")\n",
1145
+ " return False\n",
1146
+ " \n",
1147
+ " def test_drive_service_creation(self):\n",
1148
+ " \"\"\"Test if Drive service can be created\"\"\"\n",
1149
+ " print(\"\\nπŸ”§ Testing Drive Service Creation...\")\n",
1150
+ " print(\"-\" * 40)\n",
1151
+ " \n",
1152
+ " try:\n",
1153
+ " if not self.credentials:\n",
1154
+ " print(\"❌ No credentials available\")\n",
1155
+ " return False\n",
1156
+ " \n",
1157
+ " self.drive_service = build('drive', 'v3', credentials=self.credentials)\n",
1158
+ " print(\"βœ… Google Drive service created successfully\")\n",
1159
+ " return True\n",
1160
+ " \n",
1161
+ " except Exception as e:\n",
1162
+ " print(f\"❌ Error creating Drive service: {e}\")\n",
1163
+ " return False\n",
1164
+ " \n",
1165
+ " def test_basic_api_call(self):\n",
1166
+ " \"\"\"Test basic API functionality\"\"\"\n",
1167
+ " print(\"\\nπŸ“‘ Testing Basic API Call...\")\n",
1168
+ " print(\"-\" * 40)\n",
1169
+ " \n",
1170
+ " try:\n",
1171
+ " if not self.drive_service:\n",
1172
+ " print(\"❌ No Drive service available\")\n",
1173
+ " return False\n",
1174
+ " \n",
1175
+ " # Test getting user info\n",
1176
+ " about = self.drive_service.about().get(fields=\"user\").execute()\n",
1177
+ " user_info = about.get('user', {})\n",
1178
+ " print(f\"βœ… API call successful!\")\n",
1179
+ " print(f\"πŸ“§ Connected as: {user_info.get('emailAddress', 'Unknown')}\")\n",
1180
+ " print(f\"πŸ‘€ Display name: {user_info.get('displayName', 'Unknown')}\")\n",
1181
+ " \n",
1182
+ " return True\n",
1183
+ " \n",
1184
+ " except HttpError as e:\n",
1185
+ " print(f\"❌ HTTP Error: {e}\")\n",
1186
+ " if e.resp.status == 403:\n",
1187
+ " print(\" This might be a permissions issue. Check:\")\n",
1188
+ " print(\" 1. Is the service account enabled?\")\n",
1189
+ " print(\" 2. Does it have the right permissions?\")\n",
1190
+ " print(\" 3. Is the Google Drive API enabled in your project?\")\n",
1191
+ " return False\n",
1192
+ " except Exception as e:\n",
1193
+ " print(f\"❌ Error making API call: {e}\")\n",
1194
+ " return False\n",
1195
+ " \n",
1196
+ " def test_file_listing(self):\n",
1197
+ " \"\"\"Test different file listing approaches\"\"\"\n",
1198
+ " print(\"\\nπŸ“ Testing File Listing...\")\n",
1199
+ " print(\"-\" * 40)\n",
1200
+ " \n",
1201
+ " if not self.drive_service:\n",
1202
+ " print(\"❌ No Drive service available\")\n",
1203
+ " return False\n",
1204
+ " \n",
1205
+ " # Test 1: List any files (not just folders)\n",
1206
+ " try:\n",
1207
+ " print(\"Test 1: Listing all files (including documents)...\")\n",
1208
+ " results = self.drive_service.files().list(\n",
1209
+ " q=\"trashed=false\",\n",
1210
+ " fields=\"files(id, name, mimeType)\",\n",
1211
+ " pageSize=10\n",
1212
+ " ).execute()\n",
1213
+ " \n",
1214
+ " files = results.get('files', [])\n",
1215
+ " print(f\"βœ… Found {len(files)} files total\")\n",
1216
+ " \n",
1217
+ " if files:\n",
1218
+ " print(\"πŸ“„ First few files:\")\n",
1219
+ " for file in files[:5]:\n",
1220
+ " print(f\" - {file['name']} ({file['mimeType']})\")\n",
1221
+ " \n",
1222
+ " except Exception as e:\n",
1223
+ " print(f\"❌ Error listing files: {e}\")\n",
1224
+ " return False\n",
1225
+ " \n",
1226
+ " # Test 2: List only folders\n",
1227
+ " try:\n",
1228
+ " print(f\"\\nTest 2: Listing folders only...\")\n",
1229
+ " results = self.drive_service.files().list(\n",
1230
+ " q=\"mimeType='application/vnd.google-apps.folder' and trashed=false\",\n",
1231
+ " fields=\"files(id, name, parents)\",\n",
1232
+ " pageSize=10\n",
1233
+ " ).execute()\n",
1234
+ " \n",
1235
+ " folders = results.get('files', [])\n",
1236
+ " print(f\"βœ… Found {len(folders)} folders\")\n",
1237
+ " \n",
1238
+ " if folders:\n",
1239
+ " print(\"πŸ“ Folders found:\")\n",
1240
+ " for folder in folders:\n",
1241
+ " print(f\" - {folder['name']} (ID: {folder['id']})\")\n",
1242
+ " else:\n",
1243
+ " print(\"⚠️ No folders found - this might indicate:\")\n",
1244
+ " print(\" 1. Your Google Drive is empty\")\n",
1245
+ " print(\" 2. The service account doesn't have access to your personal Drive\")\n",
1246
+ " print(\" 3. You need to share folders with the service account\")\n",
1247
+ " \n",
1248
+ " return len(folders) > 0\n",
1249
+ " \n",
1250
+ " except Exception as e:\n",
1251
+ " print(f\"❌ Error listing folders: {e}\")\n",
1252
+ " return False\n",
1253
+ " \n",
1254
+ " def test_root_access(self):\n",
1255
+ " \"\"\"Test access to root folder\"\"\"\n",
1256
+ " print(\"\\n🏠 Testing Root Folder Access...\")\n",
1257
+ " print(\"-\" * 40)\n",
1258
+ " \n",
1259
+ " try:\n",
1260
+ " if not self.drive_service:\n",
1261
+ " print(\"❌ No Drive service available\")\n",
1262
+ " return False\n",
1263
+ " \n",
1264
+ " # Try to get root folder info\n",
1265
+ " root_info = self.drive_service.files().get(\n",
1266
+ " fileId='root',\n",
1267
+ " fields=\"id, name, mimeType\"\n",
1268
+ " ).execute()\n",
1269
+ " \n",
1270
+ " print(f\"βœ… Root folder accessible\")\n",
1271
+ " print(f\"πŸ“ Root folder name: {root_info.get('name', 'My Drive')}\")\n",
1272
+ " \n",
1273
+ " # Try to list contents of root\n",
1274
+ " results = self.drive_service.files().list(\n",
1275
+ " q=\"'root' in parents and trashed=false\",\n",
1276
+ " fields=\"files(id, name, mimeType)\",\n",
1277
+ " pageSize=10\n",
1278
+ " ).execute()\n",
1279
+ " \n",
1280
+ " root_contents = results.get('files', [])\n",
1281
+ " print(f\"πŸ“„ Items in root folder: {len(root_contents)}\")\n",
1282
+ " \n",
1283
+ " if root_contents:\n",
1284
+ " print(\"πŸ” Root folder contents:\")\n",
1285
+ " for item in root_contents[:5]:\n",
1286
+ " item_type = \"πŸ“\" if \"folder\" in item['mimeType'] else \"πŸ“„\"\n",
1287
+ " print(f\" {item_type} {item['name']}\")\n",
1288
+ " \n",
1289
+ " return True\n",
1290
+ " \n",
1291
+ " except Exception as e:\n",
1292
+ " print(f\"❌ Error accessing root folder: {e}\")\n",
1293
+ " return False\n",
1294
+ " \n",
1295
+ " def run_full_diagnostic(self):\n",
1296
+ " \"\"\"Run complete diagnostic\"\"\"\n",
1297
+ " print(\"πŸš€ Google Drive API Diagnostic Tool\")\n",
1298
+ " print(\"=\" * 50)\n",
1299
+ " \n",
1300
+ " # Step 1: Environment variables\n",
1301
+ " if not self.test_environment_variables():\n",
1302
+ " print(\"\\n❌ Environment setup failed. Please check your service account file.\")\n",
1303
+ " return False\n",
1304
+ " \n",
1305
+ " # Step 2: Credentials\n",
1306
+ " if not self.test_credentials_loading():\n",
1307
+ " print(\"\\n❌ Credentials loading failed.\")\n",
1308
+ " return False\n",
1309
+ " \n",
1310
+ " # Step 3: Service creation\n",
1311
+ " if not self.test_drive_service_creation():\n",
1312
+ " print(\"\\n❌ Drive service creation failed.\")\n",
1313
+ " return False\n",
1314
+ " \n",
1315
+ " # Step 4: Basic API call\n",
1316
+ " if not self.test_basic_api_call():\n",
1317
+ " print(\"\\n❌ Basic API call failed.\")\n",
1318
+ " return False\n",
1319
+ " \n",
1320
+ " # Step 5: Root access\n",
1321
+ " if not self.test_root_access():\n",
1322
+ " print(\"\\n❌ Root folder access failed.\")\n",
1323
+ " return False\n",
1324
+ " \n",
1325
+ " # Step 6: File listing\n",
1326
+ " if not self.test_file_listing():\n",
1327
+ " print(\"\\n❌ File listing failed or no folders found.\")\n",
1328
+ " print(\"\\nπŸ”§ SOLUTION SUGGESTIONS:\")\n",
1329
+ " print(\"1. If you're using a service account, you need to SHARE folders with it\")\n",
1330
+ " print(\"2. Share your 'Blue Berry' folder with the service account email\")\n",
1331
+ " print(\"3. Or consider using OAuth2 instead of service account for personal Drive access\")\n",
1332
+ " return False\n",
1333
+ " \n",
1334
+ " print(\"\\nβœ… All diagnostic tests passed!\")\n",
1335
+ " print(\"πŸŽ‰ Your Google Drive API connection is working correctly!\")\n",
1336
+ " return True\n",
1337
+ " \n",
1338
+ " def show_service_account_sharing_instructions(self):\n",
1339
+ " \"\"\"Show instructions for sharing with service account\"\"\"\n",
1340
+ " print(\"\\nπŸ“‹ SERVICE ACCOUNT SHARING INSTRUCTIONS:\")\n",
1341
+ " print(\"=\" * 50)\n",
1342
+ " print(\"If you're using a service account, you need to share folders with it:\")\n",
1343
+ " print()\n",
1344
+ " print(\"1. Open Google Drive in your browser\")\n",
1345
+ " print(\"2. Find the folder you want to access (e.g., 'Blue Berry')\")\n",
1346
+ " print(\"3. Right-click the folder β†’ 'Share'\")\n",
1347
+ " print(\"4. Add the service account email address\")\n",
1348
+ " print(\"5. Give it 'Viewer' or 'Editor' permissions\")\n",
1349
+ " print(\"6. Click 'Send'\")\n",
1350
+ " print()\n",
1351
+ " \n",
1352
+ " if self.credentials:\n",
1353
+ " try:\n",
1354
+ " service_account_file = os.getenv('GOOGLE_SERVICE_ACCOUNT_FILE')\n",
1355
+ " with open(service_account_file, 'r') as f:\n",
1356
+ " service_data = json.load(f)\n",
1357
+ " email = service_data.get('client_email')\n",
1358
+ " print(f\"πŸ“§ Your service account email: {email}\")\n",
1359
+ " print(\" ^ Share your folders with this email address\")\n",
1360
+ " except:\n",
1361
+ " print(\"❌ Could not read service account email\")\n",
1362
+ "\n",
1363
+ "def main():\n",
1364
+ " diagnostic = DriveConnectionDiagnostic()\n",
1365
+ " success = diagnostic.run_full_diagnostic()\n",
1366
+ " \n",
1367
+ " if not success:\n",
1368
+ " diagnostic.show_service_account_sharing_instructions()\n",
1369
+ "\n",
1370
+ "if __name__ == \"__main__\":\n",
1371
+ " main()"
1372
+ ]
1373
+ },
1374
+ {
1375
+ "cell_type": "code",
1376
+ "execution_count": null,
1377
+ "id": "e3374b17-d2fa-4c80-a70b-76b8bf94c702",
1378
+ "metadata": {},
1379
+ "outputs": [],
1380
+ "source": []
1381
+ }
1382
+ ],
1383
+ "metadata": {
1384
+ "kernelspec": {
1385
+ "display_name": "Python 3 (ipykernel)",
1386
+ "language": "python",
1387
+ "name": "python3"
1388
+ },
1389
+ "language_info": {
1390
+ "codemirror_mode": {
1391
+ "name": "ipython",
1392
+ "version": 3
1393
+ },
1394
+ "file_extension": ".py",
1395
+ "mimetype": "text/x-python",
1396
+ "name": "python",
1397
+ "nbconvert_exporter": "python",
1398
+ "pygments_lexer": "ipython3",
1399
+ "version": "3.13.5"
1400
+ }
1401
+ },
1402
+ "nbformat": 4,
1403
+ "nbformat_minor": 5
1404
+ }
RAG/.ipynb_checkpoints/app-checkpoint.py ADDED
File without changes
RAG/.ipynb_checkpoints/rag-system-463320-f292991d0516-checkpoint.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "type": "service_account",
3
+ "project_id": "rag-system-463320",
4
+ "private_key_id": "f292991d051639790555548b3142cf9447594bee",
5
+ "private_key": "-----BEGIN PRIVATE KEY-----\nMIIEvQIBADANBgkqhkiG9w0BAQEFAASCBKcwggSjAgEAAoIBAQCaIHHexQcwmlEx\n/68EK7q8RGtLGVVubssDFzw9eiCjBh71PNhpaXBj0RiwKtnmCrfuYZ0RHm1OQN1l\niiXE7eiRuDkgmhfuLx4k8+L7tBfEcpVA9HHEZXKpp1YHUnuDsS4UZWZDlc/Q/O6q\njt5xwcKpnDMYsOTVXSuj6NslzVmYQYgW3xAastKUzAr3N/zXnH4rvnCS9N2QocC7\nZ8Z+dQYLd4QG2Pty7Oe/FVCdPWM9HfT9MmdBDeJfxran3Wni+TXBqH9NWJsMVnAl\nLkix6iFuBGaGBKQwEmX5GwN59ILb26U8hcOgwrbVVqZS4CKEH4sodcDlsf6iPXbr\nJVGeBLUZAgMBAAECggEAAOK89MfS9rKX4TCCO+ifwbNSSWkjeEPTSEcZEj7s66Sw\nFWlAC5DO08+ShVvBrngE91irepBRCCdeDI9GLJ967bdx6XxwUVJcjO3sTkc08N6P\neco/mVdFdVuR30SRBQdwkktSp/RwDMnma6e3TWtFktNkBVWxH2XgR5o+IJkYjjXQ\nWfZhrqzAZ6d7519KbP1SKP210IAsst6FpS2lQf4JYu/ohn1G9Pf1PVDkZQ0PnAjB\noDMumBjrJJj4yupVu0nDAErQRp7eePfE5AXHtEHPKCmWQGVUq9e0T0xd/vILeSPl\nyO6UfS5vM5KvBiHouENkF7x27h+z+Uv+PZGP+wgDgQKBgQDVUc/Mq6Ef49vJtB+g\nPIX7o1wNupxlC1i2Yxfbsqtd09ZGTLfBoK2IJkj2K1kEUJoBEdw+iNhumbafH7Wp\n/k05xChIva2dsxtBqyxae4hak2SmZEIfXCpWxocXa8E9pVst68XZfcAAlWPqfjkT\nJkeYsbraOL+LTzBn+PFU1j7PYQKBgQC49sl9s+G4wnX5dTBKSerKhluR3luB5N2+\ndG/ySvbno+yuQcKWveARRxBMSMwrH3lr2UPe0KIfs2ez6ZMMskyy4X4qVS/RFkz9\nJdRYwZuJxYmHRYD9tGus70alfTxQOjcHYu4nP+BZ3/SYAS8IwQyJW0ZTJfb0GUHd\nSFzaIAfYuQKBgErs+GrwIaDc3LcFEFKsz3aqU0vzLrmC+b7eIWQmZnCHVmzMx4I4\nGJuIrvngEd0lHmKfzlcco8B9Nxq+/YTe8GsIzgl2rgOFBF7Va7fASg1eVeznrB3t\ntmVSR+LfEzm+2b7QYGba91R9JGIZwZxLRryIOJQreIPK5bqvQrzEn70hAoGBALIx\nf5jMze+T2NiFoApSABMxMdLsJ7iSZ1bvElKZChnMAV84F2Hmd5HtprP3hksdPXs7\ni3mEAk0MKCJaKsMWm3HScBwUicaj93QDF/qSO2iVmJNr0IObaAjpVGRmrM6V5yIE\nm2/QyDGN4zT2HvwOECfceXntn2c3Vg77ggjaxvRxAoGAGGXQBpyAnazkaC1vpstf\nFQwqINKGqClU9Q7ciAlDTJSbTT3q3HU5WiWZmJnxwHbd7GJN4dizypEf9g0/Zejb\nptZwejr3WB+6GwYWkabRxosVhsKBtaT/8NMmPtwZ1jEukZW4UbIAOziJ8OM/mhfm\ns3CDpUHWETb1X9bB3Jwljoc=\n-----END PRIVATE KEY-----\n",
6
+ "client_email": "rag-base@rag-system-463320.iam.gserviceaccount.com",
7
+ "client_id": "110051039758068935524",
8
+ "auth_uri": "https://accounts.google.com/o/oauth2/auth",
9
+ "token_uri": "https://oauth2.googleapis.com/token",
10
+ "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
11
+ "client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/rag-base%40rag-system-463320.iam.gserviceaccount.com",
12
+ "universe_domain": "googleapis.com"
13
+ }
RAG/.ipynb_checkpoints/requirements-checkpoint.txt ADDED
File without changes
RAG/RAG-1.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
RAG/app.py ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import GPTDriveIntegration
2
+ import gradio as gr
3
+
4
+ gpt_drive = GPTDriveIntegration()
5
+
6
+ def process_user_query(query, search_terms_input):
7
+ """Process user query and return formatted response"""
8
+ if not query.strip():
9
+ return "Please enter a question.", ""
10
+
11
+ # Parse search terms if provided
12
+ search_terms = None
13
+ # if search_terms_input.strip():
14
+ # search_terms = [term.strip() for term in search_terms_input.split(',')]
15
+
16
+ # Process the query
17
+ result = gpt_drive.process_query(query, search_terms)
18
+
19
+ # Format the response
20
+ answer = result['answer']
21
+ sources = result['sources']
22
+
23
+ sources_text = ""
24
+ if sources:
25
+ sources_text = "**Sources used:**\n" + "\n".join([f"β€’ {source}" for source in sources])
26
+
27
+ return answer, sources_text
28
+
29
+ def check_setup():
30
+ """Check if the APIs are properly configured"""
31
+ status_messages = []
32
+
33
+ # Check Google Drive API
34
+ if gpt_drive.drive_initialized:
35
+ status_messages.append("βœ… Google Drive API: Connected")
36
+ else:
37
+ status_messages.append(f"❌ Google Drive API: {getattr(gpt_drive, 'drive_error', 'Not configured')}")
38
+
39
+ # Check OpenAI API
40
+ if gpt_drive.openai_initialized:
41
+ status_messages.append("βœ… OpenAI API: Connected")
42
+ else:
43
+ status_messages.append(f"❌ OpenAI API: {getattr(gpt_drive, 'openai_error', 'Not configured')}")
44
+
45
+ return "\n".join(status_messages)
46
+
47
+ # Create Gradio interface
48
+ with gr.Blocks(title="Augusta's Anatomy Reading Assistant", theme=gr.themes.Soft()) as app:
49
+ gr.Markdown("# πŸ€– Augusta's Anatomy bot")
50
+ gr.Markdown("Ask questions about your anatomy books using AI!")
51
+
52
+ with gr.Row():
53
+ with gr.Column(scale=2):
54
+ # Main query interface
55
+ with gr.Group():
56
+ gr.Markdown("### Ask a Question")
57
+ query_input = gr.Textbox(
58
+ label="Your Question",
59
+ placeholder="Ask me any question about your anatomy books?",
60
+ lines=3
61
+ )
62
+
63
+ search_terms_input = gr.Textbox(
64
+ label="Search Terms (optional)",
65
+ placeholder="Enter comma-separated terms to search for specific files",
66
+ lines=1
67
+ )
68
+
69
+ submit_btn = gr.Button("Search & Ask", variant="primary", size="lg")
70
+
71
+ # Results section
72
+ with gr.Group():
73
+ gr.Markdown("### Answer")
74
+ answer_output = gr.Textbox(
75
+ label="AI Response",
76
+ lines=10,
77
+ interactive=False
78
+ )
79
+
80
+ sources_output = gr.Textbox(
81
+ label="Sources",
82
+ lines=3,
83
+ interactive=False
84
+ )
85
+
86
+ with gr.Column(scale=1):
87
+ # Status and setup info
88
+ with gr.Group():
89
+ gr.Markdown("### System Status")
90
+ status_btn = gr.Button("Check Status", size="sm")
91
+ status_output = gr.Textbox(
92
+ label="API Status",
93
+ lines=4,
94
+ interactive=False
95
+ )
96
+
97
+ with gr.Group():
98
+ gr.Markdown("### Setup Instructions")
99
+ gr.Markdown("""
100
+ **Important Notes:**
101
+ 1.Only documents shared with it, it can answer
102
+
103
+ **File Types Supported:**
104
+ - Google Docs
105
+ - Google Sheets
106
+ - PDF files
107
+ - Text files
108
+
109
+ **Tips:**
110
+ - Use specific search terms for better results
111
+ - The system searches the top 3 most relevant files
112
+ - Ask clear, specific questions for better answers
113
+ """)
114
+
115
+ # Event handlers
116
+ submit_btn.click(
117
+ fn=process_user_query,
118
+ inputs=[query_input, search_terms_input],
119
+ outputs=[answer_output, sources_output]
120
+ )
121
+
122
+ status_btn.click(
123
+ fn=check_setup,
124
+ outputs=status_output
125
+ )
126
+
127
+ # Example queries
128
+ with gr.Row():
129
+ gr.Examples(
130
+ examples=[
131
+ ["What is morbid Anatomy?", "morbid, Anatomy"],
132
+ ["The transmission of nerves from one neuron to another is as a result of what?", "neuron, nerves, Dr Clement"],
133
+ ],
134
+ inputs=[query_input, search_terms_input],
135
+ )
136
+
137
+ # Launch the app
138
+ if __name__ == "__main__":
139
+ app.launch(
140
+ share=True,debug =True)
RAG/rag-system-463320-f292991d0516.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "type": "service_account",
3
+ "project_id": "rag-system-463320",
4
+ "private_key_id": "f292991d051639790555548b3142cf9447594bee",
5
+ "private_key": "-----BEGIN PRIVATE KEY-----\nMIIEvQIBADANBgkqhkiG9w0BAQEFAASCBKcwggSjAgEAAoIBAQCaIHHexQcwmlEx\n/68EK7q8RGtLGVVubssDFzw9eiCjBh71PNhpaXBj0RiwKtnmCrfuYZ0RHm1OQN1l\niiXE7eiRuDkgmhfuLx4k8+L7tBfEcpVA9HHEZXKpp1YHUnuDsS4UZWZDlc/Q/O6q\njt5xwcKpnDMYsOTVXSuj6NslzVmYQYgW3xAastKUzAr3N/zXnH4rvnCS9N2QocC7\nZ8Z+dQYLd4QG2Pty7Oe/FVCdPWM9HfT9MmdBDeJfxran3Wni+TXBqH9NWJsMVnAl\nLkix6iFuBGaGBKQwEmX5GwN59ILb26U8hcOgwrbVVqZS4CKEH4sodcDlsf6iPXbr\nJVGeBLUZAgMBAAECggEAAOK89MfS9rKX4TCCO+ifwbNSSWkjeEPTSEcZEj7s66Sw\nFWlAC5DO08+ShVvBrngE91irepBRCCdeDI9GLJ967bdx6XxwUVJcjO3sTkc08N6P\neco/mVdFdVuR30SRBQdwkktSp/RwDMnma6e3TWtFktNkBVWxH2XgR5o+IJkYjjXQ\nWfZhrqzAZ6d7519KbP1SKP210IAsst6FpS2lQf4JYu/ohn1G9Pf1PVDkZQ0PnAjB\noDMumBjrJJj4yupVu0nDAErQRp7eePfE5AXHtEHPKCmWQGVUq9e0T0xd/vILeSPl\nyO6UfS5vM5KvBiHouENkF7x27h+z+Uv+PZGP+wgDgQKBgQDVUc/Mq6Ef49vJtB+g\nPIX7o1wNupxlC1i2Yxfbsqtd09ZGTLfBoK2IJkj2K1kEUJoBEdw+iNhumbafH7Wp\n/k05xChIva2dsxtBqyxae4hak2SmZEIfXCpWxocXa8E9pVst68XZfcAAlWPqfjkT\nJkeYsbraOL+LTzBn+PFU1j7PYQKBgQC49sl9s+G4wnX5dTBKSerKhluR3luB5N2+\ndG/ySvbno+yuQcKWveARRxBMSMwrH3lr2UPe0KIfs2ez6ZMMskyy4X4qVS/RFkz9\nJdRYwZuJxYmHRYD9tGus70alfTxQOjcHYu4nP+BZ3/SYAS8IwQyJW0ZTJfb0GUHd\nSFzaIAfYuQKBgErs+GrwIaDc3LcFEFKsz3aqU0vzLrmC+b7eIWQmZnCHVmzMx4I4\nGJuIrvngEd0lHmKfzlcco8B9Nxq+/YTe8GsIzgl2rgOFBF7Va7fASg1eVeznrB3t\ntmVSR+LfEzm+2b7QYGba91R9JGIZwZxLRryIOJQreIPK5bqvQrzEn70hAoGBALIx\nf5jMze+T2NiFoApSABMxMdLsJ7iSZ1bvElKZChnMAV84F2Hmd5HtprP3hksdPXs7\ni3mEAk0MKCJaKsMWm3HScBwUicaj93QDF/qSO2iVmJNr0IObaAjpVGRmrM6V5yIE\nm2/QyDGN4zT2HvwOECfceXntn2c3Vg77ggjaxvRxAoGAGGXQBpyAnazkaC1vpstf\nFQwqINKGqClU9Q7ciAlDTJSbTT3q3HU5WiWZmJnxwHbd7GJN4dizypEf9g0/Zejb\nptZwejr3WB+6GwYWkabRxosVhsKBtaT/8NMmPtwZ1jEukZW4UbIAOziJ8OM/mhfm\ns3CDpUHWETb1X9bB3Jwljoc=\n-----END PRIVATE KEY-----\n",
6
+ "client_email": "rag-base@rag-system-463320.iam.gserviceaccount.com",
7
+ "client_id": "110051039758068935524",
8
+ "auth_uri": "https://accounts.google.com/o/oauth2/auth",
9
+ "token_uri": "https://oauth2.googleapis.com/token",
10
+ "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
11
+ "client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/rag-base%40rag-system-463320.iam.gserviceaccount.com",
12
+ "universe_domain": "googleapis.com"
13
+ }
RAG/requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ google-api-python-client
2
+ google-auth
3
+ openai
4
+ python-dotenv
5
+ chromadb
6
+ sentence-transformers
7
+ numpy
8
+ gradio
9
+ flask