jesshewyz commited on
Commit
312e0ca
·
verified ·
1 Parent(s): 4cd0620

Upload 4 files

Browse files
Files changed (3) hide show
  1. gdrive_service_account.json +13 -0
  2. google_drive.py +343 -0
  3. prompts.py +28 -0
gdrive_service_account.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "type": "service_account",
3
+ "project_id": "quotation-chatbot-447807",
4
+ "private_key_id": "45d3aaa9da75bce5859c2e2681067fcfeed5f4a6",
5
+ "private_key": "-----BEGIN PRIVATE KEY-----\nMIIEvgIBADANBgkqhkiG9w0BAQEFAASCBKgwggSkAgEAAoIBAQC3A4p3zV5xMtXr\nS/ejSNvhXGth/KY5VRl6kY3y46Os/CIkmdJIQTsR9NhS/rMKXj7SYGfiAFHBZTgt\ncDqL4AmQOW6ZgxZa5/JDLpGzzxDTeJuPUxT3tX5pX8gY1fBM11sjDAbBt2yb0a/V\nFK3eqKTdTnCOLRkxNqJlJZSCPmXa5JZyQTaKJ53Xlm2/ydiIM6QVzA+htUc9Yxia\nLVi4AbYH1MIeOuns2bF32tnrxnpynb13i1exvabsypTnlFTMwEgjHJWelFRS1J4D\nIXKHpcPj98LEpE2JjUvAhDwgquviUoiOro2FCog7NwkaPYV7cEEwbOiygxPrzA7p\nSZ3zyfCFAgMBAAECggEAB3I+74ntFkj9uDGilUOxIpW6jsWTU9DZFQoYDvJhJfgq\nQXf6vNLenufp51KSoVDdWHzwrRrbOLWwF/UxhKBsczBLBjb1sWQvkLkz12+M3TXS\nin3UZBG/33S0tQH7YXz/qcji+33Fjv90moRZYK5W6uQ2oypFlKGG8Pua88OrLEE/\n+Y47LOmRAV9Ybj+n0jkUEzOzDNsjOYkBoR0xu7TSWjgVVnlTiHElVRCRqU6ErDjx\nfB/OuvOHkcvi0orBCR8UH8FAPdrUTTRfXuu5eNtN8wZ6h3hB0SlTpBQ3kYkF8qlm\n7tPWFR6sA8LhlKVH6FaN9ul/MnCZBzsK4A36Ffi+oQKBgQD0UbKf/SBs5sQS22g7\nyxc4HU+6Du1H87+QpXle602mPZQMKHVWUyMu60DBDHP3SSYSJMxBmgIhYX5osvY3\nrxw8q9l6E+ahgfRHqKlY+OATf9LWQn5BHdgWwGxKwPOMs4cGieZRECgqWEEwbNB8\nlzqoO9SEHRQkKiDnapq1FsuzUQKBgQC/w4Hko1OGGplAGO+xxpPG0DuaHcenSBSc\nP+SFf3YVi+cuApKlDDsdrLm1nTR25eqFQBfbmabUkuLkBBRKJ5EDFNHqtmcF5+aq\nnlYTdBqf/ZGY8KwLnM0VYBz4EfFQI91bHTzkYBMTrirCyRbEqvIHOLcatpM62YTv\nCLTApEUU9QKBgQC53lVoRagfcUgFmImpw428kQAQaj62GIfKQTkeryB5sispPLZX\noBQKI8s1koW+wRWHhBsoZUPtpUAH4u5R2pQ+y/Y9W1p/0F/JDFUZbOz7qKj7e8aV\nP2EfnfBExpoClqnInaxsS0BjRpUBhCbwQhTL3AiHIaArCynRJFbiIZ+CoQKBgDu9\nq7zQCRhRNwcZ/aqTo7wfaUIVHvsFYK5g1U+lquMBh870rDzQ96XCBsUGqa6U6CAF\nHKmkT64P6LGCd9O2Sd384XzOwH+9hEYJN+WtmRHYzEAqUl73xYhwPfYmmNgPpElb\nbXn0WF0cOGQgg4BXhTksuRaQZIPD+j8BplYRlxf1AoGBAMcGIk/uthjhFWWV/xfG\n6eIeTaCkkA6aGv+CvqDEQBv+TrUXjNF53tV25d32OO3msxOEIP/smhI0qIzuAl8E\neoyzlB+4AnKrNJ1Zx2FoBcMpxYC8AQcbSBs33pQk/11Ji08W3HncM0EHKjqtCxlc\nOtdIMrXmLkhEuqhWOXc8cuS+\n-----END PRIVATE KEY-----\n",
6
+ "client_email": "mh-chatbot-gdrive@quotation-chatbot-447807.iam.gserviceaccount.com",
7
+ "client_id": "103081650049465485670",
8
+ "auth_uri": "https://accounts.google.com/o/oauth2/auth",
9
+ "token_uri": "https://oauth2.googleapis.com/token",
10
+ "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
11
+ "client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/mh-chatbot-gdrive%40quotation-chatbot-447807.iam.gserviceaccount.com",
12
+ "universe_domain": "googleapis.com"
13
+ }
google_drive.py ADDED
@@ -0,0 +1,343 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import io
3
+ import markdown
4
+ import pandas as pd
5
+ from datetime import datetime
6
+ from googleapiclient.discovery import build
7
+ from googleapiclient.http import MediaIoBaseUpload
8
+ from google.oauth2 import service_account
9
+ from googleapiclient.errors import HttpError
10
+ from docx import Document
11
+ import re
12
+
13
+ # Path to your Service Account key file
14
+ SERVICE_ACCOUNT_FILE = 'gdrive_service_account.json'
15
+
16
+ # Define the scopes. For full Drive access, use 'https://www.googleapis.com/auth/drive'
17
+ # For access limited to files created by the app, use 'https://www.googleapis.com/auth/drive.file'
18
+ SCOPES = ['https://www.googleapis.com/auth/drive']
19
+
20
+ # ID of the Google Drive folder shared with the Service Account
21
+ DRIVE_FOLDER_ID = '1ksgImxpdYor73BkYnp60oAbRiE1nHAyz'
22
+
23
+ def authenticate_drive_service():
24
+ """Authenticate using Service Account and return the Drive service."""
25
+ print("Authenticating Drive service...")
26
+ credentials = service_account.Credentials.from_service_account_file(
27
+ SERVICE_ACCOUNT_FILE, scopes=SCOPES)
28
+
29
+ # If you need to impersonate a user (optional, requires domain-wide delegation)
30
+ # credentials = credentials.with_subject('user@yourdomain.com')
31
+
32
+ service = build('drive', 'v3', credentials=credentials)
33
+ print("Drive service authenticated.")
34
+ return service
35
+
36
+
37
+ def add_bold_text(paragraph, text):
38
+ """
39
+ Add text to a paragraph, handling bold formatting (text wrapped in **).
40
+ Removes the ** markers and applies bold formatting to the enclosed text.
41
+ """
42
+ parts = re.split(r'(\*\*.+?\*\*)', text)
43
+ for part in parts:
44
+ if part.startswith('**') and part.endswith('**'):
45
+ paragraph.add_run(part[2:-2]).bold = True
46
+ else:
47
+ paragraph.add_run(part)
48
+
49
+ def process_table(doc, table_rows):
50
+ """
51
+ Process a Markdown table and add it to the Word document.
52
+ """
53
+ if not table_rows:
54
+ return
55
+
56
+ # Remove leading and trailing pipes and split into columns
57
+ table_rows = [row.strip('|').split('|') for row in table_rows]
58
+
59
+ # Determine the number of columns
60
+ num_cols = len(table_rows[0])
61
+
62
+ # Create the table
63
+ table = doc.add_table(rows=1, cols=num_cols)
64
+ table.style = 'Table Grid'
65
+
66
+ # Add header row
67
+ header_cells = table.rows[0].cells
68
+ for i, cell_content in enumerate(table_rows[0]):
69
+ cell_content = cell_content.strip()
70
+ add_bold_text(header_cells[i].paragraphs[0], cell_content)
71
+
72
+ # Add data rows
73
+ for row in table_rows[2:]:
74
+ row_cells = table.add_row().cells
75
+ for i, cell_content in enumerate(row):
76
+ cell_content = cell_content.strip()
77
+ # Replace <br> with line breaks
78
+ if '<br>' in cell_content:
79
+ for part in cell_content.split('<br>'):
80
+ add_bold_text(row_cells[i].paragraphs[0], part.strip())
81
+ row_cells[i].paragraphs[0].add_run().add_break()
82
+ else:
83
+ add_bold_text(row_cells[i].paragraphs[0], cell_content)
84
+
85
+ def convert_md_to_docx(md_content):
86
+ """
87
+ Convert Markdown content to a DOCX document using python-docx.
88
+ """
89
+ try:
90
+ doc = Document()
91
+ lines = md_content.split('\n')
92
+ in_table = False
93
+ table_rows = []
94
+
95
+ for line in lines:
96
+ stripped_line = line.strip()
97
+
98
+ # Handle tables
99
+ if re.match(r'^\|.*\|$', stripped_line):
100
+ if not in_table:
101
+ in_table = True
102
+ table_rows.append(stripped_line)
103
+ continue
104
+ elif in_table:
105
+ process_table(doc, table_rows)
106
+ in_table = False
107
+ table_rows = []
108
+
109
+ # Handle headings
110
+ if stripped_line.startswith('#'):
111
+ heading_level = stripped_line.count('#')
112
+ heading_text = stripped_line.lstrip('#').strip()
113
+ if 1 <= heading_level <= 6:
114
+ # Add a heading and process bold text
115
+ heading = doc.add_paragraph()
116
+ heading.style = f'Heading {heading_level}'
117
+ add_bold_text(heading, heading_text)
118
+ continue
119
+
120
+ # Handle unordered lists
121
+ if stripped_line.startswith(('* ', '- ')):
122
+ list_text = stripped_line[2:].strip()
123
+ paragraph = doc.add_paragraph(style='List Bullet')
124
+ add_bold_text(paragraph, list_text)
125
+ continue
126
+
127
+ # Handle ordered lists
128
+ if re.match(r'^\d+\.\s', stripped_line):
129
+ list_text = re.sub(r'^\d+\.\s', '', stripped_line)
130
+ paragraph = doc.add_paragraph(style='List Number')
131
+ add_bold_text(paragraph, list_text)
132
+ continue
133
+
134
+ # Handle horizontal rules
135
+ if stripped_line in ('---', '***'):
136
+ doc.add_paragraph().add_run().add_break()
137
+ continue
138
+
139
+ # Handle regular paragraphs
140
+ if stripped_line:
141
+ paragraph = doc.add_paragraph()
142
+ add_bold_text(paragraph, stripped_line)
143
+
144
+ # Save the document to an in-memory file
145
+ output = io.BytesIO()
146
+ doc.save(output)
147
+ output.seek(0)
148
+
149
+ return output.getvalue()
150
+
151
+ except Exception as e:
152
+ print(f"Conversion error: {e}")
153
+ return None
154
+
155
+ def determine_mime_type(filename):
156
+ """Determine MIME type based on file extension for Google Drive conversion."""
157
+ print(f"Determining MIME type for {filename}...")
158
+ ext = os.path.splitext(filename)[1].lower()
159
+ if ext == '.md':
160
+ # Convert Markdown to Google Docs by uploading as DOCX
161
+ mime_type = 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'
162
+ drive_mime = 'application/vnd.google-apps.document'
163
+ elif ext == '.txt':
164
+ # Convert plain text to Google Docs
165
+ mime_type = 'text/plain'
166
+ drive_mime = 'application/vnd.google-apps.document'
167
+ elif ext == '.csv':
168
+ # Convert CSV to Google Sheets
169
+ mime_type = 'text/csv'
170
+ drive_mime = 'application/vnd.google-apps.spreadsheet'
171
+ else:
172
+ # Default to binary upload without conversion
173
+ mime_type = 'application/octet-stream'
174
+ drive_mime = None
175
+ print(f"MIME type determined: {mime_type}, Drive MIME: {drive_mime}")
176
+ return mime_type, drive_mime
177
+
178
+ def upload_content(service, folder_id, filename, content):
179
+ """
180
+ Upload content directly to Google Drive as a Google Doc or Sheet.
181
+
182
+ Args:
183
+ service: Authorized Drive API service instance.
184
+ folder_id: ID of the Drive folder to upload into.
185
+ filename: Name of the file.
186
+ content: String content for text files or pandas DataFrame for CSV.
187
+ """
188
+ print(f"Initiating upload process for {filename} to Google Drive...")
189
+ mime_type, drive_mime = determine_mime_type(filename)
190
+ print(f"Determined MIME type: {mime_type}, Drive MIME: {drive_mime}")
191
+
192
+ if filename.endswith('.md'):
193
+ content = convert_md_to_docx(content)
194
+ if content is None:
195
+ print("Failed to convert Markdown to DOCX.")
196
+ return
197
+
198
+ # Prepare file metadata
199
+ file_metadata = {
200
+ 'name': os.path.splitext(filename)[0], # Remove extension for Google Docs/Sheets
201
+ 'parents': [folder_id],
202
+ }
203
+ if drive_mime:
204
+ file_metadata['mimeType'] = drive_mime
205
+ print(f"Prepared file metadata: {file_metadata}")
206
+
207
+ # Prepare media
208
+ if isinstance(content, pd.DataFrame):
209
+ # For DataFrame, convert to CSV string
210
+ csv_content = content.to_csv(index=False)
211
+ print(f"Converted DataFrame to CSV string for {filename}")
212
+ media = MediaIoBaseUpload(
213
+ io.BytesIO(csv_content.encode('utf-8')),
214
+ mimetype=mime_type,
215
+ resumable=True
216
+ )
217
+ else:
218
+ # For binary content (e.g., DOCX)
219
+ media = MediaIoBaseUpload(
220
+ io.BytesIO(content),
221
+ mimetype=mime_type,
222
+ resumable=True
223
+ )
224
+ print(f"Prepared media for upload: {media}")
225
+
226
+ try:
227
+ file = service.files().create(
228
+ body=file_metadata,
229
+ media_body=media,
230
+ fields='id'
231
+ ).execute()
232
+ print(f"Successfully uploaded {filename} to Google Drive with ID: {file.get('id')}")
233
+ except HttpError as error:
234
+ print(f"An error occurred while uploading {filename}: {error}")
235
+
236
+ def v2_upload_to_gdrive(prd_content, components_content, tasks_content, task_units_content, quantity_df, analysis_content, cost_summary):
237
+ print("Starting v2_upload_to_gdrive process...")
238
+ service = authenticate_drive_service()
239
+
240
+ # Define parent folder ID
241
+ parent_folder_id = DRIVE_FOLDER_ID
242
+ if not parent_folder_id:
243
+ return "Drive folder ID is not set."
244
+
245
+ # Create new subfolder
246
+ folder_metadata = {
247
+ 'name': f'quotation_{datetime.now().strftime("%y%m%d_%H%M%S")}',
248
+ 'mimeType': 'application/vnd.google-apps.folder',
249
+ 'parents': [parent_folder_id]
250
+ }
251
+ subfolder = service.files().create(body=folder_metadata, fields='id').execute()
252
+ subfolder_id = subfolder.get('id')
253
+ print(f"Created subfolder with ID: {subfolder_id}")
254
+
255
+ try:
256
+ combined_content = f"""
257
+ # Cost Summary
258
+ {cost_summary}
259
+
260
+ # Analysis
261
+ {analysis_content}
262
+
263
+ # Components
264
+ {components_content}
265
+
266
+ # Tasks
267
+ {tasks_content}
268
+
269
+ # Task Base Units
270
+ {task_units_content}
271
+ """
272
+
273
+ # Upload files to subfolder
274
+ upload_content(service, subfolder_id, "quotation_document.md", combined_content)
275
+ upload_content(service, subfolder_id, "prd.md", prd_content)
276
+ if quantity_df is not None:
277
+ upload_content(service, subfolder_id, "quantified_tasks.csv", quantity_df)
278
+
279
+ # return f"All files uploaded to subfolder ID: {subfolder_id}"
280
+
281
+ # Construct the folder URL
282
+ folder_url = f"https://drive.google.com/drive/folders/{subfolder_id}"
283
+ parent_folder_url = f"https://drive.google.com/drive/folders/{parent_folder_id}"
284
+
285
+ return f"All files uploaded to subfolder.Parent folder URL:\n\n {parent_folder_url}"
286
+
287
+
288
+ except Exception as e:
289
+ print(f"An error occurred: {e}")
290
+ return f"Failed to upload files. Error: {e}"
291
+
292
+
293
+
294
+ def v1_upload_to_gdrive(prd_box,tasks_table,quantity_table,merged_table,units_output,analysis_box):
295
+ print("Starting v1_upload_to_gdrive process...")
296
+ service = authenticate_drive_service()
297
+
298
+ # Define parent folder ID
299
+ parent_folder_id = DRIVE_FOLDER_ID
300
+ if not parent_folder_id:
301
+ return "Drive folder ID is not set."
302
+
303
+ # Create new subfolder
304
+ folder_metadata = {
305
+ 'name': f'quotation_{datetime.now().strftime("%y%m%d_%H%M%S")}',
306
+ 'mimeType': 'application/vnd.google-apps.folder',
307
+ 'parents': [parent_folder_id]
308
+ }
309
+ subfolder = service.files().create(body=folder_metadata, fields='id').execute()
310
+ subfolder_id = subfolder.get('id')
311
+ print(f"Created subfolder with ID: {subfolder_id}")
312
+
313
+ try:
314
+ combined_content = f"""
315
+ # Cost Summary
316
+ {units_output}
317
+
318
+ # Analysis
319
+ {analysis_box}
320
+ """
321
+
322
+ # Upload files to subfolder
323
+ upload_content(service, subfolder_id, "quotation_document.md", combined_content)
324
+ upload_content(service, subfolder_id, "prd.md", prd_box)
325
+ if tasks_table is not None:
326
+ upload_content(service, subfolder_id, "tasks.csv", tasks_table)
327
+ if quantity_table is not None:
328
+ upload_content(service, subfolder_id, "quantity.csv", quantity_table)
329
+ if merged_table is not None:
330
+ upload_content(service, subfolder_id, "merged.csv", merged_table)
331
+
332
+ # return f"All files uploaded to subfolder ID: {subfolder_id}"
333
+
334
+ # Construct the folder URL
335
+ folder_url = f"https://drive.google.com/drive/folders/{subfolder_id}"
336
+ parent_folder_url = f"https://drive.google.com/drive/folders/{parent_folder_id}"
337
+
338
+ return f"All files uploaded to subfolder.Parent folder URL:\n\n {parent_folder_url}"
339
+
340
+
341
+ except Exception as e:
342
+ print(f"An error occurred: {e}")
343
+ return f"Failed to upload files. Error: {e}"
prompts.py CHANGED
@@ -664,6 +664,34 @@ The final output should deliver a **clear, actionable, and project-specific list
664
 
665
  """
666
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
667
  define_technical_task = """You are tasked with creating a Granular Technical Task List for a chatbot project. Using the Project Requirement Document and the Component List, your objective is to decompose each component into highly detailed, technical-level subtasks. These subtasks should capture every required step to complete the project with precision, ensuring no technical detail is overlooked.
668
 
669
  ### **Instructions**:
 
664
 
665
  """
666
 
667
+ define_dev_components = """
668
+ You are tasked with creating a detailed **Component List** for a project based on the provided requirements. Your goal is to list all **components** within each **phase** of the project. The components should be highly specific to the project context, leaving no ambiguity about what each phase entails. Follow these instructions:
669
+
670
+ ### **Specific Requirements**:
671
+
672
+ 1. **Focus on Development Phase**
673
+ - **Development**
674
+
675
+ 2. **Components**:
676
+ - Include **project-specific components** that align with the goal of developing the given project
677
+ - Use the provided project requirement document to extract relevant, **granular components** that reflect the tasks and deliverables unique to this project.
678
+ - Components should directly reference functionalities or deliverables related to the project
679
+
680
+ ### **Objective**:
681
+ The final output should deliver a **clear, actionable, and project-specific list of components** in the development phase. The goal is to provide a foundation for developing granular subcomponents and tasks, ensuring alignment with the unique requirements of this project.
682
+
683
+ **Our Tech Stack**:
684
+ - **Backend**: FastAPI (for API services)
685
+ - **Programming Language**: Python
686
+ - **Chatbot Builder**: Chatbot Builder Platform (e.g., Dialogflow, Rasa)
687
+ - **Cloud Services**: AWS
688
+ - **Database**: PostgreSQL
689
+ - **Caching**: Redis (in-memory data store)
690
+ - **Containerization**: Docker
691
+ - **Database Migrations**: Alembic
692
+ """
693
+
694
+
695
  define_technical_task = """You are tasked with creating a Granular Technical Task List for a chatbot project. Using the Project Requirement Document and the Component List, your objective is to decompose each component into highly detailed, technical-level subtasks. These subtasks should capture every required step to complete the project with precision, ensuring no technical detail is overlooked.
696
 
697
  ### **Instructions**: