Spaces:

jesshewyz
/

engage_quotation_chatbot

Sleeping

App Files Files Community

jesshewyz commited on Jan 14, 2025

Commit

312e0ca

verified ·

1 Parent(s): 4cd0620

Upload 4 files

Browse files

Files changed (3) hide show

gdrive_service_account.json +13 -0
google_drive.py +343 -0
prompts.py +28 -0

gdrive_service_account.json ADDED Viewed

	@@ -0,0 +1,13 @@

+{
+  "type": "service_account",
+  "project_id": "quotation-chatbot-447807",
+  "private_key_id": "45d3aaa9da75bce5859c2e2681067fcfeed5f4a6",
+  "private_key": "-----BEGIN PRIVATE KEY-----\nMIIEvgIBADANBgkqhkiG9w0BAQEFAASCBKgwggSkAgEAAoIBAQC3A4p3zV5xMtXr\nS/ejSNvhXGth/KY5VRl6kY3y46Os/CIkmdJIQTsR9NhS/rMKXj7SYGfiAFHBZTgt\ncDqL4AmQOW6ZgxZa5/JDLpGzzxDTeJuPUxT3tX5pX8gY1fBM11sjDAbBt2yb0a/V\nFK3eqKTdTnCOLRkxNqJlJZSCPmXa5JZyQTaKJ53Xlm2/ydiIM6QVzA+htUc9Yxia\nLVi4AbYH1MIeOuns2bF32tnrxnpynb13i1exvabsypTnlFTMwEgjHJWelFRS1J4D\nIXKHpcPj98LEpE2JjUvAhDwgquviUoiOro2FCog7NwkaPYV7cEEwbOiygxPrzA7p\nSZ3zyfCFAgMBAAECggEAB3I+74ntFkj9uDGilUOxIpW6jsWTU9DZFQoYDvJhJfgq\nQXf6vNLenufp51KSoVDdWHzwrRrbOLWwF/UxhKBsczBLBjb1sWQvkLkz12+M3TXS\nin3UZBG/33S0tQH7YXz/qcji+33Fjv90moRZYK5W6uQ2oypFlKGG8Pua88OrLEE/\n+Y47LOmRAV9Ybj+n0jkUEzOzDNsjOYkBoR0xu7TSWjgVVnlTiHElVRCRqU6ErDjx\nfB/OuvOHkcvi0orBCR8UH8FAPdrUTTRfXuu5eNtN8wZ6h3hB0SlTpBQ3kYkF8qlm\n7tPWFR6sA8LhlKVH6FaN9ul/MnCZBzsK4A36Ffi+oQKBgQD0UbKf/SBs5sQS22g7\nyxc4HU+6Du1H87+QpXle602mPZQMKHVWUyMu60DBDHP3SSYSJMxBmgIhYX5osvY3\nrxw8q9l6E+ahgfRHqKlY+OATf9LWQn5BHdgWwGxKwPOMs4cGieZRECgqWEEwbNB8\nlzqoO9SEHRQkKiDnapq1FsuzUQKBgQC/w4Hko1OGGplAGO+xxpPG0DuaHcenSBSc\nP+SFf3YVi+cuApKlDDsdrLm1nTR25eqFQBfbmabUkuLkBBRKJ5EDFNHqtmcF5+aq\nnlYTdBqf/ZGY8KwLnM0VYBz4EfFQI91bHTzkYBMTrirCyRbEqvIHOLcatpM62YTv\nCLTApEUU9QKBgQC53lVoRagfcUgFmImpw428kQAQaj62GIfKQTkeryB5sispPLZX\noBQKI8s1koW+wRWHhBsoZUPtpUAH4u5R2pQ+y/Y9W1p/0F/JDFUZbOz7qKj7e8aV\nP2EfnfBExpoClqnInaxsS0BjRpUBhCbwQhTL3AiHIaArCynRJFbiIZ+CoQKBgDu9\nq7zQCRhRNwcZ/aqTo7wfaUIVHvsFYK5g1U+lquMBh870rDzQ96XCBsUGqa6U6CAF\nHKmkT64P6LGCd9O2Sd384XzOwH+9hEYJN+WtmRHYzEAqUl73xYhwPfYmmNgPpElb\nbXn0WF0cOGQgg4BXhTksuRaQZIPD+j8BplYRlxf1AoGBAMcGIk/uthjhFWWV/xfG\n6eIeTaCkkA6aGv+CvqDEQBv+TrUXjNF53tV25d32OO3msxOEIP/smhI0qIzuAl8E\neoyzlB+4AnKrNJ1Zx2FoBcMpxYC8AQcbSBs33pQk/11Ji08W3HncM0EHKjqtCxlc\nOtdIMrXmLkhEuqhWOXc8cuS+\n-----END PRIVATE KEY-----\n",
+  "client_email": "mh-chatbot-gdrive@quotation-chatbot-447807.iam.gserviceaccount.com",
+  "client_id": "103081650049465485670",
+  "auth_uri": "https://accounts.google.com/o/oauth2/auth",
+  "token_uri": "https://oauth2.googleapis.com/token",
+  "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
+  "client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/mh-chatbot-gdrive%40quotation-chatbot-447807.iam.gserviceaccount.com",
+  "universe_domain": "googleapis.com"
+}

google_drive.py ADDED Viewed

	@@ -0,0 +1,343 @@

+import os
+import io
+import markdown
+import pandas as pd
+from datetime import datetime
+from googleapiclient.discovery import build
+from googleapiclient.http import MediaIoBaseUpload
+from google.oauth2 import service_account
+from googleapiclient.errors import HttpError
+from docx import Document
+import re
+# Path to your Service Account key file
+SERVICE_ACCOUNT_FILE = 'gdrive_service_account.json'
+# Define the scopes. For full Drive access, use 'https://www.googleapis.com/auth/drive'
+# For access limited to files created by the app, use 'https://www.googleapis.com/auth/drive.file'
+SCOPES = ['https://www.googleapis.com/auth/drive']
+# ID of the Google Drive folder shared with the Service Account
+DRIVE_FOLDER_ID = '1ksgImxpdYor73BkYnp60oAbRiE1nHAyz'
+def authenticate_drive_service():
+    """Authenticate using Service Account and return the Drive service."""
+    print("Authenticating Drive service...")
+    credentials = service_account.Credentials.from_service_account_file(
+        SERVICE_ACCOUNT_FILE, scopes=SCOPES)
+    # If you need to impersonate a user (optional, requires domain-wide delegation)
+    # credentials = credentials.with_subject('user@yourdomain.com')
+    service = build('drive', 'v3', credentials=credentials)
+    print("Drive service authenticated.")
+    return service
+def add_bold_text(paragraph, text):
+    """
+    Add text to a paragraph, handling bold formatting (text wrapped in **).
+    Removes the ** markers and applies bold formatting to the enclosed text.
+    """
+    parts = re.split(r'(\*\*.+?\*\*)', text)
+    for part in parts:
+        if part.startswith('**') and part.endswith('**'):
+            paragraph.add_run(part[2:-2]).bold = True
+        else:
+            paragraph.add_run(part)
+def process_table(doc, table_rows):
+    """
+    Process a Markdown table and add it to the Word document.
+    """
+    if not table_rows:
+        return
+    # Remove leading and trailing pipes and split into columns
+    table_rows = [row.strip('|').split('|') for row in table_rows]
+    # Determine the number of columns
+    num_cols = len(table_rows[0])
+    # Create the table
+    table = doc.add_table(rows=1, cols=num_cols)
+    table.style = 'Table Grid'
+    # Add header row
+    header_cells = table.rows[0].cells
+    for i, cell_content in enumerate(table_rows[0]):
+        cell_content = cell_content.strip()
+        add_bold_text(header_cells[i].paragraphs[0], cell_content)
+    # Add data rows
+    for row in table_rows[2:]:
+        row_cells = table.add_row().cells
+        for i, cell_content in enumerate(row):
+            cell_content = cell_content.strip()
+            # Replace <br> with line breaks
+            if '<br>' in cell_content:
+                for part in cell_content.split('<br>'):
+                    add_bold_text(row_cells[i].paragraphs[0], part.strip())
+                    row_cells[i].paragraphs[0].add_run().add_break()
+            else:
+                add_bold_text(row_cells[i].paragraphs[0], cell_content)
+def convert_md_to_docx(md_content):
+    """
+    Convert Markdown content to a DOCX document using python-docx.
+    """
+    try:
+        doc = Document()
+        lines = md_content.split('\n')
+        in_table = False
+        table_rows = []
+        for line in lines:
+            stripped_line = line.strip()
+            # Handle tables
+            if re.match(r'^\|.*\|$', stripped_line):
+                if not in_table:
+                    in_table = True
+                table_rows.append(stripped_line)
+                continue
+            elif in_table:
+                process_table(doc, table_rows)
+                in_table = False
+                table_rows = []
+            # Handle headings
+            if stripped_line.startswith('#'):
+                heading_level = stripped_line.count('#')
+                heading_text = stripped_line.lstrip('#').strip()
+                if 1 <= heading_level <= 6:
+                    # Add a heading and process bold text
+                    heading = doc.add_paragraph()
+                    heading.style = f'Heading {heading_level}'
+                    add_bold_text(heading, heading_text)
+                continue
+            # Handle unordered lists
+            if stripped_line.startswith(('* ', '- ')):
+                list_text = stripped_line[2:].strip()
+                paragraph = doc.add_paragraph(style='List Bullet')
+                add_bold_text(paragraph, list_text)
+                continue
+            # Handle ordered lists
+            if re.match(r'^\d+\.\s', stripped_line):
+                list_text = re.sub(r'^\d+\.\s', '', stripped_line)
+                paragraph = doc.add_paragraph(style='List Number')
+                add_bold_text(paragraph, list_text)
+                continue
+            # Handle horizontal rules
+            if stripped_line in ('---', '***'):
+                doc.add_paragraph().add_run().add_break()
+                continue
+            # Handle regular paragraphs
+            if stripped_line:
+                paragraph = doc.add_paragraph()
+                add_bold_text(paragraph, stripped_line)
+        # Save the document to an in-memory file
+        output = io.BytesIO()
+        doc.save(output)
+        output.seek(0)
+        return output.getvalue()
+    except Exception as e:
+        print(f"Conversion error: {e}")
+        return None
+def determine_mime_type(filename):
+    """Determine MIME type based on file extension for Google Drive conversion."""
+    print(f"Determining MIME type for {filename}...")
+    ext = os.path.splitext(filename)[1].lower()
+    if ext == '.md':
+        # Convert Markdown to Google Docs by uploading as DOCX
+        mime_type = 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'
+        drive_mime = 'application/vnd.google-apps.document'
+    elif ext == '.txt':
+        # Convert plain text to Google Docs
+        mime_type = 'text/plain'
+        drive_mime = 'application/vnd.google-apps.document'
+    elif ext == '.csv':
+        # Convert CSV to Google Sheets
+        mime_type = 'text/csv'
+        drive_mime = 'application/vnd.google-apps.spreadsheet'
+    else:
+        # Default to binary upload without conversion
+        mime_type = 'application/octet-stream'
+        drive_mime = None
+    print(f"MIME type determined: {mime_type}, Drive MIME: {drive_mime}")
+    return mime_type, drive_mime
+def upload_content(service, folder_id, filename, content):
+    """
+    Upload content directly to Google Drive as a Google Doc or Sheet.
+    Args:
+        service: Authorized Drive API service instance.
+        folder_id: ID of the Drive folder to upload into.
+        filename: Name of the file.
+        content: String content for text files or pandas DataFrame for CSV.
+    """
+    print(f"Initiating upload process for {filename} to Google Drive...")
+    mime_type, drive_mime = determine_mime_type(filename)
+    print(f"Determined MIME type: {mime_type}, Drive MIME: {drive_mime}")
+    if filename.endswith('.md'):
+        content = convert_md_to_docx(content)
+        if content is None:
+            print("Failed to convert Markdown to DOCX.")
+            return
+    # Prepare file metadata
+    file_metadata = {
+        'name': os.path.splitext(filename)[0],  # Remove extension for Google Docs/Sheets
+        'parents': [folder_id],
+    }
+    if drive_mime:
+        file_metadata['mimeType'] = drive_mime
+    print(f"Prepared file metadata: {file_metadata}")
+    # Prepare media
+    if isinstance(content, pd.DataFrame):
+        # For DataFrame, convert to CSV string
+        csv_content = content.to_csv(index=False)
+        print(f"Converted DataFrame to CSV string for {filename}")
+        media = MediaIoBaseUpload(
+            io.BytesIO(csv_content.encode('utf-8')),
+            mimetype=mime_type,
+            resumable=True
+        )
+    else:
+        # For binary content (e.g., DOCX)
+        media = MediaIoBaseUpload(
+            io.BytesIO(content),
+            mimetype=mime_type,
+            resumable=True
+        )
+    print(f"Prepared media for upload: {media}")
+    try:
+        file = service.files().create(
+            body=file_metadata,
+            media_body=media,
+            fields='id'
+        ).execute()
+        print(f"Successfully uploaded {filename} to Google Drive with ID: {file.get('id')}")
+    except HttpError as error:
+        print(f"An error occurred while uploading {filename}: {error}")
+def v2_upload_to_gdrive(prd_content, components_content, tasks_content, task_units_content, quantity_df, analysis_content, cost_summary):
+    print("Starting v2_upload_to_gdrive process...")
+    service = authenticate_drive_service()
+    # Define parent folder ID
+    parent_folder_id = DRIVE_FOLDER_ID
+    if not parent_folder_id:
+        return "Drive folder ID is not set."
+    # Create new subfolder
+    folder_metadata = {
+        'name': f'quotation_{datetime.now().strftime("%y%m%d_%H%M%S")}',
+        'mimeType': 'application/vnd.google-apps.folder',
+        'parents': [parent_folder_id]
+    }
+    subfolder = service.files().create(body=folder_metadata, fields='id').execute()
+    subfolder_id = subfolder.get('id')
+    print(f"Created subfolder with ID: {subfolder_id}")
+    try:
+        combined_content = f"""
+# Cost Summary
+{cost_summary}
+# Analysis
+{analysis_content}
+# Components
+{components_content}
+# Tasks
+{tasks_content}
+# Task Base Units
+{task_units_content}
+        """
+        # Upload files to subfolder
+        upload_content(service, subfolder_id, "quotation_document.md", combined_content)
+        upload_content(service, subfolder_id, "prd.md", prd_content)
+        if quantity_df is not None:
+            upload_content(service, subfolder_id, "quantified_tasks.csv", quantity_df)
+        # return f"All files uploaded to subfolder ID: {subfolder_id}"
+         # Construct the folder URL
+        folder_url = f"https://drive.google.com/drive/folders/{subfolder_id}"
+        parent_folder_url = f"https://drive.google.com/drive/folders/{parent_folder_id}"
+        return f"All files uploaded to subfolder.Parent folder URL:\n\n {parent_folder_url}"
+    except Exception as e:
+        print(f"An error occurred: {e}")
+        return f"Failed to upload files. Error: {e}"
+def v1_upload_to_gdrive(prd_box,tasks_table,quantity_table,merged_table,units_output,analysis_box):
+    print("Starting v1_upload_to_gdrive process...")
+    service = authenticate_drive_service()
+    # Define parent folder ID
+    parent_folder_id = DRIVE_FOLDER_ID
+    if not parent_folder_id:
+        return "Drive folder ID is not set."
+    # Create new subfolder
+    folder_metadata = {
+        'name': f'quotation_{datetime.now().strftime("%y%m%d_%H%M%S")}',
+        'mimeType': 'application/vnd.google-apps.folder',
+        'parents': [parent_folder_id]
+    }
+    subfolder = service.files().create(body=folder_metadata, fields='id').execute()
+    subfolder_id = subfolder.get('id')
+    print(f"Created subfolder with ID: {subfolder_id}")
+    try:
+        combined_content = f"""
+# Cost Summary
+{units_output}
+# Analysis
+{analysis_box}
+        """
+        # Upload files to subfolder
+        upload_content(service, subfolder_id, "quotation_document.md", combined_content)
+        upload_content(service, subfolder_id, "prd.md", prd_box)
+        if tasks_table is not None:
+            upload_content(service, subfolder_id, "tasks.csv", tasks_table)
+        if quantity_table is not None:
+            upload_content(service, subfolder_id, "quantity.csv", quantity_table)
+        if merged_table is not None:
+            upload_content(service, subfolder_id, "merged.csv", merged_table)
+        # return f"All files uploaded to subfolder ID: {subfolder_id}"
+         # Construct the folder URL
+        folder_url = f"https://drive.google.com/drive/folders/{subfolder_id}"
+        parent_folder_url = f"https://drive.google.com/drive/folders/{parent_folder_id}"
+        return f"All files uploaded to subfolder.Parent folder URL:\n\n {parent_folder_url}"
+    except Exception as e:
+        print(f"An error occurred: {e}")
+        return f"Failed to upload files. Error: {e}"

prompts.py CHANGED Viewed

@@ -664,6 +664,34 @@ The final output should deliver a **clear, actionable, and project-specific list
 """
 define_technical_task = """You are tasked with creating a Granular Technical Task List for a chatbot project. Using the Project Requirement Document and the Component List, your objective is to decompose each component into highly detailed, technical-level subtasks. These subtasks should capture every required step to complete the project with precision, ensuring no technical detail is overlooked.
 ### **Instructions**:

 """
+define_dev_components = """
+You are tasked with creating a detailed **Component List** for a project based on the provided requirements. Your goal is to list all **components** within each **phase** of the project. The components should be highly specific to the project context, leaving no ambiguity about what each phase entails. Follow these instructions:
+### **Specific Requirements**:
+1. **Focus on Development Phase**
+   - **Development**
+2. **Components**:
+   - Include **project-specific components** that align with the goal of developing the given project
+   - Use the provided project requirement document to extract relevant, **granular components** that reflect the tasks and deliverables unique to this project.
+   - Components should directly reference functionalities or deliverables related to the project
+### **Objective**:
+The final output should deliver a **clear, actionable, and project-specific list of components** in the development phase. The goal is to provide a foundation for developing granular subcomponents and tasks, ensuring alignment with the unique requirements of this project.
+**Our Tech Stack**:
+- **Backend**: FastAPI (for API services)
+- **Programming Language**: Python
+- **Chatbot Builder**: Chatbot Builder Platform (e.g., Dialogflow, Rasa)
+- **Cloud Services**: AWS
+- **Database**: PostgreSQL
+- **Caching**: Redis (in-memory data store)
+- **Containerization**: Docker
+- **Database Migrations**: Alembic
+"""
 define_technical_task = """You are tasked with creating a Granular Technical Task List for a chatbot project. Using the Project Requirement Document and the Component List, your objective is to decompose each component into highly detailed, technical-level subtasks. These subtasks should capture every required step to complete the project with precision, ensuring no technical detail is overlooked.
 ### **Instructions**: