Spaces:

dimostchv
/

career_conversations

Sleeping

File size: 14,054 Bytes

b4a8f90

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import json\n",
    "from datetime import datetime\n",
    "from dotenv import load_dotenv\n",
    "from openai import OpenAI\n",
    "from pathlib import Path\n",
    "from notion_client import Client\n",
    "\n",
    "load_dotenv(override=True)\n",
    "openai = OpenAI()\n",
    "notion = Client(auth=os.getenv(\"NOTION_TOKEN\"))\n",
    "\n",
    "# Get all transcript files in the current directory\n",
    "def get_transcript_files():\n",
    "    current_dir = Path(\".\")\n",
    "    return list(current_dir.glob(\"*.txt\"))\n",
    "\n",
    "# Read a transcript file\n",
    "def read_transcript(file_path):\n",
    "    with open(file_path, \"r\", encoding=\"utf-8\") as f:\n",
    "        return f.read()\n",
    "\n",
    "# Get list of transcript files\n",
    "transcript_files = get_transcript_files()\n",
    "print(f\"Found {len(transcript_files)} transcript files:\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [],
   "source": [
    "system_prompt = f\"\"\"You are a notes processor. You are given a transcript of a meeting and you need to process the notes into a structured JSON format.\n",
    "\n",
    "Please analyze the transcript and return a JSON object with the following structure:\n",
    "\n",
    "{{\n",
    "    \"meeting_title\": \"Brief descriptive title for the meeting\",\n",
    "    \"participants\": \"Comma-separated list of attendees, first name only, use first name and first letter of surname when duplicate i.e. Dimo S\", \n",
    "    \"category\": \"One of: Coaching, dsm-firmenich, PakTech, BDB Internal, Other - select the most appropriate category based on the discussion context\",\n",
    "    \"summary\": \"Brief 1-2 sentence summary of the main discussion points\",\n",
    "    \"content\": \"Detailed meeting notes organized by topics/categories\",\n",
    "    \"action_items\": [\n",
    "        \"List of specific action items mentioned and assigned to a person. If no person is assigned, leave the person field empty. Ensure the action is actionable and has a deadline.\",\n",
    "        \"Each as a separate string\"\n",
    "    ],\n",
    "    \"meeting_url\": \"Meeting URL if mentioned (or null)\",\n",
    "    \"date\": \"Meeting date in YYYY-MM-DD format (or null for today)\"\n",
    "}}\n",
    "\n",
    "Guidelines:\n",
    "- Choose the most appropriate category based on the discussion content\n",
    "- Make the summary concise but informative\n",
    "- Structure the content with clear headings and bullet points\n",
    "- Extract specific, actionable items for the action_items array\n",
    "- If no action items are mentioned, return an empty array\n",
    "\n",
    "The meeting transcript is: \n",
    "{transcript}\n",
    "\n",
    "Return only the JSON object, no additional text.\"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_system_prompt(transcript):\n",
    "    return f\"\"\"You are a notes processor. You are given a transcript of a meeting and you need to process the notes into a structured JSON format.\n",
    "\n",
    "Please analyze the transcript and return a JSON object with the following structure:\n",
    "\n",
    "{{\n",
    "    \"meeting_title\": \"Brief descriptive title for the meeting\",\n",
    "    \"participants\": \"Comma-separated list of attendees, first name only, use first name and first letter of surname when duplicate i.e. Dimo S\", \n",
    "    \"category\": \"One of: Coaching, dsm-firmenich, PakTech, BDB Internal, Other - select the most appropriate category based on the discussion context\",\n",
    "    \"summary\": \"Brief 1-2 sentence summary of the main discussion points\",\n",
    "    \"content\": \"Detailed meeting notes organized by topics/categories\",\n",
    "    \"action_items\": [\n",
    "        \"List of specific action items mentioned and assigned to a person. If no person is assigned, leave the person field empty. Ensure the action is actionable and has a deadline.\",\n",
    "        \"Each as a separate string\"\n",
    "    ],\n",
    "    \"meeting_url\": \"Meeting URL if mentioned (or null)\",\n",
    "    \"date\": \"Meeting date in YYYY-MM-DD format (or null for today)\"\n",
    "}}\n",
    "\n",
    "Guidelines:\n",
    "- Choose the most appropriate category based on the discussion content\n",
    "- Make the summary concise but informative\n",
    "- Structure the content with clear headings and bullet points\n",
    "- Extract specific, actionable items for the action_items array\n",
    "- If no action items are mentioned, return an empty array\n",
    "\n",
    "The meeting transcript is: \n",
    "{transcript}\n",
    "\n",
    "Return only the JSON object, no additional text.\"\"\"\n",
    "\n",
    "def chat(message, history, transcript):\n",
    "    system_prompt = get_system_prompt(transcript)\n",
    "    messages = [{\"role\": \"system\", \"content\": system_prompt}] + history + [{\"role\": \"user\", \"content\": message}]\n",
    "    response = openai.chat.completions.create(model=\"gpt-4o-mini\", messages=messages)\n",
    "    return response.choices[0].message.content"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import json\n",
    "from datetime import datetime\n",
    "\n",
    "# Process all transcript files\n",
    "def process_all_transcripts():\n",
    "    for transcript_file in transcript_files:\n",
    "        print(f\"\\nProcessing {transcript_file.name}...\")\n",
    "        \n",
    "        # Read the transcript\n",
    "        transcript = read_transcript(transcript_file)\n",
    "        \n",
    "        try:\n",
    "            # Get AI notes for this transcript\n",
    "            notes = chat(\"What are the notes?\", [], transcript)\n",
    "            \n",
    "            # Process the notes and create Notion page\n",
    "            meeting_note = process_ai_response_to_notion_formatted(notes)\n",
    "            \n",
    "            if meeting_note:\n",
    "                print(f\"✅ Successfully processed {transcript_file.name}\")\n",
    "            else:\n",
    "                print(f\"❌ Failed to process {transcript_file.name}\")\n",
    "                \n",
    "        except Exception as e:\n",
    "            print(f\"❌ Error processing {transcript_file.name}: {e}\")\n",
    "\n",
    "# Run the processing\n",
    "if transcript_files:\n",
    "    process_all_transcripts()\n",
    "else:\n",
    "    print(\"No transcript files found in the current directory.\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "✅ Meeting note 'Weekly Project Status Update' created successfully!\n",
      "Page URL: https://www.notion.so/Weekly-Project-Status-Update-21acfc87351681569bbfe60bad68a863\n"
     ]
    }
   ],
   "source": [
    "def create_meeting_note_with_formatting(name, category, participants, summary, content_dict=None, action_items=None, meeting_url=None, date=None):\n",
    "    \"\"\"Create a meeting note with properly formatted content blocks\"\"\"\n",
    "    \n",
    "    DATABASE_ID = \"214cfc87-3516-801f-9cf5-f6709213c7a0\"\n",
    "    \n",
    "    # Set date to today if not provided\n",
    "    if date is None:\n",
    "        date = datetime.now().isoformat()\n",
    "    \n",
    "    # Build properties based on your database schema\n",
    "    properties = {\n",
    "        \"Name\": {\n",
    "            \"title\": [{\"text\": {\"content\": name}}]\n",
    "        },\n",
    "        \"Category\": {\n",
    "            \"select\": {\"name\": category}\n",
    "        },\n",
    "        \"Participants\": {\n",
    "            \"rich_text\": [{\"text\": {\"content\": participants}}]\n",
    "        },\n",
    "        \"Summary\": {\n",
    "            \"rich_text\": [{\"text\": {\"content\": summary}}]\n",
    "        },\n",
    "        \"Date\": {\n",
    "            \"date\": {\"start\": date}\n",
    "        }\n",
    "    }\n",
    "    \n",
    "    # Add meeting URL if provided\n",
    "    if meeting_url:\n",
    "        properties[\"Meeting URL\"] = {\"url\": meeting_url}\n",
    "    \n",
    "    # Build the page content with proper formatting\n",
    "    children = []\n",
    "    \n",
    "    # Add Meeting Notes header\n",
    "    children.append({\n",
    "        \"object\": \"block\",\n",
    "        \"type\": \"heading_2\",\n",
    "        \"heading_2\": {\n",
    "            \"rich_text\": [{\"type\": \"text\", \"text\": {\"content\": \"Meeting Notes\"}}]\n",
    "        }\n",
    "    })\n",
    "    \n",
    "    # Add formatted content sections\n",
    "    if content_dict and isinstance(content_dict, dict):\n",
    "        for section, items in content_dict.items():\n",
    "            # Add section heading\n",
    "            children.append({\n",
    "                \"object\": \"block\",\n",
    "                \"type\": \"heading_3\",\n",
    "                \"heading_3\": {\n",
    "                    \"rich_text\": [{\"type\": \"text\", \"text\": {\"content\": section}}]\n",
    "                }\n",
    "            })\n",
    "            \n",
    "            # Add bulleted list items\n",
    "            if isinstance(items, list):\n",
    "                for item in items:\n",
    "                    children.append({\n",
    "                        \"object\": \"block\",\n",
    "                        \"type\": \"bulleted_list_item\",\n",
    "                        \"bulleted_list_item\": {\n",
    "                            \"rich_text\": [{\"type\": \"text\", \"text\": {\"content\": item}}]\n",
    "                        }\n",
    "                    })\n",
    "            else:\n",
    "                children.append({\n",
    "                    \"object\": \"block\",\n",
    "                    \"type\": \"bulleted_list_item\",\n",
    "                    \"bulleted_list_item\": {\n",
    "                        \"rich_text\": [{\"type\": \"text\", \"text\": {\"content\": str(items)}}]\n",
    "                    }\n",
    "                })\n",
    "    \n",
    "    # Add action items if provided\n",
    "    if action_items:\n",
    "        children.append({\n",
    "            \"object\": \"block\",\n",
    "            \"type\": \"heading_3\",\n",
    "            \"heading_3\": {\n",
    "                \"rich_text\": [{\"type\": \"text\", \"text\": {\"content\": \"Action Items\"}}]\n",
    "            }\n",
    "        })\n",
    "        \n",
    "        for item in action_items:\n",
    "            children.append({\n",
    "                \"object\": \"block\",\n",
    "                \"type\": \"to_do\",\n",
    "                \"to_do\": {\n",
    "                    \"rich_text\": [{\"type\": \"text\", \"text\": {\"content\": item}}],\n",
    "                    \"checked\": False\n",
    "                }\n",
    "            })\n",
    "    \n",
    "    try:\n",
    "        # Create the page with formatted content\n",
    "        new_page = notion.pages.create(\n",
    "            parent={\"database_id\": DATABASE_ID},\n",
    "            properties=properties,\n",
    "            children=children\n",
    "        )\n",
    "        \n",
    "        print(f\"✅ Meeting note '{name}' created successfully!\")\n",
    "        print(f\"Page URL: {new_page['url']}\")\n",
    "        return new_page\n",
    "        \n",
    "    except Exception as e:\n",
    "        print(f\"❌ Error creating meeting note: {e}\")\n",
    "        return None\n",
    "\n",
    "def process_ai_response_to_notion_formatted(ai_response):\n",
    "    \"\"\"Process AI JSON response and create properly formatted Notion note\"\"\"\n",
    "    \n",
    "    try:\n",
    "        # Parse the JSON response from AI\n",
    "        if isinstance(ai_response, str):\n",
    "            meeting_data = json.loads(ai_response)\n",
    "        else:\n",
    "            meeting_data = ai_response\n",
    "        \n",
    "        # Create the Notion note with formatted content\n",
    "        meeting_note = create_meeting_note_with_formatting(\n",
    "            name=meeting_data[\"meeting_title\"],\n",
    "            category=meeting_data[\"category\"], \n",
    "            participants=meeting_data[\"participants\"],\n",
    "            summary=meeting_data[\"summary\"],\n",
    "            content_dict=meeting_data.get(\"content\"),  # Pass the dict directly\n",
    "            action_items=meeting_data.get(\"action_items\", []),\n",
    "            meeting_url=meeting_data.get(\"meeting_url\"),\n",
    "            date=meeting_data.get(\"date\")\n",
    "        )\n",
    "        \n",
    "        return meeting_note\n",
    "        \n",
    "    except json.JSONDecodeError as e:\n",
    "        print(f\"❌ Error parsing AI response as JSON: {e}\")\n",
    "        return None\n",
    "    except Exception as e:\n",
    "        print(f\"❌ Error processing AI response: {e}\")\n",
    "        return None\n",
    "\n",
    "# Usage with proper formatting\n",
    "ai_response = notes\n",
    "meeting_note = process_ai_response_to_notion_formatted(ai_response)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": ".venv",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.11"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}