Spaces:

dimostchv
/

career_conversations

Sleeping

App Files Files Community

career_conversations / Task extract /process_meeting_notes.py

dimostchv

Upload folder using huggingface_hub

b4a8f90 verified 10 months ago

raw

history blame contribute delete

16.1 kB

	#!/usr/bin/env python3
	"""
	Meeting Notes Processor
	----------------------
	This script processes meeting transcript files and creates structured notes in Notion.
	It uses OpenAI's API to analyze the transcripts and extract key information.
	"""

	import os
	import json
	from datetime import datetime
	from pathlib import Path
	from typing import Dict, List, Optional, Any, Union, TypedDict

	from dotenv import load_dotenv
	from openai import OpenAI
	from notion_client import Client
	from openai.types.chat import ChatCompletion
	from openai.types.chat.chat_completion_message import ChatCompletionMessage
	from openai.types.chat.chat_completion_message_param import ChatCompletionMessageParam
	from openai.types.chat.chat_completion_system_message_param import ChatCompletionSystemMessageParam
	from openai.types.chat.chat_completion_user_message_param import ChatCompletionUserMessageParam
	from openai.types.chat.chat_completion_assistant_message_param import ChatCompletionAssistantMessageParam

	# Load environment variables and initialize clients
	load_dotenv(override=True)
	openai = OpenAI()
	notion = Client(auth=os.getenv("NOTION_ACCESS_TOKEN"))

	# Constants
	DATABASE_ID = "214cfc87-3516-801f-9cf5-f6709213c7a0"

	class HistoryMessage(TypedDict):
	content: str

	def get_transcript_files() -> List[Path]:
	"""Get all transcript files in the transcripts directory."""
	# Get the script's directory
	script_dir = Path(__file__).parent
	transcript_dir = script_dir / "transcripts"
	processed_dir = script_dir / "processed"

	# Create necessary directories if they don't exist
	transcript_dir.mkdir(exist_ok=True)
	processed_dir.mkdir(exist_ok=True)

	# Get all txt files
	return list(transcript_dir.glob("*.txt"))

	def read_transcript(file_path: Path) -> str:
	"""Read a transcript file."""
	with open(file_path, "r", encoding="utf-8") as f:
	return f.read()

	def move_to_processed(file_path: Path) -> None:
	"""Move a processed transcript to the processed directory."""
	script_dir = Path(__file__).parent
	processed_dir = script_dir / "processed"

	# Create processed directory if it doesn't exist
	processed_dir.mkdir(exist_ok=True)

	# Generate timestamp for unique filename
	timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
	new_filename = f"{file_path.stem}_{timestamp}{file_path.suffix}"
	new_path = processed_dir / new_filename

	# Move the file
	file_path.rename(new_path)
	print(f"✅ Moved transcript to {new_path}")

	def get_system_prompt(transcript: str) -> str:
	"""Generate the system prompt for the AI with the given transcript."""
	return f"""You are a detailed notes processor. You are given a transcript of a meeting and you need to process the notes into a comprehensive, structured JSON format.

	Please analyze the transcript thoroughly and return a JSON object with the following structure:

	{{
	"meeting_title": "Descriptive title capturing the main purpose of the meeting",
	"participants": "Comma-separated list of attendees, first name only, use first name and first letter of surname when duplicate i.e. Dimo S",
	"category": "One of: Coaching, dsm-firmenich, PakTech, BDB Internal, Other - select the most appropriate category based on the discussion context",
	"summary": "A comprehensive 2-3 sentence summary covering the main topics discussed, key decisions made, and overall meeting outcome",
	"content": {{
	"Key Discussions": [
	"Detailed points of discussion, including context and background information",
	"Capture all important details, examples, and explanations provided",
	"Include technical details, numbers, and specific references when mentioned"
	],
	"Decisions Made": [
	"List all decisions made during the meeting",
	"Include the context and reasoning behind each decision",
	"Note any conditions or dependencies for the decisions"
	],
	"Challenges & Concerns": [
	"Document any challenges, risks, or concerns raised",
	"Include proposed solutions or mitigation strategies discussed",
	"Note any unresolved issues that need follow-up"
	],
	"Next Steps": [
	"List strategic next steps discussed",
	"Include any dependencies or prerequisites mentioned",
	"Note any timeline considerations"
	]
	}},
	"action_items": [
	{{
	"task": "Specific, actionable task description",
	"assignee": "Person assigned (or 'Unassigned')",
	"deadline": "Deadline if mentioned (or 'Not specified')",
	"dependencies": "Any dependencies or prerequisites mentioned",
	"priority": "High/Medium/Low if indicated (or 'Not specified')"
	}}
	],
	"meeting_url": "Meeting URL if mentioned (or null)",
	"date": "IMPORTANT: Extract the actual meeting date from the transcript. Look for date references like 'scheduled for', 'meeting on', etc. Return in YYYY-MM-DD format. If multiple dates are mentioned, use the actual meeting date, not future dates mentioned for tasks. If no date is found, return null.",
	"follow_up_items": [
	"List of topics that need follow-up in future meetings",
	"Include any parking lot items or tabled discussions"
	]
	}}

	Guidelines:
	- Be thorough and detailed in capturing all discussion points
	- Maintain chronological order within each section when relevant
	- Include specific examples, numbers, and technical details mentioned
	- Capture the context and reasoning behind decisions and action items
	- Note any disagreements or alternative viewpoints expressed
	- Include any resource links or references mentioned
	- Document any blockers, dependencies, and risks discussed
	- Capture any parking lot items or topics deferred for future discussion
	- Pay special attention to extracting the correct meeting date from the transcript

	The meeting transcript is:
	{transcript}

	Return only the JSON object, no additional text."""

	def chat(message: str, history: List[HistoryMessage], transcript: str) -> str:
	"""Send a chat message to OpenAI API and get the response."""
	system_prompt = get_system_prompt(transcript)

	# Create properly typed messages
	system_message: ChatCompletionSystemMessageParam = {
	"role": "system",
	"content": system_prompt
	}

	history_messages: List[Union[ChatCompletionUserMessageParam, ChatCompletionAssistantMessageParam]] = []
	for i, msg in enumerate(history):
	if i % 2 == 0:
	history_messages.append({
	"role": "user",
	"content": msg["content"]
	})
	else:
	history_messages.append({
	"role": "assistant",
	"content": msg["content"]
	})

	user_message: ChatCompletionUserMessageParam = {
	"role": "user",
	"content": message
	}

	messages: List[ChatCompletionMessageParam] = [
	system_message,
	*history_messages,
	user_message
	]

	response: ChatCompletion = openai.chat.completions.create(
	model="gpt-4o-mini",
	messages=messages
	)
	if not response.choices or not response.choices[0].message or not response.choices[0].message.content:
	raise ValueError("No response received from OpenAI API")
	return response.choices[0].message.content

	def create_meeting_note_with_formatting(
	name: str,
	category: str,
	participants: str,
	summary: str,
	content_dict: Optional[Dict[str, Any]] = None,
	action_items: Optional[List[str]] = None,
	meeting_url: Optional[str] = None,
	date: Optional[str] = None
	) -> Optional[Dict[str, Any]]:
	"""Create a meeting note with properly formatted content blocks."""

	# Build properties based on your database schema
	properties = {
	"Name": {
	"title": [{"text": {"content": name}}]
	},
	"Category": {
	"select": {"name": category}
	},
	"Participants": {
	"rich_text": [{"text": {"content": participants}}]
	},
	"Summary": {
	"rich_text": [{"text": {"content": summary}}]
	}
	}

	# Only add date if it was found in the transcript
	if date:
	properties["Date"] = {
	"date": {"start": date}
	}

	# Add meeting URL if provided
	if meeting_url:
	properties["Meeting URL"] = {"url": meeting_url}

	# Build the page content with proper formatting
	children = []

	# Add Meeting Notes header
	children.append({
	"object": "block",
	"type": "heading_2",
	"heading_2": {
	"rich_text": [{"type": "text", "text": {"content": "Meeting Notes"}}]
	}
	})

	# Add formatted content sections
	if content_dict and isinstance(content_dict, dict):
	for section, items in content_dict.items():
	# Add section heading
	children.append({
	"object": "block",
	"type": "heading_3",
	"heading_3": {
	"rich_text": [{"type": "text", "text": {"content": section}}]
	}
	})

	# Add bulleted list items
	if isinstance(items, list):
	for item in items:
	children.append({
	"object": "block",
	"type": "bulleted_list_item",
	"bulleted_list_item": {
	"rich_text": [{"type": "text", "text": {"content": item}}]
	}
	})
	else:
	children.append({
	"object": "block",
	"type": "bulleted_list_item",
	"bulleted_list_item": {
	"rich_text": [{"type": "text", "text": {"content": str(items)}}]
	}
	})

	# Add action items if provided
	if action_items:
	children.append({
	"object": "block",
	"type": "heading_3",
	"heading_3": {
	"rich_text": [{"type": "text", "text": {"content": "Action Items"}}]
	}
	})

	for item in action_items:
	children.append({
	"object": "block",
	"type": "to_do",
	"to_do": {
	"rich_text": [{"type": "text", "text": {"content": item}}],
	"checked": False
	}
	})

	try:
	# Create the page with formatted content
	new_page = notion.pages.create(
	parent={"database_id": DATABASE_ID},
	properties=properties,
	children=children
	)

	if isinstance(new_page, dict):
	print(f"✅ Meeting note '{name}' created successfully!")
	print(f"Page URL: {new_page.get('url', 'URL not found')}")
	return new_page
	else:
	print(f"❌ Error: Unexpected response type from Notion API")
	return None

	except Exception as e:
	print(f"❌ Error creating meeting note: {e}")
	return None

	def process_ai_response_to_notion_formatted(ai_response: str) -> Optional[Dict[str, Any]]:
	"""Process AI JSON response and create properly formatted Notion note."""
	try:
	# Parse the JSON response from AI
	if isinstance(ai_response, str):
	meeting_data = json.loads(ai_response)
	else:
	meeting_data = ai_response

	# Extract action items in the correct format
	formatted_action_items = []
	if meeting_data.get("action_items"):
	for item in meeting_data["action_items"]:
	if isinstance(item, dict):
	# Format the action item text
	action_text = f"{item['task']} - "
	if item['assignee'] != "Unassigned":
	action_text += f"Assigned to: {item['assignee']}, "
	if item['deadline'] != "Not specified":
	action_text += f"Due: {item['deadline']}, "
	if item['priority'] != "Not specified":
	action_text += f"Priority: {item['priority']}, "
	if item['dependencies'] != "Not specified":
	action_text += f"Dependencies: {item['dependencies']}"
	formatted_action_items.append(action_text.rstrip(", "))
	else:
	formatted_action_items.append(item)

	# Add follow-up items to content if they exist
	content_dict = meeting_data.get("content", {})
	if meeting_data.get("follow_up_items"):
	content_dict["Follow-up Items"] = meeting_data["follow_up_items"]

	# Create the Notion note with formatted content
	meeting_note = create_meeting_note_with_formatting(
	name=meeting_data["meeting_title"],
	category=meeting_data["category"],
	participants=meeting_data["participants"],
	summary=meeting_data["summary"],
	content_dict=content_dict,
	action_items=formatted_action_items,
	meeting_url=meeting_data.get("meeting_url"),
	date=meeting_data.get("date")
	)

	return meeting_note

	except json.JSONDecodeError as e:
	print(f"❌ Error parsing AI response as JSON: {e}")
	return None
	except Exception as e:
	print(f"❌ Error processing AI response: {e}")
	return None

	def process_all_transcripts() -> None:
	"""Process all transcript files in the transcripts directory."""
	transcript_files = get_transcript_files()

	if not transcript_files:
	script_dir = Path(__file__).parent
	transcript_dir = script_dir / "transcripts"
	print(f"\n❌ No transcript files found in {transcript_dir}")
	print("Please place your .txt transcript files in the 'transcripts' directory")
	return

	print(f"\nFound {len(transcript_files)} transcript files to process in the transcripts directory.")

	for transcript_file in transcript_files:
	print(f"\nProcessing {transcript_file.name}...")

	try:
	# Read the transcript
	transcript = read_transcript(transcript_file)

	# Get AI notes for this transcript
	notes = chat("What are the notes?", [], transcript)

	# Process the notes and create Notion page
	meeting_note = process_ai_response_to_notion_formatted(notes)

	if meeting_note:
	print(f"✅ Successfully processed {transcript_file.name}")
	# Move the file to processed directory
	move_to_processed(transcript_file)
	else:
	print(f"❌ Failed to process {transcript_file.name}")

	except Exception as e:
	print(f"❌ Error processing {transcript_file.name}: {e}")

	def main():
	"""Main entry point of the script."""
	# Check for required environment variables
	if not os.getenv("OPENAI_API_KEY"):
	print("❌ Error: OPENAI_API_KEY environment variable is not set")
	return

	if not os.getenv("NOTION_ACCESS_TOKEN"):
	print("❌ Error: NOTION_ACCESS_TOKEN environment variable is not set")
	return

	process_all_transcripts()

	if __name__ == "__main__":
	main()