Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python3 | |
| """ | |
| Meeting Notes Processor | |
| ---------------------- | |
| This script processes meeting transcript files and creates structured notes in Notion. | |
| It uses OpenAI's API to analyze the transcripts and extract key information. | |
| """ | |
| import os | |
| import json | |
| from datetime import datetime | |
| from pathlib import Path | |
| from typing import Dict, List, Optional, Any, Union, TypedDict | |
| from dotenv import load_dotenv | |
| from openai import OpenAI | |
| from notion_client import Client | |
| from openai.types.chat import ChatCompletion | |
| from openai.types.chat.chat_completion_message import ChatCompletionMessage | |
| from openai.types.chat.chat_completion_message_param import ChatCompletionMessageParam | |
| from openai.types.chat.chat_completion_system_message_param import ChatCompletionSystemMessageParam | |
| from openai.types.chat.chat_completion_user_message_param import ChatCompletionUserMessageParam | |
| from openai.types.chat.chat_completion_assistant_message_param import ChatCompletionAssistantMessageParam | |
| # Load environment variables and initialize clients | |
| load_dotenv(override=True) | |
| openai = OpenAI() | |
| notion = Client(auth=os.getenv("NOTION_ACCESS_TOKEN")) | |
| # Constants | |
| DATABASE_ID = "214cfc87-3516-801f-9cf5-f6709213c7a0" | |
| class HistoryMessage(TypedDict): | |
| content: str | |
| def get_transcript_files() -> List[Path]: | |
| """Get all transcript files in the transcripts directory.""" | |
| # Get the script's directory | |
| script_dir = Path(__file__).parent | |
| transcript_dir = script_dir / "transcripts" | |
| processed_dir = script_dir / "processed" | |
| # Create necessary directories if they don't exist | |
| transcript_dir.mkdir(exist_ok=True) | |
| processed_dir.mkdir(exist_ok=True) | |
| # Get all txt files | |
| return list(transcript_dir.glob("*.txt")) | |
| def read_transcript(file_path: Path) -> str: | |
| """Read a transcript file.""" | |
| with open(file_path, "r", encoding="utf-8") as f: | |
| return f.read() | |
| def move_to_processed(file_path: Path) -> None: | |
| """Move a processed transcript to the processed directory.""" | |
| script_dir = Path(__file__).parent | |
| processed_dir = script_dir / "processed" | |
| # Create processed directory if it doesn't exist | |
| processed_dir.mkdir(exist_ok=True) | |
| # Generate timestamp for unique filename | |
| timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") | |
| new_filename = f"{file_path.stem}_{timestamp}{file_path.suffix}" | |
| new_path = processed_dir / new_filename | |
| # Move the file | |
| file_path.rename(new_path) | |
| print(f"β Moved transcript to {new_path}") | |
| def get_system_prompt(transcript: str) -> str: | |
| """Generate the system prompt for the AI with the given transcript.""" | |
| return f"""You are a detailed notes processor. You are given a transcript of a meeting and you need to process the notes into a comprehensive, structured JSON format. | |
| Please analyze the transcript thoroughly and return a JSON object with the following structure: | |
| {{ | |
| "meeting_title": "Descriptive title capturing the main purpose of the meeting", | |
| "participants": "Comma-separated list of attendees, first name only, use first name and first letter of surname when duplicate i.e. Dimo S", | |
| "category": "One of: Coaching, dsm-firmenich, PakTech, BDB Internal, Other - select the most appropriate category based on the discussion context", | |
| "summary": "A comprehensive 2-3 sentence summary covering the main topics discussed, key decisions made, and overall meeting outcome", | |
| "content": {{ | |
| "Key Discussions": [ | |
| "Detailed points of discussion, including context and background information", | |
| "Capture all important details, examples, and explanations provided", | |
| "Include technical details, numbers, and specific references when mentioned" | |
| ], | |
| "Decisions Made": [ | |
| "List all decisions made during the meeting", | |
| "Include the context and reasoning behind each decision", | |
| "Note any conditions or dependencies for the decisions" | |
| ], | |
| "Challenges & Concerns": [ | |
| "Document any challenges, risks, or concerns raised", | |
| "Include proposed solutions or mitigation strategies discussed", | |
| "Note any unresolved issues that need follow-up" | |
| ], | |
| "Next Steps": [ | |
| "List strategic next steps discussed", | |
| "Include any dependencies or prerequisites mentioned", | |
| "Note any timeline considerations" | |
| ] | |
| }}, | |
| "action_items": [ | |
| {{ | |
| "task": "Specific, actionable task description", | |
| "assignee": "Person assigned (or 'Unassigned')", | |
| "deadline": "Deadline if mentioned (or 'Not specified')", | |
| "dependencies": "Any dependencies or prerequisites mentioned", | |
| "priority": "High/Medium/Low if indicated (or 'Not specified')" | |
| }} | |
| ], | |
| "meeting_url": "Meeting URL if mentioned (or null)", | |
| "date": "IMPORTANT: Extract the actual meeting date from the transcript. Look for date references like 'scheduled for', 'meeting on', etc. Return in YYYY-MM-DD format. If multiple dates are mentioned, use the actual meeting date, not future dates mentioned for tasks. If no date is found, return null.", | |
| "follow_up_items": [ | |
| "List of topics that need follow-up in future meetings", | |
| "Include any parking lot items or tabled discussions" | |
| ] | |
| }} | |
| Guidelines: | |
| - Be thorough and detailed in capturing all discussion points | |
| - Maintain chronological order within each section when relevant | |
| - Include specific examples, numbers, and technical details mentioned | |
| - Capture the context and reasoning behind decisions and action items | |
| - Note any disagreements or alternative viewpoints expressed | |
| - Include any resource links or references mentioned | |
| - Document any blockers, dependencies, and risks discussed | |
| - Capture any parking lot items or topics deferred for future discussion | |
| - Pay special attention to extracting the correct meeting date from the transcript | |
| The meeting transcript is: | |
| {transcript} | |
| Return only the JSON object, no additional text.""" | |
| def chat(message: str, history: List[HistoryMessage], transcript: str) -> str: | |
| """Send a chat message to OpenAI API and get the response.""" | |
| system_prompt = get_system_prompt(transcript) | |
| # Create properly typed messages | |
| system_message: ChatCompletionSystemMessageParam = { | |
| "role": "system", | |
| "content": system_prompt | |
| } | |
| history_messages: List[Union[ChatCompletionUserMessageParam, ChatCompletionAssistantMessageParam]] = [] | |
| for i, msg in enumerate(history): | |
| if i % 2 == 0: | |
| history_messages.append({ | |
| "role": "user", | |
| "content": msg["content"] | |
| }) | |
| else: | |
| history_messages.append({ | |
| "role": "assistant", | |
| "content": msg["content"] | |
| }) | |
| user_message: ChatCompletionUserMessageParam = { | |
| "role": "user", | |
| "content": message | |
| } | |
| messages: List[ChatCompletionMessageParam] = [ | |
| system_message, | |
| *history_messages, | |
| user_message | |
| ] | |
| response: ChatCompletion = openai.chat.completions.create( | |
| model="gpt-4o-mini", | |
| messages=messages | |
| ) | |
| if not response.choices or not response.choices[0].message or not response.choices[0].message.content: | |
| raise ValueError("No response received from OpenAI API") | |
| return response.choices[0].message.content | |
| def create_meeting_note_with_formatting( | |
| name: str, | |
| category: str, | |
| participants: str, | |
| summary: str, | |
| content_dict: Optional[Dict[str, Any]] = None, | |
| action_items: Optional[List[str]] = None, | |
| meeting_url: Optional[str] = None, | |
| date: Optional[str] = None | |
| ) -> Optional[Dict[str, Any]]: | |
| """Create a meeting note with properly formatted content blocks.""" | |
| # Build properties based on your database schema | |
| properties = { | |
| "Name": { | |
| "title": [{"text": {"content": name}}] | |
| }, | |
| "Category": { | |
| "select": {"name": category} | |
| }, | |
| "Participants": { | |
| "rich_text": [{"text": {"content": participants}}] | |
| }, | |
| "Summary": { | |
| "rich_text": [{"text": {"content": summary}}] | |
| } | |
| } | |
| # Only add date if it was found in the transcript | |
| if date: | |
| properties["Date"] = { | |
| "date": {"start": date} | |
| } | |
| # Add meeting URL if provided | |
| if meeting_url: | |
| properties["Meeting URL"] = {"url": meeting_url} | |
| # Build the page content with proper formatting | |
| children = [] | |
| # Add Meeting Notes header | |
| children.append({ | |
| "object": "block", | |
| "type": "heading_2", | |
| "heading_2": { | |
| "rich_text": [{"type": "text", "text": {"content": "Meeting Notes"}}] | |
| } | |
| }) | |
| # Add formatted content sections | |
| if content_dict and isinstance(content_dict, dict): | |
| for section, items in content_dict.items(): | |
| # Add section heading | |
| children.append({ | |
| "object": "block", | |
| "type": "heading_3", | |
| "heading_3": { | |
| "rich_text": [{"type": "text", "text": {"content": section}}] | |
| } | |
| }) | |
| # Add bulleted list items | |
| if isinstance(items, list): | |
| for item in items: | |
| children.append({ | |
| "object": "block", | |
| "type": "bulleted_list_item", | |
| "bulleted_list_item": { | |
| "rich_text": [{"type": "text", "text": {"content": item}}] | |
| } | |
| }) | |
| else: | |
| children.append({ | |
| "object": "block", | |
| "type": "bulleted_list_item", | |
| "bulleted_list_item": { | |
| "rich_text": [{"type": "text", "text": {"content": str(items)}}] | |
| } | |
| }) | |
| # Add action items if provided | |
| if action_items: | |
| children.append({ | |
| "object": "block", | |
| "type": "heading_3", | |
| "heading_3": { | |
| "rich_text": [{"type": "text", "text": {"content": "Action Items"}}] | |
| } | |
| }) | |
| for item in action_items: | |
| children.append({ | |
| "object": "block", | |
| "type": "to_do", | |
| "to_do": { | |
| "rich_text": [{"type": "text", "text": {"content": item}}], | |
| "checked": False | |
| } | |
| }) | |
| try: | |
| # Create the page with formatted content | |
| new_page = notion.pages.create( | |
| parent={"database_id": DATABASE_ID}, | |
| properties=properties, | |
| children=children | |
| ) | |
| if isinstance(new_page, dict): | |
| print(f"β Meeting note '{name}' created successfully!") | |
| print(f"Page URL: {new_page.get('url', 'URL not found')}") | |
| return new_page | |
| else: | |
| print(f"β Error: Unexpected response type from Notion API") | |
| return None | |
| except Exception as e: | |
| print(f"β Error creating meeting note: {e}") | |
| return None | |
| def process_ai_response_to_notion_formatted(ai_response: str) -> Optional[Dict[str, Any]]: | |
| """Process AI JSON response and create properly formatted Notion note.""" | |
| try: | |
| # Parse the JSON response from AI | |
| if isinstance(ai_response, str): | |
| meeting_data = json.loads(ai_response) | |
| else: | |
| meeting_data = ai_response | |
| # Extract action items in the correct format | |
| formatted_action_items = [] | |
| if meeting_data.get("action_items"): | |
| for item in meeting_data["action_items"]: | |
| if isinstance(item, dict): | |
| # Format the action item text | |
| action_text = f"{item['task']} - " | |
| if item['assignee'] != "Unassigned": | |
| action_text += f"Assigned to: {item['assignee']}, " | |
| if item['deadline'] != "Not specified": | |
| action_text += f"Due: {item['deadline']}, " | |
| if item['priority'] != "Not specified": | |
| action_text += f"Priority: {item['priority']}, " | |
| if item['dependencies'] != "Not specified": | |
| action_text += f"Dependencies: {item['dependencies']}" | |
| formatted_action_items.append(action_text.rstrip(", ")) | |
| else: | |
| formatted_action_items.append(item) | |
| # Add follow-up items to content if they exist | |
| content_dict = meeting_data.get("content", {}) | |
| if meeting_data.get("follow_up_items"): | |
| content_dict["Follow-up Items"] = meeting_data["follow_up_items"] | |
| # Create the Notion note with formatted content | |
| meeting_note = create_meeting_note_with_formatting( | |
| name=meeting_data["meeting_title"], | |
| category=meeting_data["category"], | |
| participants=meeting_data["participants"], | |
| summary=meeting_data["summary"], | |
| content_dict=content_dict, | |
| action_items=formatted_action_items, | |
| meeting_url=meeting_data.get("meeting_url"), | |
| date=meeting_data.get("date") | |
| ) | |
| return meeting_note | |
| except json.JSONDecodeError as e: | |
| print(f"β Error parsing AI response as JSON: {e}") | |
| return None | |
| except Exception as e: | |
| print(f"β Error processing AI response: {e}") | |
| return None | |
| def process_all_transcripts() -> None: | |
| """Process all transcript files in the transcripts directory.""" | |
| transcript_files = get_transcript_files() | |
| if not transcript_files: | |
| script_dir = Path(__file__).parent | |
| transcript_dir = script_dir / "transcripts" | |
| print(f"\nβ No transcript files found in {transcript_dir}") | |
| print("Please place your .txt transcript files in the 'transcripts' directory") | |
| return | |
| print(f"\nFound {len(transcript_files)} transcript files to process in the transcripts directory.") | |
| for transcript_file in transcript_files: | |
| print(f"\nProcessing {transcript_file.name}...") | |
| try: | |
| # Read the transcript | |
| transcript = read_transcript(transcript_file) | |
| # Get AI notes for this transcript | |
| notes = chat("What are the notes?", [], transcript) | |
| # Process the notes and create Notion page | |
| meeting_note = process_ai_response_to_notion_formatted(notes) | |
| if meeting_note: | |
| print(f"β Successfully processed {transcript_file.name}") | |
| # Move the file to processed directory | |
| move_to_processed(transcript_file) | |
| else: | |
| print(f"β Failed to process {transcript_file.name}") | |
| except Exception as e: | |
| print(f"β Error processing {transcript_file.name}: {e}") | |
| def main(): | |
| """Main entry point of the script.""" | |
| # Check for required environment variables | |
| if not os.getenv("OPENAI_API_KEY"): | |
| print("β Error: OPENAI_API_KEY environment variable is not set") | |
| return | |
| if not os.getenv("NOTION_ACCESS_TOKEN"): | |
| print("β Error: NOTION_ACCESS_TOKEN environment variable is not set") | |
| return | |
| process_all_transcripts() | |
| if __name__ == "__main__": | |
| main() |