GFiaMon commited on
Commit
b812a47
·
1 Parent(s): be4bd63

Updated many system prompt bugs

Browse files
app.py CHANGED
@@ -62,5 +62,5 @@ if __name__ == "__main__":
62
  demo.launch(
63
  server_name="0.0.0.0",
64
  server_port=7860,
65
- share=True
66
  )
 
62
  demo.launch(
63
  server_name="0.0.0.0",
64
  server_port=7860,
65
+ share=False
66
  )
src/agents/conversational.py CHANGED
@@ -27,6 +27,8 @@ from src.tools.general import (
27
  list_recent_meetings,
28
  search_meetings,
29
  upsert_text_to_pinecone,
 
 
30
  )
31
  from src.tools.video import (
32
  cancel_video_workflow,
@@ -60,6 +62,21 @@ class ConversationalMeetingAgent:
60
  # Enhanced system prompt for conversational workflow
61
  SYSTEM_PROMPT = """You are a friendly and helpful Meeting Intelligence Assistant. You help users manage their meeting recordings through natural conversation.
62
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
  **IMPORTANT: Handling Meeting References**
64
  - If the user refers to a meeting by index (e.g., "meeting 1", "the second meeting"), you MUST first call `list_recent_meetings` to find the actual `meeting_id` (e.g., "meeting_abc123").
65
  - NEVER use "meeting 1" or "meeting 2" as a `meeting_id` in tool calls. Always map it to the real ID first.
@@ -101,48 +118,92 @@ You can help users with two main workflows:
101
  - `list_recent_meetings`: Show available meetings
102
  - `search_meetings`: Search meeting content semantically
103
  - `get_meeting_metadata`: Get meeting details
 
104
 
105
- **Notion Integration (Export & Documentation):**
106
 
107
  **IMPORTANT: You CAN and SHOULD use Notion tools when the user asks!**
108
 
109
- When the user asks to create/upload content to Notion:
110
-
111
- 1. **Use `API-post-page` to create a new page**:
 
 
 
 
 
 
 
 
 
 
 
112
  ```
113
- API-post-page(
114
- parent={"page_id": "2bc5a424-5cbb-80ec-8aa9-c4fd989e67bc"},
115
- properties={"title": [{"text": {"content": "Your Page Title"}}]},
116
- children=["Content goes here as a string"]
117
  )
118
  ```
119
 
120
- 2. **Default Parent Page**: Use `2bc5a424-5cbb-80ec-8aa9-c4fd989e67bc` (the "Meetings Summary Test" page).
121
-
122
- 3. **Alternative**: If the user specifies a different location, use `API-post-search(query="page name")` to find it first.
123
-
124
- **Available Tools:**
125
- - `API-post-page`: Create new pages (USE THIS!)
126
  - `API-post-search`: Search for pages
127
- - `API-append-block-children`: Add content to existing pages
 
 
 
128
  - `API-patch-page`: Update page properties
129
 
130
- **Example:**
131
- User: "Create a test page in Notion"
132
- You: Call `API-post-page(parent={"page_id": "2bc5a424-5cbb-80ec-8aa9-c4fd989e67bc"}, properties={"title": [{"text": {"content": "Test Page"}}]}, children=["This is a test"])`
133
 
134
- **Generic Document/Text Upsert:**
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
135
 
136
- **IMPORTANT: Saving Content from Notion or Manual Entry**
137
 
138
- When the user wants to save content from Notion, manual notes, or any text that is NOT a video transcription, you MUST use the `upsert_text_to_pinecone` tool.
 
 
 
 
 
 
139
 
140
- 1. **Get the content**: If it's a Notion page, first read it using `API-get-block-children` or `API-retrieve-page`. If it's manual text, use the text provided by the user.
141
- 2. **Upsert**: Call `upsert_text_to_pinecone(text="...", title="...", source="Notion/Manual")`.
 
142
 
143
- **Example:**
144
- User: "Save this Notion page to Pinecone"
145
- You: [After reading page content] Call `upsert_text_to_pinecone(text="Page content...", title="Page Title", source="Notion")`
 
 
 
 
 
 
 
 
 
 
 
 
146
 
147
 
148
  **Conversational Guidelines:**
@@ -165,7 +226,11 @@ You: [After reading page content] Call `upsert_text_to_pinecone(text="Page conte
165
  - Confirm success and offer to help with queries
166
 
167
  4. **Meeting Query Flow**:
168
- - For "what meetings": call `list_recent_meetings`
 
 
 
 
169
  - For specific questions: call `search_meetings`
170
  - For meeting details: call `get_meeting_metadata`
171
  - To create minutes/summaries:
@@ -260,7 +325,11 @@ Remember: You're a helpful assistant focused on making meeting management effort
260
  search_meetings,
261
  get_meeting_metadata,
262
  list_recent_meetings,
263
- upsert_text_to_pinecone
 
 
 
 
264
  ]
265
 
266
  # Load MCP tools (Notion integration)
@@ -310,6 +379,7 @@ Remember: You're a helpful assistant focused on making meeting management effort
310
  if success:
311
  tools = mcp_manager.get_langchain_tools()
312
  print(f"✅ Integrated {len(tools)} MCP tools into agent")
 
313
  return tools
314
  else:
315
  print("⚠️ MCP initialization failed")
 
27
  list_recent_meetings,
28
  search_meetings,
29
  upsert_text_to_pinecone,
30
+ import_notion_to_pinecone,
31
+ create_notion_page,
32
  )
33
  from src.tools.video import (
34
  cancel_video_workflow,
 
62
  # Enhanced system prompt for conversational workflow
63
  SYSTEM_PROMPT = """You are a friendly and helpful Meeting Intelligence Assistant. You help users manage their meeting recordings through natural conversation.
64
 
65
+ **CRITICAL: INTENT ROUTING (READ FIRST)**
66
+ Before calling ANY tool, determine the user's intent:
67
+
68
+ 1. **"Create a Notion page"** / **"Save to Notion"** / **"Export to Notion"** / **"Upload to Notion"**
69
+ - **ACTION**: You MUST use `create_notion_page(title=..., content=...)`.
70
+ - **FORBIDDEN TOOLS**: Do NOT use `upsert_text_to_pinecone` or `import_notion_to_pinecone`.
71
+ - **Example**: "Create a Notion page with these minutes" -> `create_notion_page(...)`
72
+
73
+ 2. **"Save to Database"** / **"Save to Memory"** / **"Upload to Pinecone"** / **"Ingest this"**
74
+ - **ACTION**: Use `upsert_text_to_pinecone` (for manual text) or `import_notion_to_pinecone` (for Notion pages).
75
+ - **FORBIDDEN TOOLS**: Do NOT use Notion creation tools.
76
+
77
+ 3. **"Import from Notion"** / **"Sync from Notion"**
78
+ - **ACTION**: Use `import_notion_to_pinecone`.
79
+
80
  **IMPORTANT: Handling Meeting References**
81
  - If the user refers to a meeting by index (e.g., "meeting 1", "the second meeting"), you MUST first call `list_recent_meetings` to find the actual `meeting_id` (e.g., "meeting_abc123").
82
  - NEVER use "meeting 1" or "meeting 2" as a `meeting_id` in tool calls. Always map it to the real ID first.
 
118
  - `list_recent_meetings`: Show available meetings
119
  - `search_meetings`: Search meeting content semantically
120
  - `get_meeting_metadata`: Get meeting details
121
+ - `get_current_time` (from World Time MCP): Check today's date (use this for questions like "last week", "yesterday", etc.)
122
 
123
+ **Notion Integration & Retrieval:**
124
 
125
  **IMPORTANT: You CAN and SHOULD use Notion tools when the user asks!**
126
 
127
+ **A. RETRIEVING from Notion (Workflow):**
128
+ To retrieve a full page from Notion, you MUST follow these steps (Notion pages are split into metadata and content):
129
+ 1. **Find Page**: Use `API-post-search(query="name")` to get the `page_id`.
130
+ 2. **Get Metadata**: Use `API-retrieve-a-page(page_id=...)` to get the title and properties. *This does NOT return the page content/text.*
131
+ 3. **Get Content (CRITICAL)**: Use `API-get-block-children(block_id=page_id)` to get the actual text blocks.
132
+ - You MUST iterate through the blocks to extract the "plain_text" or "content".
133
+ - If you skip this, you will only have an empty page!
134
+
135
+ **B. CREATING in Notion:**
136
+ 1. **Use `create_notion_page`**:
137
+ - Simply provide the `title` and the `content` (plain text or markdown).
138
+ - This tool handles all paragraph formatting 2000-char limits automatically.
139
+ - Do NOT try to build complex JSON blocks yourself.
140
+
141
  ```
142
+ create_notion_page(
143
+ title="Meeting Minutes - Dec 24",
144
+ content="Here is the summary...\n\n- Point 1\n- Point 2"
 
145
  )
146
  ```
147
 
148
+ **Available Notion Tools:**
 
 
 
 
 
149
  - `API-post-search`: Search for pages
150
+ - `API-retrieve-a-page`: Get page metadata (Title, Date, etc.)
151
+ - `API-get-block-children`: Get page content/blocks (USE THIS FOR CONTENT!)
152
+ - `API-post-page`: Create new pages
153
+ - `API-patch-block-children`: Add content to existing pages (Append)
154
  - `API-patch-page`: Update page properties
155
 
156
+ **C. APPENDING to Notion:**
157
+ When adding content to an existing page, you MUST use `API-patch-block-children`.
158
+ **CRITICAL**: The `children` argument MUST be a list of Block Objects (like `API-post-page`).
159
 
160
+ ```
161
+ API-patch-block-children(
162
+ block_id="page_id_here",
163
+ children=[
164
+ {
165
+ "object": "block",
166
+ "type": "heading_2",
167
+ "heading_2": {"rich_text": [{"type": "text", "text": {"content": "New Section"}}]}
168
+ },
169
+ {
170
+ "object": "block",
171
+ "type": "paragraph",
172
+ "paragraph": {"rich_text": [{"type": "text", "text": {"content": "New content..."}}]}
173
+ }
174
+ ]
175
+ )
176
+ ```
177
 
178
+ **D. SAVING to Pinecone (Generic Document/Text Upsert):**
179
 
180
+ 1. **Importing from Notion (MANDATORY)**:
181
+ - **ALWAYS** call `import_notion_to_pinecone(query='Meeting Title')`.
182
+ - **Context Resolution**: If the user says "upload the first one" or "that meeting", you MUST resolve the reference to the actual **Page Title** from the conversation history (e.g., "Meeting 1"). Do NOT pass "first one" as the query.
183
+ - **No Batch Uploads**: If the user asks to "upload all", "upload the missing ones", or provides a list, you MUST call `import_notion_to_pinecone` SEPARATELY for each meeting title. Do NOT call the tool once with a list or a summary. Provide one confirmation message after all are done.
184
+ - **NEVER** use `upsert_text_to_pinecone` for Notion content, even if you think you have the text in your history.
185
+ - **REASON**: Usage of `upsert_text_to_pinecone` for Notion runs the risk of you summarizing the content. `import_notion_to_pinecone` purely transfers raw data via code, which is safer.
186
+ - This single tool handles search, content fetching, and saving automatically.
187
 
188
+ 2. **Manual Entry (User types text directly)**:
189
+ - Use `upsert_text_to_pinecone` with the FULL text provided by the user.
190
+ - Ensure you pass the raw text without summarizing.
191
 
192
+ **Example 1 (Notion -> Pinecone):**
193
+ User: "Save 'Meeting 3' from Notion to Pinecone"
194
+ You: `import_notion_to_pinecone(query="Meeting 3")`
195
+
196
+ **Example 2 (Notion -> Pinecone):**
197
+ User: "Sync 'Project Kickoff' to database"
198
+ You: `import_notion_to_pinecone(query="Project Kickoff")`
199
+
200
+ **Example 3 (Pinecone/Agent -> Notion):**
201
+ User: "Save this summary to a Notion page"
202
+ You: `create_notion_page(title="Summary", content="The summary...")`
203
+
204
+ **Example 4 (Manual -> Pinecone):**
205
+ User: "Save this note: 'Discussion about budget'"
206
+ You: `upsert_text_to_pinecone(text="Discussion about budget", title="Manual Note")`
207
 
208
 
209
  **Conversational Guidelines:**
 
226
  - Confirm success and offer to help with queries
227
 
228
  4. **Meeting Query Flow**:
229
+ - For "what meetings" (db): call `list_recent_meetings`
230
+ - For "meetings in Notion" or "Notion pages": call `API-post-search(query="Meeting")`. Do NOT use `list_recent_meetings`.
231
+ - For "compare Notion and Database" or "what is missing": Call BOTH `list_recent_meetings` AND `API-post-search(query="Meeting")`, then compare the lists. Report any missing meetings clearly. If meetings are missing, ASK "Would you like to sync [Meeting Name]?" before uploading. Do NOT auto-upload.
232
+ - For "find meeting about X", "do I have...", or "search everywhere": Call BOTH `search_meetings(query='X')` AND `API-post-search(query='X')` and report all findings.
233
+ - For time-based questions (e.g., "last week", "yesterday"): FIRST call the available time tool (e.g., `get_current_time` from World Time MCP), THEN calculate the date, THEN call `search_meetings`.
234
  - For specific questions: call `search_meetings`
235
  - For meeting details: call `get_meeting_metadata`
236
  - To create minutes/summaries:
 
325
  search_meetings,
326
  get_meeting_metadata,
327
  list_recent_meetings,
328
+ upsert_text_to_pinecone,
329
+ list_recent_meetings,
330
+ upsert_text_to_pinecone,
331
+ import_notion_to_pinecone,
332
+ create_notion_page
333
  ]
334
 
335
  # Load MCP tools (Notion integration)
 
379
  if success:
380
  tools = mcp_manager.get_langchain_tools()
381
  print(f"✅ Integrated {len(tools)} MCP tools into agent")
382
+ print(f"📋 Available Tools: {[t.name for t in tools]}")
383
  return tools
384
  else:
385
  print("⚠️ MCP initialization failed")
src/retrievers/pinecone.py CHANGED
@@ -161,12 +161,12 @@ class PineconeManager:
161
  meetings[meeting_id] = {
162
  "meeting_id": meeting_id,
163
  "meeting_date": metadata.get("meeting_date"),
164
- "title": metadata.get("title", "Untitled Meeting"),
165
- "duration": metadata.get("duration", "N/A"),
166
  "source_file": metadata.get("source_file", "N/A"),
167
  }
168
 
169
- return list(meetings.values())
170
 
171
  except Exception as e:
172
  print(f"Error listing meetings: {e}")
 
161
  meetings[meeting_id] = {
162
  "meeting_id": meeting_id,
163
  "meeting_date": metadata.get("meeting_date"),
164
+ "meeting_title": metadata.get("meeting_title", metadata.get("title", "Untitled Meeting")),
165
+ "meeting_duration": metadata.get("duration", metadata.get("meeting_duration", "N/A")),
166
  "source_file": metadata.get("source_file", "N/A"),
167
  }
168
 
169
+ return list(meetings.values())
170
 
171
  except Exception as e:
172
  print(f"Error listing meetings: {e}")
src/tools/general.py CHANGED
@@ -11,10 +11,12 @@ Reference: https://docs.langchain.com/oss/python/langchain/tools#create-tools
11
  from typing import List, Dict, Any, Optional
12
  from datetime import datetime
13
  import uuid
 
14
  from langchain.tools import tool
15
  from langchain_core.documents import Document
16
 
17
  from src.retrievers.pipeline import process_transcript_to_documents
 
18
  from src.config.settings import Config
19
 
20
  # Global reference to PineconeManager (will be set during initialization)
@@ -80,12 +82,18 @@ def search_meetings(query: str, max_results: int = 5, meeting_id: Optional[str]
80
  for i, doc in enumerate(docs, 1):
81
  metadata = doc.metadata
82
  meeting_id = metadata.get("meeting_id", "unknown")
83
- meeting_date = metadata.get("meeting_date", "unknown") # Fixed: was "date"
 
84
  chunk_index = metadata.get("chunk_index", "?")
 
 
85
 
86
  result_parts.append(
87
  f"\n--- Segment {i} ---\n"
88
- f"Meeting: {meeting_id} (Date: {meeting_date})\n"
 
 
 
89
  f"Chunk: {chunk_index}\n"
90
  f"Content:\n{doc.page_content}\n"
91
  )
@@ -177,7 +185,7 @@ def list_recent_meetings(limit: int = 10) -> str:
177
  # Get retriever with high k to fetch many documents
178
  retriever = _pinecone_manager.get_retriever(
179
  namespace=Config.PINECONE_NAMESPACE,
180
- search_kwargs={"k": 100} # Fetch many to find unique meetings
181
  )
182
 
183
  # Use a generic query to get documents
@@ -223,29 +231,285 @@ def list_recent_meetings(limit: int = 10) -> str:
223
  return f"Error listing meetings: {str(e)}"
224
 
225
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
226
  # Export all tools for easy import
227
  __all__ = [
228
  "initialize_tools",
229
  "search_meetings",
230
  "get_meeting_metadata",
231
  "list_recent_meetings",
232
- "upsert_text_to_pinecone"
 
 
 
233
  ]
234
 
235
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
236
  @tool
237
  def upsert_text_to_pinecone(text: str, title: str, source: str = "Manual Entry", date: str = None) -> str:
238
  """
239
  Upsert any text content (e.g., Notion pages, manual notes) to Pinecone.
240
 
241
- Use this tool when the user wants to save a Notion page, meeting notes, or any other text
242
- that is NOT a video transcription.
 
 
 
243
 
244
  Args:
245
- text: The content to save
246
  title: Title of the document/meeting
247
  source: Source of the content (e.g., "Notion", "Manual Entry")
248
- date: Date of the content (YYYY-MM-DD). Defaults to today.
249
 
250
  Returns:
251
  Success message with the generated meeting_id
@@ -254,36 +518,59 @@ def upsert_text_to_pinecone(text: str, title: str, source: str = "Manual Entry",
254
  return "Error: Pinecone service is not initialized."
255
 
256
  try:
257
- # Generate ID and defaults
258
- meeting_id = f"doc_{uuid.uuid4().hex[:8]}"
259
- if not date:
260
- date = datetime.now().strftime("%Y-%m-%d")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
261
 
262
- # Create comprehensive metadata with consistent field names
 
 
 
 
 
 
263
  meeting_metadata = {
264
  "meeting_id": meeting_id,
265
- "meeting_date": date, # ✅ Fixed: was "date"
266
  "date_transcribed": datetime.now().strftime("%Y-%m-%d"),
267
  "source": source,
268
- "meeting_title": title, # ✅ Fixed: was "title"
269
- "summary": f"Imported from {source}", # ✅ Added summary
270
  "source_file": f"{source.lower()}_upload",
271
  "transcription_model": "text_import",
272
- "language": "en"
 
273
  }
274
 
275
- # Process text into documents (using fallback chunking since no speaker data)
276
  docs = process_transcript_to_documents(
277
- transcript_text=text,
278
- speaker_data=None,
279
  meeting_id=meeting_id,
280
  meeting_metadata=meeting_metadata
281
  )
282
 
283
- # Upsert to Pinecone
284
  _pinecone_manager.upsert_documents(docs, namespace=Config.PINECONE_NAMESPACE)
285
 
286
- return f"✅ Successfully saved '{title}' to Pinecone (ID: {meeting_id})"
 
 
287
 
288
  except Exception as e:
289
  return f"❌ Error saving to Pinecone: {str(e)}"
 
11
  from typing import List, Dict, Any, Optional
12
  from datetime import datetime
13
  import uuid
14
+ import requests
15
  from langchain.tools import tool
16
  from langchain_core.documents import Document
17
 
18
  from src.retrievers.pipeline import process_transcript_to_documents
19
+ from src.processing.metadata_extractor import MetadataExtractor
20
  from src.config.settings import Config
21
 
22
  # Global reference to PineconeManager (will be set during initialization)
 
82
  for i, doc in enumerate(docs, 1):
83
  metadata = doc.metadata
84
  meeting_id = metadata.get("meeting_id", "unknown")
85
+ meeting_date = metadata.get("meeting_date", "N/A") # Fixed missing variable
86
+ meeting_title = metadata.get("meeting_title", "Untitled") # Added title
87
  chunk_index = metadata.get("chunk_index", "?")
88
+ summary = metadata.get("summary", "N/A")
89
+ speakers = metadata.get("speaker_mapping", "N/A")
90
 
91
  result_parts.append(
92
  f"\n--- Segment {i} ---\n"
93
+ f"Meeting: {meeting_title} (ID: {meeting_id})\n"
94
+ f"Date: {meeting_date}\n"
95
+ f"Summary: {summary}\n"
96
+ f"Speakers: {speakers}\n"
97
  f"Chunk: {chunk_index}\n"
98
  f"Content:\n{doc.page_content}\n"
99
  )
 
185
  # Get retriever with high k to fetch many documents
186
  retriever = _pinecone_manager.get_retriever(
187
  namespace=Config.PINECONE_NAMESPACE,
188
+ search_kwargs={"k": 500} # Fetch many to find unique meetings
189
  )
190
 
191
  # Use a generic query to get documents
 
231
  return f"Error listing meetings: {str(e)}"
232
 
233
 
234
+ @tool
235
+ def get_current_time() -> str:
236
+ """
237
+ Get the current date and time.
238
+
239
+ Use this tool when you need to answer questions about relative time
240
+ (e.g., "what happened yesterday?", "meetings from last week?").
241
+
242
+ Returns:
243
+ Current date and time in YYYY-MM-DD HH:MM format
244
+ """
245
+ return datetime.now().strftime("%Y-%m-%d %H:%M")
246
+
247
+
248
+ @tool
249
+ def import_notion_to_pinecone(query: str) -> str:
250
+ """
251
+ Directly import a Notion page to Pinecone by name.
252
+
253
+ Fetch a Notion page and save it TO Pinecone.
254
+
255
+ Use this tool ONLY when the user wants to *Import* or *Sync* a page FROM Notion INTO the database.
256
+ Do NOT use this tool to write content TO Notion. Use `API-post-page` or `API-append-block-children` for that.
257
+
258
+ This tool handles the entire process (Search -> Fetch Content -> Upsert) automatically.
259
+
260
+ Args:
261
+ query: The name of the Notion page to find (e.g., "Meeting 1").
262
+
263
+ Returns:
264
+ Status message indicating success or failure.
265
+ """
266
+ if not Config.NOTION_TOKEN:
267
+ return "❌ Error: NOTION_TOKEN not set in configuration."
268
+
269
+ headers = {
270
+ "Authorization": f"Bearer {Config.NOTION_TOKEN}",
271
+ "Notion-Version": "2022-06-28",
272
+ "Content-Type": "application/json"
273
+ }
274
+
275
+ def fetch_blocks_recursive(block_id: str, depth: int = 0) -> List[str]:
276
+ """Recursive helper to fetch blocks and their children."""
277
+ if depth > 5: # Safety limit for recursion depth
278
+ return []
279
+
280
+ collected_text = []
281
+ cursor = None
282
+ has_more = True
283
+
284
+ while has_more:
285
+ blocks_url = f"https://api.notion.com/v1/blocks/{block_id}/children"
286
+ params = {"page_size": 100}
287
+ if cursor:
288
+ params["start_cursor"] = cursor
289
+
290
+ resp = requests.get(blocks_url, headers=headers, params=params)
291
+ if resp.status_code != 200:
292
+ print(f"⚠️ Error fetching sub-blocks for {block_id}: {resp.text}")
293
+ return []
294
+
295
+ data = resp.json()
296
+ blocks = data.get("results", [])
297
+
298
+ for block in blocks:
299
+ # 1. Extract text from this block
300
+ b_type = block.get("type")
301
+ plain_text = ""
302
+ if b_type and block.get(b_type) and "rich_text" in block[b_type]:
303
+ rich_text = block[b_type]["rich_text"]
304
+ plain_text = "".join([t.get("plain_text", "") for t in rich_text])
305
+
306
+ # Append text if present
307
+ if plain_text.strip():
308
+ collected_text.append(plain_text)
309
+
310
+ # 2. Check for children (Recursion)
311
+ if block.get("has_children", False):
312
+ # Fetch children text and append
313
+ children_text = fetch_blocks_recursive(block["id"], depth + 1)
314
+ collected_text.extend(children_text)
315
+
316
+ has_more = data.get("has_more", False)
317
+ cursor = data.get("next_cursor")
318
+
319
+ return collected_text
320
+
321
+ try:
322
+ # 1. Search for the page
323
+ print(f"🔍 Searching Notion for: {query}...")
324
+ search_url = "https://api.notion.com/v1/search"
325
+ search_payload = {
326
+ "query": query,
327
+ "filter": {"value": "page", "property": "object"},
328
+ "sort": {"direction": "descending", "timestamp": "last_edited_time"},
329
+ "page_size": 25
330
+ }
331
+ response = requests.post(search_url, headers=headers, json=search_payload)
332
+
333
+ if response.status_code != 200:
334
+ return f"❌ Notion Search Error: {response.text}"
335
+
336
+ results = response.json().get("results", [])
337
+ if not results:
338
+ return f"❌ No Notion page found matching '{query}'."
339
+
340
+ # Select best match
341
+ best_page = None
342
+ exact_match = None
343
+ substring_match = None
344
+
345
+ for p in results:
346
+ # Extract title for this page
347
+ p_props = p.get("properties", {})
348
+ p_title_prop = next((v for k, v in p_props.items() if v["id"] == "title"), None)
349
+ p_title = ""
350
+ if p_title_prop and p_title_prop.get("title"):
351
+ p_title = "".join([t.get("plain_text", "") for t in p_title_prop.get("title", [])])
352
+
353
+ p_title_clean = p_title.lower().strip()
354
+ query_clean = query.lower().strip()
355
+
356
+ # Check 1: Exact Match
357
+ if p_title_clean == query_clean:
358
+ exact_match = p
359
+ print(f"✅ Exact match found: '{p_title}'")
360
+ break # Found the perfect match
361
+
362
+ # Check 2: Substring Match (save the first one found)
363
+ # Check 2: Substring Match (save the first one found)
364
+ if query_clean in p_title_clean and substring_match is None:
365
+ substring_match = p
366
+ print(f"🔍 Substring match candidate: '{p_title}'")
367
+
368
+ # Print for debugging
369
+ print(f" - Found result: '{p_title}'")
370
+
371
+ # Decide which page to use
372
+ if exact_match:
373
+ best_page = exact_match
374
+ elif substring_match:
375
+ best_page = substring_match
376
+ print("⚠️ Using substring match.")
377
+ else:
378
+ # Generate list of titles found to guide the user
379
+ titles_found = []
380
+ for p in results:
381
+ p_props = p.get("properties", {})
382
+ p_title_prop = next((v for k, v in p_props.items() if v["id"] == "title"), None)
383
+ if p_title_prop and p_title_prop.get("title"):
384
+ titles_found.append("".join([t.get("plain_text", "") for t in p_title_prop.get("title", [])]))
385
+
386
+ return f"❌ Could not find a specific match for '{query}'. Found these pages instead: {', '.join(titles_found)}. Please try again with the exact name."
387
+
388
+ page = best_page
389
+ page_id = page["id"]
390
+
391
+ # Re-extract title for the selected page for final usage
392
+ props = page.get("properties", {})
393
+ title_prop = next((v for k, v in props.items() if v["id"] == "title"), None)
394
+ title = "Untitled"
395
+ if title_prop and title_prop.get("title"):
396
+ title = "".join([t.get("plain_text", "") for t in title_prop.get("title", [])])
397
+
398
+ print(f"📄 Found Page: '{title}' ({page_id})")
399
+
400
+ # 2. Recursive Fetch of All Content
401
+ all_text_lines = fetch_blocks_recursive(page_id)
402
+
403
+ if not all_text_lines:
404
+ return f"⚠️ Page '{title}' found but appears empty or has no text blocks."
405
+
406
+ full_content = "\n\n".join(all_text_lines)
407
+
408
+ # 3. Upsert to Pinecone
409
+ return upsert_text_to_pinecone.invoke({"text": full_content, "title": title, "source": "Notion"})
410
+
411
+ except Exception as e:
412
+ return f"❌ Import failed: {str(e)}"
413
+
414
+
415
  # Export all tools for easy import
416
  __all__ = [
417
  "initialize_tools",
418
  "search_meetings",
419
  "get_meeting_metadata",
420
  "list_recent_meetings",
421
+ "upsert_text_to_pinecone",
422
+ "import_notion_to_pinecone",
423
+ "create_notion_page",
424
+ "get_current_time"
425
  ]
426
 
427
 
428
+ @tool
429
+ def create_notion_page(title: str, content: str) -> str:
430
+ """
431
+ Create a new page in Notion with a Title and Text Content.
432
+
433
+ Use this tool for ANY request to "Write to Notion", "Save to Notion", "Create a page", "Draft an email in Notion".
434
+ This tool handles all the formatting automatically.
435
+
436
+ Args:
437
+ title: The title of the new page.
438
+ content: The text content of the page.
439
+
440
+ Returns:
441
+ Status message with link to the new page.
442
+ """
443
+ if not Config.NOTION_TOKEN:
444
+ return "❌ Error: NOTION_TOKEN not set."
445
+
446
+ headers = {
447
+ "Authorization": f"Bearer {Config.NOTION_TOKEN}",
448
+ "Notion-Version": "2022-06-28",
449
+ "Content-Type": "application/json"
450
+ }
451
+
452
+ # Split content into chunks of 2000 chars (Notion block limit)
453
+ chunks = [content[i:i+2000] for i in range(0, len(content), 2000)]
454
+
455
+ children_blocks = []
456
+ for chunk in chunks:
457
+ children_blocks.append({
458
+ "object": "block",
459
+ "type": "paragraph",
460
+ "paragraph": {
461
+ "rich_text": [{"type": "text", "text": {"content": chunk}}]
462
+ }
463
+ })
464
+
465
+ # Default parent page: Meetings Summary Test
466
+ parent_page_id = "2bc5a424-5cbb-80ec-8aa9-c4fd989e67bc"
467
+
468
+ payload = {
469
+ "parent": {"page_id": parent_page_id},
470
+ "properties": {
471
+ "title": [
472
+ {
473
+ "text": {
474
+ "content": title
475
+ }
476
+ }
477
+ ]
478
+ },
479
+ "children": children_blocks
480
+ }
481
+
482
+ try:
483
+ url = "https://api.notion.com/v1/pages"
484
+ resp = requests.post(url, headers=headers, json=payload)
485
+
486
+ if resp.status_code == 200:
487
+ data = resp.json()
488
+ url = data.get('url', 'URL not found')
489
+ return f"✅ Successfully created Notion page: '{title}'.\nLink: {url}"
490
+ else:
491
+ return f"❌ Failed to create Notion page: {resp.status_code} - {resp.text}"
492
+
493
+ except Exception as e:
494
+ return f"❌ Error creating page: {str(e)}"
495
+
496
+
497
  @tool
498
  def upsert_text_to_pinecone(text: str, title: str, source: str = "Manual Entry", date: str = None) -> str:
499
  """
500
  Upsert any text content (e.g., Notion pages, manual notes) to Pinecone.
501
 
502
+ Automatically extracts metadata (summary, date, speakers) from the text.
503
+ Use this tool when retrieving full content from Notion or other sources.
504
+
505
+ CRITICAL: Do NOT use this tool if the user wants to "Save to Notion" or "Create a Page".
506
+ Use the Notion tools (`API-post-page`) for that. Use this ONLY for saving to Pinecone/Database.
507
 
508
  Args:
509
+ text: The FULL content to save (do not summarize!)
510
  title: Title of the document/meeting
511
  source: Source of the content (e.g., "Notion", "Manual Entry")
512
+ date: Optional date override (YYYY-MM-DD). If not provided, AI extracts it from text or uses today.
513
 
514
  Returns:
515
  Success message with the generated meeting_id
 
518
  return "Error: Pinecone service is not initialized."
519
 
520
  try:
521
+
522
+ # 1. Extract intelligent metadata
523
+ print(f"🔍 Extracting metadata for '{title}'...")
524
+ extractor = MetadataExtractor()
525
+ extracted = extractor.extract_metadata(text)
526
+
527
+ # 2. Resolve final metadata values
528
+ final_summary = extracted.get("summary") or f"Imported from {source}"
529
+
530
+ # Date logic: Argument > Extracted > Today
531
+ if date:
532
+ final_date = date
533
+ elif extracted.get("meeting_date"):
534
+ final_date = extracted.get("meeting_date")
535
+ else:
536
+ final_date = datetime.now().strftime("%Y-%m-%d")
537
+
538
+ speaker_mapping = extracted.get("speaker_mapping", {})
539
 
540
+ # 3. Apply speaker mapping to text (improves searchability)
541
+ # Replaces "SPEAKER_00" -> "Name" directly in the text content
542
+ processed_text = extractor.apply_speaker_mapping(text, speaker_mapping)
543
+
544
+ # 4. Generate ID and prepare metadata
545
+ meeting_id = f"doc_{uuid.uuid4().hex[:8]}"
546
+
547
  meeting_metadata = {
548
  "meeting_id": meeting_id,
549
+ "meeting_date": final_date,
550
  "date_transcribed": datetime.now().strftime("%Y-%m-%d"),
551
  "source": source,
552
+ "meeting_title": title,
553
+ "summary": final_summary,
554
  "source_file": f"{source.lower()}_upload",
555
  "transcription_model": "text_import",
556
+ "language": "en",
557
+ "speaker_mapping": speaker_mapping
558
  }
559
 
560
+ # 5. Process text into documents
561
  docs = process_transcript_to_documents(
562
+ transcript_text=processed_text,
563
+ speaker_data=None, # Uses fallback chunking
564
  meeting_id=meeting_id,
565
  meeting_metadata=meeting_metadata
566
  )
567
 
568
+ # 6. Upsert to Pinecone
569
  _pinecone_manager.upsert_documents(docs, namespace=Config.PINECONE_NAMESPACE)
570
 
571
+ return (f"✅ Successfully saved '{title}' to Pinecone (ID: {meeting_id})\n"
572
+ f" - Date: {final_date}\n"
573
+ f" - Extracted Speakers: {', '.join(speaker_mapping.values()) if speaker_mapping else 'None'}")
574
 
575
  except Exception as e:
576
  return f"❌ Error saving to Pinecone: {str(e)}"
src/ui/gradio_app.py CHANGED
@@ -315,9 +315,9 @@ The agent will acknowledge your upload and help you analyze the meeting.
315
 
316
  for i, meeting in enumerate(meetings, 1):
317
  meeting_id = meeting.get('meeting_id', 'N/A')
318
- title = meeting.get('title', 'Untitled')
319
  date = meeting.get('meeting_date', 'N/A')
320
- duration = meeting.get('duration', 'N/A')
321
  source_file = meeting.get('source_file', 'N/A')
322
 
323
  table_md += f"| {i} | `{meeting_id}` | {title} | {date} | {duration} | {source_file} |\n"
@@ -375,7 +375,7 @@ The agent will acknowledge your upload and help you analyze the meeting.
375
 
376
  # Custom Chatbot with responsive height
377
  custom_chatbot = gr.Chatbot(
378
- height=650,
379
  show_label=False
380
  )
381
 
@@ -384,6 +384,16 @@ The agent will acknowledge your upload and help you analyze the meeting.
384
  "Summarize the key decisions from the last meeting",
385
  "What are the action items assigned to me?",
386
  "List all meetings from October",
 
 
 
 
 
 
 
 
 
 
387
  "Find discussions about 'budget' and 'costs'",
388
  "What did John say about the deadline?",
389
  "Draft a follow-up email based on this meeting",
 
315
 
316
  for i, meeting in enumerate(meetings, 1):
317
  meeting_id = meeting.get('meeting_id', 'N/A')
318
+ title = meeting.get('meeting_title', 'Untitled')
319
  date = meeting.get('meeting_date', 'N/A')
320
+ duration = meeting.get('meeting_duration', 'N/A')
321
  source_file = meeting.get('source_file', 'N/A')
322
 
323
  table_md += f"| {i} | `{meeting_id}` | {title} | {date} | {duration} | {source_file} |\n"
 
375
 
376
  # Custom Chatbot with responsive height
377
  custom_chatbot = gr.Chatbot(
378
+ height="70vh",
379
  show_label=False
380
  )
381
 
 
384
  "Summarize the key decisions from the last meeting",
385
  "What are the action items assigned to me?",
386
  "List all meetings from October",
387
+ "Create a summary for sendout",
388
+
389
+ # self created REVIEW!
390
+ "Show me the meeting minutes from last week",
391
+ "Who were attendants from last week's meeting?",
392
+ "When was the last meeting where budget was discussed?",
393
+ "Who is responsible for what in that meeting?",
394
+ "What tasks have been assigned to whom?",
395
+ "What should person abc do?",
396
+
397
  "Find discussions about 'budget' and 'costs'",
398
  "What did John say about the deadline?",
399
  "Draft a follow-up email based on this meeting",