clash-linux commited on
Commit
919151d
·
verified ·
1 Parent(s): 5f420cb

Upload 9 files

Browse files
Files changed (1) hide show
  1. main.py +915 -852
main.py CHANGED
@@ -1,853 +1,916 @@
1
- import asyncio
2
- import sys # For platform check
3
- import logging
4
- import os
5
- import uuid
6
- import json
7
- import time
8
- import random
9
- import asyncio
10
- import httpx # For getSpaces API call
11
- from contextlib import asynccontextmanager # For lifespan
12
- from playwright.async_api import async_playwright, Error as PlaywrightError
13
- from fastapi import FastAPI, Request, HTTPException, Depends, status
14
- from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
15
- from fastapi.responses import StreamingResponse
16
- from dotenv import load_dotenv
17
- import secrets # Added for secure comparison
18
- from datetime import datetime, timedelta, timezone # Explicit datetime imports
19
- from zoneinfo import ZoneInfo # For timezone handling
20
- from typing import List, Optional # Add List and Optional for typing
21
- from models import (
22
- ChatMessage, ChatCompletionRequest, NotionTranscriptConfigValue,
23
- NotionTranscriptContextValue, NotionTranscriptItem, NotionDebugOverrides,
24
- NotionRequestBody, ChoiceDelta, Choice, ChatCompletionChunk, Model, ModelList
25
- )
26
-
27
- # Load environment variables from .env file
28
- load_dotenv()
29
-
30
- # --- Event Loop Policy for Windows ---
31
- # For Playwright compatibility, especially with subprocesses on Windows
32
- if sys.platform == "win32":
33
- asyncio.set_event_loop_policy(asyncio.WindowsProactorEventLoopPolicy())
34
- logging.info("Set WindowsProactorEventLoopPolicy for asyncio.")
35
-
36
- # --- Logging Configuration ---
37
- logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
38
-
39
- # --- Account Management Class ---
40
- class NotionAccount:
41
- """Represents a single Notion account with its cookie and fetched IDs."""
42
- def __init__(self, cookie: str):
43
- self.cookie = cookie
44
- self.space_id: Optional[str] = None
45
- self.user_id: Optional[str] = None
46
- self.is_healthy: bool = False
47
- self.lock = asyncio.Lock() # To prevent concurrent fetches for the same account
48
-
49
- def __str__(self):
50
- return f"Account(user_id={self.user_id}, space_id={self.space_id}, healthy={self.is_healthy})"
51
-
52
- # --- Configuration ---
53
- NOTION_API_URL = "https://www.notion.so/api/v3/runInferenceTranscript"
54
- # IMPORTANT: Load multiple Notion cookies securely from environment variables
55
- # Use a unique separator like '|' because cookies can contain ';' and ','
56
- NOTION_COOKIES_RAW = os.getenv("NOTION_COOKIES")
57
-
58
- # --- Global State for Account Polling ---
59
- ACCOUNTS: List[NotionAccount] = []
60
- CURRENT_ACCOUNT_INDEX = 0 # Index for round-robin
61
-
62
- if not NOTION_COOKIES_RAW:
63
- # This is a critical error, app cannot function without it.
64
- logging.error("CRITICAL: NOTION_COOKIES environment variable not set. Application will not work.")
65
- # In a real app, you might exit or raise a more severe startup error.
66
-
67
- # --- Proxy Configuration ---
68
- PROXY_URL = os.getenv("PROXY_URL", "") # Empty string as default
69
-
70
- # --- Authentication ---
71
- EXPECTED_TOKEN = os.getenv("PROXY_AUTH_TOKEN", "default_token") # Default token
72
- security = HTTPBearer()
73
-
74
- def authenticate(credentials: HTTPAuthorizationCredentials = Depends(security)):
75
- """Compares provided token with the expected token."""
76
- correct_token = secrets.compare_digest(credentials.credentials, EXPECTED_TOKEN)
77
- if not correct_token:
78
- raise HTTPException(
79
- status_code=status.HTTP_401_UNAUTHORIZED,
80
- detail="Invalid authentication credentials",
81
- # WWW-Authenticate header removed for Bearer
82
- )
83
- return True # Indicate successful authentication
84
-
85
- # --- Notion Account Management ---
86
-
87
- def get_next_account() -> NotionAccount:
88
- """
89
- Selects the next healthy Notion account using a round-robin strategy.
90
- This function is not async and relies on Python's GIL for atomic index updates,
91
- which is safe in a single-threaded asyncio environment.
92
- """
93
- global CURRENT_ACCOUNT_INDEX
94
-
95
- # This check runs on every request to ensure we don't try to select from an empty list
96
- # in case all accounts failed during startup.
97
- if not ACCOUNTS:
98
- raise HTTPException(
99
- status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
100
- detail="No Notion accounts are configured in the server."
101
- )
102
-
103
- healthy_accounts = [acc for acc in ACCOUNTS if acc.is_healthy]
104
- if not healthy_accounts:
105
- raise HTTPException(
106
- status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
107
- detail="No healthy Notion accounts available to process the request."
108
- )
109
-
110
- # Round-robin logic: iterate through all accounts to find the next healthy one
111
- # This ensures that we can recover if an account becomes healthy again later.
112
- # A lock is not strictly necessary in asyncio for a simple index increment,
113
- # but could be added for thread-safety if using a threaded server.
114
- start_index = CURRENT_ACCOUNT_INDEX
115
- while True:
116
- account = ACCOUNTS[CURRENT_ACCOUNT_INDEX]
117
- CURRENT_ACCOUNT_INDEX = (CURRENT_ACCOUNT_INDEX + 1) % len(ACCOUNTS)
118
- if account.is_healthy:
119
- logging.info(f"Selected Notion account with User ID: {account.user_id} for request.")
120
- return account
121
- # This check prevents an infinite loop if no accounts are healthy,
122
- # although the initial check for healthy_accounts should prevent this.
123
- if CURRENT_ACCOUNT_INDEX == start_index:
124
- # This part should theoretically not be reached.
125
- raise HTTPException(
126
- status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
127
- detail="Critical error in account selection: No healthy accounts found in rotation."
128
- )
129
-
130
- # --- FastAPI App ---
131
-
132
- async def fetch_and_set_notion_ids(account: NotionAccount):
133
- """Fetches space ID and user ID for a given Notion account and marks it as healthy on success."""
134
- async with account.lock: # Ensure only one fetch operation per account at a time
135
- if account.is_healthy: # Don't re-fetch if already healthy
136
- logging.info(f"Account for user {account.user_id} is already healthy, skipping fetch.")
137
- return
138
-
139
- if not account.cookie:
140
- logging.error("Cannot fetch Notion IDs: Account cookie is not set.")
141
- account.is_healthy = False
142
- return
143
-
144
- get_spaces_url = "https://www.notion.so/api/v3/getSpaces"
145
- # Headers for the JS fetch call (cookie is handled by context)
146
- js_fetch_headers = {
147
- 'Content-Type': 'application/json',
148
- 'accept': '*/*',
149
- 'accept-language': 'en-US,en;q=0.9',
150
- 'notion-audit-log-platform': 'web',
151
- 'notion-client-version': '23.13.0.3686', # Match cURL example or use a recent one
152
- 'origin': 'https://www.notion.so',
153
- 'priority': 'u=1, i',
154
- 'referer': 'https://www.notion.so/', # Simplified
155
- 'sec-ch-ua': '"Chromium";v="136", "Google Chrome";v="136", "Not.A/Brand";v="99"',
156
- 'sec-ch-ua-mobile': '?0',
157
- 'sec-ch-ua-platform': '"Windows"',
158
- 'sec-fetch-dest': 'empty',
159
- 'sec-fetch-mode': 'cors',
160
- 'sec-fetch-site': 'same-origin',
161
- 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36'
162
- }
163
-
164
- browser = None
165
- context = None
166
- page = None
167
-
168
- try:
169
- logging.info(f"Attempting to fetch Notion user/space IDs for an account...")
170
- async with async_playwright() as p:
171
- # Configure browser launch with proxy if PROXY_URL is set
172
- launch_args = {
173
- 'headless': True,
174
- 'args': ['--no-sandbox', '--disable-setuid-sandbox']
175
- }
176
- if PROXY_URL:
177
- launch_args['proxy'] = {'server': PROXY_URL}
178
- logging.info(f"Using proxy for browser launch: {PROXY_URL}")
179
-
180
- try:
181
- browser = await p.chromium.launch(**launch_args)
182
- except PlaywrightError as e:
183
- if PROXY_URL and "proxy" in str(e).lower():
184
- logging.error(f"Invalid proxy URL or proxy connection failed: {PROXY_URL}. Error: {e}")
185
- raise PlaywrightError(f"Proxy configuration error: {e}")
186
- else:
187
- raise
188
-
189
- context = await browser.new_context(user_agent=js_fetch_headers['user-agent'])
190
-
191
- # Add cookies from the account's cookie string
192
- cookies_to_add = []
193
- cookie_pairs = account.cookie.split('; ')
194
- for pair in cookie_pairs:
195
- if '=' in pair:
196
- name, value = pair.split('=', 1)
197
- cookies_to_add.append({
198
- 'name': name.strip(), 'value': value.strip(),
199
- 'domain': '.notion.so', 'path': '/',
200
- 'secure': True, 'httpOnly': True, 'sameSite': 'Lax'
201
- })
202
- if cookies_to_add:
203
- await context.add_cookies(cookies_to_add)
204
- else:
205
- logging.error("No valid cookies parsed from account's cookie for getSpaces.")
206
- account.is_healthy = False
207
- return
208
-
209
- page = await context.new_page()
210
- logging.info("DEBUG: getSpaces - Navigating to notion.so to warm up context...")
211
- try:
212
- await page.goto("https://www.notion.so/", wait_until="domcontentloaded", timeout=15000) # 15s timeout
213
- logging.info("DEBUG: getSpaces - Warm-up navigation to notion.so complete.")
214
- except PlaywrightError as nav_err:
215
- logging.warning(f"DEBUG: getSpaces - Warm-up navigation to notion.so failed: {nav_err}. Proceeding with fetch anyway.")
216
-
217
- # JavaScript to perform the fetch for getSpaces
218
- javascript_code_get_spaces = """
219
- async (args) => {
220
- const { apiUrl, headers, body } = args;
221
- try {
222
- const response = await fetch(apiUrl, {
223
- method: 'POST',
224
- headers: headers,
225
- body: JSON.stringify(body) // Ensure body is stringified
226
- });
227
- if (!response.ok) {
228
- console.error('getSpaces Fetch error:', response.status, await response.text());
229
- return { success: false, error: `HTTP ${response.status}` };
230
- }
231
- const data = await response.json();
232
- return { success: true, data: data };
233
- } catch (error) {
234
- console.error('getSpaces JS Exception:', error);
235
- return { success: false, error: error.toString() };
236
- }
237
- }
238
- """
239
- js_args = {"apiUrl": get_spaces_url, "headers": js_fetch_headers, "body": {}} # Empty JSON body for getSpaces
240
-
241
- logging.info("Executing Playwright page.evaluate for getSpaces...")
242
- result = await page.evaluate(javascript_code_get_spaces, js_args)
243
-
244
- if not result or not result.get('success'):
245
- error_detail = result.get('error', 'Unknown error during getSpaces JS execution')
246
- logging.error(f"Playwright getSpaces call failed for account: {error_detail}")
247
- account.is_healthy = False
248
- return
249
-
250
- data = result.get('data')
251
- if not data:
252
- logging.error("No data returned from successful getSpaces call for account.")
253
- account.is_healthy = False
254
- return
255
-
256
- # Extract user ID
257
- user_id_key = next(iter(data), None)
258
- if not user_id_key:
259
- logging.error("Could not extract user ID from getSpaces response for account.")
260
- account.is_healthy = False
261
- return
262
- account.user_id = user_id_key
263
- logging.info(f"Fetched Notion User ID: {account.user_id}")
264
-
265
- # Extract space ID
266
- user_root = data.get(user_id_key, {}).get("user_root", {}).get(user_id_key, {})
267
- space_view_pointers = user_root.get("value", {}).get("value", {}).get("space_view_pointers", [])
268
- if space_view_pointers and isinstance(space_view_pointers, list) and len(space_view_pointers) > 0:
269
- account.space_id = space_view_pointers[0].get("spaceId")
270
- if account.space_id:
271
- logging.info(f"Fetched Notion Space ID: {account.space_id} for User ID: {account.user_id}")
272
- account.is_healthy = True # Mark as healthy on complete success
273
- else:
274
- logging.error(f"Could not extract spaceId for User ID: {account.user_id}")
275
- account.is_healthy = False
276
- else:
277
- logging.error(f"Could not find space_view_pointers or spaceId for User ID: {account.user_id}")
278
- account.is_healthy = False
279
-
280
- except PlaywrightError as e:
281
- logging.error(f"Playwright error during fetch_and_set_notion_ids for account: {e}")
282
- account.is_healthy = False
283
- except Exception as e:
284
- logging.error(f"General error during fetch_and_set_notion_ids for account: {e}")
285
- account.is_healthy = False
286
- finally:
287
- # Prioritize closing the browser, which should handle its contexts/pages.
288
- # Add checks to prevent errors if already closed.
289
- if browser and browser.is_connected():
290
- try:
291
- logging.info("DEBUG: fetch_and_set_notion_ids - Closing browser...")
292
- await browser.close()
293
- logging.info("DEBUG: fetch_and_set_notion_ids - Browser closed.")
294
- except PlaywrightError as e:
295
- logging.warning(f"DEBUG: fetch_and_set_notion_ids - Ignoring error during browser close: {e}")
296
- except Exception as e: # Catch potential unexpected errors during close
297
- logging.warning(f"DEBUG: fetch_and_set_notion_ids - Ignoring unexpected error during browser close: {e}")
298
- else:
299
- # If browser is None or not connected, page/context are likely also invalid or already handled.
300
- logging.info("DEBUG: fetch_and_set_notion_ids - Browser already closed or not initialized.")
301
-
302
- logging.info(f"fetch_and_set_notion_ids completed for account. Final status: {account}")
303
-
304
-
305
- @asynccontextmanager
306
- async def lifespan(app: FastAPI):
307
- # On startup
308
- logging.info("Application startup: Initializing Notion accounts...")
309
-
310
- if NOTION_COOKIES_RAW:
311
- # Split cookies by a unique separator, e.g., '|'
312
- cookie_list = [c.strip() for c in NOTION_COOKIES_RAW.split('|') if c.strip()]
313
- for cookie in cookie_list:
314
- ACCOUNTS.append(NotionAccount(cookie=cookie))
315
- logging.info(f"Loaded {len(ACCOUNTS)} Notion account(s) from environment variable.")
316
-
317
- if not ACCOUNTS:
318
- logging.error("CRITICAL: No Notion accounts loaded. The application will not be able to process requests.")
319
- else:
320
- # Concurrently fetch IDs for all accounts
321
- logging.info("Fetching IDs for all loaded Notion accounts...")
322
- fetch_tasks = [fetch_and_set_notion_ids(acc) for acc in ACCOUNTS]
323
- await asyncio.gather(*fetch_tasks)
324
-
325
- healthy_count = sum(1 for acc in ACCOUNTS if acc.is_healthy)
326
- logging.info(f"Initialization complete. {healthy_count} of {len(ACCOUNTS)} accounts are healthy.")
327
-
328
- if healthy_count == 0:
329
- logging.error("CRITICAL: No healthy Notion accounts available after initialization.")
330
-
331
- yield
332
- # On shutdown (if any cleanup needed)
333
- logging.info("Application shutdown.")
334
-
335
- app = FastAPI(lifespan=lifespan)
336
-
337
- # --- Helper Functions ---
338
-
339
- def build_notion_request(request_data: ChatCompletionRequest, account: NotionAccount) -> NotionRequestBody:
340
- """Transforms OpenAI-style messages to Notion transcript format, using the provided account."""
341
-
342
- # --- Timestamp and User ID Logic ---
343
- # Use the user ID from the selected account
344
- user_id = account.user_id
345
- # Get all non-assistant messages to assign timestamps
346
- non_assistant_messages = [msg for msg in request_data.messages if msg.role != "assistant"]
347
- num_non_assistant_messages = len(non_assistant_messages)
348
- message_timestamps = {} # Store timestamps keyed by message id
349
-
350
- if num_non_assistant_messages > 0:
351
- # Get current time specifically in Pacific Time (America/Los_Angeles)
352
- pacific_tz = ZoneInfo("America/Los_Angeles")
353
- now_pacific = datetime.now(timezone.utc).astimezone(pacific_tz)
354
-
355
- # Assign timestamp to the last non-assistant message
356
- last_msg_id = non_assistant_messages[-1].id
357
- message_timestamps[last_msg_id] = now_pacific
358
-
359
- # Calculate timestamps for previous non-assistant messages (random intervals earlier)
360
- current_timestamp = now_pacific
361
- for i in range(num_non_assistant_messages - 2, -1, -1): # Iterate backwards from second-to-last
362
- current_timestamp -= timedelta(minutes=random.randint(3, 20)) # Use random interval (3-20 mins)
363
- message_timestamps[non_assistant_messages[i].id] = current_timestamp
364
-
365
- # --- Build Transcript ---
366
- # Get current time in Pacific timezone for context
367
- pacific_tz = ZoneInfo("America/Los_Angeles")
368
- now_pacific = datetime.now(timezone.utc).astimezone(pacific_tz)
369
- # Format timestamp exactly as YYYY-MM-DDTHH:MM:SS.fff-HH:MM
370
- dt_str = now_pacific.strftime("%Y-%m-%dT%H:%M:%S")
371
- ms = f"{now_pacific.microsecond // 1000:03d}" # Ensure 3 digits for milliseconds
372
- tz_str = now_pacific.strftime("%z") # Gets +HHMM or -HHMM
373
- formatted_tz = f"{tz_str[:-2]}:{tz_str[-2:]}" # Insert colon
374
- current_datetime_iso = f"{dt_str}.{ms}{formatted_tz}"
375
-
376
- # Generate random text for userName and spaceName
377
- random_words = ["Project", "Workspace", "Team", "Studio", "Lab", "Hub", "Zone", "Space"]
378
- user_name = f"User{random.randint(100, 999)}"
379
- space_name = f"{random.choice(random_words)} {random.randint(1, 99)}"
380
-
381
- transcript = [
382
- NotionTranscriptItem(
383
- type="config",
384
- value=NotionTranscriptConfigValue(model=request_data.notion_model)
385
- ),
386
- NotionTranscriptItem(
387
- type="context",
388
- value=NotionTranscriptContextValue(
389
- userId=user_id or "", # Use the user_id from the selected account
390
- spaceId=account.space_id, # Use space_id from the selected account
391
- surface="home_module",
392
- timezone="America/Los_Angeles",
393
- userName=user_name,
394
- spaceName=space_name,
395
- spaceViewId=str(uuid.uuid4()), # Random UUID for spaceViewId
396
- currentDatetime=current_datetime_iso
397
- )
398
- ),
399
- NotionTranscriptItem(
400
- type="agent-integration"
401
- # No value field needed for agent-integration
402
- )
403
- ]
404
-
405
- for message in request_data.messages:
406
- if message.role == "assistant":
407
- # Assistant messages get type="markdown-chat" and a traceId
408
- transcript.append(NotionTranscriptItem(
409
- type="markdown-chat",
410
- value=message.content,
411
- traceId=str(uuid.uuid4()) # Generate unique traceId for assistant message
412
- ))
413
- else: # Treat all other roles (user, system, etc.) as "user" type
414
- created_at_dt = message_timestamps.get(message.id) # Use the unified timestamp dict
415
- created_at_iso = None
416
- if created_at_dt:
417
- # Format timestamp exactly as YYYY-MM-DDTHH:MM:SS.fff-HH:MM
418
- dt_str = created_at_dt.strftime("%Y-%m-%dT%H:%M:%S")
419
- ms = f"{created_at_dt.microsecond // 1000:03d}" # Ensure 3 digits for milliseconds
420
- tz_str = created_at_dt.strftime("%z") # Gets +HHMM or -HHMM
421
- formatted_tz = f"{tz_str[:-2]}:{tz_str[-2:]}" # Insert colon
422
- created_at_iso = f"{dt_str}.{ms}{formatted_tz}"
423
-
424
- content = message.content
425
- # Ensure content is treated as a string for user/system messages
426
- if isinstance(content, list):
427
- # Attempt to extract text from list format, default to empty string
428
- text_content = ""
429
- for part in content:
430
- if isinstance(part, dict) and part.get("type") == "text":
431
- text_part = part.get("text")
432
- if isinstance(text_part, str):
433
- text_content += text_part # Concatenate text parts if needed
434
- content = text_content if text_content else "" # Use extracted text or empty string
435
- elif not isinstance(content, str):
436
- content = "" # Default to empty string if not list or string
437
-
438
- # Format value as expected by Notion for user type: [[content_string]]
439
- notion_value = [[content]] if content else [[""]]
440
-
441
- transcript.append(NotionTranscriptItem(
442
- type="user", # Set type to "user" for non-assistant roles
443
- value=notion_value,
444
- userId=user_id, # Assign userId
445
- createdAt=created_at_iso # Assign timestamp
446
- # No traceId for user/system messages
447
- ))
448
-
449
- # Use spaceId from the selected account, set createThread=True
450
- return NotionRequestBody(
451
- spaceId=account.space_id, # From selected account
452
- transcript=transcript,
453
- createThread=True, # Always create a new thread
454
- # Generate a new traceId for each request
455
- traceId=str(uuid.uuid4()),
456
- # Explicitly set debugOverrides, generateTitle, and saveAllThreadOperations
457
- debugOverrides=NotionDebugOverrides(
458
- cachedInferences={},
459
- annotationInferences={},
460
- emitInferences=False
461
- ),
462
- generateTitle=False,
463
- saveAllThreadOperations=False
464
- )
465
-
466
- # --- Background Playwright Task ---
467
- async def _run_playwright_fetch(
468
- chunk_queue: asyncio.Queue,
469
- notion_request_body: NotionRequestBody,
470
- headers_template: dict,
471
- notion_api_url: str,
472
- account: NotionAccount # Pass the whole account object
473
- ):
474
- """Runs Playwright fetch in the background, putting results into a queue."""
475
- browser = None
476
- context = None
477
- page = None
478
-
479
- # Construct headers for this specific task run
480
- current_headers = headers_template.copy()
481
- current_headers['x-notion-space-id'] = account.space_id # Use fetched space_id
482
- if account.user_id: # Use fetched user_id for active user header
483
- current_headers['x-notion-active-user-header'] = account.user_id
484
-
485
- # 'cookie' is handled by context.add_cookies(), so it's not in current_headers for fetch
486
-
487
- async def handle_chunk(chunk_str: str):
488
- await chunk_queue.put(chunk_str)
489
-
490
- async def handle_stream_end():
491
- await chunk_queue.put(None)
492
-
493
- try:
494
- logging.info("DEBUG: Background task starting Playwright.")
495
- async with async_playwright() as p:
496
- # Configure browser launch with proxy if PROXY_URL is set
497
- launch_args = {
498
- 'headless': True,
499
- 'args': ['--no-sandbox', '--disable-setuid-sandbox']
500
- }
501
- if PROXY_URL:
502
- launch_args['proxy'] = {'server': PROXY_URL}
503
- logging.info(f"DEBUG: Background task using proxy: {PROXY_URL}")
504
-
505
- try:
506
- browser = await p.chromium.launch(**launch_args)
507
- except PlaywrightError as e:
508
- if PROXY_URL and "proxy" in str(e).lower():
509
- logging.error(f"Invalid proxy URL or proxy connection failed: {PROXY_URL}. Error: {e}")
510
- await handle_stream_end() # Signal end of stream
511
- raise PlaywrightError(f"Proxy configuration error: {e}")
512
- else:
513
- raise
514
-
515
- logging.info("DEBUG: Background task browser launched.")
516
- # Get user-agent from the constructed headers for this task
517
- user_agent_for_context = current_headers.get('user-agent', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36') # Default if not in template
518
- context = await browser.new_context(user_agent=user_agent_for_context)
519
- logging.info("DEBUG: Background task context created.")
520
-
521
- if account.cookie: # Use passed account cookie
522
- cookies_to_add = []
523
- cookie_pairs = account.cookie.split('; ')
524
- for pair in cookie_pairs:
525
- if '=' in pair:
526
- name, value = pair.split('=', 1)
527
- cookies_to_add.append({
528
- 'name': name.strip(), 'value': value.strip(),
529
- 'domain': '.notion.so', 'path': '/',
530
- 'secure': True, 'httpOnly': True, 'sameSite': 'Lax'
531
- })
532
- if cookies_to_add:
533
- await context.add_cookies(cookies_to_add)
534
- logging.info("DEBUG: Background task cookies added.")
535
- else:
536
- logging.warning("Warning: No valid cookies found in account cookie for background task.")
537
- else:
538
- logging.error("Error: Account cookie is empty for background task.")
539
- raise ValueError("Server configuration error: Notion cookie not set for background task.")
540
-
541
- page = await context.new_page()
542
- logging.info("DEBUG: Background task page created.")
543
- await page.goto("https://www.notion.so/chat", wait_until="domcontentloaded")
544
- logging.info("DEBUG: Background task navigation complete.")
545
-
546
- await page.expose_function("sendChunkToPython", handle_chunk)
547
- await page.expose_function("signalStreamEnd", handle_stream_end)
548
- logging.info("DEBUG: Background task functions exposed.")
549
-
550
- request_body_json_str = notion_request_body.json()
551
-
552
- # Prepare headers for JS fetch (cookie is handled by context)
553
- js_fetch_headers = current_headers.copy()
554
- if 'cookie' in js_fetch_headers: # Should not be there if template is correct
555
- del js_fetch_headers['cookie']
556
-
557
-
558
- javascript_code = """
559
- async (args) => {
560
- const { apiUrl, headers, body } = args;
561
- try {
562
- const response = await fetch(apiUrl, {
563
- method: 'POST',
564
- headers: headers,
565
- body: body
566
- });
567
- if (!response.ok) {
568
- const errorText = await response.text();
569
- console.error('JS Fetch error:', response.status, errorText);
570
- await window.signalStreamEnd();
571
- return { success: false, status: response.status, error: errorText };
572
- }
573
- if (!response.body) {
574
- console.error('JS Response body is null');
575
- await window.signalStreamEnd();
576
- return { success: false, error: 'Response body is null' };
577
- }
578
- const reader = response.body.getReader();
579
- const decoder = new TextDecoder();
580
- while (true) {
581
- const { done, value } = await reader.read();
582
- if (done) break;
583
- await window.sendChunkToPython(decoder.decode(value, { stream: true }));
584
- }
585
- await window.signalStreamEnd();
586
- return { success: true };
587
- } catch (error) {
588
- console.error('JS Exception during fetch:', error);
589
- await window.signalStreamEnd();
590
- return { success: false, error: error.toString() };
591
- }
592
- }
593
- """
594
- js_args = {"apiUrl": notion_api_url, "headers": js_fetch_headers, "body": request_body_json_str}
595
- logging.info("DEBUG: Background task executing page.evaluate()...")
596
- js_result = await page.evaluate(javascript_code, js_args)
597
- logging.info(f"DEBUG: Background task page.evaluate() result: {js_result}")
598
-
599
- if not js_result or not js_result.get('success'):
600
- error_detail = js_result.get('error', 'Unknown JS execution error')
601
- logging.error(f"Error in background task JS execution: {error_detail}")
602
- # Error already signaled to queue by JS calling signalStreamEnd
603
- # Re-raise to be caught by the task's main try/except
604
- raise PlaywrightError(f"JS Fetch Error: {error_detail}")
605
-
606
- except Exception as e:
607
- logging.error(f"Error in _run_playwright_fetch background task: {e}")
608
- await chunk_queue.put(None) # Ensure queue is terminated on error
609
- # Exception will be caught by playwright_task.exception() in the main generator
610
- finally:
611
- logging.info("DEBUG: Background task _run_playwright_fetch attempting to close browser.")
612
- if browser and browser.is_connected():
613
- try:
614
- await browser.close()
615
- logging.info("DEBUG: Background task browser closed.")
616
- except Exception as e:
617
- logging.warning(f"Ignoring error during background task browser close: {e}")
618
- else:
619
- logging.info("DEBUG: Background task browser already closed or not initialized.")
620
-
621
- # --- Main Generator Called by Endpoint ---
622
- async def stream_notion_response(notion_request_body: NotionRequestBody, account: NotionAccount):
623
- """Creates background task for Playwright and yields results from queue."""
624
- chunk_queue = asyncio.Queue()
625
- playwright_task = None
626
-
627
- # These should be defined once per request stream
628
- chunk_id = f"chatcmpl-{uuid.uuid4()}"
629
- created_time = int(time.time())
630
-
631
- # Define the template for headers here, to be passed to the background task
632
- # The background task will then add/override specific headers like x-notion-space-id
633
- # It will also fetch NOTION_ACTIVE_USER_HEADER from os.getenv()
634
- headers_template = {
635
- 'accept': 'application/x-ndjson',
636
- 'accept-language': 'en-US,en;q=0.9',
637
- 'content-type': 'application/json',
638
- 'notion-audit-log-platform': 'web',
639
- 'notion-client-version': '23.13.0.3668',
640
- 'origin': 'https://www.notion.so',
641
- 'priority': 'u=1, i',
642
- 'referer': 'https://www.notion.so/chat',
643
- 'sec-ch-ua': '"Chromium";v="136", "Google Chrome";v="136", "Not.A/Brand";v="99"',
644
- 'sec-ch-ua-mobile': '?0',
645
- 'sec-ch-ua-platform': '"Windows"',
646
- 'sec-fetch-dest': 'empty',
647
- 'sec-fetch-mode': 'cors',
648
- 'sec-fetch-site': 'same-origin',
649
- 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36',
650
- # 'cookie' and 'x-notion-space-id' will be handled/added by _run_playwright_fetch
651
- # using the passed environment variable strings
652
- }
653
-
654
- try:
655
- # Health check is now performed by get_next_account() before this function is called.
656
- logging.info("DEBUG: Main generator starting Playwright background task.")
657
- playwright_task = asyncio.create_task(
658
- _run_playwright_fetch(
659
- chunk_queue,
660
- notion_request_body,
661
- headers_template,
662
- NOTION_API_URL, # Global constant
663
- account # Pass the selected account
664
- )
665
- )
666
-
667
- accumulated_line = ""
668
- logging.info("DEBUG: Main generator starting queue processing loop.")
669
- while True:
670
- chunk = await chunk_queue.get() # Wait for a chunk from the background task
671
- if chunk is None:
672
- logging.info("DEBUG: Main generator received None sentinel from queue.")
673
- break
674
-
675
- accumulated_line += chunk
676
- while '\n' in accumulated_line:
677
- line, accumulated_line = accumulated_line.split('\n', 1)
678
- if not line.strip():
679
- continue
680
- try:
681
- data = json.loads(line)
682
- if data.get("type") == "markdown-chat" and isinstance(data.get("value"), str):
683
- content_chunk = data["value"]
684
- if content_chunk:
685
- sse_chunk = ChatCompletionChunk(
686
- id=chunk_id, created=created_time,
687
- choices=[Choice(delta=ChoiceDelta(content=content_chunk))]
688
- )
689
- logging.info(f"DEBUG: Main generator yielding chunk: {content_chunk[:50]}...")
690
- yield f"data: {sse_chunk.json()}\n\n"
691
- # No asyncio.sleep(0) here, as yielding should be enough
692
- elif "recordMap" in data:
693
- logging.info("DEBUG: Main generator detected recordMap, ignoring.")
694
- except json.JSONDecodeError:
695
- logging.warning(f"Warning: Main generator could not decode JSON line: {line}")
696
- except Exception as e:
697
- logging.error(f"Error processing line in main generator: {line} - {e}")
698
-
699
- # Process any final accumulated data after None sentinel
700
- if accumulated_line.strip():
701
- try:
702
- data = json.loads(accumulated_line)
703
- if data.get("type") == "markdown-chat" and isinstance(data.get("value"), str):
704
- content_chunk = data["value"]
705
- if content_chunk:
706
- sse_chunk = ChatCompletionChunk(
707
- id=chunk_id, created=created_time,
708
- choices=[Choice(delta=ChoiceDelta(content=content_chunk))]
709
- )
710
- logging.info(f"DEBUG: Main generator yielding final accumulated chunk: {content_chunk[:50]}...")
711
- yield f"data: {sse_chunk.json()}\n\n"
712
- except json.JSONDecodeError:
713
- logging.warning(f"Warning: Main generator could not decode final JSON line: {accumulated_line}")
714
- except Exception as e:
715
- logging.error(f"Error processing final line in main generator: {accumulated_line} - {e}")
716
-
717
- # After loop, check if the background task raised an exception
718
- if playwright_task.done() and playwright_task.exception():
719
- task_exception = playwright_task.exception()
720
- logging.error(f"Playwright background task failed: {task_exception}")
721
- raise HTTPException(status_code=500, detail=f"Error during background browser automation: {task_exception}")
722
- else:
723
- logging.info("DEBUG: Main generator background task completed successfully.")
724
- final_chunk = ChatCompletionChunk(
725
- id=chunk_id, created=created_time,
726
- choices=[Choice(delta=ChoiceDelta(), finish_reason="stop")]
727
- )
728
- logging.info("DEBUG: Main generator yielding final stop chunk.")
729
- yield f"data: {final_chunk.json()}\n\n"
730
- logging.info("DEBUG: Main generator yielding [DONE] marker.")
731
- yield "data: [DONE]\n\n"
732
-
733
- except Exception as e:
734
- logging.error(f"Error in main stream_notion_response generator: {e}")
735
- if playwright_task and not playwright_task.done():
736
- logging.info("DEBUG: Main generator cancelling background task due to its own error.")
737
- playwright_task.cancel()
738
- raise
739
- finally:
740
- logging.info("DEBUG: Main generator finished.")
741
- if playwright_task and not playwright_task.done():
742
- logging.info("DEBUG: Main generator ensuring background task is cancelled on exit.")
743
- playwright_task.cancel()
744
- try:
745
- await playwright_task # Allow cancellation to propagate
746
- except asyncio.CancelledError:
747
- logging.info("DEBUG: Background task successfully cancelled.")
748
- except Exception as e:
749
- logging.error(f"DEBUG: Error during background task cancellation/await: {e}")
750
-
751
-
752
- # --- API Endpoint ---
753
-
754
- @app.get("/v1/models", response_model=ModelList)
755
- async def list_models(authenticated: bool = Depends(authenticate)):
756
- """
757
- Endpoint to list available Notion models, mimicking OpenAI's /v1/models.
758
- """
759
- available_models = [
760
- "openai-gpt-4.1",
761
- "anthropic-opus-4",
762
- "anthropic-sonnet-4"
763
- ]
764
- model_list = [
765
- Model(id=model_id, owned_by="notion") # created uses default_factory
766
- for model_id in available_models
767
- ]
768
- return ModelList(data=model_list)
769
-
770
- @app.post("/v1/chat/completions")
771
- async def chat_completions(request_data: ChatCompletionRequest, request: Request, authenticated: bool = Depends(authenticate)):
772
- """
773
- Endpoint to mimic OpenAI's chat completions, proxying to Notion.
774
- It uses round-robin to select a healthy Notion account for each request.
775
- """
776
- account = get_next_account() # Select a healthy account
777
-
778
- notion_request_body = build_notion_request(request_data, account)
779
-
780
- if request_data.stream:
781
- # Call the Playwright generator, passing the selected account
782
- return StreamingResponse(
783
- stream_notion_response(notion_request_body, account),
784
- media_type="text/event-stream"
785
- )
786
- else:
787
- # --- Non-Streaming Logic (Optional - Collects stream internally) ---
788
- # Note: The primary goal is streaming, but a non-streaming version
789
- # might be useful for testing or simpler clients.
790
- # This requires collecting all chunks from the async generator.
791
- full_response_content = ""
792
- final_finish_reason = None
793
- chunk_id = f"chatcmpl-{uuid.uuid4()}" # Generate ID for the non-streamed response
794
- created_time = int(time.time())
795
-
796
- # --- Non-streaming logic needs to call the generator with the selected account ---
797
- try:
798
- # Call the Playwright generator, passing the selected account
799
- async for line in stream_notion_response(notion_request_body, account):
800
- if line.startswith("data: ") and "[DONE]" not in line:
801
- try:
802
- data_json = line[len("data: "):].strip()
803
- if data_json:
804
- chunk_data = json.loads(data_json)
805
- if chunk_data.get("choices"):
806
- delta = chunk_data["choices"][0].get("delta", {})
807
- content = delta.get("content")
808
- if content:
809
- full_response_content += content
810
- finish_reason = chunk_data["choices"][0].get("finish_reason")
811
- if finish_reason:
812
- final_finish_reason = finish_reason
813
- except json.JSONDecodeError:
814
- print(f"Warning: Could not decode JSON line in non-streaming mode: {line}")
815
-
816
- # Construct the final OpenAI-compatible non-streaming response
817
- return {
818
- "id": chunk_id,
819
- "object": "chat.completion",
820
- "created": created_time,
821
- "model": request_data.model, # Return the model requested by the client
822
- "choices": [
823
- {
824
- "index": 0,
825
- "message": {
826
- "role": "assistant",
827
- "content": full_response_content,
828
- },
829
- "finish_reason": final_finish_reason or "stop", # Default to stop if not explicitly set
830
- }
831
- ],
832
- "usage": { # Note: Token usage is not available from Notion
833
- "prompt_tokens": None,
834
- "completion_tokens": None,
835
- "total_tokens": None,
836
- },
837
- }
838
- except HTTPException as e:
839
- # Re-raise HTTP exceptions from the streaming function
840
- raise e
841
- except Exception as e:
842
- print(f"Error during non-streaming processing: {e}")
843
- raise HTTPException(status_code=500, detail="Internal server error processing Notion response")
844
-
845
-
846
- # --- Uvicorn Runner ---
847
- # Allows running with `python main.py` for simple testing,
848
- # but `uvicorn main:app --reload` is recommended for development.
849
- if __name__ == "__main__":
850
- import uvicorn
851
- print("Starting server. Access at http://127.0.0.1:7860")
852
- print("Ensure NOTION_COOKIES is set in your .env file or environment, separated by '|'.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
853
  uvicorn.run(app, host="127.0.0.1", port=7860)
 
1
+ import asyncio
2
+ import sys # For platform check
3
+ import logging
4
+ import os
5
+ import uuid
6
+ import json
7
+ import time
8
+ import random
9
+ import asyncio
10
+ import httpx # For getSpaces API call
11
+ from contextlib import asynccontextmanager # For lifespan
12
+ from playwright.async_api import async_playwright, Error as PlaywrightError
13
+ from fastapi import FastAPI, Request, HTTPException, Depends, status
14
+ from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
15
+ from fastapi.responses import StreamingResponse
16
+ from dotenv import load_dotenv
17
+ import secrets # Added for secure comparison
18
+ from datetime import datetime, timedelta, timezone # Explicit datetime imports
19
+ from zoneinfo import ZoneInfo # For timezone handling
20
+ from typing import List, Optional # Add List and Optional for typing
21
+ from models import (
22
+ ChatMessage, ChatCompletionRequest, NotionTranscriptConfigValue,
23
+ NotionTranscriptContextValue, NotionTranscriptItem, NotionDebugOverrides,
24
+ NotionRequestBody, ChoiceDelta, Choice, ChatCompletionChunk, Model, ModelList
25
+ )
26
+
27
+ # Load environment variables from .env file
28
+ load_dotenv()
29
+
30
+ # --- Event Loop Policy for Windows ---
31
+ # For Playwright compatibility, especially with subprocesses on Windows
32
+ if sys.platform == "win32":
33
+ asyncio.set_event_loop_policy(asyncio.WindowsProactorEventLoopPolicy())
34
+ logging.info("Set WindowsProactorEventLoopPolicy for asyncio.")
35
+
36
+ # --- Logging Configuration ---
37
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
38
+
39
+ # --- Account Management Class ---
40
+ class NotionAccount:
41
+ """Represents a single Notion account with its cookie and fetched IDs."""
42
+ def __init__(self, cookie: str):
43
+ self.cookie = cookie
44
+ self.space_id: Optional[str] = None
45
+ self.user_id: Optional[str] = None
46
+ self.user_email: Optional[str] = None # Added for identification
47
+ self.is_healthy: bool = False
48
+ self.lock = asyncio.Lock() # To prevent concurrent fetches for the same account
49
+
50
+ def __str__(self):
51
+ return f"Account(user_email={self.user_email}, user_id={self.user_id}, healthy={self.is_healthy})"
52
+
53
+ # --- Configuration ---
54
+ NOTION_API_URL = "https://www.notion.so/api/v3/runInferenceTranscript"
55
+ # IMPORTANT: Load multiple Notion cookies securely from environment variables
56
+ # Use a unique separator like '|' because cookies can contain ';' and ','
57
+ NOTION_COOKIES_RAW = os.getenv("NOTION_COOKIES")
58
+
59
+ # --- Global State for Account Polling ---
60
+ ACCOUNTS: List[NotionAccount] = []
61
+ CURRENT_ACCOUNT_INDEX = 0 # Index for round-robin
62
+
63
+ if not NOTION_COOKIES_RAW:
64
+ # This is a critical error, app cannot function without it.
65
+ logging.error("CRITICAL: NOTION_COOKIES environment variable not set. Application will not work.")
66
+ # In a real app, you might exit or raise a more severe startup error.
67
+
68
+ # --- Proxy Configuration ---
69
+ PROXY_URL = os.getenv("PROXY_URL", "") # Empty string as default
70
+
71
+ # --- Authentication ---
72
+ EXPECTED_TOKEN = os.getenv("PROXY_AUTH_TOKEN", "default_token") # Default token
73
+ security = HTTPBearer()
74
+
75
+ def authenticate(credentials: HTTPAuthorizationCredentials = Depends(security)):
76
+ """Compares provided token with the expected token."""
77
+ correct_token = secrets.compare_digest(credentials.credentials, EXPECTED_TOKEN)
78
+ if not correct_token:
79
+ raise HTTPException(
80
+ status_code=status.HTTP_401_UNAUTHORIZED,
81
+ detail="Invalid authentication credentials",
82
+ # WWW-Authenticate header removed for Bearer
83
+ )
84
+ return True # Indicate successful authentication
85
+
86
+ # --- Notion Account Management ---
87
+
88
+ def get_next_account() -> NotionAccount:
89
+ """
90
+ Selects the next healthy Notion account using a round-robin strategy.
91
+ This function is not async and relies on Python's GIL for atomic index updates,
92
+ which is safe in a single-threaded asyncio environment.
93
+ """
94
+ global CURRENT_ACCOUNT_INDEX
95
+
96
+ # This check runs on every request to ensure we don't try to select from an empty list
97
+ # in case all accounts failed during startup.
98
+ if not ACCOUNTS:
99
+ raise HTTPException(
100
+ status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
101
+ detail="No Notion accounts are configured in the server."
102
+ )
103
+
104
+ healthy_accounts = [acc for acc in ACCOUNTS if acc.is_healthy]
105
+ if not healthy_accounts:
106
+ raise HTTPException(
107
+ status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
108
+ detail="No healthy Notion accounts available to process the request."
109
+ )
110
+
111
+ # Round-robin logic: iterate through all accounts to find the next healthy one
112
+ # This ensures that we can recover if an account becomes healthy again later.
113
+ # A lock is not strictly necessary in asyncio for a simple index increment,
114
+ # but could be added for thread-safety if using a threaded server.
115
+ start_index = CURRENT_ACCOUNT_INDEX
116
+ while True:
117
+ account = ACCOUNTS[CURRENT_ACCOUNT_INDEX]
118
+ CURRENT_ACCOUNT_INDEX = (CURRENT_ACCOUNT_INDEX + 1) % len(ACCOUNTS)
119
+ if account.is_healthy:
120
+ logging.info(f"Selected Notion account: {account.user_email} (User ID: {account.user_id}) for request.")
121
+ return account
122
+ # This check prevents an infinite loop if no accounts are healthy,
123
+ # although the initial check for healthy_accounts should prevent this.
124
+ if CURRENT_ACCOUNT_INDEX == start_index:
125
+ # This part should theoretically not be reached.
126
+ raise HTTPException(
127
+ status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
128
+ detail="Critical error in account selection: No healthy accounts found in rotation."
129
+ )
130
+
131
+ # --- FastAPI App ---
132
+
133
+ async def fetch_and_set_notion_ids(account: NotionAccount):
134
+ """Fetches space ID and user ID for a given Notion account and marks it as healthy on success."""
135
+ async with account.lock: # Ensure only one fetch operation per account at a time
136
+ if account.is_healthy: # Don't re-fetch if already healthy
137
+ logging.info(f"Account for user {account.user_id} is already healthy, skipping fetch.")
138
+ return
139
+
140
+ if not account.cookie:
141
+ logging.error("Cannot fetch Notion IDs: Account cookie is not set.")
142
+ logging.error(f"Failing cookie (empty): {account.cookie}")
143
+ account.is_healthy = False
144
+ return
145
+
146
+ get_spaces_url = "https://www.notion.so/api/v3/getSpaces"
147
+ # Headers for the JS fetch call (cookie is handled by context)
148
+ js_fetch_headers = {
149
+ 'Content-Type': 'application/json',
150
+ 'accept': '*/*',
151
+ 'accept-language': 'en-US,en;q=0.9',
152
+ 'notion-audit-log-platform': 'web',
153
+ 'notion-client-version': '23.13.0.3686', # Match cURL example or use a recent one
154
+ 'origin': 'https://www.notion.so',
155
+ 'priority': 'u=1, i',
156
+ 'referer': 'https://www.notion.so/', # Simplified
157
+ 'sec-ch-ua': '"Chromium";v="136", "Google Chrome";v="136", "Not.A/Brand";v="99"',
158
+ 'sec-ch-ua-mobile': '?0',
159
+ 'sec-ch-ua-platform': '"Windows"',
160
+ 'sec-fetch-dest': 'empty',
161
+ 'sec-fetch-mode': 'cors',
162
+ 'sec-fetch-site': 'same-origin',
163
+ 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36'
164
+ }
165
+
166
+ browser = None
167
+ context = None
168
+ page = None
169
+
170
+ try:
171
+ logging.info(f"Attempting to fetch Notion user/space IDs for an account...")
172
+ async with async_playwright() as p:
173
+ # Configure browser launch with proxy if PROXY_URL is set
174
+ launch_args = {
175
+ 'headless': True,
176
+ 'args': ['--no-sandbox', '--disable-setuid-sandbox']
177
+ }
178
+ if PROXY_URL:
179
+ launch_args['proxy'] = {'server': PROXY_URL}
180
+ logging.info(f"Using proxy for browser launch: {PROXY_URL}")
181
+
182
+ try:
183
+ browser = await p.chromium.launch(**launch_args)
184
+ except PlaywrightError as e:
185
+ if PROXY_URL and "proxy" in str(e).lower():
186
+ logging.error(f"Invalid proxy URL or proxy connection failed: {PROXY_URL}. Error: {e}")
187
+ raise PlaywrightError(f"Proxy configuration error: {e}")
188
+ else:
189
+ raise
190
+
191
+ context = await browser.new_context(user_agent=js_fetch_headers['user-agent'])
192
+
193
+ # Add cookies from the account's cookie string
194
+ cookies_to_add = []
195
+ cookie_pairs = account.cookie.split('; ')
196
+ for pair in cookie_pairs:
197
+ if '=' in pair:
198
+ name, value = pair.split('=', 1)
199
+ cookies_to_add.append({
200
+ 'name': name.strip(), 'value': value.strip(),
201
+ 'domain': '.notion.so', 'path': '/',
202
+ 'secure': True, 'httpOnly': True, 'sameSite': 'Lax'
203
+ })
204
+ if cookies_to_add:
205
+ await context.add_cookies(cookies_to_add)
206
+ else:
207
+ logging.error("No valid cookies parsed from account's cookie for getSpaces.")
208
+ logging.error(f"Failing cookie: {account.cookie}")
209
+ account.is_healthy = False
210
+ return
211
+
212
+ page = await context.new_page()
213
+ logging.info("DEBUG: getSpaces - Navigating to notion.so to warm up context...")
214
+ try:
215
+ await page.goto("https://www.notion.so/", wait_until="domcontentloaded", timeout=15000) # 15s timeout
216
+ logging.info("DEBUG: getSpaces - Warm-up navigation to notion.so complete.")
217
+ except PlaywrightError as nav_err:
218
+ logging.warning(f"DEBUG: getSpaces - Warm-up navigation to notion.so failed: {nav_err}. Proceeding with fetch anyway.")
219
+
220
+ # JavaScript to perform the fetch for getSpaces
221
+ javascript_code_get_spaces = """
222
+ async (args) => {
223
+ const { apiUrl, headers, body } = args;
224
+ try {
225
+ const response = await fetch(apiUrl, {
226
+ method: 'POST',
227
+ headers: headers,
228
+ body: JSON.stringify(body) // Ensure body is stringified
229
+ });
230
+ if (!response.ok) {
231
+ console.error('getSpaces Fetch error:', response.status, await response.text());
232
+ return { success: false, error: `HTTP ${response.status}` };
233
+ }
234
+ const data = await response.json();
235
+ return { success: true, data: data };
236
+ } catch (error) {
237
+ console.error('getSpaces JS Exception:', error);
238
+ return { success: false, error: error.toString() };
239
+ }
240
+ }
241
+ """
242
+ js_args = {"apiUrl": get_spaces_url, "headers": js_fetch_headers, "body": {}} # Empty JSON body for getSpaces
243
+
244
+ logging.info("Executing Playwright page.evaluate for getSpaces...")
245
+ result = await page.evaluate(javascript_code_get_spaces, js_args)
246
+
247
+ if not result or not result.get('success'):
248
+ error_detail = result.get('error', 'Unknown error during getSpaces JS execution')
249
+ logging.error(f"Playwright getSpaces call failed for account: {error_detail}")
250
+ logging.error(f"Failing cookie: {account.cookie}")
251
+ account.is_healthy = False
252
+ return
253
+
254
+ data = result.get('data')
255
+ if not data:
256
+ logging.error("No data returned from successful getSpaces call for account.")
257
+ logging.error(f"Failing cookie: {account.cookie}")
258
+ account.is_healthy = False
259
+ return
260
+
261
+ # Extract user ID
262
+ user_id_key = next(iter(data), None)
263
+ if not user_id_key:
264
+ logging.error("Could not extract user ID from getSpaces response for account.")
265
+ logging.error(f"Failing cookie: {account.cookie}")
266
+ account.is_healthy = False
267
+ return
268
+ account.user_id = user_id_key
269
+ logging.info(f"Fetched Notion User ID: {account.user_id}")
270
+
271
+ # Extract space ID
272
+ user_root = data.get(user_id_key, {}).get("user_root", {}).get(user_id_key, {})
273
+ space_view_pointers = user_root.get("value", {}).get("value", {}).get("space_view_pointers", [])
274
+ if space_view_pointers and isinstance(space_view_pointers, list) and len(space_view_pointers) > 0:
275
+ account.space_id = space_view_pointers[0].get("spaceId")
276
+
277
+ # If spaceId is found, proceed to get user email
278
+ if account.space_id:
279
+ get_user_analytics_url = "https://www.notion.so/api/v3/getUserAnalyticsSettings"
280
+ analytics_headers = {
281
+ 'Content-Type': 'application/json',
282
+ 'accept': '*/*',
283
+ 'notion-audit-log-platform': 'web',
284
+ 'notion-client-version': '23.13.0.3833', # From user's example
285
+ 'x-notion-active-user-header': account.user_id,
286
+ 'x-notion-space-id': account.space_id,
287
+ 'user-agent': js_fetch_headers['user-agent']
288
+ }
289
+
290
+ javascript_code_get_analytics = """
291
+ async (args) => {
292
+ const { apiUrl, headers, body } = args;
293
+ try {
294
+ const response = await fetch(apiUrl, {
295
+ method: 'POST',
296
+ headers: headers,
297
+ body: JSON.stringify(body)
298
+ });
299
+ if (!response.ok) {
300
+ console.error('getUserAnalyticsSettings Fetch error:', response.status, await response.text());
301
+ return { success: false, error: `HTTP ${response.status}: ${await response.text()}` };
302
+ }
303
+ const data = await response.json();
304
+ return { success: true, data: data };
305
+ } catch (error) {
306
+ console.error('getUserAnalyticsSettings JS Exception:', error);
307
+ return { success: false, error: error.toString() };
308
+ }
309
+ }
310
+ """
311
+ analytics_args = {"apiUrl": get_user_analytics_url, "headers": analytics_headers, "body": {}}
312
+
313
+ logging.info(f"Executing Playwright to get email for User ID: {account.user_id}...")
314
+ analytics_result = await page.evaluate(javascript_code_get_analytics, analytics_args)
315
+
316
+ if analytics_result and analytics_result.get('success'):
317
+ analytics_data = analytics_result.get('data')
318
+ account.user_email = analytics_data.get('user_email')
319
+ if account.user_email:
320
+ logging.info(f"Fetched User Email: {account.user_email} for User ID: {account.user_id}")
321
+ logging.info(f"Fetched Notion Space ID: {account.space_id} for User ID: {account.user_id}")
322
+ account.is_healthy = True # Mark as healthy only on complete success
323
+ else:
324
+ logging.error(f"Could not extract user_email for User ID: {account.user_id}")
325
+ logging.error(f"Failing cookie: {account.cookie}")
326
+ account.is_healthy = False
327
+ else:
328
+ error_detail = analytics_result.get('error', 'Unknown error') if analytics_result else 'No result from JS'
329
+ logging.error(f"getUserAnalyticsSettings call failed for User ID {account.user_id}: {error_detail}")
330
+ logging.error(f"Failing cookie: {account.cookie}")
331
+ account.is_healthy = False
332
+ else:
333
+ logging.error(f"Could not extract spaceId for User ID: {account.user_id}")
334
+ logging.error(f"Failing cookie: {account.cookie}")
335
+ account.is_healthy = False
336
+ else:
337
+ logging.error(f"Could not find space_view_pointers or spaceId for User ID: {account.user_id}")
338
+ logging.error(f"Failing cookie: {account.cookie}")
339
+ account.is_healthy = False
340
+
341
+ except PlaywrightError as e:
342
+ logging.error(f"Playwright error during fetch_and_set_notion_ids for account: {e}")
343
+ logging.error(f"Failing cookie: {account.cookie}")
344
+ account.is_healthy = False
345
+ except Exception as e:
346
+ logging.error(f"General error during fetch_and_set_notion_ids for account: {e}")
347
+ logging.error(f"Failing cookie: {account.cookie}")
348
+ account.is_healthy = False
349
+ finally:
350
+ # Prioritize closing the browser, which should handle its contexts/pages.
351
+ # Add checks to prevent errors if already closed.
352
+ if browser and browser.is_connected():
353
+ try:
354
+ logging.info("DEBUG: fetch_and_set_notion_ids - Closing browser...")
355
+ await browser.close()
356
+ logging.info("DEBUG: fetch_and_set_notion_ids - Browser closed.")
357
+ except PlaywrightError as e:
358
+ logging.warning(f"DEBUG: fetch_and_set_notion_ids - Ignoring error during browser close: {e}")
359
+ except Exception as e: # Catch potential unexpected errors during close
360
+ logging.warning(f"DEBUG: fetch_and_set_notion_ids - Ignoring unexpected error during browser close: {e}")
361
+ else:
362
+ # If browser is None or not connected, page/context are likely also invalid or already handled.
363
+ logging.info("DEBUG: fetch_and_set_notion_ids - Browser already closed or not initialized.")
364
+
365
+ logging.info(f"fetch_and_set_notion_ids completed for account. Final status: {account}")
366
+
367
+
368
+ @asynccontextmanager
369
+ async def lifespan(app: FastAPI):
370
+ # On startup
371
+ logging.info("Application startup: Initializing Notion accounts...")
372
+
373
+ if NOTION_COOKIES_RAW:
374
+ # Split cookies by a unique separator, e.g., '|'
375
+ cookie_list = [c.strip() for c in NOTION_COOKIES_RAW.split('|') if c.strip()]
376
+ for cookie in cookie_list:
377
+ ACCOUNTS.append(NotionAccount(cookie=cookie))
378
+ logging.info(f"Loaded {len(ACCOUNTS)} Notion account(s) from environment variable.")
379
+
380
+ if not ACCOUNTS:
381
+ logging.error("CRITICAL: No Notion accounts loaded. The application will not be able to process requests.")
382
+ else:
383
+ # Concurrently fetch IDs for all accounts
384
+ logging.info("Fetching IDs for all loaded Notion accounts...")
385
+ fetch_tasks = [fetch_and_set_notion_ids(acc) for acc in ACCOUNTS]
386
+ await asyncio.gather(*fetch_tasks)
387
+
388
+ healthy_count = sum(1 for acc in ACCOUNTS if acc.is_healthy)
389
+ logging.info(f"Initialization complete. {healthy_count} of {len(ACCOUNTS)} accounts are healthy.")
390
+
391
+ if healthy_count == 0:
392
+ logging.error("CRITICAL: No healthy Notion accounts available after initialization.")
393
+
394
+ yield
395
+ # On shutdown (if any cleanup needed)
396
+ logging.info("Application shutdown.")
397
+
398
+ app = FastAPI(lifespan=lifespan)
399
+
400
+ # --- Helper Functions ---
401
+
402
+ def build_notion_request(request_data: ChatCompletionRequest, account: NotionAccount) -> NotionRequestBody:
403
+ """Transforms OpenAI-style messages to Notion transcript format, using the provided account."""
404
+
405
+ # --- Timestamp and User ID Logic ---
406
+ # Use the user ID from the selected account
407
+ user_id = account.user_id
408
+ # Get all non-assistant messages to assign timestamps
409
+ non_assistant_messages = [msg for msg in request_data.messages if msg.role != "assistant"]
410
+ num_non_assistant_messages = len(non_assistant_messages)
411
+ message_timestamps = {} # Store timestamps keyed by message id
412
+
413
+ if num_non_assistant_messages > 0:
414
+ # Get current time specifically in Pacific Time (America/Los_Angeles)
415
+ pacific_tz = ZoneInfo("America/Los_Angeles")
416
+ now_pacific = datetime.now(timezone.utc).astimezone(pacific_tz)
417
+
418
+ # Assign timestamp to the last non-assistant message
419
+ last_msg_id = non_assistant_messages[-1].id
420
+ message_timestamps[last_msg_id] = now_pacific
421
+
422
+ # Calculate timestamps for previous non-assistant messages (random intervals earlier)
423
+ current_timestamp = now_pacific
424
+ for i in range(num_non_assistant_messages - 2, -1, -1): # Iterate backwards from second-to-last
425
+ current_timestamp -= timedelta(minutes=random.randint(3, 20)) # Use random interval (3-20 mins)
426
+ message_timestamps[non_assistant_messages[i].id] = current_timestamp
427
+
428
+ # --- Build Transcript ---
429
+ # Get current time in Pacific timezone for context
430
+ pacific_tz = ZoneInfo("America/Los_Angeles")
431
+ now_pacific = datetime.now(timezone.utc).astimezone(pacific_tz)
432
+ # Format timestamp exactly as YYYY-MM-DDTHH:MM:SS.fff-HH:MM
433
+ dt_str = now_pacific.strftime("%Y-%m-%dT%H:%M:%S")
434
+ ms = f"{now_pacific.microsecond // 1000:03d}" # Ensure 3 digits for milliseconds
435
+ tz_str = now_pacific.strftime("%z") # Gets +HHMM or -HHMM
436
+ formatted_tz = f"{tz_str[:-2]}:{tz_str[-2:]}" # Insert colon
437
+ current_datetime_iso = f"{dt_str}.{ms}{formatted_tz}"
438
+
439
+ # Generate random text for userName and spaceName
440
+ random_words = ["Project", "Workspace", "Team", "Studio", "Lab", "Hub", "Zone", "Space"]
441
+ user_name = f"User{random.randint(100, 999)}"
442
+ space_name = f"{random.choice(random_words)} {random.randint(1, 99)}"
443
+
444
+ transcript = [
445
+ NotionTranscriptItem(
446
+ type="config",
447
+ value=NotionTranscriptConfigValue(model=request_data.notion_model)
448
+ ),
449
+ NotionTranscriptItem(
450
+ type="context",
451
+ value=NotionTranscriptContextValue(
452
+ userId=user_id or "", # Use the user_id from the selected account
453
+ spaceId=account.space_id, # Use space_id from the selected account
454
+ surface="home_module",
455
+ timezone="America/Los_Angeles",
456
+ userName=user_name,
457
+ spaceName=space_name,
458
+ spaceViewId=str(uuid.uuid4()), # Random UUID for spaceViewId
459
+ currentDatetime=current_datetime_iso
460
+ )
461
+ ),
462
+ NotionTranscriptItem(
463
+ type="agent-integration"
464
+ # No value field needed for agent-integration
465
+ )
466
+ ]
467
+
468
+ for message in request_data.messages:
469
+ if message.role == "assistant":
470
+ # Assistant messages get type="markdown-chat" and a traceId
471
+ transcript.append(NotionTranscriptItem(
472
+ type="markdown-chat",
473
+ value=message.content,
474
+ traceId=str(uuid.uuid4()) # Generate unique traceId for assistant message
475
+ ))
476
+ else: # Treat all other roles (user, system, etc.) as "user" type
477
+ created_at_dt = message_timestamps.get(message.id) # Use the unified timestamp dict
478
+ created_at_iso = None
479
+ if created_at_dt:
480
+ # Format timestamp exactly as YYYY-MM-DDTHH:MM:SS.fff-HH:MM
481
+ dt_str = created_at_dt.strftime("%Y-%m-%dT%H:%M:%S")
482
+ ms = f"{created_at_dt.microsecond // 1000:03d}" # Ensure 3 digits for milliseconds
483
+ tz_str = created_at_dt.strftime("%z") # Gets +HHMM or -HHMM
484
+ formatted_tz = f"{tz_str[:-2]}:{tz_str[-2:]}" # Insert colon
485
+ created_at_iso = f"{dt_str}.{ms}{formatted_tz}"
486
+
487
+ content = message.content
488
+ # Ensure content is treated as a string for user/system messages
489
+ if isinstance(content, list):
490
+ # Attempt to extract text from list format, default to empty string
491
+ text_content = ""
492
+ for part in content:
493
+ if isinstance(part, dict) and part.get("type") == "text":
494
+ text_part = part.get("text")
495
+ if isinstance(text_part, str):
496
+ text_content += text_part # Concatenate text parts if needed
497
+ content = text_content if text_content else "" # Use extracted text or empty string
498
+ elif not isinstance(content, str):
499
+ content = "" # Default to empty string if not list or string
500
+
501
+ # Format value as expected by Notion for user type: [[content_string]]
502
+ notion_value = [[content]] if content else [[""]]
503
+
504
+ transcript.append(NotionTranscriptItem(
505
+ type="user", # Set type to "user" for non-assistant roles
506
+ value=notion_value,
507
+ userId=user_id, # Assign userId
508
+ createdAt=created_at_iso # Assign timestamp
509
+ # No traceId for user/system messages
510
+ ))
511
+
512
+ # Use spaceId from the selected account, set createThread=True
513
+ return NotionRequestBody(
514
+ spaceId=account.space_id, # From selected account
515
+ transcript=transcript,
516
+ createThread=True, # Always create a new thread
517
+ # Generate a new traceId for each request
518
+ traceId=str(uuid.uuid4()),
519
+ # Explicitly set debugOverrides, generateTitle, and saveAllThreadOperations
520
+ debugOverrides=NotionDebugOverrides(
521
+ cachedInferences={},
522
+ annotationInferences={},
523
+ emitInferences=False
524
+ ),
525
+ generateTitle=False,
526
+ saveAllThreadOperations=False
527
+ )
528
+
529
+ # --- Background Playwright Task ---
530
+ async def _run_playwright_fetch(
531
+ chunk_queue: asyncio.Queue,
532
+ notion_request_body: NotionRequestBody,
533
+ headers_template: dict,
534
+ notion_api_url: str,
535
+ account: NotionAccount # Pass the whole account object
536
+ ):
537
+ """Runs Playwright fetch in the background, putting results into a queue."""
538
+ browser = None
539
+ context = None
540
+ page = None
541
+
542
+ # Construct headers for this specific task run
543
+ current_headers = headers_template.copy()
544
+ current_headers['x-notion-space-id'] = account.space_id # Use fetched space_id
545
+ if account.user_id: # Use fetched user_id for active user header
546
+ current_headers['x-notion-active-user-header'] = account.user_id
547
+
548
+ # 'cookie' is handled by context.add_cookies(), so it's not in current_headers for fetch
549
+
550
+ async def handle_chunk(chunk_str: str):
551
+ await chunk_queue.put(chunk_str)
552
+
553
+ async def handle_stream_end():
554
+ await chunk_queue.put(None)
555
+
556
+ try:
557
+ logging.info("DEBUG: Background task starting Playwright.")
558
+ async with async_playwright() as p:
559
+ # Configure browser launch with proxy if PROXY_URL is set
560
+ launch_args = {
561
+ 'headless': True,
562
+ 'args': ['--no-sandbox', '--disable-setuid-sandbox']
563
+ }
564
+ if PROXY_URL:
565
+ launch_args['proxy'] = {'server': PROXY_URL}
566
+ logging.info(f"DEBUG: Background task using proxy: {PROXY_URL}")
567
+
568
+ try:
569
+ browser = await p.chromium.launch(**launch_args)
570
+ except PlaywrightError as e:
571
+ if PROXY_URL and "proxy" in str(e).lower():
572
+ logging.error(f"Invalid proxy URL or proxy connection failed: {PROXY_URL}. Error: {e}")
573
+ await handle_stream_end() # Signal end of stream
574
+ raise PlaywrightError(f"Proxy configuration error: {e}")
575
+ else:
576
+ raise
577
+
578
+ logging.info("DEBUG: Background task browser launched.")
579
+ # Get user-agent from the constructed headers for this task
580
+ user_agent_for_context = current_headers.get('user-agent', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36') # Default if not in template
581
+ context = await browser.new_context(user_agent=user_agent_for_context)
582
+ logging.info("DEBUG: Background task context created.")
583
+
584
+ if account.cookie: # Use passed account cookie
585
+ cookies_to_add = []
586
+ cookie_pairs = account.cookie.split('; ')
587
+ for pair in cookie_pairs:
588
+ if '=' in pair:
589
+ name, value = pair.split('=', 1)
590
+ cookies_to_add.append({
591
+ 'name': name.strip(), 'value': value.strip(),
592
+ 'domain': '.notion.so', 'path': '/',
593
+ 'secure': True, 'httpOnly': True, 'sameSite': 'Lax'
594
+ })
595
+ if cookies_to_add:
596
+ await context.add_cookies(cookies_to_add)
597
+ logging.info("DEBUG: Background task cookies added.")
598
+ else:
599
+ logging.warning("Warning: No valid cookies found in account cookie for background task.")
600
+ else:
601
+ logging.error("Error: Account cookie is empty for background task.")
602
+ raise ValueError("Server configuration error: Notion cookie not set for background task.")
603
+
604
+ page = await context.new_page()
605
+ logging.info("DEBUG: Background task page created.")
606
+ await page.goto("https://www.notion.so/chat", wait_until="domcontentloaded")
607
+ logging.info("DEBUG: Background task navigation complete.")
608
+
609
+ await page.expose_function("sendChunkToPython", handle_chunk)
610
+ await page.expose_function("signalStreamEnd", handle_stream_end)
611
+ logging.info("DEBUG: Background task functions exposed.")
612
+
613
+ request_body_json_str = notion_request_body.json()
614
+
615
+ # Prepare headers for JS fetch (cookie is handled by context)
616
+ js_fetch_headers = current_headers.copy()
617
+ if 'cookie' in js_fetch_headers: # Should not be there if template is correct
618
+ del js_fetch_headers['cookie']
619
+
620
+
621
+ javascript_code = """
622
+ async (args) => {
623
+ const { apiUrl, headers, body } = args;
624
+ try {
625
+ const response = await fetch(apiUrl, {
626
+ method: 'POST',
627
+ headers: headers,
628
+ body: body
629
+ });
630
+ if (!response.ok) {
631
+ const errorText = await response.text();
632
+ console.error('JS Fetch error:', response.status, errorText);
633
+ await window.signalStreamEnd();
634
+ return { success: false, status: response.status, error: errorText };
635
+ }
636
+ if (!response.body) {
637
+ console.error('JS Response body is null');
638
+ await window.signalStreamEnd();
639
+ return { success: false, error: 'Response body is null' };
640
+ }
641
+ const reader = response.body.getReader();
642
+ const decoder = new TextDecoder();
643
+ while (true) {
644
+ const { done, value } = await reader.read();
645
+ if (done) break;
646
+ await window.sendChunkToPython(decoder.decode(value, { stream: true }));
647
+ }
648
+ await window.signalStreamEnd();
649
+ return { success: true };
650
+ } catch (error) {
651
+ console.error('JS Exception during fetch:', error);
652
+ await window.signalStreamEnd();
653
+ return { success: false, error: error.toString() };
654
+ }
655
+ }
656
+ """
657
+ js_args = {"apiUrl": notion_api_url, "headers": js_fetch_headers, "body": request_body_json_str}
658
+ logging.info("DEBUG: Background task executing page.evaluate()...")
659
+ js_result = await page.evaluate(javascript_code, js_args)
660
+ logging.info(f"DEBUG: Background task page.evaluate() result: {js_result}")
661
+
662
+ if not js_result or not js_result.get('success'):
663
+ error_detail = js_result.get('error', 'Unknown JS execution error')
664
+ logging.error(f"Error in background task JS execution: {error_detail}")
665
+ # Error already signaled to queue by JS calling signalStreamEnd
666
+ # Re-raise to be caught by the task's main try/except
667
+ raise PlaywrightError(f"JS Fetch Error: {error_detail}")
668
+
669
+ except Exception as e:
670
+ logging.error(f"Error in _run_playwright_fetch background task: {e}")
671
+ await chunk_queue.put(None) # Ensure queue is terminated on error
672
+ # Exception will be caught by playwright_task.exception() in the main generator
673
+ finally:
674
+ logging.info("DEBUG: Background task _run_playwright_fetch attempting to close browser.")
675
+ if browser and browser.is_connected():
676
+ try:
677
+ await browser.close()
678
+ logging.info("DEBUG: Background task browser closed.")
679
+ except Exception as e:
680
+ logging.warning(f"Ignoring error during background task browser close: {e}")
681
+ else:
682
+ logging.info("DEBUG: Background task browser already closed or not initialized.")
683
+
684
+ # --- Main Generator Called by Endpoint ---
685
+ async def stream_notion_response(notion_request_body: NotionRequestBody, account: NotionAccount):
686
+ """Creates background task for Playwright and yields results from queue."""
687
+ chunk_queue = asyncio.Queue()
688
+ playwright_task = None
689
+
690
+ # These should be defined once per request stream
691
+ chunk_id = f"chatcmpl-{uuid.uuid4()}"
692
+ created_time = int(time.time())
693
+
694
+ # Define the template for headers here, to be passed to the background task
695
+ # The background task will then add/override specific headers like x-notion-space-id
696
+ # It will also fetch NOTION_ACTIVE_USER_HEADER from os.getenv()
697
+ headers_template = {
698
+ 'accept': 'application/x-ndjson',
699
+ 'accept-language': 'en-US,en;q=0.9',
700
+ 'content-type': 'application/json',
701
+ 'notion-audit-log-platform': 'web',
702
+ 'notion-client-version': '23.13.0.3668',
703
+ 'origin': 'https://www.notion.so',
704
+ 'priority': 'u=1, i',
705
+ 'referer': 'https://www.notion.so/chat',
706
+ 'sec-ch-ua': '"Chromium";v="136", "Google Chrome";v="136", "Not.A/Brand";v="99"',
707
+ 'sec-ch-ua-mobile': '?0',
708
+ 'sec-ch-ua-platform': '"Windows"',
709
+ 'sec-fetch-dest': 'empty',
710
+ 'sec-fetch-mode': 'cors',
711
+ 'sec-fetch-site': 'same-origin',
712
+ 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36',
713
+ # 'cookie' and 'x-notion-space-id' will be handled/added by _run_playwright_fetch
714
+ # using the passed environment variable strings
715
+ }
716
+
717
+ try:
718
+ # Health check is now performed by get_next_account() before this function is called.
719
+ logging.info("DEBUG: Main generator starting Playwright background task.")
720
+ playwright_task = asyncio.create_task(
721
+ _run_playwright_fetch(
722
+ chunk_queue,
723
+ notion_request_body,
724
+ headers_template,
725
+ NOTION_API_URL, # Global constant
726
+ account # Pass the selected account
727
+ )
728
+ )
729
+
730
+ accumulated_line = ""
731
+ logging.info("DEBUG: Main generator starting queue processing loop.")
732
+ while True:
733
+ chunk = await chunk_queue.get() # Wait for a chunk from the background task
734
+ if chunk is None:
735
+ logging.info("DEBUG: Main generator received None sentinel from queue.")
736
+ break
737
+
738
+ accumulated_line += chunk
739
+ while '\n' in accumulated_line:
740
+ line, accumulated_line = accumulated_line.split('\n', 1)
741
+ if not line.strip():
742
+ continue
743
+ try:
744
+ data = json.loads(line)
745
+ if data.get("type") == "markdown-chat" and isinstance(data.get("value"), str):
746
+ content_chunk = data["value"]
747
+ if content_chunk:
748
+ sse_chunk = ChatCompletionChunk(
749
+ id=chunk_id, created=created_time,
750
+ choices=[Choice(delta=ChoiceDelta(content=content_chunk))]
751
+ )
752
+ logging.info(f"DEBUG: Main generator yielding chunk: {content_chunk[:50]}...")
753
+ yield f"data: {sse_chunk.json()}\n\n"
754
+ # No asyncio.sleep(0) here, as yielding should be enough
755
+ elif "recordMap" in data:
756
+ logging.info("DEBUG: Main generator detected recordMap, ignoring.")
757
+ except json.JSONDecodeError:
758
+ logging.warning(f"Warning: Main generator could not decode JSON line: {line}")
759
+ except Exception as e:
760
+ logging.error(f"Error processing line in main generator: {line} - {e}")
761
+
762
+ # Process any final accumulated data after None sentinel
763
+ if accumulated_line.strip():
764
+ try:
765
+ data = json.loads(accumulated_line)
766
+ if data.get("type") == "markdown-chat" and isinstance(data.get("value"), str):
767
+ content_chunk = data["value"]
768
+ if content_chunk:
769
+ sse_chunk = ChatCompletionChunk(
770
+ id=chunk_id, created=created_time,
771
+ choices=[Choice(delta=ChoiceDelta(content=content_chunk))]
772
+ )
773
+ logging.info(f"DEBUG: Main generator yielding final accumulated chunk: {content_chunk[:50]}...")
774
+ yield f"data: {sse_chunk.json()}\n\n"
775
+ except json.JSONDecodeError:
776
+ logging.warning(f"Warning: Main generator could not decode final JSON line: {accumulated_line}")
777
+ except Exception as e:
778
+ logging.error(f"Error processing final line in main generator: {accumulated_line} - {e}")
779
+
780
+ # After loop, check if the background task raised an exception
781
+ if playwright_task.done() and playwright_task.exception():
782
+ task_exception = playwright_task.exception()
783
+ logging.error(f"Playwright background task failed: {task_exception}")
784
+ raise HTTPException(status_code=500, detail=f"Error during background browser automation: {task_exception}")
785
+ else:
786
+ logging.info("DEBUG: Main generator background task completed successfully.")
787
+ final_chunk = ChatCompletionChunk(
788
+ id=chunk_id, created=created_time,
789
+ choices=[Choice(delta=ChoiceDelta(), finish_reason="stop")]
790
+ )
791
+ logging.info("DEBUG: Main generator yielding final stop chunk.")
792
+ yield f"data: {final_chunk.json()}\n\n"
793
+ logging.info("DEBUG: Main generator yielding [DONE] marker.")
794
+ yield "data: [DONE]\n\n"
795
+
796
+ except Exception as e:
797
+ logging.error(f"Error in main stream_notion_response generator: {e}")
798
+ if playwright_task and not playwright_task.done():
799
+ logging.info("DEBUG: Main generator cancelling background task due to its own error.")
800
+ playwright_task.cancel()
801
+ raise
802
+ finally:
803
+ logging.info("DEBUG: Main generator finished.")
804
+ if playwright_task and not playwright_task.done():
805
+ logging.info("DEBUG: Main generator ensuring background task is cancelled on exit.")
806
+ playwright_task.cancel()
807
+ try:
808
+ await playwright_task # Allow cancellation to propagate
809
+ except asyncio.CancelledError:
810
+ logging.info("DEBUG: Background task successfully cancelled.")
811
+ except Exception as e:
812
+ logging.error(f"DEBUG: Error during background task cancellation/await: {e}")
813
+
814
+
815
+ # --- API Endpoint ---
816
+
817
+ @app.get("/v1/models", response_model=ModelList)
818
+ async def list_models(authenticated: bool = Depends(authenticate)):
819
+ """
820
+ Endpoint to list available Notion models, mimicking OpenAI's /v1/models.
821
+ """
822
+ available_models = [
823
+ "openai-gpt-4.1",
824
+ "anthropic-opus-4",
825
+ "anthropic-sonnet-4"
826
+ ]
827
+ model_list = [
828
+ Model(id=model_id, owned_by="notion") # created uses default_factory
829
+ for model_id in available_models
830
+ ]
831
+ return ModelList(data=model_list)
832
+
833
+ @app.post("/v1/chat/completions")
834
+ async def chat_completions(request_data: ChatCompletionRequest, request: Request, authenticated: bool = Depends(authenticate)):
835
+ """
836
+ Endpoint to mimic OpenAI's chat completions, proxying to Notion.
837
+ It uses round-robin to select a healthy Notion account for each request.
838
+ """
839
+ account = get_next_account() # Select a healthy account
840
+
841
+ notion_request_body = build_notion_request(request_data, account)
842
+
843
+ if request_data.stream:
844
+ # Call the Playwright generator, passing the selected account
845
+ return StreamingResponse(
846
+ stream_notion_response(notion_request_body, account),
847
+ media_type="text/event-stream"
848
+ )
849
+ else:
850
+ # --- Non-Streaming Logic (Optional - Collects stream internally) ---
851
+ # Note: The primary goal is streaming, but a non-streaming version
852
+ # might be useful for testing or simpler clients.
853
+ # This requires collecting all chunks from the async generator.
854
+ full_response_content = ""
855
+ final_finish_reason = None
856
+ chunk_id = f"chatcmpl-{uuid.uuid4()}" # Generate ID for the non-streamed response
857
+ created_time = int(time.time())
858
+
859
+ # --- Non-streaming logic needs to call the generator with the selected account ---
860
+ try:
861
+ # Call the Playwright generator, passing the selected account
862
+ async for line in stream_notion_response(notion_request_body, account):
863
+ if line.startswith("data: ") and "[DONE]" not in line:
864
+ try:
865
+ data_json = line[len("data: "):].strip()
866
+ if data_json:
867
+ chunk_data = json.loads(data_json)
868
+ if chunk_data.get("choices"):
869
+ delta = chunk_data["choices"][0].get("delta", {})
870
+ content = delta.get("content")
871
+ if content:
872
+ full_response_content += content
873
+ finish_reason = chunk_data["choices"][0].get("finish_reason")
874
+ if finish_reason:
875
+ final_finish_reason = finish_reason
876
+ except json.JSONDecodeError:
877
+ print(f"Warning: Could not decode JSON line in non-streaming mode: {line}")
878
+
879
+ # Construct the final OpenAI-compatible non-streaming response
880
+ return {
881
+ "id": chunk_id,
882
+ "object": "chat.completion",
883
+ "created": created_time,
884
+ "model": request_data.model, # Return the model requested by the client
885
+ "choices": [
886
+ {
887
+ "index": 0,
888
+ "message": {
889
+ "role": "assistant",
890
+ "content": full_response_content,
891
+ },
892
+ "finish_reason": final_finish_reason or "stop", # Default to stop if not explicitly set
893
+ }
894
+ ],
895
+ "usage": { # Note: Token usage is not available from Notion
896
+ "prompt_tokens": None,
897
+ "completion_tokens": None,
898
+ "total_tokens": None,
899
+ },
900
+ }
901
+ except HTTPException as e:
902
+ # Re-raise HTTP exceptions from the streaming function
903
+ raise e
904
+ except Exception as e:
905
+ print(f"Error during non-streaming processing: {e}")
906
+ raise HTTPException(status_code=500, detail="Internal server error processing Notion response")
907
+
908
+
909
+ # --- Uvicorn Runner ---
910
+ # Allows running with `python main.py` for simple testing,
911
+ # but `uvicorn main:app --reload` is recommended for development.
912
+ if __name__ == "__main__":
913
+ import uvicorn
914
+ print("Starting server. Access at http://127.0.0.1:7860")
915
+ print("Ensure NOTION_COOKIES is set in your .env file or environment, separated by '|'.")
916
  uvicorn.run(app, host="127.0.0.1", port=7860)