Spaces:

Rajhuggingface4253
/

gem

Running

App Files Files Community

gem / app.py

Rajhuggingface4253

Update app.py

ca43214 verified 14 days ago

raw

history blame contribute delete

47.4 kB

	import sys
	import asyncio
	import os
	from contextlib import suppress
	from typing import Optional
	import time
	import uuid
	import re
	# Set Windows event loop policy for Playwright compatibility
	if sys.platform == 'win32':
	asyncio.set_event_loop_policy(asyncio.WindowsProactorEventLoopPolicy())

	from fastapi import FastAPI, HTTPException, Body
	from fastapi.responses import Response
	from fastapi.middleware.cors import CORSMiddleware
	from pydantic import BaseModel, Field, field_validator
	from typing import List, Dict, Any
	import markdown
	from jinja2 import Template, Environment, select_autoescape
	from playwright.async_api import async_playwright, Browser, BrowserContext, Page, Error as PlaywrightError
	from datetime import datetime
	import re
	from urllib.parse import quote
	import gc
	import io
	import uvicorn

	# ==================== APP INITIALIZATION ====================
	app = FastAPI(
	title="Chat PDF Export Service",
	description="Production-grade API for exporting chat conversations to PDF",
	version="1.0.0"
	)

	# Add CORS for web clients
	app.add_middleware(
	CORSMiddleware,
	allow_origins=["*"], # Configure for your domain in production
	allow_credentials=True,
	allow_methods=["GET", "POST"],
	allow_headers=["*"],
	)

	# ==================== GLOBAL CONFIGURATION ====================
	MAX_CONTENT_LENGTH = 50_000 # 50kb max content length
	PDF_GENERATION_TIMEOUT = 30 # seconds
	MAX_REQUESTS_PER_CONNECTION = 100 # After this, browser is restarted

	# ==================== PYDANTIC MODELS ====================
	class ExportRequest(BaseModel):
	messages: List[dict] = Field(..., min_length=1)
	language: str = Field(default="en", description="ISO 639-1 language code")
	font_family: Optional[str] = Field(default=None, description="Custom font family")

	@field_validator('messages')
	@classmethod
	def validate_messages(cls, v: list) -> list:
	for msg in v:
	if not isinstance(msg, dict):
	raise ValueError('Each message must be a dictionary')
	if 'role' not in msg or 'content' not in msg:
	raise ValueError('Message must have "role" and "content" keys')
	return v

	@field_validator('language')
	@classmethod
	def validate_language(cls, v: str) -> str:
	if not isinstance(v, str) or len(v) != 2:
	raise ValueError('Language must be a 2-letter ISO code')
	return v.lower()

	# ==================== PLAYWRIGHT BROWSER MANAGEMENT ====================
	class PlaywrightBrowserPool:
	"""Manages a pool of persistent browser instances for optimal performance"""

	def __init__(self):
	self.browser: Optional[Browser] = None
	self.context: Optional[BrowserContext] = None
	self.playwright = None
	self.request_count = 0
	self._lock = asyncio.Lock()
	self._last_maintenance = time.time()

	async def get_page(self) -> Page:
	"""Get a new page from the browser pool"""
	async with self._lock:
	# Check if browser is alive; restart if it crashed (OOM, timeout, etc.)
	if self.browser and not self.browser.is_connected():
	print("[browser_pool] Browser disconnected — restarting…")
	self.browser = None
	self.context = None

	if not self.browser or not self.context:
	await self._create_browser()

	# Perform maintenance every N requests
	if self.request_count > MAX_REQUESTS_PER_CONNECTION:
	await self._restart_browser()
	self.request_count = 0

	try:
	page = await self.context.new_page()
	except Exception:
	# Browser died between the check and page creation — restart
	print("[browser_pool] Failed to create page — restarting browser…")
	await self._restart_browser()
	page = await self.context.new_page()

	self.request_count += 1
	return page

	async def _create_browser(self):
	"""Initialize the Playwright browser instance"""
	self.playwright = await async_playwright().start()
	self.browser = await self.playwright.chromium.launch(
	headless=True,
	args=[
	'--no-sandbox',
	'--disable-setuid-sandbox',
	'--disable-dev-shm-usage',
	'--disable-gpu',
	'--no-zygote', # Critical for Docker: skip forking zygote process
	'--single-process', # Critical for Docker: run everything in one process
	'--disable-web-security',
	'--disable-features=VizDisplayCompositor',
	'--font-render-hinting=none', # Prevents blurry PDF text
	'--disable-lcd-text', # Disable subpixel AA (fuzz in PDFs)
	'--enable-font-antialiasing',
	'--force-color-profile=srgb',
	]
	)
	self.context = await self.browser.new_context()
	self._last_maintenance = time.time()
	print(f"[browser_pool] Browser launched successfully (pid={self.browser.process.pid if self.browser.process else '?'})")

	async def _restart_browser(self):
	"""Restart browser to free memory and resources"""
	# Close existing browser + Playwright server gracefully
	with suppress(Exception):
	if self.browser:
	await self.browser.close()
	with suppress(Exception):
	if self.playwright:
	await self.playwright.stop()
	self.browser = None
	self.context = None
	self.playwright = None
	gc.collect()
	await self._create_browser()

	async def close(self):
	"""Clean up browser instances"""
	with suppress(Exception):
	if self.browser:
	await self.browser.close()
	with suppress(Exception):
	if self.playwright:
	await self.playwright.stop()

	# Initialize global browser pool
	browser_pool = PlaywrightBrowserPool()

	# ==================== MULTILINGUAL FONT MAPPING ====================
	MULTILINGUAL_FONTS = {
	# (display_name, google_font_url_param, is_system_font)
	# is_system_font=True means no Google Font link needed (the font is pre-installed)
	'en': ('Georgia', '', True),
	'hi': ('Noto Sans Devanagari', 'Noto+Sans+Devanagari:wght@400;600;700', False),
	'ar': ('Noto Sans Arabic', 'Noto+Sans+Arabic:wght@400;600;700', False),
	'zh': ('Noto Sans SC', 'Noto+Sans+SC:wght@400;600;700', False),
	'ja': ('Noto Sans JP', 'Noto+Sans+JP:wght@400;600;700', False),
	'ko': ('Noto Sans KR', 'Noto+Sans+KR:wght@400;600;700', False),
	'th': ('Noto Sans Thai', 'Noto+Sans+Thai:wght@400;600;700', False),
	'he': ('Noto Serif Hebrew', 'Noto+Serif+Hebrew:wght@400;600;700', False),
	'bn': ('Noto Sans Bengali', 'Noto+Sans+Bengali:wght@400;600;700', False),
	'ta': ('Noto Sans Tamil', 'Noto+Sans+Tamil:wght@400;600;700', False),
	'te': ('Noto Serif Telugu', 'Noto+Serif+Telugu:wght@400;600;700', False),
	'ml': ('Noto Serif Malayalam', 'Noto+Serif+Malayalam:wght@400;600;700', False),
	'ru': ('Georgia', '', True),
	'ur': ('Noto Nastaliq Urdu', 'Noto+Nastaliq+Urdu', False),
	}

	def get_font_for_language(lang: str) -> str:
	"""Get appropriate Google Font for the specified language"""
	lang = lang.lower()
	info = MULTILINGUAL_FONTS.get(lang, ('Georgia', '', True))
	return info[1]

	# ==================== HTML TEMPLATE - FIXED VERSION ====================
	PDF_HTML_TEMPLATE = """
	<!DOCTYPE html>
	<html lang="{{ language }}">
	<head>
	<meta charset="UTF-8">
	<meta name="viewport" content="width=device-width, initial-scale=1.0">
	<title>{{ title }}</title>

	<link rel="preconnect" href="https://fonts.googleapis.com">
	<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
	<link href="https://fonts.googleapis.com/css2?family={{ font_family }}&display=swap" rel="stylesheet">

	<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.7.0/styles/github-dark.min.css">

	<style>
	* {
	font-kerning: normal;
	text-rendering: optimizeLegibility;
	-webkit-font-smoothing: antialiased;
	-webkit-print-color-adjust: exact !important;
	print-color-adjust: exact !important;
	box-sizing: border-box;
	}

	body {
	font-family: '{{ font_family.split(":")[0] \| default("Inter") }}', -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
	max-width: 100%;
	margin: 0 auto;
	padding: 0;
	color: #000000;
	direction: {{ text_direction }};
	font-size: 11pt;
	line-height: 1.6;
	}

	/* HEADER STYLING */
	.header {
	text-align: center;
	border-bottom: 2pt solid #000;
	padding-bottom: 15pt;
	margin-bottom: 20pt;
	page-break-after: avoid;
	}

	.logo {
	font-size: 18pt;
	font-weight: 800;
	color: #000;
	letter-spacing: -0.5px;
	}

	.date {
	color: #6b7280;
	font-size: 10pt;
	margin-top: 5pt;
	}

	/* MESSAGE CONTAINERS - FIXED SPACING */
	.message {
	margin-bottom: 16pt;
	page-break-inside: auto;
	}
	h1, h2, h3, h4, h5, h6, .user .content {
	page-break-after: avoid;
	}
	.message:last-child {
	margin-bottom: 0;
	}

	/* USER MESSAGE - QUESTION HEADER */
	.user .content {
	font-weight: 700;
	font-size: 12pt;
	color: #000000;
	margin: 0 0 8pt 0;
	padding: 0 0 8pt 0;
	border-bottom: 1px solid #e5e7eb;
	background: none;
	border-left: none;
	}
	img, svg {
	max-width: 100%;
	height: auto;
	display: block;
	margin: 12pt auto;
	page-break-inside: avoid;
	}
	/* SVG DIAGRAM STYLING */
	.svg-diagram-container,
	.mermaid-diagram-container {
	max-width: 100%;
	margin: 16pt 0;
	padding: 12pt;
	background: #f8f9fa;
	border: 1px solid #e9ecef;
	border-radius: 6pt;
	page-break-inside: avoid;
	overflow: hidden;
	}
	.svg-diagram-container svg,
	.mermaid-diagram-container svg {
	max-width: 100%;
	max-height: 600px; /* Professional dimension limit */
	height: auto;
	margin: 0 auto;
	display: block;
	}
	/* INLINE SVG FROM MARKDOWN */
	svg:not([class]) {
	max-width: 100%;
	height: auto;
	page-break-inside: avoid;
	}
	/* ASSISTANT MESSAGE - ANSWER BODY */
	.assistant .content {
	font-weight: 400;
	color: #000000;
	padding: 0;
	margin: 0;
	font-size: 11pt;
	}

	/* CODE BLOCKS - FIXED WRAPPING ISSUES */
	pre {
	background: #f8f9fa;
	border: 1px solid #e9ecef;
	border-radius: 6pt;
	margin: 12pt 0;
	padding: 12pt;
	page-break-inside: auto;
	orphans: 3;
	widows: 3;
	overflow-x: auto;
	white-space: pre-wrap;
	word-wrap: break-word;
	font-size: 10pt;
	}

	pre code {
	display: block;
	padding: 0;
	background: transparent;
	color: #000000;
	font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace;
	font-size: 10pt;
	line-height: 1.5;
	white-space: pre-wrap;
	word-wrap: break-word;
	}

	/* INLINE CODE */
	code {
	font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace;
	font-size: 10pt;
	background-color: #f3f4f6;
	color: #000000;
	padding: 2px 4px;
	border-radius: 4px;
	white-space: normal;
	}

	/* TABLES */
	table {
	width: 100%;
	border-collapse: collapse;
	margin: 12pt 0;
	font-size: 10pt;
	page-break-inside: auto;
	table-layout: fixed;
	word-wrap: break-word;
	}
	td {
	word-break: break-word;
	overflow-wrap: break-word;
	}
	th, td {
	border: 1px solid #d1d5db;
	padding: 8pt;
	text-align: {{ text_alignment }};
	}

	th {
	background: #f9fafb;
	font-weight: 600;
	}

	/* LISTS */
	ul, ol {
	padding-left: 30px;
	margin: 8pt 0;
	}
	/* Nested unordered list style differentiation */
	ul { list-style: disc; }
	ul ul { list-style: circle; }
	ul ul ul { list-style: square; }
	/* Nested ordered list style differentiation */
	ol { list-style: decimal; }
	ol ol { list-style: lower-alpha; }
	ol ol ol { list-style: lower-roman; }

	li {
	margin: 4pt 0;
	}

	/* PARAGRAPHS AND TEXT ELEMENTS */
	p {
	margin: 8pt 0;
	}

	h1, h2, h3, h4, h5, h6 {
	margin: 16pt 0 8pt 0;
	font-weight: 600;
	line-height: 1.3;
	}

	h1 { font-size: 16pt; }
	h2 { font-size: 14pt; }
	h3 { font-size: 13pt; }

	/* PDF PAGE SETUP */
	@page {
	size: A4;
	margin: 20mm;
	}

	@media print {
	body {
	-webkit-print-color-adjust: exact;
	print-color-adjust: exact;
	}
	}
	</style>
	</head>
	<body>
	<div class="header">
	<div class="logo">{{ document_title }}</div>
	<div class="date">{{ date }}</div>
	</div>

	{% for msg in messages %}
	<div class="message {{ msg.role }}">
	<div class="content">
	{{ msg.content_html \| safe }}
	</div>
	</div>
	{% endfor %}

	<script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.7.0/highlight.min.js"></script>
	<script>
	// Enhanced loading strategy for highlight.js
	(function() {
	function initHighlighting() {
	if (window.hljs) {
	try {
	hljs.highlightAll();
	} catch (e) {
	console.log('Highlight.js error:', e);
	}
	} else {
	console.log('Highlight.js not available, skipping syntax highlighting.');
	}
	}

	// Try to load with timeout
	Promise.race([
	new Promise(function(resolve) {
	if (document.fonts && document.fonts.ready) {
	document.fonts.ready.then(resolve).catch(resolve);
	} else {
	resolve();
	}
	}),
	new Promise(resolve => setTimeout(resolve, 3000))
	]).then(initHighlighting);
	})();
	</script>
	</body>
	</html>
	"""

	# Create Jinja2 environment for security
	jinja_env = Environment(autoescape=select_autoescape(['html', 'xml']))

	def get_text_direction(lang: str) -> str:
	"""Determine text direction for the language"""
	if lang in ['ar', 'he', 'ur', 'fa']:
	return 'rtl'
	return 'ltr'

	def get_text_alignment(lang: str) -> str:
	"""Determine text alignment for the language"""
	if lang in ['ar', 'he', 'ur', 'fa']:
	return 'right'
	return 'left'

	# ==================== UTILITY FUNCTIONS ====================
	def sanitize_content(content: str) -> str:
	"""
	Context-Aware Gatekeeper:
	Sanitizes dangerous HTML from narrative text but PRESERVES it inside code blocks.
	Strategy: Mask Code -> Sanitize Text -> Unmask Code
	"""
	content = str(content).strip()
	if not content:
	return content

	# Storage for the safe code blocks we temporarily hide
	placeholders = {}

	def mask_match(match):
	"""Generate a unique token for code blocks to preserve them"""
	token = f"__SAFE_CODE_BLOCK_{uuid.uuid4().hex}__"
	placeholders[token] = match.group(0)
	return token

	# --- PHASE 1: MASKING (Protect Valid Data) ---
	# This guarantees that educational content (like <link> inside code) is NEVER touched.

	# Pattern A: Fenced Code Blocks (``` ... ```)
	content = re.sub(r'(```[\s\S]*?```)', mask_match, content)

	# Pattern B: Inline Code (` ... `)
	# We exclude newlines inside inline code to avoid over-matching broken syntax
	content = re.sub(r'(`[^`\n]+`)', mask_match, content)

	# --- PHASE 2: FILTRATION (Neutralize Threats in Narrative) ---

	# 1. Remove dangerous tags completely
	# We INCLUDE 'link' and 'meta' because if they appear outside code blocks,
	# they are likely injection attacks (CSS injection or redirects).
	dangerous_tags = ['script', 'iframe', 'object', 'embed', 'applet', 'form', 'link', 'meta']

	for tag in dangerous_tags:
	# Remove tag and its full content (e.g. <script>...</script>)
	pattern = f'<{tag}[^>]>.?</{tag}>'
	content = re.sub(pattern, '', content, flags=re.IGNORECASE \| re.DOTALL)

	# Handle self-closing tags or single tags (e.g. <link ... />)
	single_pattern = f'<{tag}[^>]*>'
	content = re.sub(single_pattern, '', content, flags=re.IGNORECASE)

	# 2. Neutralize dangerous attributes in remaining allowed tags (like <div> or <a>)
	dangerous_attrs = [
	# Event handlers (onclick, onload, onmouseover, etc.)
	r'\son[a-z]+\s=\s["\'][^"\']*["\']',
	# Javascript protocol in href/src
	r'\s(href\|src)\s=\s["\'][^"\']javascript:[^"\']["\']',
	# Data URI exploits in href/src (Base64 HTML injection)
	r'\s(href\|src)\s=\s["\'][^"\']data:[^"\']["\']',
	]

	for attr_pattern in dangerous_attrs:
	content = re.sub(attr_pattern, '', content, flags=re.IGNORECASE)

	# --- PHASE 3: UNMASKING (Restore Valid Data) ---
	for token, original_code in placeholders.items():
	content = content.replace(token, original_code)

	return content
	def fix_markdown_tables(content: str) -> str:
	"""
	Heals common Markdown table issues before parsing:
	1. Ensures empty line before table headers (fixes "missing table" bug)
	2. Ensures table rows are on their own lines
	"""
	# Regex to find a table header pipe starting a line,
	# possibly preceded by text on the previous line without a gap.
	# Look for: (newline) (text) (newline) (\| col \| col)

	# 1. Force newline before table header if missing
	# Matches a pattern like: "text\n\| Header \|" and inserts extra newline
	content = re.sub(r'(?<=\S)\n(\\|.\\|.\n\\|[- :\|]+\\|)', r'\n\n\1', content)

	return content
	def generate_filename(language: str) -> str:
	"""Generate a safe filename with language prefix"""
	timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
	return f"NeuralStream_Export_{timestamp}_{language}.pdf"

	def validate_content_size(content_length: int) -> bool:
	"""Validate that content size is within acceptable limits"""
	return content_length <= MAX_CONTENT_LENGTH

	# ==================== API ENDPOINT ====================
	@app.post("/api/export/pdf")
	async def export_pdf_endpoint(request: ExportRequest):
	"""Export chat conversation to PDF with comprehensive multilingual support"""

	# Validate content size
	total_content = sum(len(msg.get('content', '')) for msg in request.messages)
	if not validate_content_size(total_content):
	raise HTTPException(
	status_code=413,
	detail=f"Content too large. Max allowed: {MAX_CONTENT_LENGTH} characters"
	)

	page = None
	try:
	# 1. Process messages to HTML
	processed_msgs = []
	for msg in request.messages:
	# [CORRECT ORDER]
	# Sanitize RAW input first (The Gatekeeper)
	raw_content = sanitize_content(msg.get('content', ''))

	# Fix tables
	fixed_content = fix_markdown_tables(raw_content)

	# Render Markdown
	html_content = markdown.markdown(
	fixed_content,
	extensions=['fenced_code', 'tables', 'sane_lists', 'nl2br']
	)
	processed_msgs.append({
	'role': msg.get('role', 'unknown'),
	'content_html': html_content
	})

	# 2. Prepare HTML template
	font_family = get_font_for_language(request.language)
	template = jinja_env.from_string(PDF_HTML_TEMPLATE)

	full_html = template.render(
	messages=processed_msgs,
	document_title="NeuralStream AI",
	date=datetime.now().strftime("%B %d, %Y"),
	language=request.language,
	font_family=font_family,
	text_direction=get_text_direction(request.language),
	text_alignment=get_text_alignment(request.language),
	title=f"Chat Export {datetime.now().strftime('%m/%d/%Y')}"
	)

	# 3. Generate PDF with Playwright
	page = await browser_pool.get_page()
	page.set_default_timeout(30000)
	page.set_default_navigation_timeout(30000)

	await page.set_content(full_html, wait_until='load')

	# 4. Wait for fonts and code highlighting
	await asyncio.wait_for(
	page.evaluate('''async () => {
	try {
	await Promise.race([
	document.fonts ? document.fonts.ready : Promise.resolve(),
	new Promise(resolve => setTimeout(resolve, 1500))
	]);
	if (window.hljs) await new Promise(resolve => setTimeout(resolve, 150));
	} catch (e) { console.log('Font loading error:', e); }
	}'''),
	timeout=5.0
	)

	# [CRITICAL ADDITION] 5. Hydrate Links in Code Blocks
	# This turns the text URLs inside code blocks into real clickable <a> tags
	await page.evaluate('''() => {
	// A. LINK HYDRATION
	const codeElements = document.querySelectorAll('pre code');
	codeElements.forEach(block => {
	const urlRegex = /(https?:\/\/[^\s<"']+)/g;
	block.innerHTML = block.innerHTML.replace(urlRegex, (url) => {
	return `<a href="${url}" style="text-decoration:underline; color:inherit; pointer-events:all;">${url}</a>`;
	});
	});

	// B. SMART LAYOUT PROTECTION
	// Heuristic: If a block is shorter than ~1/3 of a page (approx 350px),
	// assume it's a diagram or snippet that should NOT split.
	const preBlocks = document.querySelectorAll('pre');
	preBlocks.forEach(pre => {
	if (pre.offsetHeight < 350) {
	pre.style.pageBreakInside = 'avoid';
	pre.style.breakInside = 'avoid'; // Modern standard
	}
	});
	}''')

	# 6. Generate PDF
	pdf_bytes = await page.pdf(
	format="A4",
	margin={"top": "20mm", "bottom": "20mm", "left": "20mm", "right": "20mm"},
	print_background=True,
	display_header_footer=True,
	footer_template='<div style="font-size:9px; margin:0 auto; color:#666; text-align:center;">Page <span class="pageNumber"></span> of <span class="totalPages"></span></div>',
	header_template='<div></div>',
	prefer_css_page_size=True
	)

	filename = generate_filename(request.language)

	return Response(
	content=pdf_bytes,
	media_type="application/pdf",
	headers={
	"Content-Disposition": f"attachment; filename={filename}",
	"Cache-Control": "no-cache, no-store, must-revalidate",
	"Pragma": "no-cache",
	"Expires": "0"
	}
	)

	except asyncio.TimeoutError:
	raise HTTPException(
	status_code=408,
	detail="PDF generation timed out. The document may be too complex."
	)
	except PlaywrightError as e:
	raise HTTPException(
	status_code=500,
	detail=f"Browser error during PDF generation: {str(e)[:100]}"
	)
	except Exception as e:
	print(f"PDF Export API Error: {str(e)}")
	import traceback
	traceback.print_exc()
	raise HTTPException(
	status_code=500,
	detail="Internal server error during PDF generation. Please try again."
	)
	finally:
	if page:
	with suppress(Exception):
	await page.close()

	# ==================== ANT-EDITOR DOCUMENT EXPORT ====================
	class EditorExportRequest(BaseModel):
	"""Request model for AbWrite document PDF export"""
	html: str = Field(..., description="HTML content from TipTap editor.getHTML()")
	title: str = Field(default="Untitled Document", description="Document title")
	language: str = Field(default="en", description="ISO 639-1 language code for font selection")
	password: Optional[str] = Field(default=None, description="Optional password to encrypt the PDF")
	watermark_text: Optional[str] = Field(default=None, description="Optional watermark text to overlay on every page")

	@field_validator('html')
	@classmethod
	def validate_html(cls, v: str) -> str:
	if not v or not v.strip():
	raise ValueError('HTML content cannot be empty')
	# 10MB limit — documents with embedded base64 images are large
	if len(v) > 10_000_000:
	raise ValueError('HTML content too large (max 10MB)')
	return v

	@field_validator('language')
	@classmethod
	def validate_language(cls, v: str) -> str:
	if not isinstance(v, str) or len(v) < 2:
	raise ValueError('Language must be a valid ISO code')
	return v.lower()[:2]

	EDITOR_PDF_TEMPLATE = """
	<!DOCTYPE html>
	<html lang="{{ language }}">
	<head>
	<meta charset="UTF-8">
	<meta name="viewport" content="width=device-width, initial-scale=1.0">
	<title>{{ title }}</title>

	{% if not is_system_font %}
	<link rel="preconnect" href="https://fonts.googleapis.com">
	<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
	<link href="https://fonts.googleapis.com/css2?family={{ font_url }}&display=swap" rel="stylesheet">
	{% endif %}

	<!-- Load all editor Google Fonts for user-applied font-family inline styles -->
	<link href="https://fonts.googleapis.com/css2?family=Amatic+SC:wght@400;700&family=Bebas+Neue&family=Caveat:wght@400;700&family=Comic+Neue:wght@400;700&family=Cookie&family=Courier+Prime:wght@400;700&family=Dancing+Script:wght@400;700&family=Great+Vibes&family=Indie+Flower&family=Kaushan+Script&family=Lato:wght@300;400;700&family=Libre+Baskerville:ital,wght@0,400;0,700;1,400&family=Lora:ital,wght@0,400;0,600;1,400&family=Merriweather:wght@300;700&family=Montserrat:wght@400;600;800&family=Open+Sans:wght@400;600&family=Oswald:wght@400;500&family=Pacifico&family=Patrick+Hand&family=Permanent+Marker&family=Playfair+Display:wght@400;700&family=Poppins:wght@300;400;600&family=Roboto:wght@300;400;700&family=Sacramento&family=Shadows+Into+Light&family=Tinos:ital,wght@0,400;0,700;1,400&display=swap" rel="stylesheet">

	<style>
	, ::before, *::after {
	box-sizing: border-box;
	margin: 0;
	padding: 0;
	}

	body {
	/* Georgia first = matches the editor's default document font.
	Language-specific Noto font as fallback = auto-activates for non-Latin
	scripts (Hindi, Arabic, etc.) because Georgia lacks those glyphs. */
	font-family: 'Georgia', '{{ font_name }}', 'Times New Roman', serif;
	color: #1a1a2e;
	direction: {{ text_direction }};
	font-size: 13pt;
	line-height: 1.6;
	-webkit-print-color-adjust: exact !important;
	print-color-adjust: exact !important;
	-webkit-font-smoothing: antialiased;
	-moz-osx-font-smoothing: grayscale;
	text-rendering: optimizeLegibility;
	font-feature-settings: 'kern' 1, 'liga' 1;
	}

	/* ── Headings (matched to editor) ── */
	h1 {
	font-size: 26pt;
	font-weight: 700;
	margin: 5pt 0 3pt 0;
	line-height: 1.2;
	color: #1a1a2e;
	letter-spacing: -0.015em;
	}
	h2 {
	font-size: 20pt;
	font-weight: 600;
	margin: 6pt 0 3pt 0;
	line-height: 1.25;
	color: #1a1a2e;
	letter-spacing: -0.01em;
	}
	h3 {
	font-size: 16pt;
	font-weight: 600;
	margin: 6pt 0 2pt 0;
	line-height: 1.3;
	color: #2a2a3e;
	}
	h4 {
	font-size: 13pt;
	font-weight: 600;
	margin: 6pt 0 1pt 0;
	color: #2a2a3e;
	}
	h5 {
	font-size: 11pt;
	font-weight: 600;
	margin: 6pt 0 1pt 0;
	color: #3a3a4e;
	text-transform: uppercase;
	letter-spacing: 0.04em;
	}
	h6 {
	font-size: 10pt;
	font-weight: 600;
	margin: 6pt 0 1pt 0;
	color: #4a4a5e;
	text-transform: uppercase;
	letter-spacing: 0.05em;
	}
	h1, h2, h3, h4, h5, h6 { page-break-after: avoid; }

	/* ── Paragraphs ── */
	p {
	margin: 0 0 6pt 0;
	orphans: 3;
	widows: 3;
	}
	p:last-child { margin-bottom: 0; }

	/* ── Links (matched to editor #4a69bd) ── */
	a {
	color: #4a69bd;
	text-decoration: underline;
	text-decoration-color: rgba(74, 105, 189, 0.4);
	text-underline-offset: 2px;
	}

	/* ── Lists ── */
	ul, ol { padding-left: 22pt; margin: 5pt 0; }
	/* Nested unordered list styles (disc → circle → square) */
	ul { list-style: disc; }
	ul ul { list-style: circle; }
	ul ul ul { list-style: square; }
	/* Nested ordered list styles (decimal → lower-alpha → lower-roman) */
	ol { list-style: decimal; }
	ol ol { list-style: lower-alpha; }
	ol ol ol { list-style: lower-roman; }
	li { margin: 2pt 0; }
	li p { margin: 0; }
	ul[data-type="taskList"] { list-style: none; padding-left: 0; }
	ul[data-type="taskList"] li { display: flex; align-items: flex-start; gap: 6pt; }
	ul[data-type="taskList"] li[data-checked="true"] p {
	text-decoration: line-through;
	color: #999;
	}

	/* ── Tables (matched to editor) ── */
	table {
	width: 100%;
	border-collapse: collapse;
	margin: 10pt 0;
	font-size: 12pt;
	page-break-inside: auto;
	table-layout: fixed;
	word-wrap: break-word;
	}
	th, td {
	border: 1px solid #ddd;
	padding: 6pt 9pt;
	text-align: {{ text_alignment }};
	vertical-align: top;
	}
	th {
	background: #f5f5fa;
	font-weight: 600;
	text-align: left;
	}
	tr { page-break-inside: avoid; }

	/* ── Code blocks (dark theme, matched to editor) ── */
	pre {
	background: #1e1e2e;
	color: #e8e8ed;
	border-radius: 6pt;
	padding: 12pt;
	margin: 10pt 0;
	overflow-x: auto;
	white-space: pre-wrap;
	word-wrap: break-word;
	font-size: 11pt;
	line-height: 1.5;
	page-break-inside: auto;
	orphans: 3;
	widows: 3;
	}
	pre code {
	display: block;
	background: transparent;
	padding: 0;
	font-family: 'Consolas', 'Fira Code', ui-monospace, SFMono-Regular, Menlo, Monaco, monospace;
	font-size: 11pt;
	color: #e8e8ed;
	white-space: pre-wrap;
	word-wrap: break-word;
	}

	/* ── Inline code (matched to editor) ── */
	code {
	font-family: 'Consolas', 'Fira Code', ui-monospace, SFMono-Regular, Menlo, Monaco, monospace;
	font-size: 0.9em;
	background: #f0f0f5;
	padding: 2pt 4pt;
	border-radius: 3pt;
	color: #d63384;
	}

	/* ── Blockquotes (matched to editor — purple accent, italic) ── */
	blockquote {
	border-left: 3pt solid #6c5ce7;
	padding: 6pt 0 6pt 12pt;
	margin: 10pt 0;
	color: #444;
	font-style: italic;
	background: rgba(108, 92, 231, 0.03);
	}

	/* ── Horizontal rules ── */
	hr {
	border: none;
	border-top: 1pt solid #e0e0e5;
	margin: 18pt 0;
	}

	/* ── Images ── */
	img {
	max-width: 100%;
	height: auto;
	display: block;
	margin: 8pt auto;
	border-radius: 4pt;
	page-break-inside: avoid;
	}

	/* ── Image-Text Block (float-based text wrapping) ── */
	.image-text-block {
	overflow: hidden;
	margin: 8pt 0;
	page-break-inside: auto;
	}
	.image-text-block-image {
	max-width: 45%;
	line-height: 0;
	}
	.image-text-block[data-image-position="left"] .image-text-block-image {
	float: left;
	margin: 0 16px 8px 0;
	}
	.image-text-block[data-image-position="right"] .image-text-block-image {
	float: right;
	margin: 0 0 8px 16px;
	}
	.image-text-block-image img { max-width: 100%; height: auto; margin: 0; }
	/* Flip support */
	.image-text-block[data-flip-h="true"] .image-text-block-image img { transform: scaleX(-1); }
	.image-text-block[data-flip-v="true"] .image-text-block-image img { transform: scaleY(-1); }
	.image-text-block[data-flip-h="true"][data-flip-v="true"] .image-text-block-image img { transform: scaleX(-1) scaleY(-1); }
	.image-text-block-content { }

	/* ── Resizable Image Wrapper (from editor's ResizableImage extension) ── */
	.resizable-image-wrapper {
	width: fit-content;
	max-width: 100%;
	margin: 8pt 0;
	page-break-inside: avoid;
	}
	.resizable-image-wrapper img {
	display: block;
	max-width: 100%;
	height: auto;
	border-radius: 4pt;
	margin: 0; /* parent wrapper handles margins */
	}
	/* Preserve explicit width/height from editor resize handles */
	.resizable-image-wrapper img[width] {
	width: attr(width px);
	}
	/* Alignment variants */
	.resizable-image-align-left { margin-left: 0; margin-right: auto; }
	.resizable-image-align-center { margin-left: auto; margin-right: auto; }
	.resizable-image-align-right { margin-left: auto; margin-right: 0; }
	/* Flip support for standalone images */
	.resizable-image-wrapper[data-flip-h="true"] img { transform: scaleX(-1); }
	.resizable-image-wrapper[data-flip-v="true"] img { transform: scaleY(-1); }
	.resizable-image-wrapper[data-flip-h="true"][data-flip-v="true"] img { transform: scaleX(-1) scaleY(-1); }

	/* ── Highlights (warm yellow, matched to editor) ── */
	mark {
	background: #fef3c7;
	padding: 1pt 2pt;
	border-radius: 2pt;
	}

	/* ── Text formatting ── */
	s { text-decoration: line-through; color: #999; }
	sub { font-size: 0.75em; }
	sup { font-size: 0.75em; }

	/* ── PDF page setup ── */
	@page {
	size: A4;
	margin: 25mm 20mm 25mm 20mm;
	}

	@media print {
	body {
	-webkit-print-color-adjust: exact;
	print-color-adjust: exact;
	}
	pre {
	background: #1e1e2e !important;
	color: #e8e8ed !important;
	}
	}
	</style>
	</head>
	<body>
	{% if watermark_text %}
	<div style="
	position: fixed;
	top: 0; left: 0; right: 0; bottom: 0;
	display: flex;
	align-items: center;
	justify-content: center;
	pointer-events: none;
	z-index: 9999;
	">
	<div style="
	font-size: 72pt;
	font-weight: 800;
	color: rgba(0, 0, 0, 0.06);
	transform: rotate(-35deg);
	white-space: nowrap;
	user-select: none;
	letter-spacing: 0.08em;
	text-transform: uppercase;
	">{{ watermark_text }}</div>
	</div>
	{% endif %}
	{{ content \| safe }}

	<script>
	// Wait for fonts to load
	(function() {
	if (document.fonts && document.fonts.ready) {
	Promise.race([
	document.fonts.ready,
	new Promise(resolve => setTimeout(resolve, 3000))
	]).catch(() => {});
	}
	})();
	</script>
	</body>
	</html>
	"""

	@app.post("/api/editor/export-pdf")
	async def editor_export_pdf(request: EditorExportRequest):
	"""Export Ant-Editor document content to PDF via Playwright"""

	page = None
	try:
	# 1. Determine font settings
	lang = request.language
	font_info = MULTILINGUAL_FONTS.get(lang, MULTILINGUAL_FONTS['en'])
	font_name = font_info[0]
	font_url = font_info[1]
	is_system_font = font_info[2] if len(font_info) > 2 else False

	# 2. Render HTML template
	template = jinja_env.from_string(EDITOR_PDF_TEMPLATE)
	full_html = template.render(
	content=request.html,
	title=request.title,
	language=lang,
	font_name=font_name,
	font_url=font_url,
	is_system_font=is_system_font,
	text_direction=get_text_direction(lang),
	text_alignment=get_text_alignment(lang),
	watermark_text=request.watermark_text or '',
	)

	# 3. Generate PDF via Playwright
	page = await browser_pool.get_page()
	page.set_default_timeout(30000)
	page.set_default_navigation_timeout(30000)

	await page.set_content(full_html, wait_until='networkidle')

	# 4. Wait for fonts to load
	await asyncio.wait_for(
	page.evaluate('''async () => {
	try {
	await Promise.race([
	document.fonts ? document.fonts.ready : Promise.resolve(),
	new Promise(resolve => setTimeout(resolve, 2000))
	]);
	} catch (e) {}
	}'''),
	timeout=5.0
	)

	# 5. Wait for all images to finish loading (base64 + external URLs)
	await asyncio.wait_for(
	page.evaluate('''() => {
	const imgs = document.querySelectorAll('img');
	return Promise.all(Array.from(imgs).map(img => {
	if (img.complete) return Promise.resolve();
	return new Promise(resolve => {
	img.onload = resolve;
	img.onerror = resolve;
	});
	}));
	}'''),
	timeout=10.0
	)

	# 6. Apply explicit width from HTML attributes (CSS attr() fallback)
	await page.evaluate('''() => {
	document.querySelectorAll('.resizable-image-wrapper img').forEach(img => {
	const w = img.getAttribute('width');
	if (w) {
	img.style.width = w + (w.includes('%') ? '' : 'px');
	img.style.maxWidth = '100%';
	img.style.height = 'auto';
	}
	});
	}''')

	# 7. Smart layout: keep short code blocks together
	await page.evaluate('''() => {
	const preBlocks = document.querySelectorAll('pre');
	preBlocks.forEach(pre => {
	if (pre.offsetHeight < 400) {
	pre.style.pageBreakInside = 'avoid';
	pre.style.breakInside = 'avoid';
	}
	});
	}''')

	# 8. Generate PDF bytes
	pdf_bytes = await page.pdf(
	format="A4",
	scale=1,
	margin={"top": "25mm", "bottom": "25mm", "left": "20mm", "right": "20mm"},
	print_background=True,
	display_header_footer=True,
	footer_template='<div style="font-size:9px; margin:0 auto; color:#999; text-align:center; width:100%;">Page <span class="pageNumber"></span> of <span class="totalPages"></span></div>',
	header_template='<div></div>',
	prefer_css_page_size=True,
	)

	# 9. Encrypt PDF with password if provided
	if request.password:
	import pikepdf
	src = pikepdf.open(io.BytesIO(pdf_bytes))
	encrypted_buf = io.BytesIO()
	src.save(
	encrypted_buf,
	encryption=pikepdf.Encryption(
	owner=request.password,
	user=request.password,
	R=6, # AES-256
	),
	)
	src.close()
	pdf_bytes = encrypted_buf.getvalue()

	# 10. Generate safe filename — ASCII-only for HTTP header safety
	# re.ASCII ensures \w matches only [a-zA-Z0-9_], preventing
	# Unicode chars that Starlette can't encode as latin-1 headers.
	safe_title = re.sub(r'[^\w\s-]', '', request.title, flags=re.ASCII)[:50].strip() or 'Document'
	safe_title = re.sub(r'\s+', '_', safe_title)
	timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
	filename = f"{safe_title}_{timestamp}.pdf"

	# Build Content-Disposition with RFC 5987 filename* for Unicode display
	from urllib.parse import quote
	cd_value = f'attachment; filename="{filename}"'
	# If original title has non-ASCII chars, add filename* so browsers
	# show the real Unicode name while the ASCII fallback stays safe
	unicode_title = re.sub(r'[<>:"/\\\|?*]', '', request.title)[:80].strip() or 'Document'
	unicode_title = re.sub(r'\s+', '_', unicode_title)
	unicode_filename = f"{unicode_title}_{timestamp}.pdf"
	encoded_unicode = quote(unicode_filename, safe='')
	cd_value += f"; filename*=UTF-8''{encoded_unicode}"

	return Response(
	content=pdf_bytes,
	media_type="application/pdf",
	headers={
	"Content-Disposition": cd_value,
	"Cache-Control": "no-cache, no-store, must-revalidate",
	}
	)

	except asyncio.TimeoutError:
	raise HTTPException(status_code=408, detail="PDF generation timed out")
	except PlaywrightError as e:
	raise HTTPException(status_code=500, detail=f"Browser error: {str(e)[:200]}")
	except Exception as e:
	print(f"Editor PDF Export Error: {str(e)}")
	import traceback
	traceback.print_exc()
	raise HTTPException(status_code=500, detail="PDF generation failed")
	finally:
	if page:
	with suppress(Exception):
	await page.close()

	# Health check endpoint
	@app.get("/health")
	async def health():
	"""Health check endpoint for load balancers and monitoring"""
	return {
	"status": "healthy",
	"timestamp": datetime.now().isoformat(),
	"service": "pdf-exporter"
	}

	# Test endpoint (for debugging)
	@app.post("/api/export/html")
	async def export_html_endpoint(request: ExportRequest):
	"""Return rendered HTML for debugging purposes"""
	processed_msgs = []
	for msg in request.messages:
	html_content = markdown.markdown(
	str(msg.get('content', '')),
	extensions=['fenced_code', 'tables', 'sane_lists']
	)
	processed_msgs.append({
	'role': msg.get('role', 'unknown'),
	'content_html': html_content
	})

	font_family = get_font_for_language(request.language)
	template = jinja_env.from_string(PDF_HTML_TEMPLATE)

	full_html = template.render(
	messages=processed_msgs,
	document_title="NeuralStream AI (HTML Preview)",
	date=datetime.now().strftime("%B %d, %Y"),
	language=request.language,
	font_family=font_family,
	text_direction=get_text_direction(request.language),
	text_alignment=get_text_alignment(request.language),
	title="Chat HTML Preview"
	)

	return Response(
	content=full_html,
	media_type="text/html",
	headers={"X-Debug": "html-preview"}
	)

	# Startup/Shutdown events
	@app.on_event("startup")
	async def startup_event():
	"""Initialize browser pool on startup to warm up resources"""
	print("🚀 Pre-warming browser pool...")
	try:
	# Launch browser immediately so we catch any errors at deploy time
	# instead of failing on the first user request.
	await browser_pool.get_page()
	print("✅ Browser pool warmed up successfully")
	except Exception as e:
	print(f"⚠️ Warning: Failed to pre-warm browser pool: {e}")
	# We don't raise here so the server still starts; individual requests will retry.

	@app.on_event("shutdown")
	async def shutdown_event():
	"""Clean shutdown of browser pool"""
	await browser_pool.close()

	if __name__ == "__main__":
	# Force the Windows Proactor Loop Policy (Required for Playwright)
	if sys.platform == 'win32':
	asyncio.set_event_loop_policy(asyncio.WindowsProactorEventLoopPolicy())

	print("🚀 Starting NeuralStream PDF Backend...")
	uvicorn.run(app, host="0.0.0.0", port=7860, reload=False)