""" Compile PentaNet_NeurIPS_Draft.md + figures into a single publication PDF. Uses WeasyPrint for HTML→PDF rendering. """ import markdown import os import re from weasyprint import HTML MD_FILE = 'PentaNet_NeurIPS_Draft.md' OUTPUT = 'paper/PentaNet_Technical_Report.pdf' with open(MD_FILE, 'r') as f: md_text = f.read() # --- Strip LaTeX math (WeasyPrint cannot render it) --- def delatex(text): """Convert LaTeX math notation to readable Unicode text.""" # Remove display math blocks $$ ... $$ first def replace_display(m): content = m.group(1).strip() # Common display equations content = content.replace('\\max', 'max') content = content.replace('\\min', 'min') content = content.replace('\\sum', 'Σ') content = content.replace('\\left(', '(').replace('\\right)', ')') content = content.replace('\\left\\{', '{').replace('\\right\\}', '}') content = content.replace('\\frac{1}{d}', '(1/d)') content = content.replace('\\frac{1}{n}', '(1/n)') content = content.replace('\\text{Round}', 'Round') content = content.replace('\\text{Clip}', 'Clip') content = content.replace('\\text{detach}', 'detach') content = content.replace('\\bar{W}', 'W̄') content = content.replace('\\gamma', 'γ') content = content.replace('\\epsilon', 'ε') content = content.replace('\\mathbb{R}', 'ℝ') content = content.replace('\\in', '∈') content = content.replace('\\times', '×') content = content.replace('\\cdot', '·') content = content.replace('\\pm', '±') content = content.replace('\\sigma', 'σ') content = content.replace('\\lambda', 'λ') content = content.replace('\\sim', '~') content = content.replace('\\approx', '≈') content = content.replace('\\log_2(3)', 'log₂(3)') content = content.replace('\\log_2(5)', 'log₂(5)') content = content.replace('\\leftrightarrow', '↔') content = content.replace('|', '|') content = re.sub(r'_\{([^}]+)\}', r'_\1', content) # subscripts content = re.sub(r'\^T', 'ᵀ', content) content = re.sub(r'\\[a-zA-Z]+', '', content) # remove remaining commands content = re.sub(r'[{}]', '', content) # remove braces return content text = re.sub(r'\$\$(.*?)\$\$', replace_display, text, flags=re.DOTALL) # Inline math $...$ def replace_inline(m): c = m.group(1) c = c.replace('\\{', '{').replace('\\}', '}') c = c.replace('\\pm', '±') c = c.replace('\\bar{W}', 'W̄') c = c.replace('\\gamma', 'γ') c = c.replace('\\epsilon', 'ε') c = c.replace('\\sigma', 'σ') c = c.replace('\\lambda', 'λ') c = c.replace('\\sim', '~') c = c.replace('\\approx', '≈') c = c.replace('\\log_2(3)', 'log₂(3)') c = c.replace('\\log_2(5)', 'log₂(5)') c = c.replace('\\leftrightarrow', '↔') c = c.replace('\\mathbb{R}', 'ℝ') c = c.replace('\\text{detach}', 'detach') c = c.replace('\\text{Round}', 'Round') c = c.replace('\\text{Clip}', 'Clip') c = c.replace('\\times', '×') c = c.replace('\\cdot', '·') c = re.sub(r'\^T', 'ᵀ', c) c = re.sub(r'_\{([^}]+)\}', r'_\1', c) c = re.sub(r'\\[a-zA-Z]+', '', c) c = re.sub(r'[{}]', '', c) return c text = re.sub(r'\$([^$]+?)\$', replace_inline, text) return text md_text = delatex(md_text) # Convert markdown to HTML html_body = markdown.markdown(md_text, extensions=['tables', 'fenced_code', 'codehilite']) # Inject figure references (replace placeholders or insert after relevant sections) # We insert Figure 1 after "4.2 Convergence Dynamics" and Figure 2 after "4.3 Weight Distribution" fig1_path = os.path.abspath('paper/figures/figure1_ppl_convergence.png') fig2_path = os.path.abspath('paper/figures/figure2_weight_distribution.png') fig1_html = f'''
Figure 1. Validation perplexity convergence on WikiText-103. Solid lines: PentaNet (pentanary). Dashed lines: BitNet (ternary). Three independent seeds per architecture. PentaNet consistently achieves lower PPL from iteration ~2,000 onward.
Figure 2. PentaNet quantized weight distribution over training (Seed 42). All five buckets maintain stable occupancy throughout 10,000 iterations, with ±2 states at ~11%. No collapse toward ternary is observed.