""" Convert knowledge.md → interactive index.html for each Group folder. Uses Tailwind CSS (CDN), Mermaid.js, Dark Mode, collapsible sections. """ import re import html import sys from pathlib import Path WORKSPACE = Path(r"d:\WORK\AI_ML小組新進人員教育訓練簡報") GROUPS = [ ("Group1_Weather_Climate_Prediction", "天氣與氣候預報模型開發", "🌦️"), ("Group2_Marine_Climate_Tech", "海象與氣候應用技術", "🌊"), ("Group3_Warning_Forecasting", "天氣預報與預警應用技術", "⚡"), ("Group4_Earthquake_Warning", "地震預警應用技術", "🔔"), ("Group5_Weather_Monitoring", "天氣監測與應用技術", "📡"), ("Group7_IT_OpenSource", "資訊系統與開源環境", "🖥️"), ("Group8_AI_Talent_Cultivation", "AI 人才培育", "🎓"), ] HTML_TEMPLATE = r""" {title} — CWA AI Knowledge Hub

{content}

""" def md_to_html_content(md_text: str) -> str: """Convert markdown to HTML content (lightweight parser).""" lines = md_text.split('\n') html_parts = [] in_table = False in_code = False in_mermaid = False in_list = False in_blockquote = False code_lang = "" mermaid_lines = [] table_rows = [] list_items = [] bq_lines = [] def flush_table(): nonlocal table_rows, in_table if not table_rows: return out = '

\n' for ri, row in enumerate(table_rows): cells = [c.strip() for c in row.strip('|').split('|')] tag = 'th' if ri == 0 else 'td' if ri == 1 and all(set(c.strip()) <= set('-: ') for c in cells): continue out += '' + ''.join(f'<{tag}>{inline(c)}' for c in cells) + '\n' out += '

\n' html_parts.append(out) table_rows = [] in_table = False def flush_list(): nonlocal list_items, in_list if not list_items: return html_parts.append('

{inline(li)}

\n') list_items = [] in_list = False def flush_blockquote(): nonlocal bq_lines, in_blockquote if not bq_lines: return content = '
'.join(inline(l) for l in bq_lines) html_parts.append(f'

{content}

\n') bq_lines = [] in_blockquote = False def inline(text: str) -> str: """Handle inline markdown: bold, italic, code, links, KaTeX.""" t = html.escape(text) # bold t = re.sub(r'\*\*(.+?)\*\*', r'\1', t) # italic t = re.sub(r'(?\1', t) # inline code t = re.sub(r'`([^`]+)`', r'\1', t) # links t = re.sub(r'\[([^\]]+)\]\(([^)]+)\)', r'\1', t) # strikethrough t = re.sub(r'~~(.+?)~~', r'\1', t) return t section_counter = 0 for line in lines: stripped = line.strip() # Code fence if stripped.startswith('```'): if not in_code: lang = stripped[3:].strip() if lang == 'mermaid': in_mermaid = True mermaid_lines = [] else: in_code = True code_lang = lang html_parts.append(flush_list.__call__() or '') flush_list() flush_table() flush_blockquote() html_parts.append(f'

')
            else:
                if in_mermaid:
                    mermaid_content = '\n'.join(mermaid_lines)
                    html_parts.append(f'\n{mermaid_content}\n\n')
                    in_mermaid = False
                else:
                    html_parts.append('

\n') in_code = False continue if in_code: html_parts.append(html.escape(line) + '\n') continue if in_mermaid: mermaid_lines.append(line) continue # Table if '|' in stripped and stripped.startswith('|'): if not in_table: flush_list() flush_blockquote() in_table = True table_rows.append(stripped) continue elif in_table: flush_table() # List if re.match(r'^[-*+]\s', stripped) or re.match(r'^\d+\.\s', stripped): if not in_list: flush_table() flush_blockquote() in_list = True item = re.sub(r'^[-*+\d.]+\s*', '', stripped) list_items.append(item) continue elif in_list and stripped == '': flush_list() continue elif in_list and stripped: flush_list() # Blockquote if stripped.startswith('>'): if not in_blockquote: flush_table() flush_list() in_blockquote = True bq_lines.append(stripped.lstrip('> ')) continue elif in_blockquote: flush_blockquote() # Headings if stripped.startswith('# ') and not stripped.startswith('## '): flush_table(); flush_list(); flush_blockquote() text = stripped[2:] html_parts.append(f'

{inline(text)}

\n') continue if stripped.startswith('## '): flush_table(); flush_list(); flush_blockquote() text = stripped[3:] section_counter += 1 sid = f"section-{section_counter}" html_parts.append(f'''

{inline(text)}

\n''') continue if stripped.startswith('### '): flush_table(); flush_list(); flush_blockquote() text = stripped[4:] html_parts.append(f'

{inline(text)}

\n') continue if stripped.startswith('#### '): flush_table(); flush_list(); flush_blockquote() text = stripped[5:] html_parts.append(f'

{inline(text)}

\n') continue # Horizontal rule if stripped in ('---', '***', '___'): flush_table(); flush_list(); flush_blockquote() html_parts.append('

\n') continue # Empty line if stripped == '': continue # Paragraph html_parts.append(f'

{inline(stripped)}

\n') # Flush remaining flush_table() flush_list() flush_blockquote() return ''.join(html_parts) def build_html(folder_name: str, title: str, emoji: str): md_path = WORKSPACE / folder_name / "knowledge.md" if not md_path.exists(): print(f" SKIP {folder_name} (no knowledge.md)") return False md_text = md_path.read_text(encoding='utf-8') content = md_to_html_content(md_text) short_title = title page_html = HTML_TEMPLATE.format( title=title, short_title=short_title, emoji=emoji, content=content, ) out_path = WORKSPACE / folder_name / "index.html" out_path.write_text(page_html, encoding='utf-8') print(f" ✓ {out_path}") return True def main(): print("=== Phase 3: knowledge.md → index.html ===") count = 0 for folder, title, emoji in GROUPS: if build_html(folder, title, emoji): count += 1 print(f"\nDone: {count} HTML files generated.") if __name__ == "__main__": main()