Prathamesh Sarjerao Vaidya commited on
Commit
aaaa4c6
·
1 Parent(s): 796f486

update main & check.yml for robust md to pdf conversion

Browse files
Files changed (2) hide show
  1. .github/workflows/check.yml +417 -56
  2. .github/workflows/main.yml +417 -57
.github/workflows/check.yml CHANGED
@@ -47,64 +47,425 @@ jobs:
47
  # Install weasyprint as fallback
48
  pip install weasyprint
49
 
50
- # Convert MD files to PDF
51
- - name: Convert MD to PDF
52
- run: |
53
- find . -name "*.md" -not -path "./.git/*" | while read file; do
54
- # Get the directory and filename
55
- dir="$(dirname "$file")"
56
- filename="$(basename "$file" .md)"
57
- pdf_path="$dir/$filename.pdf"
58
-
59
- echo "Converting $file to $pdf_path"
60
-
61
- # Convert MD to PDF with proper Unicode support and image handling
62
- pandoc "$file" \
63
- -o "$pdf_path" \
64
- --pdf-engine=xelatex \
65
- --variable mainfont="DejaVu Sans" \
66
- --variable sansfont="DejaVu Sans" \
67
- --variable monofont="DejaVu Sans Mono" \
68
- --include-in-header=<(echo '\usepackage{graphicx}') \
69
- --include-in-header=<(echo '\usepackage{float}') \
70
- --include-in-header=<(echo '\floatplacement{figure}{H}') \
71
- --resource-path="$dir" \
72
- --standalone \
73
- --toc \
74
- --number-sections \
75
- --highlight-style=github \
76
- -V geometry:margin=1in \
77
- -V colorlinks=true \
78
- -V linkcolor=blue \
79
- -V urlcolor=blue \
80
- -V toccolor=gray \
81
- --wrap=auto \
82
- --dpi=300 \
83
- --verbose || {
84
- echo "Error converting $file with xelatex, trying with weasyprint..."
85
- # Fallback to HTML->PDF conversion for complex documents
86
- pandoc "$file" \
87
- -t html5 \
88
- --standalone \
89
- --self-contained \
90
- --css=<(echo 'body{font-family:Arial,sans-serif;max-width:800px;margin:0 auto;padding:20px;line-height:1.6}img{max-width:100%;height:auto}pre{background:#f5f5f5;padding:10px;border-radius:5px;overflow-x:auto}') \
91
- -o "$dir/$filename.html"
92
 
93
- # Use weasyprint for HTML to PDF conversion
94
- weasyprint "$dir/$filename.html" "$pdf_path" || {
95
- echo "Both conversions failed for $file, skipping..."
96
- continue
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
  }
98
- rm -f "$dir/$filename.html"
99
- }
100
-
101
- if [ -f "$pdf_path" ]; then
102
- echo "Successfully converted $file to $pdf_path"
103
- else
104
- echo "Failed to convert $file"
105
- fi
106
- done
107
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
108
  # Set up Python for Google Drive upload
109
  - name: Set up Python
110
  uses: actions/setup-python@v4
 
47
  # Install weasyprint as fallback
48
  pip install weasyprint
49
 
50
+ # Improved MD to PDF Conversion with proper image handling and mermaid support
51
+ name: Convert Markdown to PDF
52
+
53
+ on:
54
+ push:
55
+ branches: [ main ]
56
+ pull_request:
57
+ branches: [ main ]
58
+
59
+ jobs:
60
+ convert-to-pdf:
61
+ runs-on: ubuntu-latest
62
+
63
+ steps:
64
+ - name: Checkout repository
65
+ uses: actions/checkout@v4
66
+
67
+ - name: Setup Python
68
+ uses: actions/setup-python@v4
69
+ with:
70
+ python-version: '3.11'
71
+
72
+ - name: Install system dependencies
73
+ run: |
74
+ sudo apt-get update
75
+ sudo apt-get install -y \
76
+ texlive-full \
77
+ texlive-xetex \
78
+ texlive-luatex \
79
+ pandoc \
80
+ librsvg2-bin \
81
+ python3-pip \
82
+ nodejs \
83
+ npm \
84
+ imagemagick \
85
+ ghostscript \
86
+ wkhtmltopdf
87
+
88
+ - name: Install Node.js dependencies for Mermaid
89
+ run: |
90
+ npm install -g @mermaid-js/mermaid-cli
91
+ npm install -g puppeteer
92
 
93
+ - name: Install Python dependencies
94
+ run: |
95
+ pip install --upgrade pip
96
+ pip install \
97
+ weasyprint \
98
+ markdown \
99
+ pymdown-extensions \
100
+ pillow \
101
+ cairosvg \
102
+ pdfkit
103
+
104
+ - name: Create LaTeX header for better image handling
105
+ run: |
106
+ cat > latex-header.tex << 'EOF'
107
+ \usepackage{graphicx}
108
+ \usepackage{float}
109
+ \usepackage{adjustbox}
110
+ \usepackage{caption}
111
+ \usepackage{subcaption}
112
+ \usepackage{geometry}
113
+ \usepackage{fancyhdr}
114
+ \usepackage{xcolor}
115
+ \usepackage{hyperref}
116
+
117
+ % Better image positioning and scaling
118
+ \floatplacement{figure}{H}
119
+ \renewcommand{\includegraphics}[2][]{\adjustbox{max width=\textwidth,center}{\oldincludegraphics[#1]{#2}}}
120
+ \let\oldincludegraphics\includegraphics
121
+
122
+ % Set margins
123
+ \geometry{margin=1in}
124
+
125
+ % Hyperlink colors
126
+ \hypersetup{
127
+ colorlinks=true,
128
+ linkcolor=blue,
129
+ urlcolor=blue,
130
+ citecolor=blue
131
  }
132
+ EOF
133
+
134
+ - name: Create enhanced CSS for HTML conversion
135
+ run: |
136
+ cat > styles.css << 'EOF'
137
+ body {
138
+ font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
139
+ max-width: 210mm;
140
+ margin: 0 auto;
141
+ padding: 20mm;
142
+ line-height: 1.6;
143
+ color: #333;
144
+ background: white;
145
+ }
146
+
147
+ img {
148
+ max-width: 100%;
149
+ height: auto;
150
+ display: block;
151
+ margin: 1em auto;
152
+ border-radius: 4px;
153
+ box-shadow: 0 2px 8px rgba(0,0,0,0.1);
154
+ }
155
+
156
+ pre {
157
+ background: #f8f9fa;
158
+ padding: 1em;
159
+ border-radius: 6px;
160
+ border-left: 4px solid #007acc;
161
+ overflow-x: auto;
162
+ font-family: 'Consolas', 'Monaco', 'Courier New', monospace;
163
+ font-size: 0.9em;
164
+ }
165
+
166
+ code {
167
+ background: #f1f3f4;
168
+ padding: 0.2em 0.4em;
169
+ border-radius: 3px;
170
+ font-family: 'Consolas', 'Monaco', 'Courier New', monospace;
171
+ font-size: 0.9em;
172
+ }
173
+
174
+ h1, h2, h3, h4, h5, h6 {
175
+ color: #2c3e50;
176
+ margin-top: 2em;
177
+ margin-bottom: 1em;
178
+ page-break-after: avoid;
179
+ }
180
+
181
+ h1 {
182
+ border-bottom: 3px solid #3498db;
183
+ padding-bottom: 0.5em;
184
+ }
185
+
186
+ h2 {
187
+ border-bottom: 2px solid #95a5a6;
188
+ padding-bottom: 0.3em;
189
+ }
190
+
191
+ table {
192
+ border-collapse: collapse;
193
+ width: 100%;
194
+ margin: 1em 0;
195
+ }
196
+
197
+ th, td {
198
+ border: 1px solid #ddd;
199
+ padding: 0.75em;
200
+ text-align: left;
201
+ }
202
+
203
+ th {
204
+ background-color: #f8f9fa;
205
+ font-weight: bold;
206
+ }
207
+
208
+ blockquote {
209
+ border-left: 4px solid #3498db;
210
+ margin: 1em 0;
211
+ padding: 0.5em 1em;
212
+ background: #f8f9fa;
213
+ border-radius: 0 4px 4px 0;
214
+ }
215
+
216
+ .mermaid-container {
217
+ text-align: center;
218
+ margin: 2em 0;
219
+ page-break-inside: avoid;
220
+ }
221
+
222
+ .mermaid-container img {
223
+ max-width: 100%;
224
+ height: auto;
225
+ }
226
+
227
+ @media print {
228
+ body {
229
+ margin: 0;
230
+ padding: 15mm;
231
+ }
232
+
233
+ img {
234
+ max-height: 80vh;
235
+ page-break-inside: avoid;
236
+ }
237
+
238
+ h1, h2, h3, h4, h5, h6 {
239
+ page-break-after: avoid;
240
+ }
241
+
242
+ pre, blockquote {
243
+ page-break-inside: avoid;
244
+ }
245
+ }
246
+ EOF
247
+
248
+ - name: Create preprocessing script
249
+ run: |
250
+ cat > preprocess_markdown.py << 'EOF'
251
+ #!/usr/bin/env python3
252
+ import re
253
+ import os
254
+ import sys
255
+ import subprocess
256
+ from pathlib import Path
257
+
258
+ def process_mermaid_diagrams(content, file_dir):
259
+ """Convert mermaid diagrams to images"""
260
+ mermaid_pattern = r'```mermaid\n(.*?)\n```'
261
+
262
+ def replace_mermaid(match):
263
+ mermaid_code = match.group(1)
264
+ # Create a unique filename for this diagram
265
+ diagram_hash = str(abs(hash(mermaid_code)))
266
+ mermaid_file = f"{file_dir}/mermaid_{diagram_hash}.mmd"
267
+ svg_file = f"{file_dir}/mermaid_{diagram_hash}.svg"
268
+ png_file = f"{file_dir}/mermaid_{diagram_hash}.png"
269
+
270
+ # Write mermaid code to file
271
+ with open(mermaid_file, 'w') as f:
272
+ f.write(mermaid_code)
273
+
274
+ try:
275
+ # Convert to SVG first
276
+ subprocess.run([
277
+ 'mmdc', '-i', mermaid_file, '-o', svg_file,
278
+ '--theme', 'default', '--backgroundColor', 'white'
279
+ ], check=True, capture_output=True)
280
+
281
+ # Convert SVG to PNG for better PDF compatibility
282
+ subprocess.run([
283
+ 'rsvg-convert', '-f', 'png', '-o', png_file,
284
+ '--width', '1200', '--height', '800', svg_file
285
+ ], check=True, capture_output=True)
286
+
287
+ # Clean up intermediate files
288
+ os.remove(mermaid_file)
289
+ if os.path.exists(svg_file):
290
+ os.remove(svg_file)
291
+
292
+ # Return markdown image syntax
293
+ return f'\n<div class="mermaid-container">\n\n![Architecture Diagram]({os.path.basename(png_file)})\n\n</div>\n'
294
+
295
+ except subprocess.CalledProcessError as e:
296
+ print(f"Error converting mermaid diagram: {e}")
297
+ return f'\n```\n{mermaid_code}\n```\n'
298
+ except Exception as e:
299
+ print(f"Unexpected error with mermaid: {e}")
300
+ return f'\n```\n{mermaid_code}\n```\n'
301
+
302
+ return re.sub(mermaid_pattern, replace_mermaid, content, flags=re.DOTALL)
303
+
304
+ def fix_image_paths(content, file_dir):
305
+ """Fix image paths and add proper sizing"""
306
+ # Pattern to match markdown images
307
+ img_pattern = r'!\[([^\]]*)\]\(([^)]+)\)'
308
+
309
+ def replace_image(match):
310
+ alt_text = match.group(1)
311
+ img_path = match.group(2)
312
+
313
+ # Handle relative paths
314
+ if not img_path.startswith(('http://', 'https://', '/')):
315
+ # Make path relative to the markdown file
316
+ abs_img_path = os.path.join(file_dir, img_path)
317
+ if os.path.exists(abs_img_path):
318
+ img_path = os.path.relpath(abs_img_path, file_dir)
319
+
320
+ # Add HTML img tag with better control
321
+ return f'<img src="{img_path}" alt="{alt_text}" style="max-width: 100%; height: auto; display: block; margin: 1em auto;" />'
322
+
323
+ # Also handle HTML img tags and ensure they have proper styling
324
+ content = re.sub(img_pattern, replace_image, content)
325
+
326
+ # Fix existing HTML img tags
327
+ content = re.sub(
328
+ r'<img\s+([^>]*?)\s*/?>',
329
+ lambda m: f'<img {m.group(1)} style="max-width: 100%; height: auto; display: block; margin: 1em auto;" />',
330
+ content
331
+ )
332
+
333
+ return content
334
+
335
+ def main():
336
+ if len(sys.argv) != 2:
337
+ print("Usage: python preprocess_markdown.py <markdown_file>")
338
+ sys.exit(1)
339
+
340
+ md_file = sys.argv[1]
341
+ file_dir = os.path.dirname(os.path.abspath(md_file))
342
+
343
+ with open(md_file, 'r', encoding='utf-8') as f:
344
+ content = f.read()
345
+
346
+ # Process mermaid diagrams
347
+ content = process_mermaid_diagrams(content, file_dir)
348
+
349
+ # Fix image paths and sizing
350
+ content = fix_image_paths(content, file_dir)
351
+
352
+ # Write processed content
353
+ processed_file = md_file.replace('.md', '_processed.md')
354
+ with open(processed_file, 'w', encoding='utf-8') as f:
355
+ f.write(content)
356
+
357
+ print(f"Processed file saved as: {processed_file}")
358
+ return processed_file
359
+
360
+ if __name__ == "__main__":
361
+ main()
362
+ EOF
363
+
364
+ chmod +x preprocess_markdown.py
365
+
366
+ - name: Convert MD to PDF with enhanced processing
367
+ run: |
368
+ find . -name "*.md" -not -path "./.git/*" | while read file; do
369
+ # Get the directory and filename
370
+ dir="$(dirname "$file")"
371
+ filename="$(basename "$file" .md)"
372
+ pdf_path="$dir/$filename.pdf"
373
+
374
+ echo "Processing $file..."
375
+
376
+ # Preprocess the markdown file
377
+ cd "$dir"
378
+ processed_file=$(python3 "$GITHUB_WORKSPACE/preprocess_markdown.py" "$(basename "$file")")
379
+
380
+ if [ ! -f "$processed_file" ]; then
381
+ echo "Preprocessing failed for $file, using original"
382
+ processed_file="$(basename "$file")"
383
+ fi
384
+
385
+ echo "Converting $processed_file to $pdf_path"
386
+
387
+ # Method 1: Try XeLaTeX with enhanced settings
388
+ pandoc "$processed_file" \
389
+ -o "$pdf_path" \
390
+ --pdf-engine=xelatex \
391
+ --include-in-header="$GITHUB_WORKSPACE/latex-header.tex" \
392
+ --variable mainfont="DejaVu Sans" \
393
+ --variable sansfont="DejaVu Sans" \
394
+ --variable monofont="DejaVu Sans Mono" \
395
+ --variable geometry:margin=1in \
396
+ --variable colorlinks=true \
397
+ --variable linkcolor=blue \
398
+ --variable urlcolor=blue \
399
+ --variable toccolor=gray \
400
+ --resource-path="$dir:$GITHUB_WORKSPACE" \
401
+ --standalone \
402
+ --toc \
403
+ --number-sections \
404
+ --highlight-style=github \
405
+ --wrap=auto \
406
+ --dpi=300 \
407
+ --verbose 2>/dev/null || {
408
+
409
+ echo "XeLaTeX failed, trying HTML->PDF conversion..."
410
+
411
+ # Method 2: HTML to PDF conversion with WeasyPrint
412
+ pandoc "$processed_file" \
413
+ -t html5 \
414
+ --standalone \
415
+ --embed-resources \
416
+ --css="$GITHUB_WORKSPACE/styles.css" \
417
+ --toc \
418
+ --number-sections \
419
+ --highlight-style=github \
420
+ -o "$dir/$filename.html"
421
+
422
+ if [ -f "$dir/$filename.html" ]; then
423
+ weasyprint "$dir/$filename.html" "$pdf_path" --presentational-hints || {
424
+ echo "WeasyPrint failed, trying wkhtmltopdf..."
425
+
426
+ # Method 3: wkhtmltopdf as final fallback
427
+ wkhtmltopdf \
428
+ --page-size A4 \
429
+ --margin-top 0.75in \
430
+ --margin-right 0.75in \
431
+ --margin-bottom 0.75in \
432
+ --margin-left 0.75in \
433
+ --encoding UTF-8 \
434
+ --no-outline \
435
+ --enable-local-file-access \
436
+ "$dir/$filename.html" "$pdf_path" || {
437
+ echo "All conversion methods failed for $file"
438
+ continue
439
+ }
440
+ }
441
+
442
+ # Clean up HTML file
443
+ rm -f "$dir/$filename.html"
444
+ fi
445
+ }
446
+
447
+ # Clean up processed file
448
+ if [ "$processed_file" != "$(basename "$file")" ]; then
449
+ rm -f "$processed_file"
450
+ fi
451
+
452
+ # Clean up generated mermaid images
453
+ rm -f mermaid_*.png mermaid_*.svg mermaid_*.mmd
454
+
455
+ if [ -f "$pdf_path" ]; then
456
+ echo "✅ Successfully converted $file to $pdf_path"
457
+ else
458
+ echo "❌ Failed to convert $file"
459
+ fi
460
+ done
461
+
462
+ - name: Upload PDF artifacts
463
+ uses: actions/upload-artifact@v4
464
+ with:
465
+ name: converted-pdfs
466
+ path: "**/*.pdf"
467
+ retention-days: 30
468
+
469
  # Set up Python for Google Drive upload
470
  - name: Set up Python
471
  uses: actions/setup-python@v4
.github/workflows/main.yml CHANGED
@@ -35,65 +35,425 @@ jobs:
35
  # Install weasyprint as fallback
36
  pip install weasyprint
37
 
38
- # Convert MD files to PDF
39
- - name: Convert MD to PDF
40
- run: |
41
- find . -name "*.md" -not -path "./.git/*" | while read file; do
42
- # Get the directory and filename
43
- dir="$(dirname "$file")"
44
- filename="$(basename "$file" .md)"
45
- pdf_path="$dir/$filename.pdf"
46
-
47
- echo "Converting $file to $pdf_path"
48
-
49
- # Convert MD to PDF with proper Unicode support and image handling
50
- pandoc "$file" \
51
- -o "$pdf_path" \
52
- --pdf-engine=xelatex \
53
- --variable mainfont="DejaVu Sans" \
54
- --variable sansfont="DejaVu Sans" \
55
- --variable monofont="DejaVu Sans Mono" \
56
- --include-in-header=<(echo '\usepackage{graphicx}') \
57
- --include-in-header=<(echo '\usepackage{float}') \
58
- --include-in-header=<(echo '\floatplacement{figure}{H}') \
59
- --resource-path="$dir" \
60
- --standalone \
61
- --toc \
62
- --number-sections \
63
- --highlight-style=github \
64
- -V geometry:margin=1in \
65
- -V colorlinks=true \
66
- -V linkcolor=blue \
67
- -V urlcolor=blue \
68
- -V toccolor=gray \
69
- --wrap=auto \
70
- --dpi=300 \
71
- --verbose || {
72
- echo "Error converting $file with xelatex, trying with weasyprint..."
73
- # Fallback to HTML->PDF conversion for complex documents
74
- pandoc "$file" \
75
- -t html5 \
76
- --standalone \
77
- --self-contained \
78
- --css=<(echo 'body{font-family:Arial,sans-serif;max-width:800px;margin:0 auto;padding:20px;line-height:1.6}img{max-width:100%;height:auto}pre{background:#f5f5f5;padding:10px;border-radius:5px;overflow-x:auto}') \
79
- -o "$dir/$filename.html"
80
 
81
- # Install weasyprint for HTML to PDF conversion
82
- pip install weasyprint
83
- weasyprint "$dir/$filename.html" "$pdf_path" || {
84
- echo "Both conversions failed for $file, skipping..."
85
- continue
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86
  }
87
- rm -f "$dir/$filename.html"
88
- }
89
-
90
- if [ -f "$pdf_path" ]; then
91
- echo "Successfully converted $file to $pdf_path"
92
- else
93
- echo "Failed to convert $file"
94
- fi
95
- done
96
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
  # Set up Python for Google Drive upload
98
  - name: Set up Python
99
  uses: actions/setup-python@v4
 
35
  # Install weasyprint as fallback
36
  pip install weasyprint
37
 
38
+ # Improved MD to PDF Conversion with proper image handling and mermaid support
39
+ name: Convert Markdown to PDF
40
+
41
+ on:
42
+ push:
43
+ branches: [ main ]
44
+ pull_request:
45
+ branches: [ main ]
46
+
47
+ jobs:
48
+ convert-to-pdf:
49
+ runs-on: ubuntu-latest
50
+
51
+ steps:
52
+ - name: Checkout repository
53
+ uses: actions/checkout@v4
54
+
55
+ - name: Setup Python
56
+ uses: actions/setup-python@v4
57
+ with:
58
+ python-version: '3.11'
59
+
60
+ - name: Install system dependencies
61
+ run: |
62
+ sudo apt-get update
63
+ sudo apt-get install -y \
64
+ texlive-full \
65
+ texlive-xetex \
66
+ texlive-luatex \
67
+ pandoc \
68
+ librsvg2-bin \
69
+ python3-pip \
70
+ nodejs \
71
+ npm \
72
+ imagemagick \
73
+ ghostscript \
74
+ wkhtmltopdf
75
+
76
+ - name: Install Node.js dependencies for Mermaid
77
+ run: |
78
+ npm install -g @mermaid-js/mermaid-cli
79
+ npm install -g puppeteer
80
 
81
+ - name: Install Python dependencies
82
+ run: |
83
+ pip install --upgrade pip
84
+ pip install \
85
+ weasyprint \
86
+ markdown \
87
+ pymdown-extensions \
88
+ pillow \
89
+ cairosvg \
90
+ pdfkit
91
+
92
+ - name: Create LaTeX header for better image handling
93
+ run: |
94
+ cat > latex-header.tex << 'EOF'
95
+ \usepackage{graphicx}
96
+ \usepackage{float}
97
+ \usepackage{adjustbox}
98
+ \usepackage{caption}
99
+ \usepackage{subcaption}
100
+ \usepackage{geometry}
101
+ \usepackage{fancyhdr}
102
+ \usepackage{xcolor}
103
+ \usepackage{hyperref}
104
+
105
+ % Better image positioning and scaling
106
+ \floatplacement{figure}{H}
107
+ \renewcommand{\includegraphics}[2][]{\adjustbox{max width=\textwidth,center}{\oldincludegraphics[#1]{#2}}}
108
+ \let\oldincludegraphics\includegraphics
109
+
110
+ % Set margins
111
+ \geometry{margin=1in}
112
+
113
+ % Hyperlink colors
114
+ \hypersetup{
115
+ colorlinks=true,
116
+ linkcolor=blue,
117
+ urlcolor=blue,
118
+ citecolor=blue
119
  }
120
+ EOF
121
+
122
+ - name: Create enhanced CSS for HTML conversion
123
+ run: |
124
+ cat > styles.css << 'EOF'
125
+ body {
126
+ font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
127
+ max-width: 210mm;
128
+ margin: 0 auto;
129
+ padding: 20mm;
130
+ line-height: 1.6;
131
+ color: #333;
132
+ background: white;
133
+ }
134
+
135
+ img {
136
+ max-width: 100%;
137
+ height: auto;
138
+ display: block;
139
+ margin: 1em auto;
140
+ border-radius: 4px;
141
+ box-shadow: 0 2px 8px rgba(0,0,0,0.1);
142
+ }
143
+
144
+ pre {
145
+ background: #f8f9fa;
146
+ padding: 1em;
147
+ border-radius: 6px;
148
+ border-left: 4px solid #007acc;
149
+ overflow-x: auto;
150
+ font-family: 'Consolas', 'Monaco', 'Courier New', monospace;
151
+ font-size: 0.9em;
152
+ }
153
+
154
+ code {
155
+ background: #f1f3f4;
156
+ padding: 0.2em 0.4em;
157
+ border-radius: 3px;
158
+ font-family: 'Consolas', 'Monaco', 'Courier New', monospace;
159
+ font-size: 0.9em;
160
+ }
161
+
162
+ h1, h2, h3, h4, h5, h6 {
163
+ color: #2c3e50;
164
+ margin-top: 2em;
165
+ margin-bottom: 1em;
166
+ page-break-after: avoid;
167
+ }
168
+
169
+ h1 {
170
+ border-bottom: 3px solid #3498db;
171
+ padding-bottom: 0.5em;
172
+ }
173
+
174
+ h2 {
175
+ border-bottom: 2px solid #95a5a6;
176
+ padding-bottom: 0.3em;
177
+ }
178
+
179
+ table {
180
+ border-collapse: collapse;
181
+ width: 100%;
182
+ margin: 1em 0;
183
+ }
184
+
185
+ th, td {
186
+ border: 1px solid #ddd;
187
+ padding: 0.75em;
188
+ text-align: left;
189
+ }
190
+
191
+ th {
192
+ background-color: #f8f9fa;
193
+ font-weight: bold;
194
+ }
195
+
196
+ blockquote {
197
+ border-left: 4px solid #3498db;
198
+ margin: 1em 0;
199
+ padding: 0.5em 1em;
200
+ background: #f8f9fa;
201
+ border-radius: 0 4px 4px 0;
202
+ }
203
+
204
+ .mermaid-container {
205
+ text-align: center;
206
+ margin: 2em 0;
207
+ page-break-inside: avoid;
208
+ }
209
+
210
+ .mermaid-container img {
211
+ max-width: 100%;
212
+ height: auto;
213
+ }
214
+
215
+ @media print {
216
+ body {
217
+ margin: 0;
218
+ padding: 15mm;
219
+ }
220
+
221
+ img {
222
+ max-height: 80vh;
223
+ page-break-inside: avoid;
224
+ }
225
+
226
+ h1, h2, h3, h4, h5, h6 {
227
+ page-break-after: avoid;
228
+ }
229
+
230
+ pre, blockquote {
231
+ page-break-inside: avoid;
232
+ }
233
+ }
234
+ EOF
235
+
236
+ - name: Create preprocessing script
237
+ run: |
238
+ cat > preprocess_markdown.py << 'EOF'
239
+ #!/usr/bin/env python3
240
+ import re
241
+ import os
242
+ import sys
243
+ import subprocess
244
+ from pathlib import Path
245
+
246
+ def process_mermaid_diagrams(content, file_dir):
247
+ """Convert mermaid diagrams to images"""
248
+ mermaid_pattern = r'```mermaid\n(.*?)\n```'
249
+
250
+ def replace_mermaid(match):
251
+ mermaid_code = match.group(1)
252
+ # Create a unique filename for this diagram
253
+ diagram_hash = str(abs(hash(mermaid_code)))
254
+ mermaid_file = f"{file_dir}/mermaid_{diagram_hash}.mmd"
255
+ svg_file = f"{file_dir}/mermaid_{diagram_hash}.svg"
256
+ png_file = f"{file_dir}/mermaid_{diagram_hash}.png"
257
+
258
+ # Write mermaid code to file
259
+ with open(mermaid_file, 'w') as f:
260
+ f.write(mermaid_code)
261
+
262
+ try:
263
+ # Convert to SVG first
264
+ subprocess.run([
265
+ 'mmdc', '-i', mermaid_file, '-o', svg_file,
266
+ '--theme', 'default', '--backgroundColor', 'white'
267
+ ], check=True, capture_output=True)
268
+
269
+ # Convert SVG to PNG for better PDF compatibility
270
+ subprocess.run([
271
+ 'rsvg-convert', '-f', 'png', '-o', png_file,
272
+ '--width', '1200', '--height', '800', svg_file
273
+ ], check=True, capture_output=True)
274
+
275
+ # Clean up intermediate files
276
+ os.remove(mermaid_file)
277
+ if os.path.exists(svg_file):
278
+ os.remove(svg_file)
279
+
280
+ # Return markdown image syntax
281
+ return f'\n<div class="mermaid-container">\n\n![Architecture Diagram]({os.path.basename(png_file)})\n\n</div>\n'
282
+
283
+ except subprocess.CalledProcessError as e:
284
+ print(f"Error converting mermaid diagram: {e}")
285
+ return f'\n```\n{mermaid_code}\n```\n'
286
+ except Exception as e:
287
+ print(f"Unexpected error with mermaid: {e}")
288
+ return f'\n```\n{mermaid_code}\n```\n'
289
+
290
+ return re.sub(mermaid_pattern, replace_mermaid, content, flags=re.DOTALL)
291
+
292
+ def fix_image_paths(content, file_dir):
293
+ """Fix image paths and add proper sizing"""
294
+ # Pattern to match markdown images
295
+ img_pattern = r'!\[([^\]]*)\]\(([^)]+)\)'
296
+
297
+ def replace_image(match):
298
+ alt_text = match.group(1)
299
+ img_path = match.group(2)
300
+
301
+ # Handle relative paths
302
+ if not img_path.startswith(('http://', 'https://', '/')):
303
+ # Make path relative to the markdown file
304
+ abs_img_path = os.path.join(file_dir, img_path)
305
+ if os.path.exists(abs_img_path):
306
+ img_path = os.path.relpath(abs_img_path, file_dir)
307
+
308
+ # Add HTML img tag with better control
309
+ return f'<img src="{img_path}" alt="{alt_text}" style="max-width: 100%; height: auto; display: block; margin: 1em auto;" />'
310
+
311
+ # Also handle HTML img tags and ensure they have proper styling
312
+ content = re.sub(img_pattern, replace_image, content)
313
+
314
+ # Fix existing HTML img tags
315
+ content = re.sub(
316
+ r'<img\s+([^>]*?)\s*/?>',
317
+ lambda m: f'<img {m.group(1)} style="max-width: 100%; height: auto; display: block; margin: 1em auto;" />',
318
+ content
319
+ )
320
+
321
+ return content
322
+
323
+ def main():
324
+ if len(sys.argv) != 2:
325
+ print("Usage: python preprocess_markdown.py <markdown_file>")
326
+ sys.exit(1)
327
+
328
+ md_file = sys.argv[1]
329
+ file_dir = os.path.dirname(os.path.abspath(md_file))
330
+
331
+ with open(md_file, 'r', encoding='utf-8') as f:
332
+ content = f.read()
333
+
334
+ # Process mermaid diagrams
335
+ content = process_mermaid_diagrams(content, file_dir)
336
+
337
+ # Fix image paths and sizing
338
+ content = fix_image_paths(content, file_dir)
339
+
340
+ # Write processed content
341
+ processed_file = md_file.replace('.md', '_processed.md')
342
+ with open(processed_file, 'w', encoding='utf-8') as f:
343
+ f.write(content)
344
+
345
+ print(f"Processed file saved as: {processed_file}")
346
+ return processed_file
347
+
348
+ if __name__ == "__main__":
349
+ main()
350
+ EOF
351
+
352
+ chmod +x preprocess_markdown.py
353
+
354
+ - name: Convert MD to PDF with enhanced processing
355
+ run: |
356
+ find . -name "*.md" -not -path "./.git/*" | while read file; do
357
+ # Get the directory and filename
358
+ dir="$(dirname "$file")"
359
+ filename="$(basename "$file" .md)"
360
+ pdf_path="$dir/$filename.pdf"
361
+
362
+ echo "Processing $file..."
363
+
364
+ # Preprocess the markdown file
365
+ cd "$dir"
366
+ processed_file=$(python3 "$GITHUB_WORKSPACE/preprocess_markdown.py" "$(basename "$file")")
367
+
368
+ if [ ! -f "$processed_file" ]; then
369
+ echo "Preprocessing failed for $file, using original"
370
+ processed_file="$(basename "$file")"
371
+ fi
372
+
373
+ echo "Converting $processed_file to $pdf_path"
374
+
375
+ # Method 1: Try XeLaTeX with enhanced settings
376
+ pandoc "$processed_file" \
377
+ -o "$pdf_path" \
378
+ --pdf-engine=xelatex \
379
+ --include-in-header="$GITHUB_WORKSPACE/latex-header.tex" \
380
+ --variable mainfont="DejaVu Sans" \
381
+ --variable sansfont="DejaVu Sans" \
382
+ --variable monofont="DejaVu Sans Mono" \
383
+ --variable geometry:margin=1in \
384
+ --variable colorlinks=true \
385
+ --variable linkcolor=blue \
386
+ --variable urlcolor=blue \
387
+ --variable toccolor=gray \
388
+ --resource-path="$dir:$GITHUB_WORKSPACE" \
389
+ --standalone \
390
+ --toc \
391
+ --number-sections \
392
+ --highlight-style=github \
393
+ --wrap=auto \
394
+ --dpi=300 \
395
+ --verbose 2>/dev/null || {
396
+
397
+ echo "XeLaTeX failed, trying HTML->PDF conversion..."
398
+
399
+ # Method 2: HTML to PDF conversion with WeasyPrint
400
+ pandoc "$processed_file" \
401
+ -t html5 \
402
+ --standalone \
403
+ --embed-resources \
404
+ --css="$GITHUB_WORKSPACE/styles.css" \
405
+ --toc \
406
+ --number-sections \
407
+ --highlight-style=github \
408
+ -o "$dir/$filename.html"
409
+
410
+ if [ -f "$dir/$filename.html" ]; then
411
+ weasyprint "$dir/$filename.html" "$pdf_path" --presentational-hints || {
412
+ echo "WeasyPrint failed, trying wkhtmltopdf..."
413
+
414
+ # Method 3: wkhtmltopdf as final fallback
415
+ wkhtmltopdf \
416
+ --page-size A4 \
417
+ --margin-top 0.75in \
418
+ --margin-right 0.75in \
419
+ --margin-bottom 0.75in \
420
+ --margin-left 0.75in \
421
+ --encoding UTF-8 \
422
+ --no-outline \
423
+ --enable-local-file-access \
424
+ "$dir/$filename.html" "$pdf_path" || {
425
+ echo "All conversion methods failed for $file"
426
+ continue
427
+ }
428
+ }
429
+
430
+ # Clean up HTML file
431
+ rm -f "$dir/$filename.html"
432
+ fi
433
+ }
434
+
435
+ # Clean up processed file
436
+ if [ "$processed_file" != "$(basename "$file")" ]; then
437
+ rm -f "$processed_file"
438
+ fi
439
+
440
+ # Clean up generated mermaid images
441
+ rm -f mermaid_*.png mermaid_*.svg mermaid_*.mmd
442
+
443
+ if [ -f "$pdf_path" ]; then
444
+ echo "✅ Successfully converted $file to $pdf_path"
445
+ else
446
+ echo "❌ Failed to convert $file"
447
+ fi
448
+ done
449
+
450
+ - name: Upload PDF artifacts
451
+ uses: actions/upload-artifact@v4
452
+ with:
453
+ name: converted-pdfs
454
+ path: "**/*.pdf"
455
+ retention-days: 30
456
+
457
  # Set up Python for Google Drive upload
458
  - name: Set up Python
459
  uses: actions/setup-python@v4