File size: 4,744 Bytes
ed5e325 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 | import sys
import time
import os
import logging
from watchdog.observers import Observer
from watchdog.events import FileSystemEventHandler
import markdown
from xhtml2pdf import pisa
import threading
# Configuration
INPUT_DIR = os.path.abspath("input")
OUTPUT_DIR = os.path.abspath("output")
# Ensure directories exist
os.makedirs(INPUT_DIR, exist_ok=True)
os.makedirs(OUTPUT_DIR, exist_ok=True)
logging.basicConfig(level=logging.INFO,
format='%(asctime)s - %(message)s',
datefmt='%Y-%m-%d %H:%M:%S')
def convert_md_to_pdf(input_path):
"""Reads a markdown file and converts it to PDF."""
try:
filename = os.path.basename(input_path)
name, _ = os.path.splitext(filename)
output_path = os.path.join(OUTPUT_DIR, f"{name}.pdf")
logging.info(f"Processing: {filename}")
# 1. Read Markdown
with open(input_path, 'r', encoding='utf-8') as f:
md_content = f.read()
# 2. Convert to HTML with tables extension
# Using 'markdown.extensions.tables' for table support
# Using 'markdown.extensions.fenced_code' for code blocks
html_content = markdown.markdown(md_content, extensions=['tables', 'fenced_code'])
# 3. Add basic styling for PDF
# xhtml2pdf supports some CSS. We add borders to tables to make them visible.
full_html = f"""
<html>
<head>
<style>
@page {{
size: A4;
margin: 2cm;
}}
body {{
font-family: Helvetica, sans-serif;
font-size: 12pt;
}}
table {{
border: 1px solid black;
border-collapse: collapse;
width: 100%;
margin-bottom: 1em;
}}
th, td {{
border: 1px solid black;
padding: 5px;
text-align: left;
}}
th {{
background-color: #f2f2f2;
font-weight: bold;
}}
code {{
font-family: Courier;
background-color: #f5f5f5;
}}
pre {{
background-color: #f5f5f5;
padding: 10px;
border: 1px solid #ccc;
}}
</style>
</head>
<body>
{html_content}
</body>
</html>
"""
# 4. Generate PDF
with open(output_path, "wb") as pdf_file:
pisa_status = pisa.CreatePDF(
full_html, # the HTML to convert
dest=pdf_file # file handle to receive result
)
if pisa_status.err:
logging.error(f"Error converting {filename}: {pisa_status.err}")
else:
logging.info(f"Successfully created: {output_path}")
except Exception as e:
logging.error(f"Failed to convert {input_path}: {e}")
class MDHandler(FileSystemEventHandler):
def on_created(self, event):
if not event.is_directory and event.src_path.lower().endswith('.md'):
# Waiting a brief moment to ensure file write is complete is sometimes helpful,
# but usually open/read handles it. To be safe:
time.sleep(0.5)
convert_md_to_pdf(event.src_path)
# Also handle modified events in case user edits the file in place?
# The prompt said "put the md file ... directly parser it", so 'created' is primary.
# 'modified' might trigger too often during saving. Let's stick to 'created' and 'moved'
# or just 'created' as per "drop file in".
# Actually, if I copy paste a file, it's created.
# If I edit and save, it's modified.
# Let's support on_modified too for better UX, but duplicate events might be an issue.
# We'll stick to 'created' for the "drop file" workflow to be clean.
def on_moved(self, event):
if not event.is_directory and event.dest_path.lower().endswith('.md'):
time.sleep(0.5)
convert_md_to_pdf(event.dest_path)
if __name__ == "__main__":
logging.info("Starting Watchdog for MD -> PDF conversion...")
logging.info(f"Monitoring: {INPUT_DIR}")
logging.info(f"Output to: {OUTPUT_DIR}")
event_handler = MDHandler()
observer = Observer()
observer.schedule(event_handler, INPUT_DIR, recursive=False)
observer.start()
try:
while True:
time.sleep(1)
except KeyboardInterrupt:
observer.stop()
observer.join()
|