HTMLviewer2_API

Paused

App Files Files Community

HTMLviewer2_API / app.py

tomo2chin2

Update app.py

da755ee verified 12 months ago

raw

history blame

4.47 kB

	import os, tempfile, time
	import gradio as gr
	from selenium import webdriver
	from selenium.webdriver.chrome.options import Options
	from PIL import Image
	from io import BytesIO

	def html_to_screenshot(html_code: str) -> Image.Image:
	# Configure Selenium to use headless Chrome
	chrome_options = Options()
	chrome_options.add_argument("--headless")
	chrome_options.add_argument("--no-sandbox")
	chrome_options.add_argument("--disable-dev-shm-usage")
	chrome_options.add_argument("--disable-gpu")

	# Launch headless Chrome
	driver = webdriver.Chrome(options=chrome_options)
	try:
	# Write HTML code to a temporary file
	tmp_file = tempfile.NamedTemporaryFile(suffix=".html", delete=False)
	tmp_path = tmp_file.name
	tmp_file.write(html_code.encode('utf-8'))
	tmp_file.close()
	driver.get(f"file://{tmp_path}")
	time.sleep(1) # allow any dynamic content to load if needed

	# Get total page dimensions
	total_width = driver.execute_script(
	"return Math.max(document.body.scrollWidth, document.documentElement.scrollWidth);")
	total_height = driver.execute_script(
	"return Math.max(document.body.scrollHeight, document.documentElement.scrollHeight);")
	# Set the browser window to full content width and a fixed viewport height
	viewport_height = 1000 # px
	driver.set_window_size(total_width, viewport_height)

	# Mark all fixed or sticky elements
	driver.execute_script(
	"document.querySelectorAll('*').forEach(el => {"
	" const pos = window.getComputedStyle(el).position;"
	" if(pos === 'fixed' \|\| pos === 'sticky') { el.setAttribute('data-fixed', 'true'); }"
	"});")
	# Screenshot the top of the page (with sticky elements visible)
	screenshots = []
	png_data = driver.get_screenshot_as_png()
	screenshots.append(Image.open(BytesIO(png_data)))

	# Hide sticky/fixed elements before taking further screenshots
	driver.execute_script(
	"document.querySelectorAll('[data-fixed=\"true\"]').forEach(el => el.style.visibility='hidden');")

	# Scroll and capture screenshots until reaching the bottom
	pixels_scrolled = viewport_height
	while True:
	if pixels_scrolled >= total_height:
	break # done if we've covered the whole height
	driver.execute_script(f"window.scrollTo(0, {pixels_scrolled});")
	time.sleep(0.2)
	# Check actual scroll position in case we hit the bottom
	current_offset = driver.execute_script("return window.pageYOffset;")
	if current_offset < pixels_scrolled:
	# Adjust if we couldn't scroll the full amount (at bottom of page)
	current_offset = total_height - viewport_height
	driver.execute_script(f"window.scrollTo(0, {current_offset});")
	time.sleep(0.1)
	# Capture screenshot at the current offset
	png_data = driver.get_screenshot_as_png()
	screenshots.append(Image.open(BytesIO(png_data)))
	# Prepare for next iteration
	pixels_scrolled = current_offset + viewport_height

	# Stitch screenshots into one tall image
	# Compute overlap if the last screenshot went beyond the content bottom
	remainder = total_height % viewport_height
	overlap = viewport_height - remainder if remainder != 0 else 0
	if overlap and len(screenshots) > 1:
	# Crop the overlapping top part from the last image
	last_img = screenshots[-1]
	screenshots[-1] = last_img.crop((0, overlap, last_img.width, last_img.height))
	# Combine images vertically
	total_combined_height = sum(img.height for img in screenshots)
	combined_img = Image.new("RGB", (total_width, total_combined_height))
	y = 0
	for img in screenshots:
	combined_img.paste(img, (0, y))
	y += img.height

	return combined_img
	finally:
	driver.quit()

	# Set up Gradio interface
	interface = gr.Interface(
	fn=html_to_screenshot,
	inputs=gr.Textbox(label="HTML Code", lines=15),
	outputs=gr.Image(type="pil"),
	title="HTML Full-Page Screenshot",
	description="Enter HTML code and generate a full-page screenshot image."
	)

	if __name__ == "__main__":
	interface.launch()