| import time | |
| import base64 | |
| import io | |
| from PIL import Image | |
| from bs4 import BeautifulSoup | |
| from langchain_google_genai import ChatGoogleGenerativeAI | |
| from langchain_core.messages import HumanMessage | |
| def resize_and_encode_image(image_file, max_long_side=1024): | |
| img = Image.open(image_file) | |
| width, height = img.size | |
| if max(width, height) > max_long_side: | |
| if width > height: | |
| new_width = max_long_side | |
| new_height = int(max_long_side * (height / width)) | |
| else: | |
| new_height = max_long_side | |
| new_width = int(max_long_side * (width / height)) | |
| img = img.resize((new_width, new_height), Image.Resampling.LANCZOS) | |
| buffered = io.BytesIO() | |
| img.save(buffered, format="PNG") | |
| image_bytes = buffered.getvalue() | |
| base64_str = base64.b64encode(image_bytes).decode("utf-8") | |
| return f"data:image/png;base64,{base64_str}" | |
| def beautify_html(html_code): | |
| soup = BeautifulSoup(html_code, "html.parser") | |
| return soup.prettify() | |
| def generate_html_css_from_image(image_file): | |
| image_data_url = resize_and_encode_image(image_file) | |
| prompt_text = """ | |
| You are an expert front-end developer. | |
| The input is a screenshot of a website UI. Carefully analyze its layout and generate accurate, semantic, and maintainable HTML and CSS. | |
| Follow these professional guidelines: | |
| 1) Structure & Semantics: | |
| - Use HTML5 semantic tags that match the visual hierarchy (e.g., <header>, <nav>, <main>, <section>, <article>, <aside>, <footer>) | |
| - Use HTML5 non-semantic tags that match the visual hierarchy (e.g., <div>, <span>) | |
| - Reflect layout grouping using appropriate containers and divs where needed | |
| 2) Layout & Responsiveness: | |
| - Use Flexbox or CSS Grid for layout | |
| - Include responsive breakpoints (laptop and desktop-first) with at least one media query | |
| - Ensure layout adapts well to **laptop and desktop screen sizes (min-width: 1024px)** using responsive design techniques like media queries | |
| 3) CSS Practices: | |
| - Keep CSS in a <style> block or separate file (no inline styles) | |
| - Use class names that follow a clean naming convention (e.g., BEM or descriptive naming) | |
| - Group CSS rules logically (layout, typography, components) | |
| - Use `max-width` + `white-space` + `line-height` for text containers to ensure headings/subheadings **wrap exactly** as in screenshot (e.g., 1 line or 2 lines). | |
| 4) Accessibility & UX: | |
| - Add accessible markup: alt text, ARIA roles, labels | |
| - Ensure good contrast and keyboard navigability | |
| 5) Content & Comments: | |
| - Use meaningful placeholder text (not lorem ipsum) | |
| - Add short code comments to explain each major section | |
| - Accurately match the **font-size, font-weight, text-size and line-height** of all visible text, especially major headings and hero titles. | |
| - Use **CSS font-size in rem or px** to replicate the exact visual size of headings and subheadings from the screenshot. | |
| - Ensure text blocks wrap correctly as in the screenshot (e.g., long headings on one line, subheadings on two lines if seen). Use **max-width, white-space, and line-height** to control wrapping. | |
| 6) Output: | |
| - The output should be a complete single HTML file with embedded CSS | |
| - Preserve the visual structure and content flow of the original screenshot as closely as possible | |
| - Do not skip or summarize any sections | |
| Assume this is for real production-ready front-end code generation from a web UI screenshot. | |
| """ | |
| prompt = [ | |
| HumanMessage( | |
| content=[ | |
| {"type": "text", "text": prompt_text}, | |
| {"type": "image_url", "image_url": {"url": image_data_url, "mime_type": "image/png"}} | |
| ] | |
| ) | |
| ] | |
| llm = ChatGoogleGenerativeAI(model="gemini-2.5-flash", temperature=0) | |
| max_retries = 5 | |
| generated_code = None | |
| for attempt in range(max_retries): | |
| try: | |
| response = llm.invoke(prompt) | |
| generated_code = response.content | |
| if generated_code.strip().startswith("```html"): | |
| generated_code = ( | |
| generated_code.strip() | |
| .removeprefix("```html") | |
| .removesuffix("```") | |
| .strip() | |
| ) | |
| elif generated_code.strip().startswith("<!DOCTYPE html>"): | |
| pass | |
| else: | |
| start_index = generated_code.find("<!DOCTYPE html>") | |
| if start_index != -1: | |
| generated_code = generated_code[start_index:].strip() | |
| if "<html" in generated_code.lower() and "<body" in generated_code.lower(): | |
| break | |
| else: | |
| print(f"Attempt {attempt+1}: Generated code missing HTML/BODY tags. Retrying...") | |
| time.sleep(5) | |
| except Exception as e: | |
| if "ResourceExhausted" in str(e) or "429" in str(e) or "500" in str(e): | |
| print(f"Attempt {attempt+1}: Rate limit or server error. Retrying in {30 * (attempt + 1)} seconds...") | |
| time.sleep(30 * (attempt + 1)) | |
| else: | |
| print(f"Attempt {attempt+1}: Unexpected error: {e}. Retrying...") | |
| time.sleep(5) | |
| if generated_code: | |
| final_output = beautify_html(generated_code) | |
| if not final_output.strip().startswith("<!DOCTYPE html>"): | |
| final_output = "<!DOCTYPE html>\n" + final_output | |
| return final_output | |
| else: | |
| return None |