Spaces:

AkashKumarave
/

editableweb

Sleeping

App Files Files Community

AkashKumarave commited on May 5

Commit

59f237a

verified ·

1 Parent(s): 916861c

Update app.py

Browse files

Files changed (1) hide show

app.py +53 -195

app.py CHANGED Viewed

@@ -1,200 +1,58 @@
-import os
-import json
-import requests
-from bs4 import BeautifulSoup
-from flask import Flask, request, jsonify
-from flask_cors import CORS
-import cssutils
-import re
-from urllib.parse import urlparse, urljoin
-from playwright.sync_api import sync_playwright
-app = Flask(__name__)
-CORS(app)  # Enable CORS for all routes
-@app.route('/')
-def home():
-    return "Website to Figma Converter API - Use POST /api/convert"
-@app.route('/api/convert', methods=['POST'])
-def convert_website():
-    data = request.json
-    if not data or 'url' not in data:
-        return jsonify({"success": False, "error": "URL is required"}), 400
-    url = data['url']
     try:
-        # Fetch the website content with Playwright for dynamic content
-        website_data = fetch_website(url)
-        # Parse and convert website to Figma-compatible format
-        figma_data = convert_to_figma_format(website_data, url)
-        print("Figma data:", json.dumps(figma_data, indent=2))  # Debug log
-        return jsonify({
-            "success": True,
-            "data": figma_data
-        })
-    except Exception as e:
-        print(f"Error converting website: {e}")
-        return jsonify({
-            "success": False,
-            "error": str(e)
-        }), 500
-def fetch_website(url):
-    """Fetch website HTML content using Playwright for rendered DOM"""
-    with sync_playwright() as p:
-        browser = p.chromium.launch(headless=True)
-        page = browser.new_page()
-        page.goto(url, wait_until="networkidle")
-        html_content = page.content()
-        browser.close()
-    return html_content
-def convert_to_figma_format(html_content, base_url):
-    """Convert website HTML to Figma-compatible format"""
-    soup = BeautifulSoup(html_content, 'html.parser')
-    # Extract viewport dimensions
-    viewport_meta = soup.find('meta', attrs={'name': 'viewport'})
-    viewport_width = 1440  # Default width
-    viewport_height = 900  # Default height
-    if viewport_meta and 'content' in viewport_meta.attrs:
-        viewport_content = viewport_meta['content']
-        width_match = re.search(r'width=(\d+)', viewport_content)
-        if width_match:
-            viewport_width = int(width_match.group(1))
-    # Initialize the result data structure
-    result = {
-        "width": viewport_width,
-        "height": viewport_height,
-        "elements": []
-    }
-    # Process the body element and its children
-    body = soup.body
-    if body:
-        result["elements"] = parse_element(body, base_url)
-    return result
-def parse_element(element, base_url, depth=0):
-    """Recursively parse HTML elements into Figma-compatible format"""
-    if depth > 10:  # Limit recursion depth
-        return []
-    elements = []
-    for child in element.children:
-        if not hasattr(child, 'name') or not child.name:
-            continue
-        # Skip invisible elements
-        if child.name in ['script', 'style', 'meta', 'link', 'noscript']:
-            continue
-        # Get element style (inline and computed)
-        style = extract_style(child)
-        element_data = {
-            "type": determine_element_type(child),
-            "tag": child.name,
-            "style": style
         }
-        # Handle text content
-        if element_data["type"] == "text":
-            text_content = child.get_text().strip()
-            if text_content:
-                element_data["content"] = text_content
-        # Handle image elements
-        if child.name == 'img' and child.get('src'):
-            element_data["type"] = "image"
-            src = child.get('src')
-            if not src.startswith(('http://', 'https://')):
-                src = urljoin(base_url, src)
-            element_data["src"] = src
-        # Handle input elements
-        if child.name == 'input':
-            input_type = child.get('type', 'text')
-            if input_type in ['submit', 'button']:
-                element_data["type"] = "button"
-                element_data["content"] = child.get('value', 'Button')
-        # Handle button elements
-        if child.name == 'button':
-            element_data["type"] = "button"
-            element_data["content"] = child.get_text().strip() or 'Button'
-        # Recursively process children
-        if list(child.children):
-            element_data["children"] = parse_element(child, base_url, depth + 1)
-        elements.append(element_data)
-    return elements
-def determine_element_type(element):
-    """Determine the Figma element type based on HTML element"""
-    if element.name in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'span', 'a', 'label', 'li']:
-        return "text"
-    if element.name == 'img':
-        return "image"
-    if element.name == 'button' or (element.name == 'input' and element.get('type') in ['submit', 'button']):
-        return "button"
-    return "container"
-def extract_style(element):
-    """Extract CSS styles from an HTML element"""
-    style = {}
-    # Extract inline styles
-    if element.get('style'):
-        try:
-            inline_styles = cssutils.parseStyle(element['style'])
-            for prop in inline_styles:
-                style[prop.name] = prop.value
-        except Exception as e:
-            print(f"Error parsing inline styles for {element.name}: {e}")
-    # Add default styles for specific elements
-    if element.name in ['div', 'section', 'article', 'main']:
-        style.setdefault('width', '100%')
-        style.setdefault('height', 'auto')
-        style.setdefault('display', 'block')
-    # Extract colors and backgrounds
-    if element.get('color'):
-        style['color'] = element['color']
-    elif element.name in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']:
-        style.setdefault('color', '#000000')
-    # Extract font sizes
-    if element.name == 'h1':
-        style.setdefault('fontSize', '32px')
-    elif element.name == 'h2':
-        style.setdefault('fontSize', '24px')
-    elif element.name == 'h3':
-        style.setdefault('fontSize', '18px')
-    elif element.name in ['p', 'span', 'a', 'li']:
-        style.setdefault('fontSize', '16px')
-    # Add layout-related styles
-    style.setdefault('position', 'relative')
-    style.setdefault('margin', '0')
-    style.setdefault('padding', '0')
-    style.setdefault('boxSizing', 'border-box')
-    # Add computed styles (simulated)
-    if element.name in ['div', 'section', 'article']:
-        style.setdefault('backgroundColor', 'transparent')
-    return style

+from fastapi import FastAPI, HTTPException
+from fastapi.responses import JSONResponse
+from pdf2json import Pdf2Json
+from io import BytesIO
+import base64
+app = FastAPI()
+@app.post("/api/convert")
+async def convert_pdf(file: bytes = File(...)):
     try:
+        # Parse PDF
+        pdf_parser = Pdf2Json(BytesIO(file))
+        pdf_data = pdf_parser.get_json()
+        # Process PDF data
+        result = {
+            "width": pdf_data["width"],  # Page width in pixels
+            "height": pdf_data["height"],  # Page height in pixels
+            "texts": [],
+            "images": [],
+            "shapes": []
         }
+        # Extract text
+        for text in pdf_data["texts"]:
+            result["texts"].append({
+                "content": text["content"],
+                "x": text["x"],
+                "y": text["y"],
+                "fontFamily": text["font"] or "Arial",
+                "fontStyle": text["style"] or "Regular",
+                "fontSize": text["size"],
+                "color": {"r": text["color"]["r"]/255, "g": text["color"]["g"]/255, "b": text["color"]["b"]/255}
+            })
+        # Extract images
+        for img in pdf_data["images"]:
+            result["images"].append({
+                "data": base64.b64encode(img["data"]).decode('utf-8'),
+                "x": img["x"],
+                "y": img["y"],
+                "width": img["width"],
+                "height": img["height"]
+            })
+        # Extract shapes
+        for shape in pdf_data["shapes"]:
+            result["shapes"].append({
+                "path": shape["path"],
+                "x": shape["x"],
+                "y": shape["y"],
+                "color": {"r": shape["color"]["r"]/255, "g": shape["color"]["g"]/255, "b": shape["color"]["b"]/255}
+            })
+        return JSONResponse(content=result)
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))