Spaces:
Runtime error
Runtime error
Commit
·
4ee0746
1
Parent(s):
17bef4b
task: refines scraping and analysis
Browse files- analyze_designs.py +18 -19
- notebook.ipynb +18 -36
- scraper.py +2 -24
analyze_designs.py
CHANGED
|
@@ -13,7 +13,7 @@ VISION_MODEL = "gpt-4o-2024-08-06"
|
|
| 13 |
client = AsyncOpenAI()
|
| 14 |
|
| 15 |
async def analyze_screenshot(design_id: str, design_path: Path):
|
| 16 |
-
"""Analyze screenshots and return description and
|
| 17 |
try:
|
| 18 |
# Check files exist
|
| 19 |
metadata_path = design_path / "metadata.json"
|
|
@@ -22,7 +22,7 @@ async def analyze_screenshot(design_id: str, design_path: Path):
|
|
| 22 |
|
| 23 |
if not all(f.exists() for f in [metadata_path, desktop_img, mobile_img]):
|
| 24 |
print(f"Missing required files for design {design_id}")
|
| 25 |
-
return design_id, None, None
|
| 26 |
|
| 27 |
# Load existing metadata
|
| 28 |
with open(metadata_path, "r") as f:
|
|
@@ -36,11 +36,11 @@ async def analyze_screenshot(design_id: str, design_path: Path):
|
|
| 36 |
mobile_base64 = base64.b64encode(f.read()).decode('utf-8')
|
| 37 |
except Exception as e:
|
| 38 |
print(f"Error reading images for design {design_id}: {str(e)}")
|
| 39 |
-
return design_id, None, None
|
| 40 |
|
| 41 |
print(f"Analyzing design {design_id}...")
|
| 42 |
|
| 43 |
-
#
|
| 44 |
response = await client.chat.completions.create(
|
| 45 |
model=VISION_MODEL,
|
| 46 |
messages=[
|
|
@@ -48,20 +48,18 @@ async def analyze_screenshot(design_id: str, design_path: Path):
|
|
| 48 |
"role": "system",
|
| 49 |
"content": """You are an expert graphic designer analyzing print and web designs for aesthetics, functionality, audience appeal, and potential applications.
|
| 50 |
|
| 51 |
-
The design should be considered from a visual standpoint
|
| 52 |
-
Consider gradients, texture, background effects, and the use of images.
|
| 53 |
|
| 54 |
Treat all text content as placeholder Lorem Ipsum.
|
| 55 |
|
| 56 |
Provide analysis in clean JSON format with these exact keys:
|
| 57 |
{
|
| 58 |
-
"description": "
|
| 59 |
"categories": ["category1", "category2"],
|
| 60 |
"visual_characteristics": ["characteristic1", "characteristic2"]
|
| 61 |
}
|
| 62 |
-
Provide 4-6 categories and 4-6 visual characteristics.
|
| 63 |
-
Categories should only refer to categories of design styling.
|
| 64 |
-
This data will be consumed by another LLM, so provide enough categories and visual characteristics to explain the design.
|
| 65 |
"""
|
| 66 |
},
|
| 67 |
{
|
|
@@ -90,10 +88,14 @@ async def analyze_screenshot(design_id: str, design_path: Path):
|
|
| 90 |
],
|
| 91 |
max_tokens=1000
|
| 92 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 93 |
# Ensure the response is not empty
|
| 94 |
if not response_content:
|
| 95 |
print(f"Empty response for design {design_id}")
|
| 96 |
-
return design_id, None, None
|
| 97 |
|
| 98 |
# Extract JSON content from markdown code block
|
| 99 |
if "```json" in response_content:
|
|
@@ -102,11 +104,7 @@ async def analyze_screenshot(design_id: str, design_path: Path):
|
|
| 102 |
|
| 103 |
# Parse the JSON response
|
| 104 |
try:
|
| 105 |
-
print(f"Cleaned response for design {design_id}: {response_content}")
|
| 106 |
analysis = json.loads(response_content)
|
| 107 |
-
print(f"Description: {analysis['description']}")
|
| 108 |
-
print(f"Categories: {analysis['categories']}")
|
| 109 |
-
print(f"Visual Characteristics: {analysis['visual_characteristics']}")
|
| 110 |
|
| 111 |
# Update metadata with all fields
|
| 112 |
metadata.update(analysis)
|
|
@@ -116,15 +114,16 @@ async def analyze_screenshot(design_id: str, design_path: Path):
|
|
| 116 |
json.dump(metadata, f, indent=2)
|
| 117 |
|
| 118 |
print(f"Successfully analyzed design {design_id}")
|
| 119 |
-
|
|
|
|
| 120 |
|
| 121 |
except json.JSONDecodeError as e:
|
| 122 |
print(f"Error parsing JSON response for design {design_id}: {str(e)}")
|
| 123 |
-
return design_id, None, None
|
| 124 |
|
| 125 |
except Exception as e:
|
| 126 |
print(f"Error processing design {design_id}: {str(e)}")
|
| 127 |
-
return design_id, None, None
|
| 128 |
|
| 129 |
async def main():
|
| 130 |
designs_dir = Path("designs")
|
|
@@ -156,7 +155,7 @@ async def main():
|
|
| 156 |
|
| 157 |
# Print summary
|
| 158 |
successful = 0
|
| 159 |
-
for design_id, desc, cats in results:
|
| 160 |
if desc is not None:
|
| 161 |
successful += 1
|
| 162 |
print(f"\nDesign {design_id}:")
|
|
|
|
| 13 |
client = AsyncOpenAI()
|
| 14 |
|
| 15 |
async def analyze_screenshot(design_id: str, design_path: Path):
|
| 16 |
+
"""Analyze screenshots and return description, categories, and visual characteristics"""
|
| 17 |
try:
|
| 18 |
# Check files exist
|
| 19 |
metadata_path = design_path / "metadata.json"
|
|
|
|
| 22 |
|
| 23 |
if not all(f.exists() for f in [metadata_path, desktop_img, mobile_img]):
|
| 24 |
print(f"Missing required files for design {design_id}")
|
| 25 |
+
return design_id, None, None, None
|
| 26 |
|
| 27 |
# Load existing metadata
|
| 28 |
with open(metadata_path, "r") as f:
|
|
|
|
| 36 |
mobile_base64 = base64.b64encode(f.read()).decode('utf-8')
|
| 37 |
except Exception as e:
|
| 38 |
print(f"Error reading images for design {design_id}: {str(e)}")
|
| 39 |
+
return design_id, None, None, None
|
| 40 |
|
| 41 |
print(f"Analyzing design {design_id}...")
|
| 42 |
|
| 43 |
+
# Get response first
|
| 44 |
response = await client.chat.completions.create(
|
| 45 |
model=VISION_MODEL,
|
| 46 |
messages=[
|
|
|
|
| 48 |
"role": "system",
|
| 49 |
"content": """You are an expert graphic designer analyzing print and web designs for aesthetics, functionality, audience appeal, and potential applications.
|
| 50 |
|
| 51 |
+
The design should be considered from a visual standpoint. Use chain of thought to consider color palette, visual layout, typography, artistic style, mood, and potential applications.
|
| 52 |
+
Consider gradients, texture, background effects, and the use of images.
|
| 53 |
|
| 54 |
Treat all text content as placeholder Lorem Ipsum.
|
| 55 |
|
| 56 |
Provide analysis in clean JSON format with these exact keys:
|
| 57 |
{
|
| 58 |
+
"description": "A one-paragraph summary highlighting exceptional features of the design",
|
| 59 |
"categories": ["category1", "category2"],
|
| 60 |
"visual_characteristics": ["characteristic1", "characteristic2"]
|
| 61 |
}
|
| 62 |
+
Provide 4-6 categories and 4-6 visual characteristics most relevant to the style and feel of the design. Do not reference css or web design directly because this analysis is primarily about design. These lists should describe the design to another LLM that will use this data to generate a UI.
|
|
|
|
|
|
|
| 63 |
"""
|
| 64 |
},
|
| 65 |
{
|
|
|
|
| 88 |
],
|
| 89 |
max_tokens=1000
|
| 90 |
)
|
| 91 |
+
|
| 92 |
+
# Then get the content
|
| 93 |
+
response_content = response.choices[0].message.content.strip()
|
| 94 |
+
|
| 95 |
# Ensure the response is not empty
|
| 96 |
if not response_content:
|
| 97 |
print(f"Empty response for design {design_id}")
|
| 98 |
+
return design_id, None, None, None
|
| 99 |
|
| 100 |
# Extract JSON content from markdown code block
|
| 101 |
if "```json" in response_content:
|
|
|
|
| 104 |
|
| 105 |
# Parse the JSON response
|
| 106 |
try:
|
|
|
|
| 107 |
analysis = json.loads(response_content)
|
|
|
|
|
|
|
|
|
|
| 108 |
|
| 109 |
# Update metadata with all fields
|
| 110 |
metadata.update(analysis)
|
|
|
|
| 114 |
json.dump(metadata, f, indent=2)
|
| 115 |
|
| 116 |
print(f"Successfully analyzed design {design_id}")
|
| 117 |
+
# Return visual_characteristics as fourth element
|
| 118 |
+
return design_id, analysis["description"], analysis["categories"], analysis["visual_characteristics"]
|
| 119 |
|
| 120 |
except json.JSONDecodeError as e:
|
| 121 |
print(f"Error parsing JSON response for design {design_id}: {str(e)}")
|
| 122 |
+
return design_id, None, None, None
|
| 123 |
|
| 124 |
except Exception as e:
|
| 125 |
print(f"Error processing design {design_id}: {str(e)}")
|
| 126 |
+
return design_id, None, None, None
|
| 127 |
|
| 128 |
async def main():
|
| 129 |
designs_dir = Path("designs")
|
|
|
|
| 155 |
|
| 156 |
# Print summary
|
| 157 |
successful = 0
|
| 158 |
+
for design_id, desc, cats, _ in results:
|
| 159 |
if desc is not None:
|
| 160 |
successful += 1
|
| 161 |
print(f"\nDesign {design_id}:")
|
notebook.ipynb
CHANGED
|
@@ -6,6 +6,8 @@
|
|
| 6 |
"source": [
|
| 7 |
"# Zen Garden Design Analysis\n",
|
| 8 |
"\n",
|
|
|
|
|
|
|
| 9 |
"To collect our design data, we scrape csszengarden.com for design screenshots and associated styles. With over 200 designs, this should give us a good training set on how apply different styles and techniques."
|
| 10 |
]
|
| 11 |
},
|
|
@@ -43,51 +45,31 @@
|
|
| 43 |
"await test_scraper(test_set)"
|
| 44 |
]
|
| 45 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
{
|
| 47 |
"cell_type": "code",
|
| 48 |
-
"execution_count":
|
| 49 |
"metadata": {},
|
| 50 |
"outputs": [
|
| 51 |
{
|
| 52 |
"name": "stdout",
|
| 53 |
"output_type": "stream",
|
| 54 |
"text": [
|
| 55 |
-
"Analyzing design
|
| 56 |
-
"
|
| 57 |
-
"Length of response content: 450\n",
|
| 58 |
-
"Cleaned response for design 220: {\n",
|
| 59 |
-
" \"description\": \"The design is minimalistic with a focus on typography, subtle background elements, and a neutral color palette. It emphasizes a clean and organized layout, suitable for showcasing content-heavy pages such as articles or educational materials.\",\n",
|
| 60 |
-
" \"categories\": [\"Minimalism\", \"Typography-focused\"],\n",
|
| 61 |
-
" \"visual_characteristics\": [\"Neutral color palette\", \"Clean layout\", \"Subtle typography\", \"Organized content\"]\n",
|
| 62 |
-
"}\n",
|
| 63 |
-
"JSON parsed\n",
|
| 64 |
-
"Successfully analyzed design 220\n",
|
| 65 |
-
"\n",
|
| 66 |
-
"Analysis for design 220:\n",
|
| 67 |
-
"Description: The design is minimalistic with a focus on typography, subtle background elements, and a neutral color palette. It emphasizes a clean and organized layout, suitable for showcasing content-heavy pages such as articles or educational materials.\n",
|
| 68 |
-
"Categories: Minimalism, Typography-focused\n",
|
| 69 |
-
"Analyzing design 221...\n",
|
| 70 |
-
"Type of response content: <class 'str'>\n",
|
| 71 |
-
"Length of response content: 680\n",
|
| 72 |
-
"Cleaned response for design 221: {\n",
|
| 73 |
-
" \"description\": \"The visual design features a bold, colorful layout with a mix of geometric shapes and high-contrast text blocks. It uses a playful color palette of purples, blues, oranges, and cream tones. The design is structured to guide the viewer's eye vertically and horizontally across sections with clear typography. It appears modern and suited for showcasing CSS design capabilities in a playful yet professional manner.\",\n",
|
| 74 |
-
" \"categories\": [\n",
|
| 75 |
-
" \"Modern\",\n",
|
| 76 |
-
" \"Playful\"\n",
|
| 77 |
-
" ],\n",
|
| 78 |
-
" \"visual_characteristics\": [\n",
|
| 79 |
-
" \"Bold colors\",\n",
|
| 80 |
-
" \"Geometric shapes\",\n",
|
| 81 |
-
" \"High-contrast typography\",\n",
|
| 82 |
-
" \"Vertical and horizontal layout\"\n",
|
| 83 |
-
" ]\n",
|
| 84 |
-
"}\n",
|
| 85 |
-
"JSON parsed\n",
|
| 86 |
-
"Successfully analyzed design 221\n",
|
| 87 |
"\n",
|
| 88 |
-
"Analysis for design
|
| 89 |
-
"Description: The
|
| 90 |
-
"Categories:
|
|
|
|
| 91 |
]
|
| 92 |
}
|
| 93 |
],
|
|
|
|
| 6 |
"source": [
|
| 7 |
"# Zen Garden Design Analysis\n",
|
| 8 |
"\n",
|
| 9 |
+
"## 1. Scrape\n",
|
| 10 |
+
"\n",
|
| 11 |
"To collect our design data, we scrape csszengarden.com for design screenshots and associated styles. With over 200 designs, this should give us a good training set on how apply different styles and techniques."
|
| 12 |
]
|
| 13 |
},
|
|
|
|
| 45 |
"await test_scraper(test_set)"
|
| 46 |
]
|
| 47 |
},
|
| 48 |
+
{
|
| 49 |
+
"cell_type": "markdown",
|
| 50 |
+
"metadata": {},
|
| 51 |
+
"source": [
|
| 52 |
+
"## 2. Analyze\n",
|
| 53 |
+
"\n",
|
| 54 |
+
"Now, using the screenshots and styles we downloaded, we analyze the design for characteristics that will be useful for retrieval."
|
| 55 |
+
]
|
| 56 |
+
},
|
| 57 |
{
|
| 58 |
"cell_type": "code",
|
| 59 |
+
"execution_count": 2,
|
| 60 |
"metadata": {},
|
| 61 |
"outputs": [
|
| 62 |
{
|
| 63 |
"name": "stdout",
|
| 64 |
"output_type": "stream",
|
| 65 |
"text": [
|
| 66 |
+
"Analyzing design 010...\n",
|
| 67 |
+
"Successfully analyzed design 010\n",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 68 |
"\n",
|
| 69 |
+
"Analysis for design 010:\n",
|
| 70 |
+
"Description: The design features a harmonious combination of earthy tones and soft gradients, creating a serene and inviting atmosphere. The layout leverages clear headings and subheadings for readability while integrating subtle decorative elements for visual interest. Functionally, the design is divided into a main content area and a sidebar, enhancing navigability.\n",
|
| 71 |
+
"Categories: Web Design, Educational, Inspirational, Aesthetic, Textual\n",
|
| 72 |
+
"Visual Characteristics: Earthy Color Palette, Soft Gradients, Informative Layout, Balanced Composition, Text-focused Design\n"
|
| 73 |
]
|
| 74 |
}
|
| 75 |
],
|
scraper.py
CHANGED
|
@@ -34,18 +34,7 @@ async def take_screenshot(url, directory):
|
|
| 34 |
page = await browser.new_page(viewport={'width': 1920, 'height': 1080})
|
| 35 |
await page.goto(url)
|
| 36 |
# Wait for network to be idle (no requests for at least 500ms)
|
| 37 |
-
await page.wait_for_load_state(
|
| 38 |
-
|
| 39 |
-
# Wait for all images to be loaded
|
| 40 |
-
await page.evaluate("""() => {
|
| 41 |
-
return Promise.all(
|
| 42 |
-
Array.from(document.images)
|
| 43 |
-
.filter(img => !img.complete)
|
| 44 |
-
.map(img => new Promise(resolve => {
|
| 45 |
-
img.onload = img.onerror = resolve;
|
| 46 |
-
}))
|
| 47 |
-
);
|
| 48 |
-
}""")
|
| 49 |
|
| 50 |
# Additional wait to ensure any animations/transitions complete
|
| 51 |
#await page.wait_for_timeout(2000) # 2 second delay
|
|
@@ -59,18 +48,7 @@ async def take_screenshot(url, directory):
|
|
| 59 |
page = await browser.new_page(viewport={'width': 480, 'height': 1080})
|
| 60 |
await page.goto(url)
|
| 61 |
# Wait for network to be idle (no requests for at least 500ms)
|
| 62 |
-
await page.wait_for_load_state(
|
| 63 |
-
|
| 64 |
-
# Wait for all images to be loaded
|
| 65 |
-
await page.evaluate("""() => {
|
| 66 |
-
return Promise.all(
|
| 67 |
-
Array.from(document.images)
|
| 68 |
-
.filter(img => !img.complete)
|
| 69 |
-
.map(img => new Promise(resolve => {
|
| 70 |
-
img.onload = img.onerror = resolve;
|
| 71 |
-
}))
|
| 72 |
-
);
|
| 73 |
-
}""")
|
| 74 |
|
| 75 |
# Additional wait to ensure any animations/transitions complete
|
| 76 |
#await page.wait_for_timeout(2000) # 2 second delay
|
|
|
|
| 34 |
page = await browser.new_page(viewport={'width': 1920, 'height': 1080})
|
| 35 |
await page.goto(url)
|
| 36 |
# Wait for network to be idle (no requests for at least 500ms)
|
| 37 |
+
await page.wait_for_load_state()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
|
| 39 |
# Additional wait to ensure any animations/transitions complete
|
| 40 |
#await page.wait_for_timeout(2000) # 2 second delay
|
|
|
|
| 48 |
page = await browser.new_page(viewport={'width': 480, 'height': 1080})
|
| 49 |
await page.goto(url)
|
| 50 |
# Wait for network to be idle (no requests for at least 500ms)
|
| 51 |
+
await page.wait_for_load_state()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
|
| 53 |
# Additional wait to ensure any animations/transitions complete
|
| 54 |
#await page.wait_for_timeout(2000) # 2 second delay
|