Lcmind commited on
Commit
13024bf
·
1 Parent(s): f1c2407

refactor: 10-year prompt engineer redesign - Flux.1-schnell optimal params + smart text overlay

Browse files
app/api/routes/poster.py CHANGED
@@ -41,17 +41,17 @@ async def create_poster(request: PosterRequest):
41
 
42
  # Step 4: Add brand name overlay with Pillow
43
  brand_name = analysis.get('brand_name', 'BRAND')
44
- color_palette = analysis.get('color_palette', {})
45
- primary_color = color_palette.get('primary', '#FFFFFF')
46
 
47
- final_poster_path = add_brand_overlay(poster_path, brand_name, primary_color)
48
 
49
  # Step 5: Upload to ImgBB
50
  poster_url = await upload_to_imgbb(final_poster_path)
51
 
52
  return PosterResponse(
53
  poster_url=poster_url,
54
- analysis=f"{brand_name}: {analysis.get('what_they_do', '')}"
55
  )
56
 
57
  except Exception as e:
 
41
 
42
  # Step 4: Add brand name overlay with Pillow
43
  brand_name = analysis.get('brand_name', 'BRAND')
44
+ primary_color = analysis.get('primary_color', '#FFFFFF')
45
+ mood = analysis.get('mood', 'Clean')
46
 
47
+ final_poster_path = add_brand_overlay(poster_path, brand_name, primary_color, mood)
48
 
49
  # Step 5: Upload to ImgBB
50
  poster_url = await upload_to_imgbb(final_poster_path)
51
 
52
  return PosterResponse(
53
  poster_url=poster_url,
54
+ analysis=f"{brand_name}: {analysis.get('what_they_provide', '')}"
55
  )
56
 
57
  except Exception as e:
app/core/config.py CHANGED
@@ -29,7 +29,7 @@ class Settings(BaseSettings):
29
  debug: bool = False
30
 
31
  # Hugging Face Models - use stable-diffusion instead of FLUX
32
- flux_model: str = "stabilityai/stable-diffusion-xl-base-1.0"
33
  flux_steps: int = 20
34
 
35
  # Gemini Model - 2.5-flash
 
29
  debug: bool = False
30
 
31
  # Hugging Face Models - use stable-diffusion instead of FLUX
32
+ flux_model: str = "black-forest-labs/FLUX.1-schnell"
33
  flux_steps: int = 20
34
 
35
  # Gemini Model - 2.5-flash
app/services/flux.py CHANGED
@@ -8,7 +8,7 @@ from app.core.config import settings
8
 
9
  async def generate_poster(analysis: dict) -> str:
10
  """
11
- Generate a poster using Flux AI based on analysis.
12
 
13
  Args:
14
  analysis: Analysis results from Gemini
@@ -19,71 +19,52 @@ async def generate_poster(analysis: dict) -> str:
19
  Raises:
20
  Exception: If generation fails
21
  """
22
- # Construct detailed prompt for Flux
23
- brand_name = analysis.get('brand_name', 'BRAND')
24
- what_they_do = analysis.get('what_they_do', 'service')
25
- poster_scene = analysis.get('poster_scene', 'modern clean design')
26
- color_palette = analysis.get('color_palette', {'primary': '#4A90D9', 'secondary': []})
27
- mood = analysis.get('mood', 'professional')
28
- objects_list = analysis.get('objects_list', ['modern design elements'])
29
 
30
- primary_color = color_palette.get('primary', '#4A90D9')
31
- secondary_colors = color_palette.get('secondary', [])
 
 
 
 
 
32
 
33
- # Build specific color instruction
34
- if secondary_colors and len(secondary_colors) > 1:
35
- all_colors = [primary_color] + secondary_colors
36
- color_instruction = f"Color scheme using {', '.join(all_colors)} as accent colors throughout the scene"
37
- else:
38
- color_instruction = f"Color scheme dominated by {primary_color} with white and dark accents"
39
-
40
- # Build objects description
41
- objects_desc = ', '.join(objects_list) if objects_list else 'clean design elements'
42
-
43
- prompt = f"""
44
- Commercial brand poster, vertical 9:16 aspect ratio.
45
 
46
- SCENE DESCRIPTION:
47
- {poster_scene}
48
 
49
- KEY OBJECTS TO INCLUDE:
50
- {objects_desc}
51
 
52
- COLOR AND LIGHTING:
53
- {color_instruction}
54
- {mood} atmosphere
55
- Professional studio lighting, soft shadows, cinematic depth
56
 
57
- STYLE REQUIREMENTS:
58
- - Clean, modern, minimalist composition
59
- - Professional advertising photography quality
60
- - High-end brand commercial aesthetic
61
- - NO text, NO words, NO letters in the image
62
- - Leave empty space at top center for logo placement
63
 
64
- TECHNICAL:
65
- - 8K resolution quality
66
- - Sharp focus throughout
67
- - Professional color grading
68
- - Magazine advertisement quality
69
- """
70
 
71
- negative_prompt = "text, words, letters, numbers, typography, watermark, signature, logo, brand name, writing, caption, label, blurry, low quality, amateur, messy, cluttered, chaotic, ugly, distorted, deformed, oversaturated, planets, space, galaxy, cosmos, moons, stars, universe"
 
72
 
73
- # Use new HF router endpoint directly
74
  headers = {
75
  "Authorization": f"Bearer {settings.hf_token}",
76
  "Content-Type": "application/json"
77
  }
78
 
 
79
  payload = {
80
  "inputs": prompt,
81
  "parameters": {
82
  "negative_prompt": negative_prompt,
83
- "num_inference_steps": 28, # Higher steps for better quality
84
- "guidance_scale": 3.5, # Optimal balance for detail and prompt adherence
85
- "width": 768, # 9:16 ratio width
86
- "height": 1344, # 9:16 ratio height (optimal for Flux Dev)
87
  }
88
  }
89
 
 
8
 
9
  async def generate_poster(analysis: dict) -> str:
10
  """
11
+ Generate a poster using Flux.1-schnell based on analysis.
12
 
13
  Args:
14
  analysis: Analysis results from Gemini
 
19
  Raises:
20
  Exception: If generation fails
21
  """
22
+ # Extract analysis data with new schema
23
+ business_type = analysis.get('business_type', 'Productivity')
24
+ poster_objects = analysis.get('poster_objects', 'modern workspace elements')
25
+ background_style = analysis.get('background_style', 'Clean gradient')
26
+ primary_color = analysis.get('primary_color', '#4A90D9')
27
+ mood = analysis.get('mood', 'Clean')
 
28
 
29
+ # === 10-YEAR PROMPT ENGINEER'S PERFECT PROMPT STRUCTURE ===
30
+ #
31
+ # Rule 1: Start with medium and format
32
+ # Rule 2: Subject first, then style
33
+ # Rule 3: Be concrete, not abstract
34
+ # Rule 4: Quality tags at the end
35
+ # Rule 5: Negative prompt blocks unwanted elements
36
 
37
+ prompt = f"""Commercial photography poster, vertical composition, 9:16 aspect ratio.
38
+
39
+ Subject: {poster_objects}, arranged aesthetically in frame.
 
 
 
 
 
 
 
 
 
40
 
41
+ Environment: {background_style} background, professional studio setup, {mood.lower()} atmosphere.
 
42
 
43
+ Lighting: Soft diffused studio lighting, subtle shadows, {primary_color} color accents in the scene.
 
44
 
45
+ Style: High-end advertising campaign, clean minimalist design, modern corporate aesthetic, magazine quality.
 
 
 
46
 
47
+ Composition: Empty space at top 15% of frame for text overlay, centered focal point, balanced layout.
 
 
 
 
 
48
 
49
+ Quality: 8k resolution, sharp focus, professional color grading, commercial photography."""
 
 
 
 
 
50
 
51
+ # Optimized negative prompt - specific and targeted
52
+ negative_prompt = "text, words, letters, typography, watermark, logo, signature, blurry, noise, grainy, amateur, low quality, distorted, ugly, planets, space, galaxy, stars, cosmos, abstract art, random patterns, cluttered, messy, chaotic"
53
 
 
54
  headers = {
55
  "Authorization": f"Bearer {settings.hf_token}",
56
  "Content-Type": "application/json"
57
  }
58
 
59
+ # Flux.1-schnell optimal parameters
60
  payload = {
61
  "inputs": prompt,
62
  "parameters": {
63
  "negative_prompt": negative_prompt,
64
+ "num_inference_steps": 4, # Schnell is optimized for 4 steps
65
+ "guidance_scale": 0.0, # Schnell uses guidance_scale 0
66
+ "width": 768,
67
+ "height": 1344,
68
  }
69
  }
70
 
app/services/gemini.py CHANGED
@@ -27,73 +27,44 @@ async def analyze_with_gemini(screenshot_path: str) -> dict:
27
  img = Image.open(screenshot_path)
28
 
29
  prompt = """
30
- You are a **Commercial Poster Art Director**.
31
- Analyze this website screenshot and design a visual concept for a brand poster.
32
 
33
- === STEP 1: READ THE WEBSITE (MANDATORY) ===
 
34
 
35
- 1. **What text do you see?**
36
- - Headlines, menu items, buttons, taglines
37
-
38
- 2. **What is this website FOR?** (Be SPECIFIC)
39
- - ❌ WRONG: "It's a tech company"
40
- - ✅ RIGHT: "Team collaboration tool with chat, file sharing, task management, and scheduling"
41
-
42
- 3. **Brand name?** (Romanize Korean: 무신사→MUSINSA, 포커스허브→FOCUSHUB)
43
-
44
- 4. **Main color?** (Extract the dominant color from the logo/design)
45
-
46
- === STEP 2: DESIGN THE POSTER SCENE ===
47
-
48
- **CRITICAL RULE**: The poster must VISUALLY REPRESENT what the company DOES.
49
-
50
- **Examples of CORRECT visual translation:**
51
-
52
- 📁 **Productivity Tool (FocusHub, Notion, Slack):**
53
- - SHOW: Floating glass panels with app icons (folder, chat bubble, checklist, calendar)
54
- - SHOW: Clean desk setup with organized digital screens
55
- - SHOW: Minimalist workspace with transparent UI elements
56
- - COLOR: Blue/white/gray tones (professional, clean)
57
- - MOOD: Organized, calm, productive
58
 
59
- 👕 **Fashion E-commerce (Musinsa, Zara):**
60
- - SHOW: Clothing items spread on floor or hanging on racks
61
- - SHOW: Sneakers, hoodies, jackets arranged artistically
62
- - SHOW: Fashion photography studio setup
63
- - COLOR: Black/white with accent colors
64
- - MOOD: Trendy, stylish, premium
65
 
66
- 🔍 **Search Engine / Tech (Google, Naver):**
67
- - SHOW: Holographic search interfaces, data visualization
68
- - SHOW: Flowing data streams, connected nodes
69
- - SHOW: Futuristic command center with screens
70
- - COLOR: Brand's signature colors (Google: blue/red/yellow/green)
71
- - MOOD: Intelligent, connected, powerful
72
-
73
- 📦 **Delivery / Logistics (Coupang, Amazon):**
74
- - SHOW: Flying boxes with motion blur
75
- - SHOW: Futuristic warehouse conveyor systems
76
- - SHOW: Speed lines, delivery trucks, packages
77
- - COLOR: Brand colors with dynamic lighting
78
- - MOOD: Fast, efficient, massive scale
79
-
80
- === STEP 3: OUTPUT ===
81
-
82
- Think carefully about what the website actually PROVIDES, then describe a scene that SHOWS it.
83
 
 
84
  {
85
- "brand_name": "ENGLISH name (romanized if Korean)",
86
- "what_they_do": "Specific description of their service/product (20 words)",
87
- "poster_scene": "CONCRETE description of objects in the poster (40+ words). List actual objects: 'floating glass panels showing folder icon, chat bubble icon, checklist icon, calendar icon, arranged in a 2x2 grid, deep blue gradient background, soft white glow'",
88
- "color_palette": {
89
- "primary": "#hexcode",
90
- "secondary": ["#hex1", "#hex2"]
91
- },
92
- "mood": "2-3 word mood description",
93
- "objects_list": ["object1", "object2", "object3", "object4"]
94
  }
95
-
96
- **JSON ONLY. No other text.**
97
  """
98
 
99
  response = model.generate_content([prompt, img])
 
27
  img = Image.open(screenshot_path)
28
 
29
  prompt = """
30
+ You are a Senior Creative Director analyzing a website screenshot for a commercial poster design.
 
31
 
32
+ === TASK ===
33
+ Extract key information to create a poster that VISUALLY REPRESENTS what this company does.
34
 
35
+ === ANALYSIS STEPS ===
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
 
37
+ 1. **WHAT IS THIS?** (Read the screen carefully)
38
+ - Company/Brand name (if Korean, romanize: 무신사→MUSINSA)
39
+ - What do they sell or provide? Be SPECIFIC.
40
+ - Who is the target user?
 
 
41
 
42
+ 2. **VISUAL TRANSLATION** (Convert business to imagery)
43
+
44
+ The poster must show OBJECTS that represent the business:
45
+
46
+ | Business Type | What to Show |
47
+ |--------------|--------------|
48
+ | Productivity Tool | Organized workspace, floating UI panels, clean desk, glass screens with icons |
49
+ | Fashion Store | Clothes on racks, sneakers, fashion photography studio |
50
+ | Search/Tech | Holographic interfaces, data streams, futuristic screens |
51
+ | Delivery | Flying boxes, warehouse, conveyor belts |
52
+ | Food | The food items, kitchen, restaurant interior |
53
+
54
+ 3. **COLOR EXTRACTION**
55
+ - What is the main brand color from the logo/design?
56
+ - Is it single color or multi-color brand?
 
 
57
 
58
+ === OUTPUT (JSON) ===
59
  {
60
+ "brand_name": "ENGLISH brand name",
61
+ "business_type": "Productivity/Fashion/Tech/Delivery/Food/Other",
62
+ "what_they_provide": "Specific description in 15 words",
63
+ "poster_objects": "List concrete objects: 'glass panels, folder icon, chat icon, checklist, modern desk, soft lighting'",
64
+ "background_style": "Clean gradient/Studio/Futuristic/Warehouse/Minimal",
65
+ "primary_color": "#hexcode",
66
+ "mood": "Clean/Premium/Energetic/Calm"
 
 
67
  }
 
 
68
  """
69
 
70
  response = model.generate_content([prompt, img])
app/services/overlay.py CHANGED
@@ -1,42 +1,75 @@
1
- """Brand overlay service using Pillow."""
2
 
3
- from PIL import Image, ImageDraw, ImageFont
4
  import os
5
 
6
 
7
- def add_brand_overlay(image_path: str, brand_name: str, primary_color: str = "#FFFFFF") -> str:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  """
9
- Add brand name text overlay to the generated poster image.
10
 
11
  Args:
12
  image_path: Path to the generated poster image
13
  brand_name: Brand name to overlay (in ENGLISH)
14
- primary_color: Hex color for the text
 
15
 
16
  Returns:
17
  str: Path to the image with overlay
18
  """
19
  img = Image.open(image_path).convert('RGBA')
20
 
 
 
 
21
  # Create overlay layer
22
  overlay = Image.new('RGBA', img.size, (0, 0, 0, 0))
23
  draw = ImageDraw.Draw(overlay)
24
 
25
- # Convert hex to RGB
26
- hex_color = primary_color.lstrip('#')
27
- rgb_color = tuple(int(hex_color[i:i+2], 16) for i in (0, 2, 4))
28
-
29
- # Try to load a nice font, fallback to default
30
- font_size = int(img.width * 0.12) # 12% of image width
 
 
 
 
 
 
31
  font = None
32
 
33
- # Try common font paths
34
  font_paths = [
35
  "/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf",
36
  "/usr/share/fonts/truetype/liberation/LiberationSans-Bold.ttf",
 
37
  "/usr/share/fonts/TTF/DejaVuSans-Bold.ttf",
38
  "/System/Library/Fonts/Helvetica.ttc",
39
- "/usr/share/fonts/truetype/freefont/FreeSansBold.ttf",
40
  ]
41
 
42
  for font_path in font_paths:
@@ -48,50 +81,58 @@ def add_brand_overlay(image_path: str, brand_name: str, primary_color: str = "#F
48
  continue
49
 
50
  if font is None:
51
- # Use default font with larger size workaround
52
  font = ImageFont.load_default()
53
- font_size = 20 # Default font is small
54
 
55
- # Calculate text position (center-top)
56
- text_bbox = draw.textbbox((0, 0), brand_name.upper(), font=font)
 
57
  text_width = text_bbox[2] - text_bbox[0]
58
  text_height = text_bbox[3] - text_bbox[1]
59
 
60
  x = (img.width - text_width) // 2
61
- y = int(img.height * 0.08) # 8% from top
62
-
63
- # Draw shadow/outline for better visibility
64
- shadow_offset = max(3, font_size // 25)
65
- for offset_x in range(-shadow_offset, shadow_offset + 1):
66
- for offset_y in range(-shadow_offset, shadow_offset + 1):
67
- if offset_x != 0 or offset_y != 0:
68
- draw.text(
69
- (x + offset_x, y + offset_y),
70
- brand_name.upper(),
71
- font=font,
72
- fill=(0, 0, 0, 180)
73
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
 
75
  # Draw main text
76
- draw.text((x, y), brand_name.upper(), font=font, fill=(*rgb_color, 255))
77
-
78
- # Add subtle tagline area (optional gradient bar)
79
- bar_height = int(img.height * 0.02)
80
- bar_y = y + text_height + int(img.height * 0.03)
81
- bar_width = int(text_width * 0.6)
82
- bar_x = (img.width - bar_width) // 2
83
-
84
- for i in range(bar_width):
85
- alpha = int(255 * (1 - abs(i - bar_width/2) / (bar_width/2)))
86
- draw.rectangle(
87
- [bar_x + i, bar_y, bar_x + i + 1, bar_y + bar_height],
88
- fill=(*rgb_color, alpha)
89
- )
90
 
91
  # Composite overlay onto original image
92
  result = Image.alpha_composite(img, overlay)
93
 
94
- # Convert back to RGB for saving as PNG/JPEG
95
  result = result.convert('RGB')
96
 
97
  output_path = '/tmp/poster_final.png'
 
1
+ """Brand overlay service using Pillow - Professional Grade."""
2
 
3
+ from PIL import Image, ImageDraw, ImageFont, ImageFilter
4
  import os
5
 
6
 
7
+ def analyze_image_colors(img: Image.Image) -> dict:
8
+ """Analyze the top portion of image to determine best text color."""
9
+ # Crop top 20% of image where text will be placed
10
+ top_region = img.crop((0, 0, img.width, int(img.height * 0.2)))
11
+
12
+ # Resize for faster processing
13
+ small = top_region.resize((50, 10))
14
+
15
+ # Get average color
16
+ pixels = list(small.getdata())
17
+ avg_r = sum(p[0] for p in pixels) // len(pixels)
18
+ avg_g = sum(p[1] for p in pixels) // len(pixels)
19
+ avg_b = sum(p[2] for p in pixels) // len(pixels)
20
+
21
+ avg_luminance = (0.299 * avg_r + 0.587 * avg_g + 0.114 * avg_b) / 255
22
+
23
+ return {
24
+ 'avg_color': (avg_r, avg_g, avg_b),
25
+ 'luminance': avg_luminance,
26
+ 'is_dark': avg_luminance < 0.5
27
+ }
28
+
29
+
30
+ def add_brand_overlay(image_path: str, brand_name: str, primary_color: str = "#FFFFFF", mood: str = "professional") -> str:
31
  """
32
+ Add brand name text overlay with professional styling that matches the image.
33
 
34
  Args:
35
  image_path: Path to the generated poster image
36
  brand_name: Brand name to overlay (in ENGLISH)
37
+ primary_color: Hex color for the text (brand color)
38
+ mood: The mood/style of the poster
39
 
40
  Returns:
41
  str: Path to the image with overlay
42
  """
43
  img = Image.open(image_path).convert('RGBA')
44
 
45
+ # Analyze image to determine best text approach
46
+ color_analysis = analyze_image_colors(img)
47
+
48
  # Create overlay layer
49
  overlay = Image.new('RGBA', img.size, (0, 0, 0, 0))
50
  draw = ImageDraw.Draw(overlay)
51
 
52
+ # Determine text color based on image analysis
53
+ if color_analysis['is_dark']:
54
+ text_color = (255, 255, 255) # White text on dark
55
+ shadow_color = (0, 0, 0, 200)
56
+ glass_fill = (0, 0, 0, 80)
57
+ else:
58
+ text_color = (20, 20, 30) # Dark text on light
59
+ shadow_color = (255, 255, 255, 200)
60
+ glass_fill = (255, 255, 255, 120)
61
+
62
+ # Font sizing - responsive to image width
63
+ font_size = int(img.width * 0.08) # 8% of width (more elegant)
64
  font = None
65
 
66
+ # Try to load professional fonts
67
  font_paths = [
68
  "/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf",
69
  "/usr/share/fonts/truetype/liberation/LiberationSans-Bold.ttf",
70
+ "/usr/share/fonts/truetype/freefont/FreeSansBold.ttf",
71
  "/usr/share/fonts/TTF/DejaVuSans-Bold.ttf",
72
  "/System/Library/Fonts/Helvetica.ttc",
 
73
  ]
74
 
75
  for font_path in font_paths:
 
81
  continue
82
 
83
  if font is None:
 
84
  font = ImageFont.load_default()
85
+ font_size = 20
86
 
87
+ # Calculate text position
88
+ text = brand_name.upper()
89
+ text_bbox = draw.textbbox((0, 0), text, font=font)
90
  text_width = text_bbox[2] - text_bbox[0]
91
  text_height = text_bbox[3] - text_bbox[1]
92
 
93
  x = (img.width - text_width) // 2
94
+ y = int(img.height * 0.05) # 5% from top
95
+
96
+ # Create frosted glass background for text
97
+ glass_padding_x = int(text_width * 0.12)
98
+ glass_padding_y = int(text_height * 0.4)
99
+
100
+ glass_left = x - glass_padding_x
101
+ glass_top = y - glass_padding_y
102
+ glass_right = x + text_width + glass_padding_x
103
+ glass_bottom = y + text_height + glass_padding_y
104
+
105
+ # Draw rounded rectangle background
106
+ draw.rounded_rectangle(
107
+ [glass_left, glass_top, glass_right, glass_bottom],
108
+ radius=10,
109
+ fill=glass_fill
110
+ )
111
+
112
+ # Subtle border
113
+ draw.rounded_rectangle(
114
+ [glass_left, glass_top, glass_right, glass_bottom],
115
+ radius=10,
116
+ outline=(*text_color[:3], 40),
117
+ width=1
118
+ )
119
+
120
+ # Draw text shadow
121
+ shadow_offset = max(1, font_size // 50)
122
+ draw.text(
123
+ (x + shadow_offset, y + shadow_offset),
124
+ text,
125
+ font=font,
126
+ fill=shadow_color
127
+ )
128
 
129
  # Draw main text
130
+ draw.text((x, y), text, font=font, fill=(*text_color, 255))
 
 
 
 
 
 
 
 
 
 
 
 
 
131
 
132
  # Composite overlay onto original image
133
  result = Image.alpha_composite(img, overlay)
134
 
135
+ # Convert back to RGB for saving
136
  result = result.convert('RGB')
137
 
138
  output_path = '/tmp/poster_final.png'