Spaces:

lcmind
/

vibe-link-server

Sleeping

App Files Files Community

Lcmind commited on Jan 15

Commit

13024bf

1 Parent(s): f1c2407

refactor: 10-year prompt engineer redesign - Flux.1-schnell optimal params + smart text overlay

Browse files

Files changed (5) hide show

app/api/routes/poster.py +4 -4
app/core/config.py +1 -1
app/services/flux.py +29 -48
app/services/gemini.py +31 -60
app/services/overlay.py +86 -45

app/api/routes/poster.py CHANGED Viewed

@@ -41,17 +41,17 @@ async def create_poster(request: PosterRequest):
         # Step 4: Add brand name overlay with Pillow
         brand_name = analysis.get('brand_name', 'BRAND')
-        color_palette = analysis.get('color_palette', {})
-        primary_color = color_palette.get('primary', '#FFFFFF')
-        final_poster_path = add_brand_overlay(poster_path, brand_name, primary_color)
         # Step 5: Upload to ImgBB
         poster_url = await upload_to_imgbb(final_poster_path)
         return PosterResponse(
             poster_url=poster_url,
-            analysis=f"{brand_name}: {analysis.get('what_they_do', '')}"
         )
     except Exception as e:

         # Step 4: Add brand name overlay with Pillow
         brand_name = analysis.get('brand_name', 'BRAND')
+        primary_color = analysis.get('primary_color', '#FFFFFF')
+        mood = analysis.get('mood', 'Clean')
+        final_poster_path = add_brand_overlay(poster_path, brand_name, primary_color, mood)
         # Step 5: Upload to ImgBB
         poster_url = await upload_to_imgbb(final_poster_path)
         return PosterResponse(
             poster_url=poster_url,
+            analysis=f"{brand_name}: {analysis.get('what_they_provide', '')}"
         )
     except Exception as e:

app/core/config.py CHANGED Viewed

@@ -29,7 +29,7 @@ class Settings(BaseSettings):
     debug: bool = False
     # Hugging Face Models - use stable-diffusion instead of FLUX
-    flux_model: str = "stabilityai/stable-diffusion-xl-base-1.0"
     flux_steps: int = 20
     # Gemini Model - 2.5-flash

     debug: bool = False
     # Hugging Face Models - use stable-diffusion instead of FLUX
+    flux_model: str = "black-forest-labs/FLUX.1-schnell"
     flux_steps: int = 20
     # Gemini Model - 2.5-flash

app/services/flux.py CHANGED Viewed

@@ -8,7 +8,7 @@ from app.core.config import settings
 async def generate_poster(analysis: dict) -> str:
     """
-    Generate a poster using Flux AI based on analysis.
     Args:
         analysis: Analysis results from Gemini
@@ -19,71 +19,52 @@ async def generate_poster(analysis: dict) -> str:
     Raises:
         Exception: If generation fails
     """
-    # Construct detailed prompt for Flux
-    brand_name = analysis.get('brand_name', 'BRAND')
-    what_they_do = analysis.get('what_they_do', 'service')
-    poster_scene = analysis.get('poster_scene', 'modern clean design')
-    color_palette = analysis.get('color_palette', {'primary': '#4A90D9', 'secondary': []})
-    mood = analysis.get('mood', 'professional')
-    objects_list = analysis.get('objects_list', ['modern design elements'])
-    primary_color = color_palette.get('primary', '#4A90D9')
-    secondary_colors = color_palette.get('secondary', [])
-    # Build specific color instruction
-    if secondary_colors and len(secondary_colors) > 1:
-        all_colors = [primary_color] + secondary_colors
-        color_instruction = f"Color scheme using {', '.join(all_colors)} as accent colors throughout the scene"
-    else:
-        color_instruction = f"Color scheme dominated by {primary_color} with white and dark accents"
-    # Build objects description
-    objects_desc = ', '.join(objects_list) if objects_list else 'clean design elements'
-    prompt = f"""
-Commercial brand poster, vertical 9:16 aspect ratio.
-SCENE DESCRIPTION:
-{poster_scene}
-KEY OBJECTS TO INCLUDE:
-{objects_desc}
-COLOR AND LIGHTING:
-{color_instruction}
-{mood} atmosphere
-Professional studio lighting, soft shadows, cinematic depth
-STYLE REQUIREMENTS:
-- Clean, modern, minimalist composition
-- Professional advertising photography quality
-- High-end brand commercial aesthetic
-- NO text, NO words, NO letters in the image
-- Leave empty space at top center for logo placement
-TECHNICAL:
-- 8K resolution quality
-- Sharp focus throughout
-- Professional color grading
-- Magazine advertisement quality
-"""
-    negative_prompt = "text, words, letters, numbers, typography, watermark, signature, logo, brand name, writing, caption, label, blurry, low quality, amateur, messy, cluttered, chaotic, ugly, distorted, deformed, oversaturated, planets, space, galaxy, cosmos, moons, stars, universe"
-    # Use new HF router endpoint directly
     headers = {
         "Authorization": f"Bearer {settings.hf_token}",
         "Content-Type": "application/json"
     }
     payload = {
         "inputs": prompt,
         "parameters": {
             "negative_prompt": negative_prompt,
-            "num_inference_steps": 28,  # Higher steps for better quality
-            "guidance_scale": 3.5,  # Optimal balance for detail and prompt adherence
-            "width": 768,   # 9:16 ratio width
-            "height": 1344, # 9:16 ratio height (optimal for Flux Dev)
         }
     }

 async def generate_poster(analysis: dict) -> str:
     """
+    Generate a poster using Flux.1-schnell based on analysis.
     Args:
         analysis: Analysis results from Gemini
     Raises:
         Exception: If generation fails
     """
+    # Extract analysis data with new schema
+    business_type = analysis.get('business_type', 'Productivity')
+    poster_objects = analysis.get('poster_objects', 'modern workspace elements')
+    background_style = analysis.get('background_style', 'Clean gradient')
+    primary_color = analysis.get('primary_color', '#4A90D9')
+    mood = analysis.get('mood', 'Clean')
+    # === 10-YEAR PROMPT ENGINEER'S PERFECT PROMPT STRUCTURE ===
+    #
+    # Rule 1: Start with medium and format
+    # Rule 2: Subject first, then style
+    # Rule 3: Be concrete, not abstract
+    # Rule 4: Quality tags at the end
+    # Rule 5: Negative prompt blocks unwanted elements
+    prompt = f"""Commercial photography poster, vertical composition, 9:16 aspect ratio.
+Subject: {poster_objects}, arranged aesthetically in frame.
+Environment: {background_style} background, professional studio setup, {mood.lower()} atmosphere.
+Lighting: Soft diffused studio lighting, subtle shadows, {primary_color} color accents in the scene.
+Style: High-end advertising campaign, clean minimalist design, modern corporate aesthetic, magazine quality.
+Composition: Empty space at top 15% of frame for text overlay, centered focal point, balanced layout.
+Quality: 8k resolution, sharp focus, professional color grading, commercial photography."""
+    # Optimized negative prompt - specific and targeted
+    negative_prompt = "text, words, letters, typography, watermark, logo, signature, blurry, noise, grainy, amateur, low quality, distorted, ugly, planets, space, galaxy, stars, cosmos, abstract art, random patterns, cluttered, messy, chaotic"
     headers = {
         "Authorization": f"Bearer {settings.hf_token}",
         "Content-Type": "application/json"
     }
+    # Flux.1-schnell optimal parameters
     payload = {
         "inputs": prompt,
         "parameters": {
             "negative_prompt": negative_prompt,
+            "num_inference_steps": 4,  # Schnell is optimized for 4 steps
+            "guidance_scale": 0.0,     # Schnell uses guidance_scale 0
+            "width": 768,
+            "height": 1344,
         }
     }

app/services/gemini.py CHANGED Viewed

@@ -27,73 +27,44 @@ async def analyze_with_gemini(screenshot_path: str) -> dict:
     img = Image.open(screenshot_path)
     prompt = """
-You are a **Commercial Poster Art Director**.
-Analyze this website screenshot and design a visual concept for a brand poster.
-=== STEP 1: READ THE WEBSITE (MANDATORY) ===
-1. **What text do you see?**
-   - Headlines, menu items, buttons, taglines
-2. **What is this website FOR?** (Be SPECIFIC)
-   - ❌ WRONG: "It's a tech company"
-   - ✅ RIGHT: "Team collaboration tool with chat, file sharing, task management, and scheduling"
-3. **Brand name?** (Romanize Korean: 무신사→MUSINSA, 포커스허브→FOCUSHUB)
-4. **Main color?** (Extract the dominant color from the logo/design)
-=== STEP 2: DESIGN THE POSTER SCENE ===
-**CRITICAL RULE**: The poster must VISUALLY REPRESENT what the company DOES.
-**Examples of CORRECT visual translation:**
-📁 **Productivity Tool (FocusHub, Notion, Slack):**
-- SHOW: Floating glass panels with app icons (folder, chat bubble, checklist, calendar)
-- SHOW: Clean desk setup with organized digital screens
-- SHOW: Minimalist workspace with transparent UI elements
-- COLOR: Blue/white/gray tones (professional, clean)
-- MOOD: Organized, calm, productive
-👕 **Fashion E-commerce (Musinsa, Zara):**
-- SHOW: Clothing items spread on floor or hanging on racks
-- SHOW: Sneakers, hoodies, jackets arranged artistically
-- SHOW: Fashion photography studio setup
-- COLOR: Black/white with accent colors
-- MOOD: Trendy, stylish, premium
-🔍 **Search Engine / Tech (Google, Naver):**
-- SHOW: Holographic search interfaces, data visualization
-- SHOW: Flowing data streams, connected nodes
-- SHOW: Futuristic command center with screens
-- COLOR: Brand's signature colors (Google: blue/red/yellow/green)
-- MOOD: Intelligent, connected, powerful
-📦 **Delivery / Logistics (Coupang, Amazon):**
-- SHOW: Flying boxes with motion blur
-- SHOW: Futuristic warehouse conveyor systems
-- SHOW: Speed lines, delivery trucks, packages
-- COLOR: Brand colors with dynamic lighting
-- MOOD: Fast, efficient, massive scale
-=== STEP 3: OUTPUT ===
-Think carefully about what the website actually PROVIDES, then describe a scene that SHOWS it.
 {
-  "brand_name": "ENGLISH name (romanized if Korean)",
-  "what_they_do": "Specific description of their service/product (20 words)",
-  "poster_scene": "CONCRETE description of objects in the poster (40+ words). List actual objects: 'floating glass panels showing folder icon, chat bubble icon, checklist icon, calendar icon, arranged in a 2x2 grid, deep blue gradient background, soft white glow'",
-  "color_palette": {
-    "primary": "#hexcode",
-    "secondary": ["#hex1", "#hex2"]
-  },
-  "mood": "2-3 word mood description",
-  "objects_list": ["object1", "object2", "object3", "object4"]
 }
-**JSON ONLY. No other text.**
 """
     response = model.generate_content([prompt, img])

     img = Image.open(screenshot_path)
     prompt = """
+You are a Senior Creative Director analyzing a website screenshot for a commercial poster design.
+=== TASK ===
+Extract key information to create a poster that VISUALLY REPRESENTS what this company does.
+=== ANALYSIS STEPS ===
+1. **WHAT IS THIS?** (Read the screen carefully)
+   - Company/Brand name (if Korean, romanize: 무신사→MUSINSA)
+   - What do they sell or provide? Be SPECIFIC.
+   - Who is the target user?
+2. **VISUAL TRANSLATION** (Convert business to imagery)
+   The poster must show OBJECTS that represent the business:
+   | Business Type | What to Show |
+   |--------------|--------------|
+   | Productivity Tool | Organized workspace, floating UI panels, clean desk, glass screens with icons |
+   | Fashion Store | Clothes on racks, sneakers, fashion photography studio |
+   | Search/Tech | Holographic interfaces, data streams, futuristic screens |
+   | Delivery | Flying boxes, warehouse, conveyor belts |
+   | Food | The food items, kitchen, restaurant interior |
+3. **COLOR EXTRACTION**
+   - What is the main brand color from the logo/design?
+   - Is it single color or multi-color brand?
+=== OUTPUT (JSON) ===
 {
+  "brand_name": "ENGLISH brand name",
+  "business_type": "Productivity/Fashion/Tech/Delivery/Food/Other",
+  "what_they_provide": "Specific description in 15 words",
+  "poster_objects": "List concrete objects: 'glass panels, folder icon, chat icon, checklist, modern desk, soft lighting'",
+  "background_style": "Clean gradient/Studio/Futuristic/Warehouse/Minimal",
+  "primary_color": "#hexcode",
+  "mood": "Clean/Premium/Energetic/Calm"
 }
 """
     response = model.generate_content([prompt, img])

app/services/overlay.py CHANGED Viewed

@@ -1,42 +1,75 @@
-"""Brand overlay service using Pillow."""
-from PIL import Image, ImageDraw, ImageFont
 import os
-def add_brand_overlay(image_path: str, brand_name: str, primary_color: str = "#FFFFFF") -> str:
     """
-    Add brand name text overlay to the generated poster image.
     Args:
         image_path: Path to the generated poster image
         brand_name: Brand name to overlay (in ENGLISH)
-        primary_color: Hex color for the text
     Returns:
         str: Path to the image with overlay
     """
     img = Image.open(image_path).convert('RGBA')
     # Create overlay layer
     overlay = Image.new('RGBA', img.size, (0, 0, 0, 0))
     draw = ImageDraw.Draw(overlay)
-    # Convert hex to RGB
-    hex_color = primary_color.lstrip('#')
-    rgb_color = tuple(int(hex_color[i:i+2], 16) for i in (0, 2, 4))
-    # Try to load a nice font, fallback to default
-    font_size = int(img.width * 0.12)  # 12% of image width
     font = None
-    # Try common font paths
     font_paths = [
         "/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf",
         "/usr/share/fonts/truetype/liberation/LiberationSans-Bold.ttf",
         "/usr/share/fonts/TTF/DejaVuSans-Bold.ttf",
         "/System/Library/Fonts/Helvetica.ttc",
-        "/usr/share/fonts/truetype/freefont/FreeSansBold.ttf",
     ]
     for font_path in font_paths:
@@ -48,50 +81,58 @@ def add_brand_overlay(image_path: str, brand_name: str, primary_color: str = "#F
                 continue
     if font is None:
-        # Use default font with larger size workaround
         font = ImageFont.load_default()
-        font_size = 20  # Default font is small
-    # Calculate text position (center-top)
-    text_bbox = draw.textbbox((0, 0), brand_name.upper(), font=font)
     text_width = text_bbox[2] - text_bbox[0]
     text_height = text_bbox[3] - text_bbox[1]
     x = (img.width - text_width) // 2
-    y = int(img.height * 0.08)  # 8% from top
-    # Draw shadow/outline for better visibility
-    shadow_offset = max(3, font_size // 25)
-    for offset_x in range(-shadow_offset, shadow_offset + 1):
-        for offset_y in range(-shadow_offset, shadow_offset + 1):
-            if offset_x != 0 or offset_y != 0:
-                draw.text(
-                    (x + offset_x, y + offset_y),
-                    brand_name.upper(),
-                    font=font,
-                    fill=(0, 0, 0, 180)
-                )
     # Draw main text
-    draw.text((x, y), brand_name.upper(), font=font, fill=(*rgb_color, 255))
-    # Add subtle tagline area (optional gradient bar)
-    bar_height = int(img.height * 0.02)
-    bar_y = y + text_height + int(img.height * 0.03)
-    bar_width = int(text_width * 0.6)
-    bar_x = (img.width - bar_width) // 2
-    for i in range(bar_width):
-        alpha = int(255 * (1 - abs(i - bar_width/2) / (bar_width/2)))
-        draw.rectangle(
-            [bar_x + i, bar_y, bar_x + i + 1, bar_y + bar_height],
-            fill=(*rgb_color, alpha)
-        )
     # Composite overlay onto original image
     result = Image.alpha_composite(img, overlay)
-    # Convert back to RGB for saving as PNG/JPEG
     result = result.convert('RGB')
     output_path = '/tmp/poster_final.png'

+"""Brand overlay service using Pillow - Professional Grade."""
+from PIL import Image, ImageDraw, ImageFont, ImageFilter
 import os
+def analyze_image_colors(img: Image.Image) -> dict:
+    """Analyze the top portion of image to determine best text color."""
+    # Crop top 20% of image where text will be placed
+    top_region = img.crop((0, 0, img.width, int(img.height * 0.2)))
+    # Resize for faster processing
+    small = top_region.resize((50, 10))
+    # Get average color
+    pixels = list(small.getdata())
+    avg_r = sum(p[0] for p in pixels) // len(pixels)
+    avg_g = sum(p[1] for p in pixels) // len(pixels)
+    avg_b = sum(p[2] for p in pixels) // len(pixels)
+    avg_luminance = (0.299 * avg_r + 0.587 * avg_g + 0.114 * avg_b) / 255
+    return {
+        'avg_color': (avg_r, avg_g, avg_b),
+        'luminance': avg_luminance,
+        'is_dark': avg_luminance < 0.5
+    }
+def add_brand_overlay(image_path: str, brand_name: str, primary_color: str = "#FFFFFF", mood: str = "professional") -> str:
     """
+    Add brand name text overlay with professional styling that matches the image.
     Args:
         image_path: Path to the generated poster image
         brand_name: Brand name to overlay (in ENGLISH)
+        primary_color: Hex color for the text (brand color)
+        mood: The mood/style of the poster
     Returns:
         str: Path to the image with overlay
     """
     img = Image.open(image_path).convert('RGBA')
+    # Analyze image to determine best text approach
+    color_analysis = analyze_image_colors(img)
     # Create overlay layer
     overlay = Image.new('RGBA', img.size, (0, 0, 0, 0))
     draw = ImageDraw.Draw(overlay)
+    # Determine text color based on image analysis
+    if color_analysis['is_dark']:
+        text_color = (255, 255, 255)  # White text on dark
+        shadow_color = (0, 0, 0, 200)
+        glass_fill = (0, 0, 0, 80)
+    else:
+        text_color = (20, 20, 30)  # Dark text on light
+        shadow_color = (255, 255, 255, 200)
+        glass_fill = (255, 255, 255, 120)
+    # Font sizing - responsive to image width
+    font_size = int(img.width * 0.08)  # 8% of width (more elegant)
     font = None
+    # Try to load professional fonts
     font_paths = [
         "/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf",
         "/usr/share/fonts/truetype/liberation/LiberationSans-Bold.ttf",
+        "/usr/share/fonts/truetype/freefont/FreeSansBold.ttf",
         "/usr/share/fonts/TTF/DejaVuSans-Bold.ttf",
         "/System/Library/Fonts/Helvetica.ttc",
     ]
     for font_path in font_paths:
                 continue
     if font is None:
         font = ImageFont.load_default()
+        font_size = 20
+    # Calculate text position
+    text = brand_name.upper()
+    text_bbox = draw.textbbox((0, 0), text, font=font)
     text_width = text_bbox[2] - text_bbox[0]
     text_height = text_bbox[3] - text_bbox[1]
     x = (img.width - text_width) // 2
+    y = int(img.height * 0.05)  # 5% from top
+    # Create frosted glass background for text
+    glass_padding_x = int(text_width * 0.12)
+    glass_padding_y = int(text_height * 0.4)
+    glass_left = x - glass_padding_x
+    glass_top = y - glass_padding_y
+    glass_right = x + text_width + glass_padding_x
+    glass_bottom = y + text_height + glass_padding_y
+    # Draw rounded rectangle background
+    draw.rounded_rectangle(
+        [glass_left, glass_top, glass_right, glass_bottom],
+        radius=10,
+        fill=glass_fill
+    )
+    # Subtle border
+    draw.rounded_rectangle(
+        [glass_left, glass_top, glass_right, glass_bottom],
+        radius=10,
+        outline=(*text_color[:3], 40),
+        width=1
+    )
+    # Draw text shadow
+    shadow_offset = max(1, font_size // 50)
+    draw.text(
+        (x + shadow_offset, y + shadow_offset),
+        text,
+        font=font,
+        fill=shadow_color
+    )
     # Draw main text
+    draw.text((x, y), text, font=font, fill=(*text_color, 255))
     # Composite overlay onto original image
     result = Image.alpha_composite(img, overlay)
+    # Convert back to RGB for saving
     result = result.convert('RGB')
     output_path = '/tmp/poster_final.png'