Lcmind commited on
Commit
b895841
ยท
1 Parent(s): f94d4dc

fix: S-tier prompt - hex to color name, blur UI text, remove watermarks

Browse files
Files changed (2) hide show
  1. app/core/config.py +5 -2
  2. app/services/qwen.py +94 -0
app/core/config.py CHANGED
@@ -37,6 +37,9 @@ class Settings(BaseSettings):
37
  gemini_model: str = "gemma-3-27b-it"
38
  groq_model: str = "meta-llama/llama-4-maverick-17b-128e-instruct"
39
 
 
 
 
40
  # Pyppeteer Settings
41
  puppeteer_executable_path: str = "/usr/bin/chromium"
42
  puppeteer_args: List[str] = [
@@ -46,8 +49,8 @@ class Settings(BaseSettings):
46
  "--disable-gpu",
47
  ]
48
 
49
- # ๋ถ„์„ ๋ชจ๋ธ ์„ ํƒ: "gemini" ๋˜๋Š” "groq"
50
- analysis_model: str = "groq" # ๋˜๋Š” "gemini"
51
 
52
  class Config:
53
  env_file = ".env"
 
37
  gemini_model: str = "gemma-3-27b-it"
38
  groq_model: str = "meta-llama/llama-4-maverick-17b-128e-instruct"
39
 
40
+ # Qwen Model (๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ)
41
+ qwen_model: str = "Qwen/Qwen3-VL-32B-Instruct" # 32B ๋ชจ๋ธ๋กœ ๊ต์ฒด
42
+
43
  # Pyppeteer Settings
44
  puppeteer_executable_path: str = "/usr/bin/chromium"
45
  puppeteer_args: List[str] = [
 
49
  "--disable-gpu",
50
  ]
51
 
52
+ # ๋ถ„์„ ๋ชจ๋ธ ์„ ํƒ: "gemini", "groq", "qwen"
53
+ analysis_model: str = "qwen" # "gemini", "groq", "qwen" ์ค‘ ์„ ํƒ
54
 
55
  class Config:
56
  env_file = ".env"
app/services/qwen.py ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Qwen-VL ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ ๋ถ„์„ ์„œ๋น„์Šค."""
2
+
3
+ import httpx
4
+ import base64
5
+ import json
6
+ from app.core.config import settings
7
+
8
+ async def analyze_with_qwen(screenshot_path: str) -> dict:
9
+ """
10
+ Analyze website screenshot using Qwen-VL API.
11
+ Args:
12
+ screenshot_path: Path to the screenshot file
13
+ Returns:
14
+ dict: Analysis results with title, atmosphere, colors, and keywords
15
+ Raises:
16
+ Exception: If analysis fails
17
+ """
18
+ # ์ด๋ฏธ์ง€ ํŒŒ์ผ์„ base64๋กœ ์ธ์ฝ”๋”ฉ
19
+ with open(screenshot_path, "rb") as img_file:
20
+ img_b64 = base64.b64encode(img_file.read()).decode("utf-8")
21
+
22
+ prompt = """
23
+ You are a Senior Creative Director analyzing a website screenshot for a commercial poster design.
24
+
25
+ === TASK ===
26
+ Extract key information to create a poster that VISUALLY REPRESENTS what this company does.
27
+
28
+ === ANALYSIS STEPS ===
29
+
30
+ 1. **WHAT IS THIS?** (Read the screen carefully)
31
+ - Company/Brand name (if Korean, romanize: ๋ฌด์‹ ์‚ฌโ†’MUSINSA)
32
+ - What do they sell or provide? Be SPECIFIC.
33
+ - Who is the target user?
34
+
35
+ 2. **VISUAL TRANSLATION** (Convert business to imagery)
36
+ The poster must show OBJECTS that represent the business:
37
+ | Business Type | What to Show |
38
+ |--------------|--------------|
39
+ | Productivity Tool | Organized workspace, floating UI panels, clean desk, glass screens with icons |
40
+ | Fashion Store | Clothes on racks, sneakers, fashion photography studio |
41
+ | Search/Tech | Holographic interfaces, data streams, futuristic screens |
42
+ | Delivery | Flying boxes, warehouse, conveyor belts |
43
+ | Food | The food items, kitchen, restaurant interior |
44
+
45
+ 3. **COLOR EXTRACTION**
46
+ - What is the main brand color from the logo/design?
47
+ - Is it single color or multi-color brand?
48
+
49
+ === OUTPUT (JSON) ===
50
+ {
51
+ "brand_name": "ENGLISH brand name",
52
+ "business_type": "Productivity/Fashion/Tech/Delivery/Food/Other",
53
+ "what_they_provide": "Specific description in 15 words",
54
+ "poster_objects": "List concrete objects: 'glass panels, folder icon, chat icon, checklist, modern desk, soft lighting'",
55
+ "background_style": "Clean gradient/Studio/Futuristic/Warehouse/Minimal",
56
+ "primary_color": "#hexcode",
57
+ "mood": "Clean/Premium/Energetic/Calm"
58
+ }
59
+ """
60
+
61
+ url = "https://api-inference.huggingface.co/models/Qwen/Qwen-VL-Chat" # ๋˜๋Š” ์ง์ ‘ ๋„์šด ์„œ๋ฒ„ ์ฃผ์†Œ
62
+ headers = {
63
+ "Authorization": f"Bearer {settings.hf_token}",
64
+ "Content-Type": "application/json"
65
+ }
66
+ payload = {
67
+ "inputs": {
68
+ "image": f"data:image/png;base64,{img_b64}",
69
+ "question": prompt
70
+ }
71
+ }
72
+ async with httpx.AsyncClient(timeout=60.0) as client:
73
+ response = await client.post(url, headers=headers, json=payload)
74
+ response.raise_for_status()
75
+ result = response.json()
76
+ # Qwen-VL์€ ๋‹ต๋ณ€์ด result['answer']์— ๋“ค์–ด์žˆ์Œ
77
+ text = result.get("answer", "").strip()
78
+
79
+ # ๊ธฐ์กด JSON ํŒŒ์‹ฑ ๋กœ์ง ์žฌ์‚ฌ์šฉ
80
+ if "```json" in text:
81
+ text = text.split("```json")[1].split("```", 1)[0].strip()
82
+ elif "```" in text:
83
+ text = text.split("```", 1)[1].split("```", 1)[0].strip()
84
+ text = text[text.find('{'):text.rfind('}')+1]
85
+ try:
86
+ analysis = json.loads(text)
87
+ return analysis
88
+ except json.JSONDecodeError as e:
89
+ text = text.replace("'", '"').replace('\n', ' ')
90
+ try:
91
+ analysis = json.loads(text)
92
+ return analysis
93
+ except:
94
+ raise Exception(f"Failed to parse Qwen response as JSON: {text[:200]}")