Spaces:

lcmind
/

vibe-link-server

Sleeping

App Files Files Community

Lcmind commited on Jan 15

Commit

b895841

1 Parent(s): f94d4dc

fix: S-tier prompt - hex to color name, blur UI text, remove watermarks

Browse files

Files changed (2) hide show

app/core/config.py +5 -2
app/services/qwen.py +94 -0

app/core/config.py CHANGED Viewed

@@ -37,6 +37,9 @@ class Settings(BaseSettings):
     gemini_model: str = "gemma-3-27b-it"
     groq_model: str = "meta-llama/llama-4-maverick-17b-128e-instruct"
     # Pyppeteer Settings
     puppeteer_executable_path: str = "/usr/bin/chromium"
     puppeteer_args: List[str] = [
@@ -46,8 +49,8 @@ class Settings(BaseSettings):
         "--disable-gpu",
     ]
-    # 분석 모델 선택: "gemini" 또는 "groq"
-    analysis_model: str = "groq"  # 또는 "gemini"
     class Config:
         env_file = ".env"

     gemini_model: str = "gemma-3-27b-it"
     groq_model: str = "meta-llama/llama-4-maverick-17b-128e-instruct"
+    # Qwen Model (멀티모달)
+    qwen_model: str = "Qwen/Qwen3-VL-32B-Instruct"  # 32B 모델로 교체
     # Pyppeteer Settings
     puppeteer_executable_path: str = "/usr/bin/chromium"
     puppeteer_args: List[str] = [
         "--disable-gpu",
     ]
+    # 분석 모델 선택: "gemini", "groq", "qwen"
+    analysis_model: str = "qwen"  # "gemini", "groq", "qwen" 중 선택
     class Config:
         env_file = ".env"

app/services/qwen.py ADDED Viewed

	@@ -0,0 +1,94 @@

+"""Qwen-VL 멀티모달 분석 서비스."""
+import httpx
+import base64
+import json
+from app.core.config import settings
+async def analyze_with_qwen(screenshot_path: str) -> dict:
+    """
+    Analyze website screenshot using Qwen-VL API.
+    Args:
+        screenshot_path: Path to the screenshot file
+    Returns:
+        dict: Analysis results with title, atmosphere, colors, and keywords
+    Raises:
+        Exception: If analysis fails
+    """
+    # 이미지 파일을 base64로 인코딩
+    with open(screenshot_path, "rb") as img_file:
+        img_b64 = base64.b64encode(img_file.read()).decode("utf-8")
+    prompt = """
+You are a Senior Creative Director analyzing a website screenshot for a commercial poster design.
+=== TASK ===
+Extract key information to create a poster that VISUALLY REPRESENTS what this company does.
+=== ANALYSIS STEPS ===
+1. **WHAT IS THIS?** (Read the screen carefully)
+   - Company/Brand name (if Korean, romanize: 무신사→MUSINSA)
+   - What do they sell or provide? Be SPECIFIC.
+   - Who is the target user?
+2. **VISUAL TRANSLATION** (Convert business to imagery)
+   The poster must show OBJECTS that represent the business:
+   | Business Type | What to Show |
+   |--------------|--------------|
+   | Productivity Tool | Organized workspace, floating UI panels, clean desk, glass screens with icons |
+   | Fashion Store | Clothes on racks, sneakers, fashion photography studio |
+   | Search/Tech | Holographic interfaces, data streams, futuristic screens |
+   | Delivery | Flying boxes, warehouse, conveyor belts |
+   | Food | The food items, kitchen, restaurant interior |
+3. **COLOR EXTRACTION**
+   - What is the main brand color from the logo/design?
+   - Is it single color or multi-color brand?
+=== OUTPUT (JSON) ===
+{
+  "brand_name": "ENGLISH brand name",
+  "business_type": "Productivity/Fashion/Tech/Delivery/Food/Other",
+  "what_they_provide": "Specific description in 15 words",
+  "poster_objects": "List concrete objects: 'glass panels, folder icon, chat icon, checklist, modern desk, soft lighting'",
+  "background_style": "Clean gradient/Studio/Futuristic/Warehouse/Minimal",
+  "primary_color": "#hexcode",
+  "mood": "Clean/Premium/Energetic/Calm"
+}
+"""
+    url = "https://api-inference.huggingface.co/models/Qwen/Qwen-VL-Chat"  # 또는 직접 띄운 서버 주소
+    headers = {
+        "Authorization": f"Bearer {settings.hf_token}",
+        "Content-Type": "application/json"
+    }
+    payload = {
+        "inputs": {
+            "image": f"data:image/png;base64,{img_b64}",
+            "question": prompt
+        }
+    }
+    async with httpx.AsyncClient(timeout=60.0) as client:
+        response = await client.post(url, headers=headers, json=payload)
+        response.raise_for_status()
+        result = response.json()
+        # Qwen-VL은 답변이 result['answer']에 들어있음
+        text = result.get("answer", "").strip()
+    # 기존 JSON 파싱 로직 재사용
+    if "```json" in text:
+        text = text.split("```json")[1].split("```", 1)[0].strip()
+    elif "```" in text:
+        text = text.split("```", 1)[1].split("```", 1)[0].strip()
+    text = text[text.find('{'):text.rfind('}')+1]
+    try:
+        analysis = json.loads(text)
+        return analysis
+    except json.JSONDecodeError as e:
+        text = text.replace("'", '"').replace('\n', ' ')
+        try:
+            analysis = json.loads(text)
+            return analysis
+        except:
+            raise Exception(f"Failed to parse Qwen response as JSON: {text[:200]}")