sushilideaclan01 commited on
Commit
76c3397
·
1 Parent(s): f8af65f

Add product scraping functionality and AI concept filling

Browse files

- Introduced a new `.gitignore` file to exclude environment and build files.
- Added `scraper.py` to fetch product data from Amalfa product pages.
- Implemented `ai_filler.py` to suggest target audience, competitors, and psychological triggers based on scraped data.
- Updated `requirements.txt` to include `requests` and `beautifulsoup4` for web scraping.
- Enhanced `main.py` to support a new API endpoint for scraping and filling product data.
- Modified frontend to include a URL input for scraping product details and auto-filling form fields.

.gitignore ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Environment and secrets
2
+ .env
3
+ .env.local
4
+ .env.*.local
5
+ *.pem
6
+
7
+ # Python
8
+ __pycache__/
9
+ *.py[cod]
10
+ *$py.class
11
+ *.so
12
+ .Python
13
+ build/
14
+ develop-eggs/
15
+ dist/
16
+ downloads/
17
+ eggs/
18
+ .eggs/
19
+ lib/
20
+ lib64/
21
+ parts/
22
+ sdist/
23
+ var/
24
+ wheels/
25
+ *.egg-info/
26
+ .installed.cfg
27
+ *.egg
28
+
29
+ # Virtual environments
30
+ venv/
31
+ .venv/
32
+ env/
33
+ .env/
34
+
35
+ # IDE and editors
36
+ .idea/
37
+ .vscode/
38
+ *.swp
39
+ *.swo
40
+ *~
41
+ .project
42
+ .settings/
43
+
44
+ # OS
45
+ .DS_Store
46
+ Thumbs.db
47
+
48
+ # Logs and debug
49
+ *.log
50
+ .pytest_cache/
51
+ .coverage
52
+ htmlcov/
53
+
54
+ # Optional
55
+ *.bak
56
+ *.tmp
backend/ai_filler.py ADDED
@@ -0,0 +1,117 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ AI module to fill concept fields (target_audience, competitors, psychological_triggers)
3
+ based on scraped product data.
4
+ """
5
+ from openai import OpenAI
6
+ from dotenv import load_dotenv
7
+ import os
8
+ import json
9
+
10
+ load_dotenv()
11
+
12
+ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
13
+
14
+ from backend.pydantic_schema import TARGET_AUDIENCE_OPTIONS
15
+
16
+
17
+ def _normalize_audience(s: str) -> str:
18
+ """Normalize for matching: lowercase, strip, normalize dashes/hyphens."""
19
+ if not s or not isinstance(s, str):
20
+ return ""
21
+ s = s.strip().lower()
22
+ # Normalize various dash/hyphen characters to a single hyphen
23
+ for c in ("–", "—", "−", "‑"):
24
+ s = s.replace(c, "-")
25
+ return s
26
+
27
+
28
+ def _match_audience(ai_value: str) -> str | None:
29
+ """Return the exact TARGET_AUDIENCE_OPTIONS entry that matches ai_value, or None."""
30
+ if not ai_value:
31
+ return None
32
+ norm = _normalize_audience(ai_value)
33
+ if not norm:
34
+ return None
35
+ for opt in TARGET_AUDIENCE_OPTIONS:
36
+ if _normalize_audience(opt) == norm:
37
+ return opt
38
+ if norm in _normalize_audience(opt) or _normalize_audience(opt) in norm:
39
+ return opt
40
+ return None
41
+
42
+
43
+ def fill_concept_fields(product_data: dict) -> dict:
44
+ """
45
+ Use AI to suggest target_audience, competitors, and psychological_triggers
46
+ based on scraped product data.
47
+
48
+ Args:
49
+ product_data: Dictionary with product_name, description, price, category, etc.
50
+
51
+ Returns:
52
+ Dictionary with suggested target_audience, competitors, psychological_triggers
53
+ """
54
+ system_prompt = """You are an expert marketing researcher for Amalfa, a contemporary jewellery brand in India.
55
+ Your task is to analyze product data and suggest:
56
+ 1. Target Audience: Select 3-5 most relevant audiences from the provided list
57
+ 2. Competitors: List 3-5 direct competitors or similar brands
58
+ 3. Psychological Triggers: Suggest 3-5 psychological triggers that would resonate with the target audience for this product
59
+
60
+ Be specific and data-driven. Consider the product category, price point, and description."""
61
+
62
+ available_audiences = ", ".join(TARGET_AUDIENCE_OPTIONS)
63
+
64
+ user_prompt = f"""Product Data:
65
+ - Product Name: {product_data.get('product_name', 'N/A')}
66
+ - Category: {product_data.get('category', 'N/A')}
67
+ - Description: {product_data.get('description', 'N/A')}
68
+ - Price: {product_data.get('price', 'N/A')}
69
+ - Brand: {product_data.get('brand', 'Amalfa')}
70
+
71
+ Available Target Audience Options:
72
+ {available_audiences}
73
+
74
+ Please provide a JSON response with the following structure:
75
+ {{
76
+ "target_audience": ["audience1", "audience2", "audience3"],
77
+ "competitors": ["competitor1", "competitor2", "competitor3"],
78
+ "psychological_triggers": "trigger1, trigger2, trigger3"
79
+ }}
80
+
81
+ Make sure target_audience values exactly match the available options."""
82
+
83
+ try:
84
+ client = OpenAI(api_key=OPENAI_API_KEY)
85
+ response = client.chat.completions.create(
86
+ model="gpt-4o",
87
+ messages=[
88
+ {"role": "system", "content": system_prompt},
89
+ {"role": "user", "content": user_prompt}
90
+ ],
91
+ response_format={"type": "json_object"},
92
+ temperature=0.7
93
+ )
94
+
95
+ result = json.loads(response.choices[0].message.content)
96
+
97
+ # Validate target_audience: map each AI suggestion to exact option string
98
+ validated_audiences = []
99
+ seen = set()
100
+ for audience in result.get("target_audience", []):
101
+ matched = _match_audience(audience if isinstance(audience, str) else str(audience))
102
+ if matched and matched not in seen:
103
+ seen.add(matched)
104
+ validated_audiences.append(matched)
105
+
106
+ return {
107
+ "target_audience": validated_audiences[:5] if validated_audiences else [],
108
+ "competitors": result.get("competitors", [])[:5],
109
+ "psychological_triggers": result.get("psychological_triggers", "")
110
+ }
111
+ except Exception as e:
112
+ # Fallback if AI fails
113
+ return {
114
+ "target_audience": [],
115
+ "competitors": [],
116
+ "psychological_triggers": ""
117
+ }
backend/claude_method.py CHANGED
@@ -33,7 +33,12 @@ def _add_additional_properties_false(schema: dict) -> dict:
33
  return schema
34
 
35
 
36
- def researcher_claude(target_audience: str, product_category: str, product_description: str):
 
 
 
 
 
37
  """
38
  Claude-based researcher function using native structured outputs.
39
 
@@ -41,16 +46,21 @@ def researcher_claude(target_audience: str, product_category: str, product_descr
41
  target_audience: Target audience from the predefined list
42
  product_category: Product category (e.g., "ring", "bangles")
43
  product_description: Description of the product
 
44
 
45
  Returns:
46
  list[ImageAdEssentials]: List of psychology triggers, angles, and concepts
47
  """
48
- # Initialize Claude client
 
 
49
  claude_client = Anthropic(api_key=ANTHROPIC_API_KEY)
50
 
51
  # Get prompts
52
  system_prompt = get_system_prompt()
53
- user_prompt = get_user_prompt(target_audience, product_category, product_description)
 
 
54
 
55
  # Build JSON schema from Pydantic model and add required additionalProperties: false
56
  json_schema = ImageAdEssentialsOutput.model_json_schema()
@@ -59,7 +69,7 @@ def researcher_claude(target_audience: str, product_category: str, product_descr
59
  # Use Claude's native structured outputs via output_config.format
60
  message = claude_client.messages.create(
61
  model="claude-opus-4-6",
62
- max_tokens=1024,
63
  system=system_prompt,
64
  messages=[
65
  {
 
33
  return schema
34
 
35
 
36
+ def researcher_claude(
37
+ target_audience: str,
38
+ product_category: str,
39
+ product_description: str,
40
+ count: int = 5,
41
+ ):
42
  """
43
  Claude-based researcher function using native structured outputs.
44
 
 
46
  target_audience: Target audience from the predefined list
47
  product_category: Product category (e.g., "ring", "bangles")
48
  product_description: Description of the product
49
+ count: Number of psychology triggers (concepts/angles) to generate
50
 
51
  Returns:
52
  list[ImageAdEssentials]: List of psychology triggers, angles, and concepts
53
  """
54
+ if not ANTHROPIC_API_KEY:
55
+ raise ValueError("ANTHROPIC_API_KEY is not set in the environment.")
56
+
57
  claude_client = Anthropic(api_key=ANTHROPIC_API_KEY)
58
 
59
  # Get prompts
60
  system_prompt = get_system_prompt()
61
+ user_prompt = get_user_prompt(
62
+ target_audience, product_category, product_description, count
63
+ )
64
 
65
  # Build JSON schema from Pydantic model and add required additionalProperties: false
66
  json_schema = ImageAdEssentialsOutput.model_json_schema()
 
69
  # Use Claude's native structured outputs via output_config.format
70
  message = claude_client.messages.create(
71
  model="claude-opus-4-6",
72
+ max_tokens=4096,
73
  system=system_prompt,
74
  messages=[
75
  {
backend/gpt_method.py CHANGED
@@ -1,7 +1,8 @@
1
  """
2
  GPT-based researcher implementation.
3
- Uses the latest Responses API with text_format for structured outputs.
4
  """
 
5
  from openai import OpenAI
6
  from backend.pydantic_schema import ImageAdEssentialsOutput
7
  from backend.prompt import get_system_prompt, get_user_prompt
@@ -12,41 +13,80 @@ load_dotenv()
12
 
13
  OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
14
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
- def researcher_gpt(target_audience: str, product_category: str, product_description: str):
 
 
 
 
 
 
17
  """
18
- GPT-based researcher function using the Responses API.
19
 
20
  Args:
21
  target_audience: Target audience from the predefined list
22
  product_category: Product category (e.g., "ring", "bangles")
23
  product_description: Description of the product
 
24
 
25
  Returns:
26
  list[ImageAdEssentials]: List of psychology triggers, angles, and concepts
27
  """
28
- # Initialize GPT client
29
- gpt_client = OpenAI(api_key=OPENAI_API_KEY)
30
 
31
- # Get prompts
32
  system_prompt = get_system_prompt()
33
- user_prompt = get_user_prompt(target_audience, product_category, product_description)
 
 
34
 
35
- # Use the Responses API with text_format for structured output
36
- response = gpt_client.responses.parse(
37
  model="gpt-4o",
38
- instructions=system_prompt,
39
- input=[
40
- {
41
- "role": "user",
42
- "content": user_prompt
43
- }
44
  ],
45
- text_format=ImageAdEssentialsOutput,
 
 
 
 
 
 
 
 
46
  )
47
 
48
- # output_parsed returns the Pydantic model directly
49
- if response.output_parsed:
50
- return response.output_parsed.output
51
- else:
52
- raise ValueError("GPT returned an unparseable response.")
 
 
 
 
 
 
1
  """
2
  GPT-based researcher implementation.
3
+ Uses Chat Completions API with response_format for structured JSON output.
4
  """
5
+ import json
6
  from openai import OpenAI
7
  from backend.pydantic_schema import ImageAdEssentialsOutput
8
  from backend.prompt import get_system_prompt, get_user_prompt
 
13
 
14
  OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
15
 
16
+ # JSON schema for strict structured output (matches ImageAdEssentialsOutput)
17
+ RESEARCH_RESPONSE_SCHEMA = {
18
+ "type": "object",
19
+ "properties": {
20
+ "output": {
21
+ "type": "array",
22
+ "items": {
23
+ "type": "object",
24
+ "properties": {
25
+ "phsychologyTriggers": {"type": "string"},
26
+ "angles": {"type": "array", "items": {"type": "string"}},
27
+ "concepts": {"type": "array", "items": {"type": "string"}},
28
+ },
29
+ "required": ["phsychologyTriggers", "angles", "concepts"],
30
+ "additionalProperties": False,
31
+ },
32
+ }
33
+ },
34
+ "required": ["output"],
35
+ "additionalProperties": False,
36
+ }
37
 
38
+
39
+ def researcher_gpt(
40
+ target_audience: str,
41
+ product_category: str,
42
+ product_description: str,
43
+ count: int = 5,
44
+ ):
45
  """
46
+ GPT-based researcher function using Chat Completions with structured output.
47
 
48
  Args:
49
  target_audience: Target audience from the predefined list
50
  product_category: Product category (e.g., "ring", "bangles")
51
  product_description: Description of the product
52
+ count: Number of psychology triggers (concepts/angles) to generate
53
 
54
  Returns:
55
  list[ImageAdEssentials]: List of psychology triggers, angles, and concepts
56
  """
57
+ if not OPENAI_API_KEY:
58
+ raise ValueError("OPENAI_API_KEY is not set in the environment.")
59
 
60
+ gpt_client = OpenAI(api_key=OPENAI_API_KEY)
61
  system_prompt = get_system_prompt()
62
+ user_prompt = get_user_prompt(
63
+ target_audience, product_category, product_description, count
64
+ )
65
 
66
+ response = gpt_client.chat.completions.create(
 
67
  model="gpt-4o",
68
+ messages=[
69
+ {"role": "system", "content": system_prompt},
70
+ {"role": "user", "content": user_prompt},
 
 
 
71
  ],
72
+ response_format={
73
+ "type": "json_schema",
74
+ "json_schema": {
75
+ "name": "image_ad_essentials_output",
76
+ "strict": True,
77
+ "schema": RESEARCH_RESPONSE_SCHEMA,
78
+ },
79
+ },
80
+ temperature=0.7,
81
  )
82
 
83
+ msg = response.choices[0].message
84
+ if not msg.content:
85
+ raise ValueError("GPT returned an empty response.")
86
+
87
+ try:
88
+ data = json.loads(msg.content)
89
+ parsed = ImageAdEssentialsOutput(**data)
90
+ return parsed.output
91
+ except (json.JSONDecodeError, TypeError) as e:
92
+ raise ValueError(f"GPT returned invalid JSON: {e}") from e
backend/main.py CHANGED
@@ -11,6 +11,8 @@ from pydantic import BaseModel
11
  from backend.pydantic_schema import ImageAdEssentials, TARGET_AUDIENCE_OPTIONS
12
  from backend.gpt_method import researcher_gpt
13
  from backend.claude_method import researcher_claude
 
 
14
 
15
  app = FastAPI(title="Image Ad Essentials Researcher")
16
 
@@ -30,9 +32,14 @@ class ResearchRequest(BaseModel):
30
  target_audience: list[str]
31
  product_category: str
32
  product_description: str
 
33
  method: Literal["gpt", "claude"]
34
 
35
 
 
 
 
 
36
  class AudienceResult(BaseModel):
37
  target_audience: str
38
  output: list[ImageAdEssentials]
@@ -50,6 +57,35 @@ def get_target_audiences():
50
  return {"audiences": TARGET_AUDIENCE_OPTIONS}
51
 
52
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
  @app.post("/api/research", response_model=ResearchResponse)
54
  def run_research(req: ResearchRequest):
55
  """
@@ -62,11 +98,11 @@ def run_research(req: ResearchRequest):
62
  for audience in req.target_audience:
63
  if req.method == "gpt":
64
  result = researcher_gpt(
65
- audience, req.product_category, req.product_description
66
  )
67
  elif req.method == "claude":
68
  result = researcher_claude(
69
- audience, req.product_category, req.product_description
70
  )
71
  else:
72
  raise HTTPException(status_code=400, detail="Invalid method. Use 'gpt' or 'claude'.")
@@ -78,7 +114,10 @@ def run_research(req: ResearchRequest):
78
  except ValueError as e:
79
  raise HTTPException(status_code=500, detail=str(e))
80
  except Exception as e:
81
- raise HTTPException(status_code=500, detail=f"An error occurred: {str(e)}")
 
 
 
82
 
83
 
84
  # --- Serve frontend static files (MUST be after API routes) ---
 
11
  from backend.pydantic_schema import ImageAdEssentials, TARGET_AUDIENCE_OPTIONS
12
  from backend.gpt_method import researcher_gpt
13
  from backend.claude_method import researcher_claude
14
+ from backend.scraper import scrape_product
15
+ from backend.ai_filler import fill_concept_fields
16
 
17
  app = FastAPI(title="Image Ad Essentials Researcher")
18
 
 
32
  target_audience: list[str]
33
  product_category: str
34
  product_description: str
35
+ count: int = 5 # number of concepts/angles (psychology triggers) to generate
36
  method: Literal["gpt", "claude"]
37
 
38
 
39
+ class ScrapeProductRequest(BaseModel):
40
+ url: str
41
+
42
+
43
  class AudienceResult(BaseModel):
44
  target_audience: str
45
  output: list[ImageAdEssentials]
 
57
  return {"audiences": TARGET_AUDIENCE_OPTIONS}
58
 
59
 
60
+ @app.post("/api/scrape-product")
61
+ def scrape_and_fill_product(req: ScrapeProductRequest):
62
+ """
63
+ Scrape product data from URL and use AI to fill concept fields.
64
+ Returns product data with suggested target_audience, competitors, and psychological_triggers.
65
+ """
66
+ try:
67
+ # Scrape product data
68
+ product_data = scrape_product(req.url)
69
+
70
+ # Use AI to fill concept fields
71
+ concept_data = fill_concept_fields(product_data)
72
+
73
+ # Merge scraped data with AI-filled concepts
74
+ result = {
75
+ **product_data,
76
+ "target_audience": concept_data["target_audience"],
77
+ "competitors": concept_data["competitors"],
78
+ "psychological_triggers": concept_data["psychological_triggers"]
79
+ }
80
+
81
+ return result
82
+
83
+ except ValueError as e:
84
+ raise HTTPException(status_code=400, detail=str(e))
85
+ except Exception as e:
86
+ raise HTTPException(status_code=500, detail=f"An error occurred while scraping: {str(e)}")
87
+
88
+
89
  @app.post("/api/research", response_model=ResearchResponse)
90
  def run_research(req: ResearchRequest):
91
  """
 
98
  for audience in req.target_audience:
99
  if req.method == "gpt":
100
  result = researcher_gpt(
101
+ audience, req.product_category, req.product_description, req.count
102
  )
103
  elif req.method == "claude":
104
  result = researcher_claude(
105
+ audience, req.product_category, req.product_description, req.count
106
  )
107
  else:
108
  raise HTTPException(status_code=400, detail="Invalid method. Use 'gpt' or 'claude'.")
 
114
  except ValueError as e:
115
  raise HTTPException(status_code=500, detail=str(e))
116
  except Exception as e:
117
+ detail = str(e)
118
+ if not detail.strip():
119
+ detail = repr(e)
120
+ raise HTTPException(status_code=500, detail=detail)
121
 
122
 
123
  # --- Serve frontend static files (MUST be after API routes) ---
backend/prompt.py CHANGED
@@ -17,18 +17,25 @@ def get_system_prompt() -> str:
17
  User will provide you the category on which he needs to run the ads, his requirement, product description and what is target audience."""
18
 
19
 
20
- def get_user_prompt(target_audience: str, product_category: str, product_description: str) -> str:
 
 
 
 
 
21
  """
22
  Returns the user prompt with the provided inputs.
23
-
24
  Args:
25
  target_audience: Target audience(s), comma-separated
26
  product_category: Product category (e.g., "ring", "bangles")
27
  product_description: Description of the product
 
28
  """
29
  return f"""Following are the inputs:
30
  Product Category: {product_category}
31
  Target Audience: {target_audience}
32
  Product Description: {product_description}
33
-
34
- Provide the different psychology triggers, angles and concept based on the given input."""
 
 
17
  User will provide you the category on which he needs to run the ads, his requirement, product description and what is target audience."""
18
 
19
 
20
+ def get_user_prompt(
21
+ target_audience: str,
22
+ product_category: str,
23
+ product_description: str,
24
+ count: int = 5,
25
+ ) -> str:
26
  """
27
  Returns the user prompt with the provided inputs.
28
+
29
  Args:
30
  target_audience: Target audience(s), comma-separated
31
  product_category: Product category (e.g., "ring", "bangles")
32
  product_description: Description of the product
33
+ count: Number of psychology triggers (each with angles and concepts) to generate
34
  """
35
  return f"""Following are the inputs:
36
  Product Category: {product_category}
37
  Target Audience: {target_audience}
38
  Product Description: {product_description}
39
+
40
+ Provide exactly {count} psychology triggers. For each trigger, provide multiple ad angles and ad concepts.
41
+ Output exactly {count} items in the required format, each with one psychology trigger, and a list of angles and a list of concepts."""
backend/scraper.py ADDED
@@ -0,0 +1,152 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Scrape product data from an Amalfa product page URL.
3
+ """
4
+ import json
5
+ import re
6
+ from typing import Any
7
+ from urllib.parse import urlparse
8
+
9
+ import requests
10
+ from bs4 import BeautifulSoup
11
+
12
+
13
+ def _clean_text(s: str) -> str:
14
+ if not s:
15
+ return ""
16
+ return " ".join(s.split()).strip()
17
+
18
+
19
+ def _extract_price_from_text(text: str) -> str:
20
+ """Find first price like Rs 1,299 or ₹1299."""
21
+ if not text:
22
+ return ""
23
+ # Rs 1,299.00 or ₹1,299 or Rs. 1299
24
+ m = re.search(r"(?:Rs\.?|₹)\s*([\d,]+(?:\.\d{2})?)", text, re.I)
25
+ if m:
26
+ return m.group(0).strip()
27
+ m = re.search(r"[\d,]+(?:\.\d{2})?", text)
28
+ if m:
29
+ return m.group(0)
30
+ return ""
31
+
32
+
33
+ def scrape_product(url: str) -> dict[str, Any]:
34
+ """
35
+ Fetch an Amalfa product page and extract product_name, description, price, offers, product_images, brand, category.
36
+ Strategy fields (target_audience, competitors, psychological_triggers) and show_product are left empty for AI / user.
37
+ """
38
+ parsed = urlparse(url)
39
+ if not parsed.scheme or not parsed.netloc:
40
+ raise ValueError(f"Invalid URL: {url}")
41
+
42
+ headers = {
43
+ "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
44
+ "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
45
+ "Accept-Language": "en-IN,en;q=0.9",
46
+ }
47
+ resp = requests.get(url, headers=headers, timeout=15)
48
+ resp.raise_for_status()
49
+ html = resp.text
50
+ soup = BeautifulSoup(html, "html.parser")
51
+
52
+ product: dict[str, Any] = {
53
+ "product_name": "",
54
+ "description": "",
55
+ "price": "",
56
+ "offers": "",
57
+ "product_images": "",
58
+ "brand": "Amalfa",
59
+ "category": "",
60
+ "target_audience": "",
61
+ "competitors": "",
62
+ "psychological_triggers": "",
63
+ "show_product": None,
64
+ }
65
+
66
+ # 1. JSON-LD (Shopify and many stores)
67
+ for script in soup.find_all("script", type="application/ld+json"):
68
+ try:
69
+ data = json.loads(script.string or "{}")
70
+ if isinstance(data, dict) and data.get("@type") == "Product":
71
+ product["product_name"] = _clean_text(data.get("name") or "")
72
+ product["description"] = _clean_text(data.get("description") or "")
73
+ if data.get("offers") and isinstance(data["offers"], dict):
74
+ product["price"] = str(data["offers"].get("price", ""))
75
+ elif isinstance(data.get("offers"), list) and data["offers"]:
76
+ product["price"] = str(data["offers"][0].get("price", ""))
77
+ if data.get("image"):
78
+ imgs = data["image"] if isinstance(data["image"], list) else [data["image"]]
79
+ # Collect up to 10 image URLs (product gallery)
80
+ product["product_images"] = ", ".join(str(u).strip() for u in imgs[:10] if u)
81
+ if product["product_name"] and product["price"]:
82
+ break
83
+ except (json.JSONDecodeError, TypeError):
84
+ continue
85
+
86
+ # 2. Meta tags (og:title, og:description, og:image)
87
+ if not product["product_name"]:
88
+ meta = soup.find("meta", property="og:title")
89
+ if meta and meta.get("content"):
90
+ product["product_name"] = _clean_text(meta["content"].split("|")[0].strip())
91
+ if not product["description"]:
92
+ meta = soup.find("meta", property="og:description") or soup.find("meta", attrs={"name": "description"})
93
+ if meta and meta.get("content"):
94
+ product["description"] = _clean_text(meta["content"])
95
+ if not product["product_images"]:
96
+ meta = soup.find("meta", property="og:image")
97
+ if meta and meta.get("content"):
98
+ product["product_images"] = meta["content"].strip()
99
+
100
+ # 3. Fallback: H1, price in body, description section
101
+ if not product["product_name"]:
102
+ h1 = soup.find("h1")
103
+ if h1:
104
+ product["product_name"] = _clean_text(h1.get_text())
105
+
106
+ if not product["price"]:
107
+ # Common Shopify / Amalfa price classes
108
+ for sel in ["[class*='price']", ".product__price", "[data-product-price]", ".price-item"]:
109
+ el = soup.select_one(sel)
110
+ if el:
111
+ product["price"] = _extract_price_from_text(el.get_text())
112
+ if product["price"]:
113
+ break
114
+ if not product["price"]:
115
+ product["price"] = _extract_price_from_text(soup.get_text())
116
+
117
+ if not product["description"]:
118
+ desc_el = (
119
+ soup.find("div", class_=re.compile(r"description|product-description|product__description", re.I))
120
+ or soup.find("meta", attrs={"name": "description"})
121
+ )
122
+ if desc_el:
123
+ product["description"] = _clean_text(desc_el.get_text() if hasattr(desc_el, "get_text") else (desc_el.get("content") or ""))
124
+
125
+ if not product["product_images"]:
126
+ # Product gallery images: collect up to 10 URLs (no break after first)
127
+ seen = set()
128
+ for img in soup.select("img[src*='cdn.shopify'], img[data-src*='shopify'], img[src*='amalfa']")[:20]:
129
+ if len(seen) >= 10:
130
+ break
131
+ src = (img.get("data-src") or img.get("src") or "").split("?")[0].strip()
132
+ if src and src.startswith("http") and src not in seen:
133
+ seen.add(src)
134
+ product["product_images"] = (product["product_images"] + ", " + src).strip(", ")
135
+
136
+ # Infer category from URL path (e.g. /collections/earrings/...) or leave for AI
137
+ path = (parsed.path or "").lower()
138
+ if "earring" in path:
139
+ product["category"] = product["category"] or "Earrings"
140
+ elif "necklace" in path or "pendant" in path or "choker" in path:
141
+ product["category"] = product["category"] or "Necklaces"
142
+ elif "ring" in path:
143
+ product["category"] = product["category"] or "Rings"
144
+ elif "bracelet" in path or "bangle" in path:
145
+ product["category"] = product["category"] or "Bracelets"
146
+ elif "anklet" in path:
147
+ product["category"] = product["category"] or "Anklets"
148
+
149
+ if not product["category"]:
150
+ product["category"] = "Jewellery"
151
+
152
+ return product
frontend/index.html CHANGED
@@ -31,9 +31,28 @@
31
  <!-- Form Card -->
32
  <div class="card form-card">
33
  <form id="researchForm">
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  <!-- Target Audience (multi-select) -->
35
  <div class="field">
36
- <label>Target Audience <span class="label-hint">(select one or more)</span></label>
37
  <div class="multiselect" id="audienceMultiselect">
38
  <div class="multiselect-selected" id="selectedAudiences">
39
  <span class="multiselect-placeholder">Loading audiences…</span>
@@ -67,6 +86,19 @@
67
  ></textarea>
68
  </div>
69
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
  <!-- Method Toggle -->
71
  <div class="field">
72
  <label>AI Method</label>
 
31
  <!-- Form Card -->
32
  <div class="card form-card">
33
  <form id="researchForm">
34
+ <!-- Product URL Scraper -->
35
+ <div class="field">
36
+ <label for="productUrl">Product URL <span class="label-hint">(optional — fills category, description &amp; target audience)</span></label>
37
+ <div class="url-input-group">
38
+ <input
39
+ type="url"
40
+ id="productUrl"
41
+ placeholder="https://amalfa.in/products/..."
42
+ class="url-input"
43
+ />
44
+ <button type="button" class="scrape-btn" id="scrapeBtn">
45
+ <span class="scrape-btn-text">Scrape & Fill</span>
46
+ <span class="scrape-btn-loader hidden">
47
+ <span class="spinner"></span>
48
+ </span>
49
+ </button>
50
+ </div>
51
+ </div>
52
+
53
  <!-- Target Audience (multi-select) -->
54
  <div class="field">
55
+ <label>Target Audience <span class="label-hint">(select one or more — or use Scrape &amp; Fill above)</span></label>
56
  <div class="multiselect" id="audienceMultiselect">
57
  <div class="multiselect-selected" id="selectedAudiences">
58
  <span class="multiselect-placeholder">Loading audiences…</span>
 
86
  ></textarea>
87
  </div>
88
 
89
+ <!-- Number of concepts & angles -->
90
+ <div class="field">
91
+ <label for="conceptsCount">Number of concepts &amp; angles <span class="label-hint">(triggers to generate)</span></label>
92
+ <input
93
+ type="number"
94
+ id="conceptsCount"
95
+ min="1"
96
+ max="15"
97
+ value="5"
98
+ placeholder="e.g. 5"
99
+ />
100
+ </div>
101
+
102
  <!-- Method Toggle -->
103
  <div class="field">
104
  <label>AI Method</label>
frontend/script.js CHANGED
@@ -5,6 +5,11 @@ const API_BASE = "";
5
  const form = document.getElementById("researchForm");
6
  const categoryInput = document.getElementById("productCategory");
7
  const descriptionInput = document.getElementById("productDescription");
 
 
 
 
 
8
  const submitBtn = document.getElementById("submitBtn");
9
  const btnText = submitBtn.querySelector(".btn-text");
10
  const btnLoader = submitBtn.querySelector(".btn-loader");
@@ -128,16 +133,88 @@ toggleBtns.forEach((btn) => {
128
  });
129
  });
130
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
131
  // ===== Form Submit =====
132
  form.addEventListener("submit", async (e) => {
133
  e.preventDefault();
134
  hideError();
135
  hideResults();
136
 
 
 
137
  const payload = {
138
  target_audience: selectedAudiences,
139
  product_category: categoryInput.value.trim(),
140
  product_description: descriptionInput.value.trim(),
 
141
  method: selectedMethod,
142
  };
143
 
@@ -161,7 +238,11 @@ form.addEventListener("submit", async (e) => {
161
  body: JSON.stringify(payload),
162
  });
163
 
164
- if (!res.ok) throw new Error("Server error");
 
 
 
 
165
 
166
  const data = await res.json();
167
  renderResults(data.results, selectedMethod);
@@ -278,9 +359,20 @@ function setLoading(isLoading) {
278
  btnLoader.classList.toggle("hidden", !isLoading);
279
  }
280
 
281
- function showError(msg) {
282
  errorBanner.textContent = msg;
283
  errorBanner.classList.remove("hidden");
 
 
 
 
 
 
 
 
 
 
 
284
  }
285
 
286
  function hideError() {
 
5
  const form = document.getElementById("researchForm");
6
  const categoryInput = document.getElementById("productCategory");
7
  const descriptionInput = document.getElementById("productDescription");
8
+ const productUrlInput = document.getElementById("productUrl");
9
+ const conceptsCountInput = document.getElementById("conceptsCount");
10
+ const scrapeBtn = document.getElementById("scrapeBtn");
11
+ const scrapeBtnText = scrapeBtn.querySelector(".scrape-btn-text");
12
+ const scrapeBtnLoader = scrapeBtn.querySelector(".scrape-btn-loader");
13
  const submitBtn = document.getElementById("submitBtn");
14
  const btnText = submitBtn.querySelector(".btn-text");
15
  const btnLoader = submitBtn.querySelector(".btn-loader");
 
133
  });
134
  });
135
 
136
+ // ===== Scrape Product =====
137
+ scrapeBtn.addEventListener("click", async () => {
138
+ const url = productUrlInput.value.trim();
139
+
140
+ if (!url) {
141
+ showError("Please enter a product URL.");
142
+ return;
143
+ }
144
+
145
+ // Basic URL validation
146
+ try {
147
+ new URL(url);
148
+ } catch (e) {
149
+ showError("Please enter a valid URL.");
150
+ return;
151
+ }
152
+
153
+ hideError();
154
+ setScrapeLoading(true);
155
+
156
+ try {
157
+ const res = await fetch(`${API_BASE}/api/scrape-product`, {
158
+ method: "POST",
159
+ headers: { "Content-Type": "application/json" },
160
+ body: JSON.stringify({ url }),
161
+ });
162
+
163
+ if (!res.ok) {
164
+ const errorData = await res.json().catch(() => ({}));
165
+ throw new Error(errorData.detail || "Failed to scrape product data.");
166
+ }
167
+
168
+ const data = await res.json();
169
+
170
+ // Auto-fill form fields: category, description, and target audience
171
+ if (data.category) {
172
+ categoryInput.value = data.category;
173
+ }
174
+ if (data.description) {
175
+ descriptionInput.value = data.description;
176
+ }
177
+ // Scrape & Fill also fills target audience from AI suggestions
178
+ if (data.target_audience && data.target_audience.length > 0) {
179
+ selectedAudiences = [...data.target_audience];
180
+ renderOptions(searchInput.value);
181
+ renderSelected();
182
+ }
183
+
184
+ const audienceNote = (data.target_audience && data.target_audience.length > 0)
185
+ ? ` Target audience filled (${data.target_audience.length} selected).`
186
+ : "";
187
+ showError(`✓ Product data scraped successfully!${data.product_name ? ` Found: ${data.product_name}.` : ""}${audienceNote}`, "success");
188
+
189
+ // Clear URL input after successful scrape
190
+ productUrlInput.value = "";
191
+
192
+ } catch (err) {
193
+ showError(err.message || "Something went wrong while scraping the product.");
194
+ } finally {
195
+ setScrapeLoading(false);
196
+ }
197
+ });
198
+
199
+ function setScrapeLoading(isLoading) {
200
+ scrapeBtn.disabled = isLoading;
201
+ scrapeBtnText.classList.toggle("hidden", isLoading);
202
+ scrapeBtnLoader.classList.toggle("hidden", !isLoading);
203
+ }
204
+
205
  // ===== Form Submit =====
206
  form.addEventListener("submit", async (e) => {
207
  e.preventDefault();
208
  hideError();
209
  hideResults();
210
 
211
+ const count = Math.min(15, Math.max(1, parseInt(conceptsCountInput.value, 10) || 5));
212
+
213
  const payload = {
214
  target_audience: selectedAudiences,
215
  product_category: categoryInput.value.trim(),
216
  product_description: descriptionInput.value.trim(),
217
+ count,
218
  method: selectedMethod,
219
  };
220
 
 
238
  body: JSON.stringify(payload),
239
  });
240
 
241
+ if (!res.ok) {
242
+ const errData = await res.json().catch(() => ({}));
243
+ const msg = Array.isArray(errData.detail) ? errData.detail.map((e) => e.msg || e).join("; ") : (errData.detail || "Server error");
244
+ throw new Error(msg);
245
+ }
246
 
247
  const data = await res.json();
248
  renderResults(data.results, selectedMethod);
 
359
  btnLoader.classList.toggle("hidden", !isLoading);
360
  }
361
 
362
+ function showError(msg, type = "error") {
363
  errorBanner.textContent = msg;
364
  errorBanner.classList.remove("hidden");
365
+
366
+ // Update styling based on type
367
+ if (type === "success") {
368
+ errorBanner.style.background = "rgba(16, 163, 127, 0.1)";
369
+ errorBanner.style.borderColor = "rgba(16, 163, 127, 0.3)";
370
+ errorBanner.style.color = "#10a37f";
371
+ } else {
372
+ errorBanner.style.background = "rgba(232, 84, 84, 0.1)";
373
+ errorBanner.style.borderColor = "rgba(232, 84, 84, 0.3)";
374
+ errorBanner.style.color = "var(--danger)";
375
+ }
376
  }
377
 
378
  function hideError() {
frontend/styles.css CHANGED
@@ -115,6 +115,47 @@ body {
115
  min-height: 80px;
116
  }
117
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
118
  /* ===== Toggle Group ===== */
119
  .toggle-group {
120
  display: flex;
 
115
  min-height: 80px;
116
  }
117
 
118
+ /* ===== URL Input Group ===== */
119
+ .url-input-group {
120
+ display: flex;
121
+ gap: 12px;
122
+ }
123
+
124
+ .url-input {
125
+ flex: 1;
126
+ }
127
+
128
+ .scrape-btn {
129
+ padding: 12px 20px;
130
+ border: 1px solid var(--accent);
131
+ border-radius: var(--radius-sm);
132
+ background: rgba(201, 164, 108, 0.1);
133
+ color: var(--accent-light);
134
+ font-size: 0.95rem;
135
+ font-weight: 600;
136
+ cursor: pointer;
137
+ transition: all 0.2s;
138
+ white-space: nowrap;
139
+ display: flex;
140
+ align-items: center;
141
+ gap: 8px;
142
+ }
143
+
144
+ .scrape-btn:hover {
145
+ background: rgba(201, 164, 108, 0.2);
146
+ border-color: var(--accent-light);
147
+ }
148
+
149
+ .scrape-btn:disabled {
150
+ opacity: 0.6;
151
+ cursor: not-allowed;
152
+ }
153
+
154
+ .scrape-btn-loader {
155
+ display: inline-flex;
156
+ align-items: center;
157
+ }
158
+
159
  /* ===== Toggle Group ===== */
160
  .toggle-group {
161
  display: flex;
requirements.txt CHANGED
@@ -4,3 +4,5 @@ openai
4
  anthropic
5
  pydantic
6
  python-dotenv
 
 
 
4
  anthropic
5
  pydantic
6
  python-dotenv
7
+ requests
8
+ beautifulsoup4