Spaces:
Running
Running
Added new version of scrappers
Browse files- code/web_scraper_allegro.py +119 -110
- code/web_scraper_ebay.py +133 -73
- requirements.txt +0 -2
code/web_scraper_allegro.py
CHANGED
|
@@ -1,16 +1,33 @@
|
|
| 1 |
-
from apify_client import ApifyClient
|
| 2 |
import os
|
| 3 |
-
import
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
|
|
|
|
| 5 |
|
| 6 |
-
|
| 7 |
-
|
|
|
|
| 8 |
polish_chars = {
|
| 9 |
-
"ą": "a",
|
| 10 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
}
|
|
|
|
| 12 |
text = text.lower()
|
| 13 |
result = ""
|
|
|
|
| 14 |
for char in text:
|
| 15 |
if char in polish_chars:
|
| 16 |
result += polish_chars[char]
|
|
@@ -18,125 +35,117 @@ def sanitize_folder_name(text):
|
|
| 18 |
result += char
|
| 19 |
else:
|
| 20 |
result += "_"
|
|
|
|
|
|
|
| 21 |
while "__" in result:
|
| 22 |
result = result.replace("__", "_")
|
| 23 |
-
return result.strip("_")
|
| 24 |
|
|
|
|
| 25 |
|
| 26 |
-
def
|
| 27 |
-
"""
|
| 28 |
-
if not
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
return
|
| 34 |
-
|
| 35 |
|
| 36 |
-
def
|
| 37 |
-
"""
|
| 38 |
-
|
| 39 |
-
|
|
|
|
|
|
|
|
|
|
| 40 |
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
image_sources.extend(item_data['images'])
|
| 45 |
-
else:
|
| 46 |
-
image_sources.append(item_data['images'])
|
| 47 |
-
|
| 48 |
-
if 'image' in item_data and item_data['image']:
|
| 49 |
-
image_sources.append(item_data['image'])
|
| 50 |
-
|
| 51 |
-
if 'imageUrl' in item_data and item_data['imageUrl']:
|
| 52 |
-
image_sources.append(item_data['imageUrl'])
|
| 53 |
-
|
| 54 |
-
for img_url in image_sources:
|
| 55 |
-
if img_url and isinstance(img_url, str):
|
| 56 |
-
if "allegroimg.com" in img_url or "img" in img_url:
|
| 57 |
-
for size in allowed_sizes:
|
| 58 |
-
img_url = img_url.replace(size, "/original/")
|
| 59 |
-
unique_links.add(img_url)
|
| 60 |
|
| 61 |
-
|
|
|
|
|
|
|
| 62 |
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
"""Scrape single Allegro product using Apify E-commerce Tool"""
|
| 66 |
-
|
| 67 |
-
api_token = os.getenv('APIFY_API_TOKEN')
|
| 68 |
-
if not api_token:
|
| 69 |
-
raise ValueError("APIFY_API_TOKEN environment variable not set")
|
| 70 |
-
|
| 71 |
-
client = ApifyClient(api_token)
|
| 72 |
-
|
| 73 |
-
# Correct input format for E-commerce Scraping Tool
|
| 74 |
-
run_input = {
|
| 75 |
-
"startUrls": [
|
| 76 |
-
url
|
| 77 |
-
]
|
| 78 |
-
}
|
| 79 |
|
| 80 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 81 |
|
|
|
|
|
|
|
| 82 |
try:
|
| 83 |
-
|
| 84 |
-
run_input=run_input
|
| 85 |
-
)
|
| 86 |
-
dataset_client = client.dataset(actor_call['defaultDatasetId'])
|
| 87 |
-
items = list(dataset_client.iterate_items())
|
| 88 |
|
| 89 |
-
|
| 90 |
-
print("⚠️ No data returned from Apify")
|
| 91 |
-
return {
|
| 92 |
-
"platform": "allegro",
|
| 93 |
-
"url": url,
|
| 94 |
-
"title": "untitled",
|
| 95 |
-
"description": "No description",
|
| 96 |
-
"price": None,
|
| 97 |
-
"image_urls": []
|
| 98 |
-
}
|
| 99 |
|
| 100 |
-
|
| 101 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 102 |
|
| 103 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 104 |
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
if thumbnail:
|
| 108 |
-
image_urls = [thumbnail]
|
| 109 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 110 |
return {
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
|
|
|
|
|
|
| 119 |
except Exception as e:
|
| 120 |
-
print(f"
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
"price": None,
|
| 127 |
-
"image_urls": []
|
| 128 |
-
}
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
# Example usage
|
| 132 |
-
if __name__ == "__main__":
|
| 133 |
-
url = input("Allegro URL: ")
|
| 134 |
-
result = scrape_allegro_offer(url)
|
| 135 |
-
|
| 136 |
-
print("\n✅ Scraping result:")
|
| 137 |
-
print(f"Title: {result['title']}")
|
| 138 |
-
print(f"Price: {result['price']}")
|
| 139 |
-
print(f"Description: {result['description'][:100]}..." if len(result['description']) > 100 else f"Description: {result['description']}")
|
| 140 |
-
print(f"Images: {len(result['image_urls'])} found")
|
| 141 |
-
for img in result['image_urls'][:3]:
|
| 142 |
-
print(f" - {img}")
|
|
|
|
|
|
|
| 1 |
import os
|
| 2 |
+
import requests
|
| 3 |
+
from apify_client import ApifyClient
|
| 4 |
+
from dotenv import load_dotenv
|
| 5 |
+
import json
|
| 6 |
+
|
| 7 |
+
# --- CONFIGURATION ---
|
| 8 |
+
# Load environment variables from the .env file (if it exists)
|
| 9 |
+
load_dotenv()
|
| 10 |
|
| 11 |
+
ACTOR_ID = "e-commerce/allegro-product-detail-scraper"
|
| 12 |
|
| 13 |
+
# --- HELPER FUNCTIONS ---
|
| 14 |
+
def sanitize_name(text):
|
| 15 |
+
"""Sanitizes text by removing Polish characters and special symbols for a folder name."""
|
| 16 |
polish_chars = {
|
| 17 |
+
"ą": "a",
|
| 18 |
+
"ć": "c",
|
| 19 |
+
"ę": "e",
|
| 20 |
+
"ł": "l",
|
| 21 |
+
"ń": "n",
|
| 22 |
+
"ó": "o",
|
| 23 |
+
"ś": "s",
|
| 24 |
+
"ź": "z",
|
| 25 |
+
"ż": "z",
|
| 26 |
}
|
| 27 |
+
|
| 28 |
text = text.lower()
|
| 29 |
result = ""
|
| 30 |
+
|
| 31 |
for char in text:
|
| 32 |
if char in polish_chars:
|
| 33 |
result += polish_chars[char]
|
|
|
|
| 35 |
result += char
|
| 36 |
else:
|
| 37 |
result += "_"
|
| 38 |
+
|
| 39 |
+
# Remove double underscores
|
| 40 |
while "__" in result:
|
| 41 |
result = result.replace("__", "_")
|
|
|
|
| 42 |
|
| 43 |
+
return result.strip("_")
|
| 44 |
|
| 45 |
+
def get_high_res_image(url):
|
| 46 |
+
"""Converts a thumbnail/resized link to the original high-resolution Allegro link."""
|
| 47 |
+
if not url: return None
|
| 48 |
+
sizes = ["/s128/", "/s360/", "/s720/", "/s1024/", "/s1440/"]
|
| 49 |
+
for size in sizes:
|
| 50 |
+
if size in url:
|
| 51 |
+
return url.replace(size, "/original/")
|
| 52 |
+
return url
|
|
|
|
| 53 |
|
| 54 |
+
def get_api_token():
|
| 55 |
+
"""
|
| 56 |
+
Retrieves API token.
|
| 57 |
+
Priority 1: from .env file (environment variable).
|
| 58 |
+
Priority 2: prompts user input in the console.
|
| 59 |
+
"""
|
| 60 |
+
token = os.getenv("APIFY_TOKEN")
|
| 61 |
|
| 62 |
+
if token:
|
| 63 |
+
print("Info: API Token loaded from .env file.")
|
| 64 |
+
return token
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 65 |
|
| 66 |
+
print("Warning: APIFY_TOKEN not found in .env file.")
|
| 67 |
+
token = input("Please enter your Apify API Token: ").strip()
|
| 68 |
+
return token
|
| 69 |
|
| 70 |
+
def get_allegro_data(url):
|
| 71 |
+
apify_token = get_api_token()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 72 |
|
| 73 |
+
if not apify_token:
|
| 74 |
+
print("ERROR: API Token is required to run the script.")
|
| 75 |
+
return
|
| 76 |
+
|
| 77 |
+
client = ApifyClient(apify_token)
|
| 78 |
|
| 79 |
+
run_input = { "startUrls": [url] }
|
| 80 |
+
|
| 81 |
try:
|
| 82 |
+
print("--- GATHERING DATA ---")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 83 |
|
| 84 |
+
run = client.actor(ACTOR_ID).call(run_input=run_input)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 85 |
|
| 86 |
+
dataset_items = list(client.dataset(run["defaultDatasetId"]).iterate_items())
|
| 87 |
+
|
| 88 |
+
if not dataset_items:
|
| 89 |
+
print("Apify finished the job but returned no data.")
|
| 90 |
+
return
|
| 91 |
+
|
| 92 |
+
item = dataset_items[0]
|
| 93 |
|
| 94 |
+
# --- DATA MAPPING ---
|
| 95 |
+
|
| 96 |
+
# TITLE
|
| 97 |
+
title = item.get("productTitle") or item.get("title") or "untitled"
|
| 98 |
+
|
| 99 |
+
# DESCRIPTION
|
| 100 |
+
description = item.get("description", "No description")
|
| 101 |
+
|
| 102 |
+
# PARAMETERS
|
| 103 |
+
parameter_list = []
|
| 104 |
+
specs = item.get("productSpecifications", {})
|
| 105 |
+
|
| 106 |
+
if isinstance(specs, dict):
|
| 107 |
+
for key, value in specs.items():
|
| 108 |
+
parameter_list.append(f"{key}: {value}")
|
| 109 |
+
elif not specs:
|
| 110 |
+
raw_params = item.get("parameters") or item.get("attributes", [])
|
| 111 |
+
for p in raw_params:
|
| 112 |
+
name = p.get("name") or p.get("key")
|
| 113 |
+
val = p.get("value")
|
| 114 |
+
if name and val:
|
| 115 |
+
parameter_list.append(f"{name}: {val}")
|
| 116 |
|
| 117 |
+
# IMAGES
|
| 118 |
+
unique_links = set()
|
|
|
|
|
|
|
| 119 |
|
| 120 |
+
raw_images = item.get("images", [])
|
| 121 |
+
for img in raw_images:
|
| 122 |
+
if isinstance(img, str): unique_links.add(get_high_res_image(img))
|
| 123 |
+
elif isinstance(img, dict): unique_links.add(get_high_res_image(img.get("url")))
|
| 124 |
+
|
| 125 |
+
if not unique_links:
|
| 126 |
+
thumb = item.get("thumbnail")
|
| 127 |
+
if thumb:
|
| 128 |
+
high_res = get_high_res_image(thumb)
|
| 129 |
+
unique_links.add(high_res)
|
| 130 |
+
print("Info: Retrieved main image from thumbnail (gallery was empty in API).")
|
| 131 |
+
|
| 132 |
+
print(f"Found {len(unique_links)} images.")
|
| 133 |
+
|
| 134 |
return {
|
| 135 |
+
"title": title,
|
| 136 |
+
"sanitized_title": sanitize_name(title),
|
| 137 |
+
"url": url,
|
| 138 |
+
"description": description,
|
| 139 |
+
"parameters": parameter_list,
|
| 140 |
+
"image_urls": list(unique_links),
|
| 141 |
+
"image_count": len(unique_links),
|
| 142 |
+
"price": f"{item.get('price')} {item.get('currency')}"
|
| 143 |
+
}
|
| 144 |
+
|
| 145 |
except Exception as e:
|
| 146 |
+
print(f"Main error occurred: {e}")
|
| 147 |
+
|
| 148 |
+
# --- USAGE ---
|
| 149 |
+
link = input("Enter the Allegro offer link: ")
|
| 150 |
+
data = get_allegro_data(link)
|
| 151 |
+
print(json.dumps(data, indent=4, ensure_ascii=False))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
code/web_scraper_ebay.py
CHANGED
|
@@ -1,93 +1,153 @@
|
|
| 1 |
-
# scrape_ebay_offer.py
|
| 2 |
-
import undetected_chromedriver as uc
|
| 3 |
-
from selenium.webdriver.common.by import By
|
| 4 |
-
from webdriver_manager.chrome import ChromeDriverManager
|
| 5 |
-
from selenium.webdriver.chrome.service import Service
|
| 6 |
-
import time
|
| 7 |
-
import requests
|
| 8 |
import os
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
|
| 10 |
-
|
| 11 |
-
""
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
|
|
|
|
|
|
| 18 |
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
|
|
|
|
|
|
|
| 29 |
try:
|
| 30 |
-
|
| 31 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
|
| 33 |
# TITLE
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
|
|
|
|
|
|
|
|
|
|
| 40 |
# PARAMETERS
|
| 41 |
parameter_list = []
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
for
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
parameter_list.append(f"{label}: {value}")
|
| 50 |
-
except:
|
| 51 |
-
continue
|
| 52 |
-
except:
|
| 53 |
-
pass
|
| 54 |
-
|
| 55 |
-
# DESCRIPTION
|
| 56 |
-
description_content = "No description"
|
| 57 |
-
try:
|
| 58 |
-
frame = driver.find_element(By.ID, "desc_ifr")
|
| 59 |
-
driver.switch_to.frame(frame)
|
| 60 |
-
description_content = driver.find_element(By.TAG_NAME, "body").text.strip()
|
| 61 |
-
driver.switch_to.default_content()
|
| 62 |
-
except:
|
| 63 |
-
pass
|
| 64 |
|
|
|
|
|
|
|
| 65 |
# IMAGES
|
| 66 |
unique_links = set()
|
| 67 |
-
try:
|
| 68 |
-
thumbnails = driver.find_elements(By.CSS_SELECTOR, ".ux-image-grid-item img")
|
| 69 |
-
for img in thumbnails:
|
| 70 |
-
src = img.get_attribute("src") or img.get_attribute("data-src")
|
| 71 |
-
if src and "ebayimg.com" in src:
|
| 72 |
-
# Zamień na HD
|
| 73 |
-
hd_link = src.replace("/s-l64/", "/s-l1600").replace("/s-l140/", "/s-l1600")
|
| 74 |
-
unique_links.add(hd_link)
|
| 75 |
-
except:
|
| 76 |
-
pass
|
| 77 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 78 |
return {
|
| 79 |
-
"
|
|
|
|
| 80 |
"url": url,
|
| 81 |
-
"
|
| 82 |
-
"description": description_content,
|
| 83 |
"parameters": parameter_list,
|
| 84 |
-
"image_urls": list(unique_links)
|
|
|
|
|
|
|
| 85 |
}
|
| 86 |
-
|
| 87 |
-
finally:
|
| 88 |
-
driver.quit()
|
| 89 |
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import os
|
| 2 |
+
import requests
|
| 3 |
+
from apify_client import ApifyClient
|
| 4 |
+
from dotenv import load_dotenv
|
| 5 |
+
import json
|
| 6 |
+
|
| 7 |
+
# --- CONFIGURATION ---
|
| 8 |
+
# Load environment variables from the .env file (if it exists)
|
| 9 |
+
load_dotenv()
|
| 10 |
+
|
| 11 |
+
ACTOR_ID = "vulnv/ebay-product-scraper"
|
| 12 |
+
|
| 13 |
+
# --- HELPER FUNCTIONS ---
|
| 14 |
+
def sanitize_name(text):
|
| 15 |
+
"""Sanitizes text by removing Polish characters and special symbols for a folder name."""
|
| 16 |
+
polish_chars = {
|
| 17 |
+
"ą": "a",
|
| 18 |
+
"ć": "c",
|
| 19 |
+
"ę": "e",
|
| 20 |
+
"ł": "l",
|
| 21 |
+
"ń": "n",
|
| 22 |
+
"ó": "o",
|
| 23 |
+
"ś": "s",
|
| 24 |
+
"ź": "z",
|
| 25 |
+
"ż": "z",
|
| 26 |
+
}
|
| 27 |
+
|
| 28 |
+
text = text.lower()
|
| 29 |
+
result = ""
|
| 30 |
+
|
| 31 |
+
for char in text:
|
| 32 |
+
if char in polish_chars:
|
| 33 |
+
result += polish_chars[char]
|
| 34 |
+
elif char.isalnum():
|
| 35 |
+
result += char
|
| 36 |
+
else:
|
| 37 |
+
result += "_"
|
| 38 |
|
| 39 |
+
# Remove double underscores
|
| 40 |
+
while "__" in result:
|
| 41 |
+
result = result.replace("__", "_")
|
| 42 |
+
|
| 43 |
+
return result.strip("_")
|
| 44 |
+
|
| 45 |
+
def get_high_res_ebay_image(url):
|
| 46 |
+
"""Replaces the size code in the eBay link with s-l1600 (Max quality)."""
|
| 47 |
+
if not url: return None
|
| 48 |
+
sizes = ["s-l64", "s-l140", "s-l300", "s-l400", "s-l500", "s-l960"]
|
| 49 |
|
| 50 |
+
for size in sizes:
|
| 51 |
+
if size in url:
|
| 52 |
+
return url.replace(size, "s-l1600")
|
| 53 |
+
|
| 54 |
+
if "ebayimg.com" in url and "s-l1600" not in url:
|
| 55 |
+
parts = url.split("/")
|
| 56 |
+
last_part = parts[-1]
|
| 57 |
+
if "s-l" in last_part:
|
| 58 |
+
return url.replace(last_part[:last_part.find(".")], "s-l1600")
|
| 59 |
+
|
| 60 |
+
return url
|
| 61 |
+
|
| 62 |
+
def get_api_token():
|
| 63 |
+
"""Retrieves token from .env or asks the user."""
|
| 64 |
+
token = os.getenv("APIFY_TOKEN")
|
| 65 |
+
if token:
|
| 66 |
+
print("Info: API Token loaded from .env file.")
|
| 67 |
+
return token
|
| 68 |
|
| 69 |
+
print("Warning: APIFY_TOKEN not found in .env file.")
|
| 70 |
+
return input("Please enter your Apify API Token: ").strip()
|
| 71 |
+
|
| 72 |
+
def get_ebay_data(url):
|
| 73 |
+
apify_token = get_api_token()
|
| 74 |
+
if not apify_token:
|
| 75 |
+
print("ERROR: API Token is required.")
|
| 76 |
+
return
|
| 77 |
+
|
| 78 |
+
print(f"\n--- SENDING REQUEST TO APIFY ---")
|
| 79 |
+
client = ApifyClient(apify_token)
|
| 80 |
|
| 81 |
+
run_input = { "product_urls": [url] }
|
| 82 |
+
|
| 83 |
try:
|
| 84 |
+
run = client.actor(ACTOR_ID).call(run_input=run_input)
|
| 85 |
+
|
| 86 |
+
dataset_items = list(client.dataset(run["defaultDatasetId"]).iterate_items())
|
| 87 |
+
|
| 88 |
+
if not dataset_items:
|
| 89 |
+
print("Apify finished the job but returned no data.")
|
| 90 |
+
return
|
| 91 |
+
|
| 92 |
+
item = dataset_items[0]
|
| 93 |
+
|
| 94 |
+
# --- DATA MAPPING ---
|
| 95 |
|
| 96 |
# TITLE
|
| 97 |
+
title = item.get("name") or item.get("title") or "untitled_ebay"
|
| 98 |
+
|
| 99 |
+
# PRICE
|
| 100 |
+
price = item.get("price", "N/A")
|
| 101 |
+
currency = item.get("currency", "")
|
| 102 |
|
| 103 |
+
# DESCRIPTION
|
| 104 |
+
description = item.get("description", "No text description available.")
|
| 105 |
+
|
| 106 |
# PARAMETERS
|
| 107 |
parameter_list = []
|
| 108 |
+
raw_props = item.get("additionalProperties", [])
|
| 109 |
+
if isinstance(raw_props, list):
|
| 110 |
+
for prop in raw_props:
|
| 111 |
+
p_name = prop.get("name")
|
| 112 |
+
p_val = prop.get("value")
|
| 113 |
+
if p_name and p_val:
|
| 114 |
+
parameter_list.append(f"{p_name}: {p_val}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 115 |
|
| 116 |
+
if item.get("sku"): parameter_list.insert(0, f"SKU: {item.get('sku')}")
|
| 117 |
+
|
| 118 |
# IMAGES
|
| 119 |
unique_links = set()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 120 |
|
| 121 |
+
main_img = item.get("mainImage", {}).get("url")
|
| 122 |
+
if main_img:
|
| 123 |
+
unique_links.add(get_high_res_ebay_image(main_img))
|
| 124 |
+
|
| 125 |
+
raw_images = item.get("images", [])
|
| 126 |
+
for img_entry in raw_images:
|
| 127 |
+
if isinstance(img_entry, dict):
|
| 128 |
+
raw_url = img_entry.get("url")
|
| 129 |
+
if raw_url:
|
| 130 |
+
unique_links.add(get_high_res_ebay_image(raw_url))
|
| 131 |
+
elif isinstance(img_entry, str):
|
| 132 |
+
unique_links.add(get_high_res_ebay_image(img_entry))
|
| 133 |
+
|
| 134 |
+
print(f"Found {len(unique_links)} unique images (High-Res).")
|
| 135 |
+
|
| 136 |
return {
|
| 137 |
+
"title": title,
|
| 138 |
+
"sanitized_title": sanitize_name(title),
|
| 139 |
"url": url,
|
| 140 |
+
"description": description,
|
|
|
|
| 141 |
"parameters": parameter_list,
|
| 142 |
+
"image_urls": list(unique_links),
|
| 143 |
+
"image_count": len(unique_links),
|
| 144 |
+
"price": f"{price} {currency}"
|
| 145 |
}
|
|
|
|
|
|
|
|
|
|
| 146 |
|
| 147 |
+
except Exception as e:
|
| 148 |
+
print(f"Critical error occurred: {e}")
|
| 149 |
+
|
| 150 |
+
# --- USAGE ---
|
| 151 |
+
link = input("Enter the eBay offer link: ")
|
| 152 |
+
data = get_ebay_data(link)
|
| 153 |
+
print(json.dumps(data, indent=4, ensure_ascii=False))
|
requirements.txt
CHANGED
|
@@ -8,8 +8,6 @@ tqdm
|
|
| 8 |
fastapi
|
| 9 |
uvicorn
|
| 10 |
python-multipart
|
| 11 |
-
undetected_chromedriver
|
| 12 |
-
webdriver-manager
|
| 13 |
bs4
|
| 14 |
requests
|
| 15 |
flask
|
|
|
|
| 8 |
fastapi
|
| 9 |
uvicorn
|
| 10 |
python-multipart
|
|
|
|
|
|
|
| 11 |
bs4
|
| 12 |
requests
|
| 13 |
flask
|