Spaces:
Running
Running
File size: 1,774 Bytes
718c4ae |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 |
# scrape_olx_offer.py
import requests
from bs4 import BeautifulSoup
def scrape_olx_offer(url: str):
"""Zwraca dane aukcji bez zapisywania na dysk"""
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36"
}
print(f"🔍 OLX: {url}")
response = requests.get(url, headers=headers)
if response.status_code != 200:
raise ValueError(f"OLX error: {response.status_code}")
soup = BeautifulSoup(response.content, "html.parser")
# TITLE
title_element = soup.find("h4", class_="css-1au435n")
title = title_element.get_text().strip() if title_element else "untitled"
# DESCRIPTION
description_element = soup.find("div", class_="css-19duwlz")
description = description_element.get_text(separator="\n").strip() if description_element else "No description"
# PARAMETERS
parameter_list = []
parameters_container = soup.find("div", attrs={"data-testid": "ad-parameters-container"})
if parameters_container:
params = parameters_container.find_all("p", class_="css-13x8d99")
for p in params:
parameter_list.append(p.get_text().strip())
# IMAGES
images = soup.select('img[data-testid^="swiper-image"]')
unique_links = set()
for img in images:
link = img.get("src")
if link:
unique_links.add(link)
return {
"platform": "olx",
"url": url,
"title": title,
"description": description,
"parameters": parameter_list,
"image_urls": list(unique_links)
}
if __name__ == "__main__":
url = input("OLX URL: ")
result = scrape_olx_offer(url)
print(result)
|