File size: 6,915 Bytes
0277ad1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3acf0f3
 
 
 
 
 
 
0277ad1
 
 
3acf0f3
 
 
 
 
 
0277ad1
3acf0f3
 
 
0277ad1
 
 
 
3acf0f3
0277ad1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
import requests
from bs4 import BeautifulSoup
import re
import csv
from .DatabaseDataSaver import save_product

class SephoraScraper:
    def __init__(self):
        self.headers = {
            'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) ',
        }
        self.categories = [
            "makeup-cosmetics", "skincare", "hair-products", "fragrance", "makeup-tools", "bath-body", "travel-size-toiletries", "gifts"
        ]

    def get_response(self, url):
        id_pattern = pattern = r'-(P\d+)\?'
        match = re.search(pattern, url)
        if match:
            product_id = match.group(1)
        else:
            return False
        burp0_url = f"https://www.sephora.com:443/api2/catalog/products/{product_id}?addCurrentSkuToProductChildSkus=true&includeRegionsMap=true&showContent=true&includeConfigurableSku=true&countryCode=US&removePersonalizedData=true&includeReviewFilters=true&includeReviewImages=true&sentiments=6"
        burp0_headers = {"Sec-Ch-Ua": "\"Chromium\";v=\"117\", \"Not;A=Brand\";v=\"8\"", "X-Ufe-Request": "true", "Sec-Ch-Ua-Mobile": "?0", "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.5938.132 Safari/537.36", "X-Dtpc": "5$505172501_268h23vBLLCMFBNGOLFAPFGHKUDBVTGKMEPJULD-0e0", "X-Dtreferer": "https://www.sephora.com/shop/makeup-cosmetics?currentPage=2", "Exclude_personalized_content": "true", "X-Requested-Source": "rwd", "Sec-Ch-Ua-Platform": "\"Windows\"", "Accept": "*/*", "Sec-Fetch-Site": "same-origin", "Sec-Fetch-Mode": "cors", "Sec-Fetch-Dest": "empty", "Referer": "https://www.sephora.com/product/sephora-collection-total-coverage-blending-sponge-set-60-plant-based-P482303?skuId=2497220&icid2=products%20grid:p482303:product", "Accept-Encoding": "gzip, deflate, br", "Accept-Language": "en-US,en;q=0.9"}
        try:
            response = requests.get(burp0_url, headers=burp0_headers)
            print(response.text)
            return response.json()
        except Exception as e:
            print(e)
            return False
    
    def parse_response(self, response, url):
        try:
            name = response['productDetails']['displayName']
            product_id = response['productDetails']['productId']
            try:
                ingredients = response['currentSku']['ingredientDesc'].replace('\n', ' ').replace('<b>', '').replace('</b>', '').replace('<br>', '').replace('</p>', '').replace('<p>', '').replace('<br/>', '').replace('<p>', '')
            except:
                ingredients = ''

            return [name, product_id, ingredients]
        except:
            return False
    def search_category_total_results(self, category):
        burp0_url = f"https://www.sephora.com:443/api/v2/catalog/categories/{category}/seo?targetSearchEngine=NLP&currentPage=2&pageSize=60&content=true&includeRegionsMap=true&headers=%5Bobject%20Object%5D&pickupRampup=true&sddRampup=true&loc=en-US&ch=rwd"
        burp0_headers = {"Sec-Ch-Ua": "\"Chromium\";v=\"117\", \"Not;A=Brand\";v=\"8\"", "Sec-Ch-Ua-Mobile": "?0", "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.5938.132 Safari/537.36", "X-Dtpc": "5$505172501_268h16vBLLCMFBNGOLFAPFGHKUDBVTGKMEPJULD-0e0", "X-Dtreferer": "https://www.sephora.com/shop/makeup-cosmetics", "X-Timestamp": "1697306065014", "Exclude_personalized_content": "true", "X-Requested-Source": "rwd", "Sec-Ch-Ua-Platform": "\"Windows\"", "Accept": "*/*", "Sec-Fetch-Site": "same-origin", "Sec-Fetch-Mode": "cors", "Sec-Fetch-Dest": "empty", "Referer": "https://www.sephora.com/shop/makeup-cosmetics?currentPage=2", "Accept-Encoding": "gzip, deflate, br", "Accept-Language": "en-US,en;q=0.9"}
        response = requests.get(burp0_url, headers=burp0_headers)
        
        data = response.json()

        return data['totalProducts']
    
    def get_urls_of_category_from_all_results(self, category, total_results):

        all_urls = []
        pages = total_results // 60 + 1
        for page in range(1, pages+1):
            burp0_url = f"https://www.sephora.com:443/api/v2/catalog/categories/{category}/seo?targetSearchEngine=NLP&currentPage={page}&pageSize=60&content=true&includeRegionsMap=true&headers=%5Bobject%20Object%5D&pickupRampup=true&sddRampup=true&loc=en-US&ch=rwd"
            burp0_headers = {"Sec-Ch-Ua": "\"Chromium\";v=\"117\", \"Not;A=Brand\";v=\"8\"", "Sec-Ch-Ua-Mobile": "?0", "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.5938.132 Safari/537.36", "X-Dtpc": "5$505172501_268h16vBLLCMFBNGOLFAPFGHKUDBVTGKMEPJULD-0e0", "X-Dtreferer": "https://www.sephora.com/shop/makeup-cosmetics", "X-Timestamp": "1697306065014", "Exclude_personalized_content": "true", "X-Requested-Source": "rwd", "Sec-Ch-Ua-Platform": "\"Windows\"", "Accept": "*/*", "Sec-Fetch-Site": "same-origin", "Sec-Fetch-Mode": "cors", "Sec-Fetch-Dest": "empty", "Referer": "https://www.sephora.com/shop/makeup-cosmetics?currentPage=2", "Accept-Encoding": "gzip, deflate, br", "Accept-Language": "en-US,en;q=0.9"}
            response = requests.get(burp0_url, headers=burp0_headers)
            data = response.json()
            products = data['products']
            urls = []
            for product in products:
                urls.append(f'https://www.sephora.com{product["targetUrl"]}')
            print(urls)
            self.get_all_products_from_category_page(urls)
        # self.get_all_products_from_category_page(all_urls)
            
    def get_all_products_from_category_page(self, urls):
        for url in urls:
            response = self.get_response(url)
            product = self.parse_response(response, url=url)
            if product:
                save_product({
                    'title': product[0],
                    'product_id': product[1],
                    'ingredients': product[2],
                    'url': url,
                    'store_name': 'Sephora'
                })
                # save_product(product)
        return True

    def save_product_to_csv(self, product):
        with open('sep.csv', 'a+', encoding='utf-8', newline='') as file:
            #check if the product is already in the csv
            file.seek(0) # move the file pointer to the beginning of the file
            reader = csv.reader(file)
            product_ids = [row[1] for row in reader]
            print(product_ids)
            if product[1] in product_ids:
                return False
            
            writer = csv.writer(file)
            writer.writerow(product)
            file.close()

    def run(self):
        for category in self.categories:
            total_results = self.search_category_total_results(category)
            self.get_urls_of_category_from_all_results(category, total_results)
            print(f'Finished {category}')
        return True