Spaces:

dramp77
/

classification-test

Sleeping

App Files Files Community

dramp77 commited on Nov 21, 2024

Commit

30ad672

verified ·

1 Parent(s): 23be87a

Update app.py

Browse files

Files changed (1) hide show

app.py +502 -501

app.py CHANGED Viewed

@@ -1,501 +1,502 @@
-import requests
-import re
-import csv
-import datetime
-import gradio as gr
-import os
-from openai import OpenAI
-from PIL import Image
-from io import BytesIO
-from dotenv import load_dotenv
-import json
-# Load environment variables
-load_dotenv()
-# Initialize OpenAI client
-client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'))
-# Define reference images directory
-REFERENCE_IMAGES_DIR = 'reference_images'
-os.makedirs(REFERENCE_IMAGES_DIR, exist_ok=True)
-def load_reference_images():
-    """Load all reference images from the reference directory"""
-    reference_data = {}
-    for category in os.listdir(REFERENCE_IMAGES_DIR):
-        category_path = os.path.join(REFERENCE_IMAGES_DIR, category)
-        if os.path.isdir(category_path):
-            reference_data[category] = []
-            for img_file in os.listdir(category_path):
-                if img_file.lower().endswith(('.png', '.jpg', '.jpeg')):
-                    img_path = os.path.join(category_path, img_file)
-                    reference_data[category].append(img_path)
-    return reference_data
-def compare_with_reference(image_url, product_category):
-    """Compare product image with reference images using OpenAI Vision"""
-    reference_images = load_reference_images().get(product_category, [])
-    if not reference_images:
-        return "Error: No reference images found for this category", 0
-    try:
-        messages = [
-            {
-                "role": "user",
-                "content": [
-                    {
-                        "type": "text",
-                        "text": """Compare these images and determine if the product appears to be authentic.
-                        Consider:
-                        1. Logo placement and quality
-                        2. Product design details
-                        3. Material quality appearance
-                        4. Color accuracy
-                        5. Overall build quality
-                        The first image is the reference (authentic product).
-                        The second image is the product to verify.
-                        Respond with 'Pass' if it appears authentic or 'Not Pass' if it shows signs of being counterfeit.
-                        """
-                    },
-                    {
-                        "type": "image_url",
-                        "image_url": {"url": reference_images[0]}  # Using first reference image
-                    },
-                    {
-                        "type": "image_url",
-                        "image_url": {"url": image_url}
-                    }
-                ]
-            }
-        ]
-        response = client.chat.completions.create(
-            model="gpt-4o-mini",
-            messages=messages,
-            max_tokens=10
-        )
-        result = response.choices[0].message.content.strip()
-        confidence = 1.0 if result == "Pass" else 0.0
-        return result, confidence
-    except Exception as e:
-        print(f"Error in comparison: {e}")
-        return "Error", 0
-def scrape_tokopedia(product_url, product_category):
-    """Scrape product data from Tokopedia"""
-    try:
-        # Validasi URL Tokopedia
-        match = re.search(r'tokopedia\.com/([^/]+)/([^/?]+)', product_url)
-        if not match:
-            return "Error: Invalid Tokopedia URL format.", None
-        headers = {
-            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
-            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
-            'Accept-Language': 'en-US,en;q=0.9',
-            'Accept-Encoding': 'gzip, deflate, br',
-            'Connection': 'keep-alive',
-            'Upgrade-Insecure-Requests': '1',
-            'sec-ch-ua': '"Not_A Brand";v="8", "Chromium";v="120", "Google Chrome";v="120"',
-            'sec-ch-ua-platform': '"Windows"'
-        }
-        session = requests.Session()
-        print(f"Fetching product page: {product_url}")
-        # Langsung mengakses halaman produk
-        response = session.get(product_url, headers=headers, timeout=10)
-        response.raise_for_status()  # Raise exception for bad status codes
-        print(f"Response status: {response.status_code}")
-        # Multiple patterns untuk mencari URL gambar
-        image_patterns = [
-            r'https://images\.tokopedia\.net/img/[^"\']+\.(jpg|jpeg|png)',
-            r'https://[^"\']+\.tokopedia\.net/[^"\']+\.(jpg|jpeg|png)',
-            r'"imageUrl":"(https://[^"]+)"',
-            r'"url":"(https://images[^"]+)"',
-            r'content="(https://images\.tokopedia\.net[^"]+)"'
-        ]
-        all_images = []
-        for pattern in image_patterns:
-            matches = re.findall(pattern, response.text)
-            if matches:
-                if isinstance(matches[0], tuple):
-                    # If the pattern contains groups, take the full match
-                    images = [m[0] if isinstance(m, tuple) else m for m in matches]
-                else:
-                    images = matches
-                all_images.extend(images)
-        # Remove duplicates and clean URLs
-        unique_images = list(set(all_images))
-        print(f"Found {len(unique_images)} unique images")
-        if not unique_images:
-            # Try to extract from JSON-LD
-            json_ld_pattern = r'<script type="application/ld\+json">(.*?)</script>'
-            json_matches = re.findall(json_ld_pattern, response.text, re.DOTALL)
-            for json_str in json_matches:
-                try:
-                    json_data = json.loads(json_str)
-                    if 'image' in json_data:
-                        if isinstance(json_data['image'], list):
-                            unique_images.extend(json_data['image'])
-                        else:
-                            unique_images.append(json_data['image'])
-                except:
-                    continue
-        if not unique_images:
-            return "Error: No product images found.", None
-        # Filter and verify images
-        valid_images = []
-        for img_url in unique_images[:10]:  # Try first 10 images
-            try:
-                print(f"Verifying image URL: {img_url}")
-                img_response = session.head(img_url, headers=headers, timeout=5)
-                content_type = img_response.headers.get('content-type', '')
-                if img_response.status_code == 200 and 'image' in content_type.lower():
-                    valid_images.append(img_url)
-                    if len(valid_images) >= 5:  # Stop after getting 5 valid images
-                        break
-            except Exception as e:
-                print(f"Error verifying image {img_url}: {str(e)}")
-                continue
-        if not valid_images:
-            return "Error: Could not verify any product images.", None
-        results = []
-        for img_url in valid_images:
-            try:
-                print(f"Processing image: {img_url}")
-                classification_result, confidence = compare_with_reference(img_url, product_category)
-                results.append({
-                    'image_url': img_url,
-                    'classification': classification_result,
-                    'confidence': confidence
-                })
-            except Exception as e:
-                print(f"Error processing image {img_url}: {str(e)}")
-                continue
-        if not results:
-            return "Error: Could not process any product images.", None
-        output_file = 'tokopedia_authenticity_check.csv'
-        with open(output_file, 'w', newline='', encoding='utf-8') as file:
-            writer = csv.writer(file)
-            writer.writerow(['image_url', 'authenticity_result', 'confidence'])
-            for result in results:
-                writer.writerow([
-                    result['image_url'],
-                    result['classification'],
-                    f"{result['confidence']:.2%}"
-                ])
-        pass_count = sum(1 for r in results if r['classification'] == 'Pass')
-        total_images = len(results)
-        summary = f"""
-        Tokopedia Authenticity Check Results:
-        Total Images Analyzed: {total_images}
-        Appears Authentic: {pass_count}
-        Potentially Counterfeit: {total_images - pass_count}
-        Detailed results saved to {output_file}
-        """
-        return summary, results[0]['image_url']
-    except Exception as e:
-        print(f"Error in scrape_tokopedia: {str(e)}")
-        return f"Error scraping Tokopedia: {str(e)}", None
-def scrape_shopee(product_url, product_category):
-    """Scrape product data from Shopee"""
-    try:
-        # Extract shop_id and item_id from URL
-        match = re.search(r'i\.(\d+)\.(\d+)', product_url)
-        if not match:
-            return "Error: Invalid Shopee URL format.", None
-        shop_id, item_id = match.groups()
-        api_url = f'https://shopee.co.id/api/v4/item/get?itemid={item_id}&shopid={shop_id}'
-        headers = {
-            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
-            'Accept': 'application/json',
-            'X-Requested-With': 'XMLHttpRequest',
-            'Referer': 'https://shopee.co.id/',
-            'AF-AC-Encoding-Version': '3',
-        }
-        session = requests.Session()
-        # First visit the main page to get cookies
-        session.get(f'https://shopee.co.id/product/{shop_id}/{item_id}', headers=headers)
-        response = session.get(api_url, headers=headers)
-        if response.status_code != 200:
-            return f"Error: Failed to fetch product data (HTTP {response.status_code}).", None
-        product_data = response.json()
-        images = product_data.get('data', {}).get('images', [])
-        if not images:
-            return "Error: No product images found.", None
-        results = []
-        for img_id in images[:5]:
-            image_url = f"https://cf.shopee.co.id/file/{img_id}"
-            classification_result, confidence = compare_with_reference(image_url, product_category)
-            results.append({
-                'image_url': image_url,
-                'classification': classification_result,
-                'confidence': confidence
-            })
-        output_file = 'shopee_authenticity_check.csv'
-        with open(output_file, 'w', newline='', encoding='utf-8') as file:
-            writer = csv.writer(file)
-            writer.writerow(['image_url', 'authenticity_result', 'confidence'])
-            for result in results:
-                writer.writerow([
-                    result['image_url'],
-                    result['classification'],
-                    f"{result['confidence']:.2%}"
-                ])
-        pass_count = sum(1 for r in results if r['classification'] == 'Pass')
-        total_images = len(results)
-        summary = f"""
-        Shopee Authenticity Check Results:
-        Total Images Analyzed: {total_images}
-        Appears Authentic: {pass_count}
-        Potentially Counterfeit: {total_images - pass_count}
-        Detailed results saved to {output_file}
-        """
-        return summary, results[0]['image_url']
-    except Exception as e:
-        return f"Error scraping Shopee: {str(e)}", None
-def scrape_blibli(product_url, product_category):
-    """Scrape product data from Blibli"""
-    try:
-        # Extract product ID from URL
-        match = re.search(r'p/([^/\?]+)', product_url)
-        if not match:
-            return "Error: Invalid Blibli URL format.", None
-        product_id = match.group(1)
-        api_url = f"https://www.blibli.com/backend/product-detail/products/{product_id}"
-        headers = {
-            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
-            'Accept': 'application/json',
-            'X-Requested-With': 'XMLHttpRequest',
-            'Referer': 'https://www.blibli.com/',
-        }
-        session = requests.Session()
-        response = session.get(api_url, headers=headers)
-        if response.status_code != 200:
-            return f"Error: Failed to fetch product data (HTTP {response.status_code}).", None
-        product_data = response.json()
-        images = product_data.get('data', {}).get('images', [])
-        if not images:
-            return "Error: No product images found.", None
-        results = []
-        for img_url in images[:5]:
-            classification_result, confidence = compare_with_reference(img_url, product_category)
-            results.append({
-                'image_url': img_url,
-                'classification': classification_result,
-                'confidence': confidence
-            })
-        output_file = 'blibli_authenticity_check.csv'
-        with open(output_file, 'w', newline='', encoding='utf-8') as file:
-            writer = csv.writer(file)
-            writer.writerow(['image_url', 'authenticity_result', 'confidence'])
-            for result in results:
-                writer.writerow([
-                    result['image_url'],
-                    result['classification'],
-                    f"{result['confidence']:.2%}"
-                ])
-        pass_count = sum(1 for r in results if r['classification'] == 'Pass')
-        total_images = len(results)
-        summary = f"""
-        Blibli Authenticity Check Results:
-        Total Images Analyzed: {total_images}
-        Appears Authentic: {pass_count}
-        Potentially Counterfeit: {total_images - pass_count}
-        Detailed results saved to {output_file}
-        """
-        return summary, results[0]['image_url']
-    except Exception as e:
-        return f"Error scraping Blibli: {str(e)}", None
-def scrape_bukalapak(product_url, product_category):
-    """Scrape product data from Bukalapak"""
-    try:
-        # Extract product ID from URL
-        match = re.search(r'p/([^/\?]+)', product_url)
-        if not match:
-            return "Error: Invalid Bukalapak URL format.", None
-        product_slug = match.group(1)
-        api_url = f"https://api.bukalapak.com/products/{product_slug}"
-        headers = {
-            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
-            'Accept': 'application/json',
-            'X-Requested-With': 'XMLHttpRequest',
-            'Referer': 'https://www.bukalapak.com/',
-        }
-        session = requests.Session()
-        response = session.get(api_url, headers=headers)
-        if response.status_code != 200:
-            return f"Error: Failed to fetch product data (HTTP {response.status_code}).", None
-        product_data = response.json()
-        images = product_data.get('data', {}).get('images', [])
-        if not images:
-            return "Error: No product images found.", None
-        results = []
-        for img_data in images[:5]:
-            img_url = img_data.get('large_url')
-            if img_url:
-                classification_result, confidence = compare_with_reference(img_url, product_category)
-                results.append({
-                    'image_url': img_url,
-                    'classification': classification_result,
-                    'confidence': confidence
-                })
-        output_file = 'bukalapak_authenticity_check.csv'
-        with open(output_file, 'w', newline='', encoding='utf-8') as file:
-            writer = csv.writer(file)
-            writer.writerow(['image_url', 'authenticity_result', 'confidence'])
-            for result in results:
-                writer.writerow([
-                    result['image_url'],
-                    result['classification'],
-                    f"{result['confidence']:.2%}"
-                ])
-        pass_count = sum(1 for r in results if r['classification'] == 'Pass')
-        total_images = len(results)
-        summary = f"""
-        Bukalapak Authenticity Check Results:
-        Total Images Analyzed: {total_images}
-        Appears Authentic: {pass_count}
-        Potentially Counterfeit: {total_images - pass_count}
-        Detailed results saved to {output_file}
-        """
-        return summary, results[0]['image_url']
-    except Exception as e:
-        return f"Error scraping Bukalapak: {str(e)}", None
-def gradio_scrape(marketplace_choice, product_url, product_category):
-    """Updated gradio function with direct marketplace selection"""
-    if not product_url:
-        return "Error: Please enter a product URL", None
-    # Validate URL based on selected marketplace
-    url_patterns = {
-        'Shopee': r'shopee\.co\.id',
-        'Tokopedia': r'tokopedia\.com',
-        'Blibli': r'blibli\.com',
-        'Bukalapak': r'bukalapak\.com'
-    }
-    if not re.search(url_patterns[marketplace_choice], product_url):
-        return f"Error: URL doesn't match selected marketplace ({marketplace_choice}). Please check your URL.", None
-    # Call appropriate scraping function based on marketplace
-    scraping_functions = {
-        'Shopee': scrape_shopee,
-        'Tokopedia': scrape_tokopedia,
-        'Blibli': scrape_blibli,
-        'Bukalapak': scrape_bukalapak
-    }
-    result, image_url = scraping_functions[marketplace_choice](product_url, product_category)
-    if image_url:
-        img = Image.open(BytesIO(requests.get(image_url).content))
-        return result, img
-    return result, None
-# Get available categories from reference_images directory
-categories = [d for d in os.listdir(REFERENCE_IMAGES_DIR)
-             if os.path.isdir(os.path.join(REFERENCE_IMAGES_DIR, d))]
-# Define marketplace choices
-marketplace_choices = ['Shopee', 'Tokopedia', 'Blibli', 'Bukalapak']
-# Update Gradio Interface
-interface = gr.Interface(
-    fn=gradio_scrape,
-    inputs=[
-        gr.Dropdown(
-            choices=marketplace_choices,
-            label="Select Marketplace",
-            value="Shopee"
-        ),
-        gr.Textbox(
-            label="Product URL",
-            placeholder="Paste your product URL here"
-        ),
-        gr.Dropdown(
-            choices=categories,
-            label="Product Category"
-        )
-    ],
-    outputs=[
-        gr.Textbox(label="Authenticity Check Results"),
-        gr.Image(label="Product Image Sample")
-    ],
-    title="E-commerce Product Authenticity Checker",
-    description="""
-    How to use:
-    1. Select your marketplace (Shopee/Tokopedia/Blibli/Bukalapak)
-    2. Paste the product URL
-    3. Select the product category
-    4. Click submit to check authenticity
-    """,
-)
-if __name__ == "__main__":
-    interface.launch()

+import requests
+import re
+import csv
+import datetime
+import gradio as gr
+import os
+import openai
+from openai import OpenAI
+from PIL import Image
+from io import BytesIO
+from dotenv import load_dotenv
+import json
+# Load environment variables
+load_dotenv()
+# Initialize OpenAI client
+client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'))
+# Define reference images directory
+REFERENCE_IMAGES_DIR = 'reference_images'
+os.makedirs(REFERENCE_IMAGES_DIR, exist_ok=True)
+def load_reference_images():
+    """Load all reference images from the reference directory"""
+    reference_data = {}
+    for category in os.listdir(REFERENCE_IMAGES_DIR):
+        category_path = os.path.join(REFERENCE_IMAGES_DIR, category)
+        if os.path.isdir(category_path):
+            reference_data[category] = []
+            for img_file in os.listdir(category_path):
+                if img_file.lower().endswith(('.png', '.jpg', '.jpeg')):
+                    img_path = os.path.join(category_path, img_file)
+                    reference_data[category].append(img_path)
+    return reference_data
+def compare_with_reference(image_url, product_category):
+    """Compare product image with reference images using OpenAI Vision"""
+    reference_images = load_reference_images().get(product_category, [])
+    if not reference_images:
+        return "Error: No reference images found for this category", 0
+    try:
+        messages = [
+            {
+                "role": "user",
+                "content": [
+                    {
+                        "type": "text",
+                        "text": """Compare these images and determine if the product appears to be authentic.
+                        Consider:
+                        1. Logo placement and quality
+                        2. Product design details
+                        3. Material quality appearance
+                        4. Color accuracy
+                        5. Overall build quality
+                        The first image is the reference (authentic product).
+                        The second image is the product to verify.
+                        Respond with 'Pass' if it appears authentic or 'Not Pass' if it shows signs of being counterfeit.
+                        """
+                    },
+                    {
+                        "type": "image_url",
+                        "image_url": {"url": reference_images[0]}  # Using first reference image
+                    },
+                    {
+                        "type": "image_url",
+                        "image_url": {"url": image_url}
+                    }
+                ]
+            }
+        ]
+        response = client.chat.completions.create(
+            model="gpt-4o-mini",
+            messages=messages,
+            max_tokens=10
+        )
+        result = response.choices[0].message.content.strip()
+        confidence = 1.0 if result == "Pass" else 0.0
+        return result, confidence
+    except Exception as e:
+        print(f"Error in comparison: {e}")
+        return "Error", 0
+def scrape_tokopedia(product_url, product_category):
+    """Scrape product data from Tokopedia"""
+    try:
+        # Validasi URL Tokopedia
+        match = re.search(r'tokopedia\.com/([^/]+)/([^/?]+)', product_url)
+        if not match:
+            return "Error: Invalid Tokopedia URL format.", None
+        headers = {
+            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
+            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
+            'Accept-Language': 'en-US,en;q=0.9',
+            'Accept-Encoding': 'gzip, deflate, br',
+            'Connection': 'keep-alive',
+            'Upgrade-Insecure-Requests': '1',
+            'sec-ch-ua': '"Not_A Brand";v="8", "Chromium";v="120", "Google Chrome";v="120"',
+            'sec-ch-ua-platform': '"Windows"'
+        }
+        session = requests.Session()
+        print(f"Fetching product page: {product_url}")
+        # Langsung mengakses halaman produk
+        response = session.get(product_url, headers=headers, timeout=10)
+        response.raise_for_status()  # Raise exception for bad status codes
+        print(f"Response status: {response.status_code}")
+        # Multiple patterns untuk mencari URL gambar
+        image_patterns = [
+            r'https://images\.tokopedia\.net/img/[^"\']+\.(jpg|jpeg|png)',
+            r'https://[^"\']+\.tokopedia\.net/[^"\']+\.(jpg|jpeg|png)',
+            r'"imageUrl":"(https://[^"]+)"',
+            r'"url":"(https://images[^"]+)"',
+            r'content="(https://images\.tokopedia\.net[^"]+)"'
+        ]
+        all_images = []
+        for pattern in image_patterns:
+            matches = re.findall(pattern, response.text)
+            if matches:
+                if isinstance(matches[0], tuple):
+                    # If the pattern contains groups, take the full match
+                    images = [m[0] if isinstance(m, tuple) else m for m in matches]
+                else:
+                    images = matches
+                all_images.extend(images)
+        # Remove duplicates and clean URLs
+        unique_images = list(set(all_images))
+        print(f"Found {len(unique_images)} unique images")
+        if not unique_images:
+            # Try to extract from JSON-LD
+            json_ld_pattern = r'<script type="application/ld\+json">(.*?)</script>'
+            json_matches = re.findall(json_ld_pattern, response.text, re.DOTALL)
+            for json_str in json_matches:
+                try:
+                    json_data = json.loads(json_str)
+                    if 'image' in json_data:
+                        if isinstance(json_data['image'], list):
+                            unique_images.extend(json_data['image'])
+                        else:
+                            unique_images.append(json_data['image'])
+                except:
+                    continue
+        if not unique_images:
+            return "Error: No product images found.", None
+        # Filter and verify images
+        valid_images = []
+        for img_url in unique_images[:10]:  # Try first 10 images
+            try:
+                print(f"Verifying image URL: {img_url}")
+                img_response = session.head(img_url, headers=headers, timeout=5)
+                content_type = img_response.headers.get('content-type', '')
+                if img_response.status_code == 200 and 'image' in content_type.lower():
+                    valid_images.append(img_url)
+                    if len(valid_images) >= 5:  # Stop after getting 5 valid images
+                        break
+            except Exception as e:
+                print(f"Error verifying image {img_url}: {str(e)}")
+                continue
+        if not valid_images:
+            return "Error: Could not verify any product images.", None
+        results = []
+        for img_url in valid_images:
+            try:
+                print(f"Processing image: {img_url}")
+                classification_result, confidence = compare_with_reference(img_url, product_category)
+                results.append({
+                    'image_url': img_url,
+                    'classification': classification_result,
+                    'confidence': confidence
+                })
+            except Exception as e:
+                print(f"Error processing image {img_url}: {str(e)}")
+                continue
+        if not results:
+            return "Error: Could not process any product images.", None
+        output_file = 'tokopedia_authenticity_check.csv'
+        with open(output_file, 'w', newline='', encoding='utf-8') as file:
+            writer = csv.writer(file)
+            writer.writerow(['image_url', 'authenticity_result', 'confidence'])
+            for result in results:
+                writer.writerow([
+                    result['image_url'],
+                    result['classification'],
+                    f"{result['confidence']:.2%}"
+                ])
+        pass_count = sum(1 for r in results if r['classification'] == 'Pass')
+        total_images = len(results)
+        summary = f"""
+        Tokopedia Authenticity Check Results:
+        Total Images Analyzed: {total_images}
+        Appears Authentic: {pass_count}
+        Potentially Counterfeit: {total_images - pass_count}
+        Detailed results saved to {output_file}
+        """
+        return summary, results[0]['image_url']
+    except Exception as e:
+        print(f"Error in scrape_tokopedia: {str(e)}")
+        return f"Error scraping Tokopedia: {str(e)}", None
+def scrape_shopee(product_url, product_category):
+    """Scrape product data from Shopee"""
+    try:
+        # Extract shop_id and item_id from URL
+        match = re.search(r'i\.(\d+)\.(\d+)', product_url)
+        if not match:
+            return "Error: Invalid Shopee URL format.", None
+        shop_id, item_id = match.groups()
+        api_url = f'https://shopee.co.id/api/v4/item/get?itemid={item_id}&shopid={shop_id}'
+        headers = {
+            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
+            'Accept': 'application/json',
+            'X-Requested-With': 'XMLHttpRequest',
+            'Referer': 'https://shopee.co.id/',
+            'AF-AC-Encoding-Version': '3',
+        }
+        session = requests.Session()
+        # First visit the main page to get cookies
+        session.get(f'https://shopee.co.id/product/{shop_id}/{item_id}', headers=headers)
+        response = session.get(api_url, headers=headers)
+        if response.status_code != 200:
+            return f"Error: Failed to fetch product data (HTTP {response.status_code}).", None
+        product_data = response.json()
+        images = product_data.get('data', {}).get('images', [])
+        if not images:
+            return "Error: No product images found.", None
+        results = []
+        for img_id in images[:5]:
+            image_url = f"https://cf.shopee.co.id/file/{img_id}"
+            classification_result, confidence = compare_with_reference(image_url, product_category)
+            results.append({
+                'image_url': image_url,
+                'classification': classification_result,
+                'confidence': confidence
+            })
+        output_file = 'shopee_authenticity_check.csv'
+        with open(output_file, 'w', newline='', encoding='utf-8') as file:
+            writer = csv.writer(file)
+            writer.writerow(['image_url', 'authenticity_result', 'confidence'])
+            for result in results:
+                writer.writerow([
+                    result['image_url'],
+                    result['classification'],
+                    f"{result['confidence']:.2%}"
+                ])
+        pass_count = sum(1 for r in results if r['classification'] == 'Pass')
+        total_images = len(results)
+        summary = f"""
+        Shopee Authenticity Check Results:
+        Total Images Analyzed: {total_images}
+        Appears Authentic: {pass_count}
+        Potentially Counterfeit: {total_images - pass_count}
+        Detailed results saved to {output_file}
+        """
+        return summary, results[0]['image_url']
+    except Exception as e:
+        return f"Error scraping Shopee: {str(e)}", None
+def scrape_blibli(product_url, product_category):
+    """Scrape product data from Blibli"""
+    try:
+        # Extract product ID from URL
+        match = re.search(r'p/([^/\?]+)', product_url)
+        if not match:
+            return "Error: Invalid Blibli URL format.", None
+        product_id = match.group(1)
+        api_url = f"https://www.blibli.com/backend/product-detail/products/{product_id}"
+        headers = {
+            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
+            'Accept': 'application/json',
+            'X-Requested-With': 'XMLHttpRequest',
+            'Referer': 'https://www.blibli.com/',
+        }
+        session = requests.Session()
+        response = session.get(api_url, headers=headers)
+        if response.status_code != 200:
+            return f"Error: Failed to fetch product data (HTTP {response.status_code}).", None
+        product_data = response.json()
+        images = product_data.get('data', {}).get('images', [])
+        if not images:
+            return "Error: No product images found.", None
+        results = []
+        for img_url in images[:5]:
+            classification_result, confidence = compare_with_reference(img_url, product_category)
+            results.append({
+                'image_url': img_url,
+                'classification': classification_result,
+                'confidence': confidence
+            })
+        output_file = 'blibli_authenticity_check.csv'
+        with open(output_file, 'w', newline='', encoding='utf-8') as file:
+            writer = csv.writer(file)
+            writer.writerow(['image_url', 'authenticity_result', 'confidence'])
+            for result in results:
+                writer.writerow([
+                    result['image_url'],
+                    result['classification'],
+                    f"{result['confidence']:.2%}"
+                ])
+        pass_count = sum(1 for r in results if r['classification'] == 'Pass')
+        total_images = len(results)
+        summary = f"""
+        Blibli Authenticity Check Results:
+        Total Images Analyzed: {total_images}
+        Appears Authentic: {pass_count}
+        Potentially Counterfeit: {total_images - pass_count}
+        Detailed results saved to {output_file}
+        """
+        return summary, results[0]['image_url']
+    except Exception as e:
+        return f"Error scraping Blibli: {str(e)}", None
+def scrape_bukalapak(product_url, product_category):
+    """Scrape product data from Bukalapak"""
+    try:
+        # Extract product ID from URL
+        match = re.search(r'p/([^/\?]+)', product_url)
+        if not match:
+            return "Error: Invalid Bukalapak URL format.", None
+        product_slug = match.group(1)
+        api_url = f"https://api.bukalapak.com/products/{product_slug}"
+        headers = {
+            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
+            'Accept': 'application/json',
+            'X-Requested-With': 'XMLHttpRequest',
+            'Referer': 'https://www.bukalapak.com/',
+        }
+        session = requests.Session()
+        response = session.get(api_url, headers=headers)
+        if response.status_code != 200:
+            return f"Error: Failed to fetch product data (HTTP {response.status_code}).", None
+        product_data = response.json()
+        images = product_data.get('data', {}).get('images', [])
+        if not images:
+            return "Error: No product images found.", None
+        results = []
+        for img_data in images[:5]:
+            img_url = img_data.get('large_url')
+            if img_url:
+                classification_result, confidence = compare_with_reference(img_url, product_category)
+                results.append({
+                    'image_url': img_url,
+                    'classification': classification_result,
+                    'confidence': confidence
+                })
+        output_file = 'bukalapak_authenticity_check.csv'
+        with open(output_file, 'w', newline='', encoding='utf-8') as file:
+            writer = csv.writer(file)
+            writer.writerow(['image_url', 'authenticity_result', 'confidence'])
+            for result in results:
+                writer.writerow([
+                    result['image_url'],
+                    result['classification'],
+                    f"{result['confidence']:.2%}"
+                ])
+        pass_count = sum(1 for r in results if r['classification'] == 'Pass')
+        total_images = len(results)
+        summary = f"""
+        Bukalapak Authenticity Check Results:
+        Total Images Analyzed: {total_images}
+        Appears Authentic: {pass_count}
+        Potentially Counterfeit: {total_images - pass_count}
+        Detailed results saved to {output_file}
+        """
+        return summary, results[0]['image_url']
+    except Exception as e:
+        return f"Error scraping Bukalapak: {str(e)}", None
+def gradio_scrape(marketplace_choice, product_url, product_category):
+    """Updated gradio function with direct marketplace selection"""
+    if not product_url:
+        return "Error: Please enter a product URL", None
+    # Validate URL based on selected marketplace
+    url_patterns = {
+        'Shopee': r'shopee\.co\.id',
+        'Tokopedia': r'tokopedia\.com',
+        'Blibli': r'blibli\.com',
+        'Bukalapak': r'bukalapak\.com'
+    }
+    if not re.search(url_patterns[marketplace_choice], product_url):
+        return f"Error: URL doesn't match selected marketplace ({marketplace_choice}). Please check your URL.", None
+    # Call appropriate scraping function based on marketplace
+    scraping_functions = {
+        'Shopee': scrape_shopee,
+        'Tokopedia': scrape_tokopedia,
+        'Blibli': scrape_blibli,
+        'Bukalapak': scrape_bukalapak
+    }
+    result, image_url = scraping_functions[marketplace_choice](product_url, product_category)
+    if image_url:
+        img = Image.open(BytesIO(requests.get(image_url).content))
+        return result, img
+    return result, None
+# Get available categories from reference_images directory
+categories = [d for d in os.listdir(REFERENCE_IMAGES_DIR)
+             if os.path.isdir(os.path.join(REFERENCE_IMAGES_DIR, d))]
+# Define marketplace choices
+marketplace_choices = ['Shopee', 'Tokopedia', 'Blibli', 'Bukalapak']
+# Update Gradio Interface
+interface = gr.Interface(
+    fn=gradio_scrape,
+    inputs=[
+        gr.Dropdown(
+            choices=marketplace_choices,
+            label="Select Marketplace",
+            value="Shopee"
+        ),
+        gr.Textbox(
+            label="Product URL",
+            placeholder="Paste your product URL here"
+        ),
+        gr.Dropdown(
+            choices=categories,
+            label="Product Category"
+        )
+    ],
+    outputs=[
+        gr.Textbox(label="Authenticity Check Results"),
+        gr.Image(label="Product Image Sample")
+    ],
+    title="E-commerce Product Authenticity Checker",
+    description="""
+    How to use:
+    1. Select your marketplace (Shopee/Tokopedia/Blibli/Bukalapak)
+    2. Paste the product URL
+    3. Select the product category
+    4. Click submit to check authenticity
+    """,
+)
+if __name__ == "__main__":
+    interface.launch()