dramp77 commited on
Commit
8b114cb
·
1 Parent(s): 8fdd88f
.env ADDED
@@ -0,0 +1 @@
 
 
1
+ OPENAI_API_KEY = "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"
app.py ADDED
@@ -0,0 +1,501 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import re
3
+ import csv
4
+ import datetime
5
+ import gradio as gr
6
+ import os
7
+ from openai import OpenAI
8
+ from PIL import Image
9
+ from io import BytesIO
10
+ from dotenv import load_dotenv
11
+ import json
12
+
13
+ # Load environment variables
14
+ load_dotenv()
15
+
16
+ # Initialize OpenAI client
17
+ client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'))
18
+
19
+ # Define reference images directory
20
+ REFERENCE_IMAGES_DIR = 'reference_images'
21
+ os.makedirs(REFERENCE_IMAGES_DIR, exist_ok=True)
22
+
23
+ def load_reference_images():
24
+ """Load all reference images from the reference directory"""
25
+ reference_data = {}
26
+ for category in os.listdir(REFERENCE_IMAGES_DIR):
27
+ category_path = os.path.join(REFERENCE_IMAGES_DIR, category)
28
+ if os.path.isdir(category_path):
29
+ reference_data[category] = []
30
+ for img_file in os.listdir(category_path):
31
+ if img_file.lower().endswith(('.png', '.jpg', '.jpeg')):
32
+ img_path = os.path.join(category_path, img_file)
33
+ reference_data[category].append(img_path)
34
+ return reference_data
35
+
36
+ def compare_with_reference(image_url, product_category):
37
+ """Compare product image with reference images using OpenAI Vision"""
38
+ reference_images = load_reference_images().get(product_category, [])
39
+
40
+ if not reference_images:
41
+ return "Error: No reference images found for this category", 0
42
+
43
+ try:
44
+ messages = [
45
+ {
46
+ "role": "user",
47
+ "content": [
48
+ {
49
+ "type": "text",
50
+ "text": """Compare these images and determine if the product appears to be authentic.
51
+ Consider:
52
+ 1. Logo placement and quality
53
+ 2. Product design details
54
+ 3. Material quality appearance
55
+ 4. Color accuracy
56
+ 5. Overall build quality
57
+
58
+ The first image is the reference (authentic product).
59
+ The second image is the product to verify.
60
+
61
+ Respond with 'Pass' if it appears authentic or 'Not Pass' if it shows signs of being counterfeit.
62
+ """
63
+ },
64
+ {
65
+ "type": "image_url",
66
+ "image_url": {"url": reference_images[0]} # Using first reference image
67
+ },
68
+ {
69
+ "type": "image_url",
70
+ "image_url": {"url": image_url}
71
+ }
72
+ ]
73
+ }
74
+ ]
75
+
76
+ response = client.chat.completions.create(
77
+ model="gpt-4o-mini",
78
+ messages=messages,
79
+ max_tokens=10
80
+ )
81
+
82
+ result = response.choices[0].message.content.strip()
83
+ confidence = 1.0 if result == "Pass" else 0.0
84
+
85
+ return result, confidence
86
+
87
+ except Exception as e:
88
+ print(f"Error in comparison: {e}")
89
+ return "Error", 0
90
+
91
+ def scrape_tokopedia(product_url, product_category):
92
+ """Scrape product data from Tokopedia"""
93
+ try:
94
+ # Validasi URL Tokopedia
95
+ match = re.search(r'tokopedia\.com/([^/]+)/([^/?]+)', product_url)
96
+ if not match:
97
+ return "Error: Invalid Tokopedia URL format.", None
98
+
99
+ headers = {
100
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
101
+ 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
102
+ 'Accept-Language': 'en-US,en;q=0.9',
103
+ 'Accept-Encoding': 'gzip, deflate, br',
104
+ 'Connection': 'keep-alive',
105
+ 'Upgrade-Insecure-Requests': '1',
106
+ 'sec-ch-ua': '"Not_A Brand";v="8", "Chromium";v="120", "Google Chrome";v="120"',
107
+ 'sec-ch-ua-platform': '"Windows"'
108
+ }
109
+
110
+ session = requests.Session()
111
+ print(f"Fetching product page: {product_url}")
112
+
113
+ # Langsung mengakses halaman produk
114
+ response = session.get(product_url, headers=headers, timeout=10)
115
+ response.raise_for_status() # Raise exception for bad status codes
116
+
117
+ print(f"Response status: {response.status_code}")
118
+
119
+ # Multiple patterns untuk mencari URL gambar
120
+ image_patterns = [
121
+ r'https://images\.tokopedia\.net/img/[^"\']+\.(jpg|jpeg|png)',
122
+ r'https://[^"\']+\.tokopedia\.net/[^"\']+\.(jpg|jpeg|png)',
123
+ r'"imageUrl":"(https://[^"]+)"',
124
+ r'"url":"(https://images[^"]+)"',
125
+ r'content="(https://images\.tokopedia\.net[^"]+)"'
126
+ ]
127
+
128
+ all_images = []
129
+ for pattern in image_patterns:
130
+ matches = re.findall(pattern, response.text)
131
+ if matches:
132
+ if isinstance(matches[0], tuple):
133
+ # If the pattern contains groups, take the full match
134
+ images = [m[0] if isinstance(m, tuple) else m for m in matches]
135
+ else:
136
+ images = matches
137
+ all_images.extend(images)
138
+
139
+ # Remove duplicates and clean URLs
140
+ unique_images = list(set(all_images))
141
+ print(f"Found {len(unique_images)} unique images")
142
+
143
+ if not unique_images:
144
+ # Try to extract from JSON-LD
145
+ json_ld_pattern = r'<script type="application/ld\+json">(.*?)</script>'
146
+ json_matches = re.findall(json_ld_pattern, response.text, re.DOTALL)
147
+ for json_str in json_matches:
148
+ try:
149
+ json_data = json.loads(json_str)
150
+ if 'image' in json_data:
151
+ if isinstance(json_data['image'], list):
152
+ unique_images.extend(json_data['image'])
153
+ else:
154
+ unique_images.append(json_data['image'])
155
+ except:
156
+ continue
157
+
158
+ if not unique_images:
159
+ return "Error: No product images found.", None
160
+
161
+ # Filter and verify images
162
+ valid_images = []
163
+ for img_url in unique_images[:10]: # Try first 10 images
164
+ try:
165
+ print(f"Verifying image URL: {img_url}")
166
+ img_response = session.head(img_url, headers=headers, timeout=5)
167
+ content_type = img_response.headers.get('content-type', '')
168
+
169
+ if img_response.status_code == 200 and 'image' in content_type.lower():
170
+ valid_images.append(img_url)
171
+ if len(valid_images) >= 5: # Stop after getting 5 valid images
172
+ break
173
+ except Exception as e:
174
+ print(f"Error verifying image {img_url}: {str(e)}")
175
+ continue
176
+
177
+ if not valid_images:
178
+ return "Error: Could not verify any product images.", None
179
+
180
+ results = []
181
+ for img_url in valid_images:
182
+ try:
183
+ print(f"Processing image: {img_url}")
184
+ classification_result, confidence = compare_with_reference(img_url, product_category)
185
+ results.append({
186
+ 'image_url': img_url,
187
+ 'classification': classification_result,
188
+ 'confidence': confidence
189
+ })
190
+ except Exception as e:
191
+ print(f"Error processing image {img_url}: {str(e)}")
192
+ continue
193
+
194
+ if not results:
195
+ return "Error: Could not process any product images.", None
196
+
197
+ output_file = 'tokopedia_authenticity_check.csv'
198
+ with open(output_file, 'w', newline='', encoding='utf-8') as file:
199
+ writer = csv.writer(file)
200
+ writer.writerow(['image_url', 'authenticity_result', 'confidence'])
201
+ for result in results:
202
+ writer.writerow([
203
+ result['image_url'],
204
+ result['classification'],
205
+ f"{result['confidence']:.2%}"
206
+ ])
207
+
208
+ pass_count = sum(1 for r in results if r['classification'] == 'Pass')
209
+ total_images = len(results)
210
+ summary = f"""
211
+ Tokopedia Authenticity Check Results:
212
+ Total Images Analyzed: {total_images}
213
+ Appears Authentic: {pass_count}
214
+ Potentially Counterfeit: {total_images - pass_count}
215
+
216
+ Detailed results saved to {output_file}
217
+ """
218
+
219
+ return summary, results[0]['image_url']
220
+
221
+ except Exception as e:
222
+ print(f"Error in scrape_tokopedia: {str(e)}")
223
+ return f"Error scraping Tokopedia: {str(e)}", None
224
+
225
+ def scrape_shopee(product_url, product_category):
226
+ """Scrape product data from Shopee"""
227
+ try:
228
+ # Extract shop_id and item_id from URL
229
+ match = re.search(r'i\.(\d+)\.(\d+)', product_url)
230
+ if not match:
231
+ return "Error: Invalid Shopee URL format.", None
232
+
233
+ shop_id, item_id = match.groups()
234
+ api_url = f'https://shopee.co.id/api/v4/item/get?itemid={item_id}&shopid={shop_id}'
235
+
236
+ headers = {
237
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
238
+ 'Accept': 'application/json',
239
+ 'X-Requested-With': 'XMLHttpRequest',
240
+ 'Referer': 'https://shopee.co.id/',
241
+ 'AF-AC-Encoding-Version': '3',
242
+ }
243
+
244
+ session = requests.Session()
245
+ # First visit the main page to get cookies
246
+ session.get(f'https://shopee.co.id/product/{shop_id}/{item_id}', headers=headers)
247
+
248
+ response = session.get(api_url, headers=headers)
249
+
250
+ if response.status_code != 200:
251
+ return f"Error: Failed to fetch product data (HTTP {response.status_code}).", None
252
+
253
+ product_data = response.json()
254
+ images = product_data.get('data', {}).get('images', [])
255
+
256
+ if not images:
257
+ return "Error: No product images found.", None
258
+
259
+ results = []
260
+ for img_id in images[:5]:
261
+ image_url = f"https://cf.shopee.co.id/file/{img_id}"
262
+ classification_result, confidence = compare_with_reference(image_url, product_category)
263
+ results.append({
264
+ 'image_url': image_url,
265
+ 'classification': classification_result,
266
+ 'confidence': confidence
267
+ })
268
+
269
+ output_file = 'shopee_authenticity_check.csv'
270
+ with open(output_file, 'w', newline='', encoding='utf-8') as file:
271
+ writer = csv.writer(file)
272
+ writer.writerow(['image_url', 'authenticity_result', 'confidence'])
273
+ for result in results:
274
+ writer.writerow([
275
+ result['image_url'],
276
+ result['classification'],
277
+ f"{result['confidence']:.2%}"
278
+ ])
279
+
280
+ pass_count = sum(1 for r in results if r['classification'] == 'Pass')
281
+ total_images = len(results)
282
+ summary = f"""
283
+ Shopee Authenticity Check Results:
284
+ Total Images Analyzed: {total_images}
285
+ Appears Authentic: {pass_count}
286
+ Potentially Counterfeit: {total_images - pass_count}
287
+
288
+ Detailed results saved to {output_file}
289
+ """
290
+
291
+ return summary, results[0]['image_url']
292
+
293
+ except Exception as e:
294
+ return f"Error scraping Shopee: {str(e)}", None
295
+
296
+ def scrape_blibli(product_url, product_category):
297
+ """Scrape product data from Blibli"""
298
+ try:
299
+ # Extract product ID from URL
300
+ match = re.search(r'p/([^/\?]+)', product_url)
301
+ if not match:
302
+ return "Error: Invalid Blibli URL format.", None
303
+
304
+ product_id = match.group(1)
305
+ api_url = f"https://www.blibli.com/backend/product-detail/products/{product_id}"
306
+
307
+ headers = {
308
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
309
+ 'Accept': 'application/json',
310
+ 'X-Requested-With': 'XMLHttpRequest',
311
+ 'Referer': 'https://www.blibli.com/',
312
+ }
313
+
314
+ session = requests.Session()
315
+ response = session.get(api_url, headers=headers)
316
+
317
+ if response.status_code != 200:
318
+ return f"Error: Failed to fetch product data (HTTP {response.status_code}).", None
319
+
320
+ product_data = response.json()
321
+ images = product_data.get('data', {}).get('images', [])
322
+
323
+ if not images:
324
+ return "Error: No product images found.", None
325
+
326
+ results = []
327
+ for img_url in images[:5]:
328
+ classification_result, confidence = compare_with_reference(img_url, product_category)
329
+ results.append({
330
+ 'image_url': img_url,
331
+ 'classification': classification_result,
332
+ 'confidence': confidence
333
+ })
334
+
335
+ output_file = 'blibli_authenticity_check.csv'
336
+ with open(output_file, 'w', newline='', encoding='utf-8') as file:
337
+ writer = csv.writer(file)
338
+ writer.writerow(['image_url', 'authenticity_result', 'confidence'])
339
+ for result in results:
340
+ writer.writerow([
341
+ result['image_url'],
342
+ result['classification'],
343
+ f"{result['confidence']:.2%}"
344
+ ])
345
+
346
+ pass_count = sum(1 for r in results if r['classification'] == 'Pass')
347
+ total_images = len(results)
348
+ summary = f"""
349
+ Blibli Authenticity Check Results:
350
+ Total Images Analyzed: {total_images}
351
+ Appears Authentic: {pass_count}
352
+ Potentially Counterfeit: {total_images - pass_count}
353
+
354
+ Detailed results saved to {output_file}
355
+ """
356
+
357
+ return summary, results[0]['image_url']
358
+
359
+ except Exception as e:
360
+ return f"Error scraping Blibli: {str(e)}", None
361
+
362
+ def scrape_bukalapak(product_url, product_category):
363
+ """Scrape product data from Bukalapak"""
364
+ try:
365
+ # Extract product ID from URL
366
+ match = re.search(r'p/([^/\?]+)', product_url)
367
+ if not match:
368
+ return "Error: Invalid Bukalapak URL format.", None
369
+
370
+ product_slug = match.group(1)
371
+ api_url = f"https://api.bukalapak.com/products/{product_slug}"
372
+
373
+ headers = {
374
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
375
+ 'Accept': 'application/json',
376
+ 'X-Requested-With': 'XMLHttpRequest',
377
+ 'Referer': 'https://www.bukalapak.com/',
378
+ }
379
+
380
+ session = requests.Session()
381
+ response = session.get(api_url, headers=headers)
382
+
383
+ if response.status_code != 200:
384
+ return f"Error: Failed to fetch product data (HTTP {response.status_code}).", None
385
+
386
+ product_data = response.json()
387
+ images = product_data.get('data', {}).get('images', [])
388
+
389
+ if not images:
390
+ return "Error: No product images found.", None
391
+
392
+ results = []
393
+ for img_data in images[:5]:
394
+ img_url = img_data.get('large_url')
395
+ if img_url:
396
+ classification_result, confidence = compare_with_reference(img_url, product_category)
397
+ results.append({
398
+ 'image_url': img_url,
399
+ 'classification': classification_result,
400
+ 'confidence': confidence
401
+ })
402
+
403
+ output_file = 'bukalapak_authenticity_check.csv'
404
+ with open(output_file, 'w', newline='', encoding='utf-8') as file:
405
+ writer = csv.writer(file)
406
+ writer.writerow(['image_url', 'authenticity_result', 'confidence'])
407
+ for result in results:
408
+ writer.writerow([
409
+ result['image_url'],
410
+ result['classification'],
411
+ f"{result['confidence']:.2%}"
412
+ ])
413
+
414
+ pass_count = sum(1 for r in results if r['classification'] == 'Pass')
415
+ total_images = len(results)
416
+ summary = f"""
417
+ Bukalapak Authenticity Check Results:
418
+ Total Images Analyzed: {total_images}
419
+ Appears Authentic: {pass_count}
420
+ Potentially Counterfeit: {total_images - pass_count}
421
+
422
+ Detailed results saved to {output_file}
423
+ """
424
+
425
+ return summary, results[0]['image_url']
426
+
427
+ except Exception as e:
428
+ return f"Error scraping Bukalapak: {str(e)}", None
429
+
430
+ def gradio_scrape(marketplace_choice, product_url, product_category):
431
+ """Updated gradio function with direct marketplace selection"""
432
+ if not product_url:
433
+ return "Error: Please enter a product URL", None
434
+
435
+ # Validate URL based on selected marketplace
436
+ url_patterns = {
437
+ 'Shopee': r'shopee\.co\.id',
438
+ 'Tokopedia': r'tokopedia\.com',
439
+ 'Blibli': r'blibli\.com',
440
+ 'Bukalapak': r'bukalapak\.com'
441
+ }
442
+
443
+ if not re.search(url_patterns[marketplace_choice], product_url):
444
+ return f"Error: URL doesn't match selected marketplace ({marketplace_choice}). Please check your URL.", None
445
+
446
+ # Call appropriate scraping function based on marketplace
447
+ scraping_functions = {
448
+ 'Shopee': scrape_shopee,
449
+ 'Tokopedia': scrape_tokopedia,
450
+ 'Blibli': scrape_blibli,
451
+ 'Bukalapak': scrape_bukalapak
452
+ }
453
+
454
+ result, image_url = scraping_functions[marketplace_choice](product_url, product_category)
455
+
456
+ if image_url:
457
+ img = Image.open(BytesIO(requests.get(image_url).content))
458
+ return result, img
459
+ return result, None
460
+
461
+ # Get available categories from reference_images directory
462
+ categories = [d for d in os.listdir(REFERENCE_IMAGES_DIR)
463
+ if os.path.isdir(os.path.join(REFERENCE_IMAGES_DIR, d))]
464
+
465
+ # Define marketplace choices
466
+ marketplace_choices = ['Shopee', 'Tokopedia', 'Blibli', 'Bukalapak']
467
+
468
+ # Update Gradio Interface
469
+ interface = gr.Interface(
470
+ fn=gradio_scrape,
471
+ inputs=[
472
+ gr.Dropdown(
473
+ choices=marketplace_choices,
474
+ label="Select Marketplace",
475
+ value="Shopee"
476
+ ),
477
+ gr.Textbox(
478
+ label="Product URL",
479
+ placeholder="Paste your product URL here"
480
+ ),
481
+ gr.Dropdown(
482
+ choices=categories,
483
+ label="Product Category"
484
+ )
485
+ ],
486
+ outputs=[
487
+ gr.Textbox(label="Authenticity Check Results"),
488
+ gr.Image(label="Product Image Sample")
489
+ ],
490
+ title="E-commerce Product Authenticity Checker",
491
+ description="""
492
+ How to use:
493
+ 1. Select your marketplace (Shopee/Tokopedia/Blibli/Bukalapak)
494
+ 2. Paste the product URL
495
+ 3. Select the product category
496
+ 4. Click submit to check authenticity
497
+ """,
498
+ )
499
+
500
+ if __name__ == "__main__":
501
+ interface.launch()
cek.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import pandas as pd
3
+ from datetime import datetime
4
+
5
+ # URL API Shopee
6
+ base_url = "https://shopee.co.id/api/v2/item/get_ratings"
7
+ params = {
8
+ "exclude_filter": 1,
9
+ "filter": 0,
10
+ "filter_size": 0,
11
+ "flag": 1,
12
+ "fold_filter": 0,
13
+ "itemid": 5283031042, # Ganti dengan item ID yang ingin di-scrape
14
+ "limit": 20, # Jumlah review per permintaan
15
+ "offset": 0, # Awal pagination
16
+ "relevant_reviews": "false",
17
+ "request_source": 2,
18
+ "shopid": 52733860, # Shop ID terkait
19
+ "tag_filter": "",
20
+ "type": 0,
21
+ "variation_filters": ""
22
+ }
23
+
24
+ # Dataframe untuk menyimpan hasil
25
+ reviews = {"username": [], "rating": [], "comment": [], "date": [], "images": []}
26
+
27
+ # Loop untuk iterasi pagination
28
+ while True:
29
+ response = requests.get(base_url, params=params)
30
+
31
+ if response.status_code != 200:
32
+ print("Error: Failed to fetch data.")
33
+ break
34
+
35
+ data = response.json()
36
+
37
+ # Periksa apakah ada data dalam "ratings"
38
+ if "data" not in data or "ratings" not in data["data"]:
39
+ print("No more ratings found.")
40
+ break
41
+
42
+ for rating in data["data"]["ratings"]:
43
+ reviews["username"].append(rating.get("author_username", "Unknown"))
44
+ reviews["rating"].append(rating.get("rating_star", "N/A"))
45
+ reviews["comment"].append(rating.get("comment", "No comment"))
46
+ reviews["date"].append(
47
+ datetime.utcfromtimestamp(rating.get("ctime", 0)).strftime("%Y-%m-%d %H:%M")
48
+ )
49
+ reviews["images"].append(", ".join(rating.get("images", [])))
50
+
51
+ # Jika jumlah data kurang dari limit, berarti sudah di halaman terakhir
52
+ if len(data["data"]["ratings"]) < params["limit"]:
53
+ break
54
+
55
+ # Tambah offset untuk pagination
56
+ params["offset"] += params["limit"]
57
+
58
+ # Simpan data ke dalam file CSV
59
+ df = pd.DataFrame(reviews)
60
+ print(df)
61
+ df.to_csv("shopee_reviews.csv", index=False)
classification-test/.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
classification-test/README.md ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Classification Test
3
+ emoji: 🐨
4
+ colorFrom: indigo
5
+ colorTo: pink
6
+ sdk: gradio
7
+ sdk_version: 5.6.0
8
+ app_file: app.py
9
+ pinned: false
10
+ license: gpl-3.0
11
+ ---
12
+
13
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
data.csv ADDED
@@ -0,0 +1 @@
 
 
1
+ username,rating,comment
shopee_reviews.csv ADDED
@@ -0,0 +1 @@
 
 
1
+ username,rating,comment,date,images
tokopedia_authenticity_check.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ image_url,authenticity_result,confidence
2
+ https://images.tokopedia.net/img/cache/700/VqbcmM/2024/10/23/d7bd79c8-3614-4d71-8aba-5e9a28aae180.jpg,Error,0.00%
3
+ https://images.tokopedia.net/img/cache/500-square/VqbcmM/2024/10/23/d7bd79c8-3614-4d71-8aba-5e9a28aae180.jpg,Error,0.00%