Loading...
from flask import Flask, request, render_template_string, jsonify, send_from_directory
import requests
import pandas as pd
import re
import time
from random import randint, choice
import os
from transformers import XLMRobertaForSequenceClassification, XLMRobertaTokenizer
from peft import PeftModel, PeftConfig # Ensure peft library is installed
import torch
from collections import defaultdict
flask_app = Flask(__name__)
# List of account credentials (only cookies shown here)
ACCOUNTS = [
{
"cookie": "SPC_F=hmR34sqS9gRUgA35BL857RAqy0Hn0sU8; REC_T_ID=64c7c97b-cc02-11ef-bac2-8a756e8ab50a; _gcl_au=1.1.834093345.1744027851"
},
{
"cookie": "SPC_F=6nZYVWCtsBQBzqW8DPio55dDmfqxKFdM; REC_T_ID=14d45038-03a9-11f0-877a-2a88f69ba114; _gcl_au=1.1.938658524.1742268522"
}
# Add more accounts as needed
# {
# 1"cookie": "SPC_F=hmR34sqS9gRUgA35BL857RAqy0Hn0sU8; REC_T_ID=64c7c97b-cc02-11ef-bac2-8a756e8ab50a; _gcl_au=1.1.1391686440.1736149630"
# 2"cookie": "SPC_F=6nZYVWCtsBQBzqW8DPio55dDmfqxKFdM; REC_T_ID=14d45038-03a9-11f0-877a-2a88f69ba114; _gcl_au=1.1.938658524.1742268522"
# }
]
def get_headers(shop_id=None, item_id=None):
"""Randomly pick an account and return a header set with a dynamic Referer."""
account = choice(ACCOUNTS)
if shop_id and item_id:
referer = f"https://shopee.ph/product/{shop_id}/{item_id}"
else:
referer = "https://shopee.ph"
headers = {
"User-Agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 13_2_3 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.0.3 Mobile/15E148 Safari/604.1",
"Cookie": account["cookie"],
"X-Api-Source": "rweb",
"X-Requested-With": "XMLHttpRequest",
"Accept": "application/json",
"Content-Type": "application/json",
"X-Shopee-Language": "en",
"Referer": referer,
"af-ac-enc-dat": "null" # Adding the 'af-ac-enc-dat' header as advised
}
return headers
# Load the base XLM-RoBERTa model with the correct number of labels (3 labels for classification)
tokenizer = XLMRobertaTokenizer.from_pretrained("letijo03/lora-adapter-32",use_fast=True, trust_remote_code=True)
base_model = XLMRobertaForSequenceClassification.from_pretrained("xlm-roberta-base", num_labels=3)
config = PeftConfig.from_pretrained("letijo03/lora-adapter-32")
model = PeftModel.from_pretrained(base_model, "letijo03/lora-adapter-32")
model.eval()
def get_ids_from_url(url):
patterns = [
r"i\.(\d+)\.(\d+)",
r"/product/(\d+)/(\d+)"
]
for pattern in patterns:
match = re.search(pattern, url)
if match:
return int(match.group(1)), int(match.group(2))
raise ValueError("Invalid Shopee URL format. Please use a valid Shopee product URL.")
def fetch_comments(shop_id, item_id, limit=50, offset=0, retries=3):
url = f"https://shopee.ph/api/v2/item/get_ratings?itemid={item_id}&shopid={shop_id}&limit={limit}&offset={offset}"
for attempt in range(retries):
headers = get_headers(shop_id, item_id)
response = requests.get(url, headers=headers)
if response.status_code == 418:
print("Received status code 418. Rotating account and waiting before retry.")
time.sleep(randint(15 * 60, 30 * 60))
continue
try:
response.raise_for_status()
return response.json()
except requests.exceptions.HTTPError as http_err:
print(f"HTTP error occurred: {http_err}")
if attempt < retries - 1:
time.sleep(2)
except Exception as err:
print(f"An error occurred: {err}")
if attempt < retries - 1:
time.sleep(2)
return None
def extract_comments(data):
comments = []
if data and 'data' in data and 'ratings' in data['data']:
for rating in data['data']['ratings']:
comment_parts = []
if 'tag_info' in rating:
for tag in rating['tag_info']:
tag_text = f"{tag.get('tag_name', '')}: {tag.get('tag_value', '')}"
comment_parts.append(tag_text)
main_comment = rating.get('comment', '').strip()
if main_comment:
comment_parts.append(main_comment)
full_comment = "\n".join(comment_parts)
comment = {
'Username': rating.get('author_username', ''),
'Rating': rating.get('rating_star', 0),
'Date and Time': pd.to_datetime(rating.get('ctime', 0), unit='s').strftime('%Y-%m-%d %H:%M'),
'Comment': full_comment
}
comments.append(comment)
return comments
def clean_data(df):
df['Comment'] = df['Comment'].apply(lambda x: re.sub(r'[^a-zA-Z0-9\s]', '', str(x)))
df = df[df['Comment'].str.strip() != '']
return df
def classify_sentiment(text):
inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512)
outputs = model(**inputs)
prediction = torch.argmax(outputs.logits, dim=-1)
return prediction.item()
def generate_insights(df):
insights = {}
sentiment_mapping = {2: 'Positive', 1: 'Neutral', 0: 'Negative'}
for sentiment_value, sentiment_label in sentiment_mapping.items():
subset = df[df['Sentiment'] == sentiment_value]
count = len(subset)
if count == 0:
insights[sentiment_label] = f"There are no significant comments for {sentiment_label.lower()} sentiment."
else:
comments = subset['Comment'].dropna().tolist()
insights[sentiment_label] = generate_comment_insight(comments)
return insights
def generate_comment_insight(comments):
# Return a sample of comments for insight (e.g., first 5 comments)
return '
'.join(comments[:20]) # Adjust the number of comments as needed
html_template = """
Loading...