letijo03 commited on
Commit
6cfdd4c
·
verified ·
1 Parent(s): 149d5a7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +101 -394
app.py CHANGED
@@ -1,394 +1,101 @@
1
- from flask import Flask, request, render_template_string, jsonify, send_from_directory
2
- import requests
3
- import pandas as pd
4
- import re
5
- import time
6
- from random import randint
7
- import os
8
- from transformers import AutoModelForSequenceClassification, XLMRobertaTokenizer
9
- import torch
10
-
11
- # Define the Flask app with a different variable name
12
- flask_app = Flask(__name__)
13
-
14
- # Load the pre-trained model and tokenizer from Hugging Face
15
- MODEL_NAME = "letijo03/xlm-r-shopee"
16
- tokenizer = XLMRobertaTokenizer.from_pretrained(MODEL_NAME)
17
- model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME)
18
- model.eval() # Set the model to evaluation mode
19
-
20
- def get_ids_from_url(url):
21
- # Try multiple patterns to extract shop_id and item_id
22
- patterns = [
23
- r"i\.(\d+)\.(\d+)", # e.g. ...-i.123456.987654
24
- r"/product/(\d+)/(\d+)" # e.g. .../product/123456/987654
25
- ]
26
- for pattern in patterns:
27
- match = re.search(pattern, url)
28
- if match:
29
- return int(match.group(1)), int(match.group(2))
30
- raise ValueError("Invalid Shopee URL format. Please use a valid Shopee product URL.")
31
-
32
- def fetch_comments(shop_id, item_id, limit=50, offset=0, retries=3):
33
- url = f"https://shopee.ph/api/v2/item/get_ratings?itemid={item_id}&shopid={shop_id}&limit={limit}&offset={offset}"
34
- headers = {
35
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
36
- }
37
- for attempt in range(retries):
38
- try:
39
- response = requests.get(url, headers=headers)
40
- response.raise_for_status()
41
- return response.json()
42
- except requests.exceptions.HTTPError as http_err:
43
- print(f"HTTP error occurred: {http_err}")
44
- if attempt < retries - 1:
45
- time.sleep(2)
46
- except Exception as err:
47
- print(f"An error occurred: {err}")
48
- if attempt < retries - 1:
49
- time.sleep(2)
50
- return None
51
-
52
- def extract_comments(data):
53
- comments = []
54
- if data and 'data' in data and 'ratings' in data['data']:
55
- for rating in data['data']['ratings']:
56
- comment_parts = []
57
- if 'tag_info' in rating:
58
- for tag in rating['tag_info']:
59
- tag_text = f"{tag.get('tag_name', '')}: {tag.get('tag_value', '')}"
60
- comment_parts.append(tag_text)
61
- main_comment = rating.get('comment', '').strip()
62
- if main_comment:
63
- comment_parts.append(main_comment)
64
- full_comment = "\n".join(comment_parts)
65
- comment = {
66
- 'Username': rating.get('author_username', ''),
67
- 'Rating': rating.get('rating_star', 0),
68
- 'Date and Time': pd.to_datetime(rating.get('ctime', 0), unit='s').strftime('%Y-%m-%d %H:%M'),
69
- 'Comment': full_comment
70
- }
71
- comments.append(comment)
72
- return comments
73
-
74
- def clean_data(df):
75
- # Convert each comment to a string before applying regex
76
- df['Comment'] = df['Comment'].apply(lambda x: re.sub(r'[^a-zA-Z0-9\s]', '', str(x)))
77
- # Drop rows with empty comments after stripping whitespace
78
- df = df[df['Comment'].str.strip() != '']
79
- return df
80
-
81
- def classify_sentiment(text):
82
- inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512)
83
- outputs = model(**inputs)
84
- prediction = torch.argmax(outputs.logits, dim=-1)
85
- return prediction.item()
86
-
87
- # Combined HTML and CSS template using render_template_string
88
- html_template = """
89
- <!DOCTYPE html>
90
- <html lang="en">
91
- <head>
92
- <meta charset="UTF-8">
93
- <meta name="viewport" content="width=device-width, initial-scale=1.0">
94
- <title>Shopee Product Comment Sentiment Analysis</title>
95
- <style>
96
- /* Global styles */
97
- body {
98
- font-family: Arial, sans-serif;
99
- background-color: #f5f5f5;
100
- margin: 0;
101
- padding: 0;
102
- color: #333;
103
- }
104
- header {
105
- background-color: #FF5722;
106
- color: white;
107
- padding: 20px;
108
- text-align: center;
109
- }
110
- header h1 {
111
- margin: 0;
112
- font-size: 2em;
113
- }
114
- main {
115
- padding: 20px;
116
- max-width: 900px;
117
- margin: 0 auto;
118
- background-color: white;
119
- border-radius: 8px;
120
- box-shadow: 0px 4px 6px rgba(0, 0, 0, 0.1);
121
- }
122
- form {
123
- margin: 20px auto;
124
- max-width: 400px;
125
- display: flex;
126
- flex-direction: column;
127
- gap: 15px;
128
- background-color: #f9f9f9;
129
- padding: 20px;
130
- border-radius: 8px;
131
- box-shadow: 0px 2px 4px rgba(0, 0, 0, 0.1);
132
- }
133
- input, button {
134
- padding: 12px;
135
- font-size: 1.1em;
136
- border: 1px solid #ccc;
137
- border-radius: 6px;
138
- }
139
- input {
140
- background-color: #fff;
141
- }
142
- button {
143
- background-color: #FF5722;
144
- color: white;
145
- border: none;
146
- cursor: pointer;
147
- transition: background-color 0.3s ease;
148
- }
149
- button:hover {
150
- background-color: #E64A19;
151
- }
152
- input:focus {
153
- border-color: #FF5722;
154
- outline: none;
155
- }
156
- .error-message {
157
- color: red;
158
- font-weight: bold;
159
- }
160
- .success-message {
161
- color: green;
162
- font-weight: bold;
163
- }
164
- #loadingContainer {
165
- display: flex;
166
- flex-direction: column;
167
- justify-content: center;
168
- align-items: center;
169
- font-size: 16px;
170
- color: #FF5722;
171
- height: 100vh;
172
- position: absolute;
173
- top: 0;
174
- left: 0;
175
- right: 0;
176
- bottom: 0;
177
- background-color: rgba(255, 255, 255, 0.8);
178
- z-index: 9999;
179
- text-align: center;
180
- }
181
- .spinner {
182
- border: 4px solid rgba(0, 0, 0, 0.1);
183
- border-left-color: #FF5722;
184
- border-radius: 50%;
185
- width: 50px;
186
- height: 50px;
187
- animation: spin 1s linear infinite;
188
- margin-top: 10px;
189
- }
190
- @keyframes spin {
191
- to { transform: rotate(360deg); }
192
- }
193
- #chartContainer {
194
- display: flex;
195
- justify-content: center;
196
- align-items: center;
197
- width: 100%;
198
- max-width: 800px;
199
- height: 600px;
200
- margin: 20px auto;
201
- }
202
- img {
203
- display: block;
204
- margin: 0 auto;
205
- max-width: 100%;
206
- height: auto;
207
- }
208
- footer {
209
- background-color: #333;
210
- color: white;
211
- text-align: center;
212
- padding: 10px 0;
213
- position: relative;
214
- bottom: 0;
215
- width: 100%;
216
- }
217
- .result-message {
218
- display: flex;
219
- flex-direction: column;
220
- justify-content: center;
221
- align-items: center;
222
- text-align: center;
223
- margin-top: 20px;
224
- }
225
- .error-message {
226
- color: red;
227
- font-size: 18px;
228
- font-weight: bold;
229
- }
230
- .success-message {
231
- color: green;
232
- font-size: 18px;
233
- font-weight: bold;
234
- }
235
- .download-link {
236
- margin-top: 15px;
237
- padding: 10px 20px;
238
- background-color: #FF5722;
239
- color: white;
240
- text-decoration: none;
241
- border-radius: 5px;
242
- font-size: 16px;
243
- font-weight: bold;
244
- display: inline-block;
245
- }
246
- .download-link:hover {
247
- background-color: #e64a19;
248
- }
249
- </style>
250
- <script type="text/javascript" src="https://www.gstatic.com/charts/loader.js"></script>
251
- </head>
252
- <body>
253
- <header>
254
- <h1>Shopee Product Comment Sentiment Analysis</h1>
255
- </header>
256
- <main>
257
- <form id="scrapeForm">
258
- <label for="url">Enter Shopee Product URL:</label>
259
- <input type="text" id="url" name="url" placeholder="Enter the URL here" required>
260
- <button type="submit">Generate</button>
261
- </form>
262
- <div id="loadingContainer" style="display: none;">
263
- <p id="loadingText">Scraping, please wait...</p>
264
- <div class="spinner"></div>
265
- </div>
266
- <div id="result"></div>
267
- <div id="downloadLink" style="margin-top: 20px;"></div>
268
- <div id="chartContainer" style="margin-top: 5px;"></div>
269
- </main>
270
- <footer>
271
- <p>&copy; 2025 Shopee Product Comment Sentiment Analysis. All Rights Reserved.</p>
272
- </footer>
273
- <script>
274
- google.charts.load('current', { 'packages': ['corechart'] });
275
- function drawPieChart(chartData) {
276
- const data = google.visualization.arrayToDataTable(chartData);
277
- const options = {
278
- title: 'Sentiment Distribution',
279
- is3D: true,
280
- width: '100%',
281
- height: 600,
282
- slices: {
283
- 0: { color: '#4caf50' },
284
- 1: { color: '#ffc107' },
285
- 2: { color: '#f44336' }
286
- },
287
- pieSliceText: 'percentage',
288
- tooltip: { trigger: 'focus' }
289
- };
290
- const chart = new google.visualization.PieChart(document.getElementById('chartContainer'));
291
- chart.draw(data, options);
292
- }
293
- document.getElementById("scrapeForm").onsubmit = async function(e) {
294
- e.preventDefault();
295
- const url = document.getElementById("url").value;
296
- const resultDiv = document.getElementById("result");
297
- const downloadLinkDiv = document.getElementById("downloadLink");
298
- const chartDiv = document.getElementById("chartContainer");
299
- const loadingContainer = document.getElementById("loadingContainer");
300
- loadingContainer.style.display = "flex";
301
- resultDiv.innerHTML = "";
302
- downloadLinkDiv.innerHTML = "";
303
- chartDiv.innerHTML = "";
304
- try {
305
- const response = await fetch('/scrape', {
306
- method: 'POST',
307
- headers: { 'Content-Type': 'application/x-www-form-urlencoded' },
308
- body: new URLSearchParams({ 'url': url })
309
- });
310
- const data = await response.json();
311
- loadingContainer.style.display = "none";
312
- if (data.error) {
313
- resultDiv.innerHTML = `<div class="result-message"><p class="error-message">${data.error}</p></div>`;
314
- } else {
315
- resultDiv.innerHTML = `<div class="result-message"><p class="success-message">${data.message}</p></div>`;
316
- if (data.filename) {
317
- downloadLinkDiv.innerHTML = `<div class="result-message"><a href="/download/${data.filename}" download class="download-link">Download CSV</a></div>`;
318
- }
319
- if (data.chart_data) {
320
- google.charts.setOnLoadCallback(() => drawPieChart(data.chart_data));
321
- }
322
- }
323
- } catch (error) {
324
- loadingContainer.style.display = "none";
325
- resultDiv.innerHTML = `<p class="error-message">Error sending request: ${error.message}</p>`;
326
- console.error('Fetch error:', error);
327
- }
328
- };
329
- </script>
330
- </body>
331
- </html>
332
- """
333
-
334
- @flask_app.route('/')
335
- def index():
336
- return render_template_string(html_template)
337
-
338
- @flask_app.route('/scrape', methods=['POST'])
339
- def scrape():
340
- url = request.form.get('url')
341
- try:
342
- shop_id, item_id = get_ids_from_url(url)
343
- except ValueError as e:
344
- return jsonify({'error': str(e)})
345
-
346
- all_comments = []
347
- offset = 0
348
- limit = 50
349
-
350
- while True:
351
- data = fetch_comments(shop_id, item_id, limit=limit, offset=offset)
352
- if data is None:
353
- break
354
- comments = extract_comments(data)
355
- if not comments:
356
- break
357
- all_comments.extend(comments)
358
- if len(comments) < limit:
359
- break
360
- offset += limit
361
- time.sleep(randint(2, 5))
362
-
363
- if all_comments:
364
- df = pd.DataFrame(all_comments)
365
- df = clean_data(df)
366
- df['Sentiment'] = df['Comment'].apply(classify_sentiment)
367
- sentiment_counts = df['Sentiment'].value_counts().to_dict()
368
- chart_data = [["Sentiment", "Count"]]
369
- for sentiment, count in sentiment_counts.items():
370
- sentiment_label = "Positive" if sentiment == 2 else "Neutral" if sentiment == 1 else "Negative"
371
- chart_data.append([sentiment_label, count])
372
- csv_filename = 'shopee_comments_formatted.csv'
373
- os.makedirs('static', exist_ok=True)
374
- csv_filepath = os.path.join('static', csv_filename)
375
- df.to_csv(csv_filepath, index=False)
376
- return jsonify({
377
- 'message': 'Successfully scraped and analyzed comments.',
378
- 'filename': csv_filename,
379
- 'chart_data': chart_data
380
- })
381
- else:
382
- return jsonify({'error': 'No comments found or unable to fetch comments.'})
383
-
384
- @flask_app.route('/download/<filename>')
385
- def download_file(filename):
386
- return send_from_directory('static', filename, as_attachment=True)
387
-
388
- # Wrap the Flask app as an ASGI app so that the module-level variable 'app' is ASGI-compatible
389
- from asgiref.wsgi import WsgiToAsgi
390
- app = WsgiToAsgi(flask_app)
391
-
392
- if __name__ == '__main__':
393
- import uvicorn
394
- uvicorn.run(app, host="0.0.0.0", port=int(os.environ.get("PORT", 7860)))
 
1
+ from flask import Flask, request, render_template_string, jsonify
2
+ from transformers import AutoModelForSequenceClassification, XLMRobertaTokenizer
3
+ import torch
4
+ from asgiref.wsgi import WsgiToAsgi
5
+
6
+ # Define the Flask app
7
+ flask_app = Flask(__name__)
8
+
9
+ # Load the pre-trained model and tokenizer
10
+ MODEL_NAME = "letijo03/xlm-r-shopee"
11
+ tokenizer = XLMRobertaTokenizer.from_pretrained(MODEL_NAME)
12
+ model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME)
13
+ model.eval() # Set the model to evaluation mode
14
+
15
+ def classify_sentiment(text):
16
+ inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512)
17
+ outputs = model(**inputs)
18
+ prediction = torch.argmax(outputs.logits, dim=-1)
19
+ return prediction.item()
20
+
21
+ # HTML template for user input
22
+ html_template = """
23
+ <!DOCTYPE html>
24
+ <html lang="en">
25
+ <head>
26
+ <meta charset="UTF-8">
27
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
28
+ <title>Comment Sentiment Analysis</title>
29
+ <style>
30
+ body { font-family: Arial, sans-serif; background-color: #f5f5f5; margin: 0; padding: 0; color: #333; }
31
+ header { background-color: #FF5722; color: white; padding: 20px; text-align: center; }
32
+ main { padding: 20px; max-width: 900px; margin: 0 auto; background-color: white; border-radius: 8px; box-shadow: 0px 4px 6px rgba(0, 0, 0, 0.1); }
33
+ form { margin: 20px auto; max-width: 600px; display: flex; flex-direction: column; gap: 15px; background-color: #f9f9f9; padding: 20px; border-radius: 8px; box-shadow: 0px 2px 4px rgba(0, 0, 0, 0.1); }
34
+ textarea, button { padding: 12px; font-size: 1.1em; border: 1px solid #ccc; border-radius: 6px; }
35
+ textarea { background-color: #fff; resize: vertical; min-height: 100px; }
36
+ button { background-color: #FF5722; color: white; border: none; cursor: pointer; transition: background-color 0.3s ease; }
37
+ button:hover { background-color: #E64A19; }
38
+ .result-message { text-align: center; margin-top: 20px; font-size: 18px; font-weight: bold; }
39
+ </style>
40
+ <script>
41
+ document.addEventListener("DOMContentLoaded", function() {
42
+ document.getElementById("commentForm").onsubmit = async function(e) {
43
+ e.preventDefault();
44
+ const comment = document.getElementById("comment").value;
45
+ const resultDiv = document.getElementById("result");
46
+ resultDiv.innerHTML = "";
47
+ try {
48
+ const response = await fetch('/analyze', {
49
+ method: 'POST',
50
+ headers: { 'Content-Type': 'application/x-www-form-urlencoded' },
51
+ body: new URLSearchParams({ 'comment': comment })
52
+ });
53
+ const data = await response.json();
54
+ if (data.error) {
55
+ resultDiv.innerHTML = `<p class="result-message" style="color:red;">${data.error}</p>`;
56
+ } else {
57
+ resultDiv.innerHTML = `<p class="result-message" style="color:green;">${data.message}</p>`;
58
+ }
59
+ } catch (error) {
60
+ resultDiv.innerHTML = `<p class="result-message" style="color:red;">Error sending request: ${error.message}</p>`;
61
+ console.error('Fetch error:', error);
62
+ }
63
+ };
64
+ });
65
+ </script>
66
+ </head>
67
+ <body>
68
+ <header>
69
+ <h1>Comment Sentiment Analysis</h1>
70
+ </header>
71
+ <main>
72
+ <form id="commentForm">
73
+ <label for="comment">Enter your comment:</label>
74
+ <textarea id="comment" name="comment" placeholder="Type your comment here..." required></textarea>
75
+ <button type="submit">Analyze Sentiment</button>
76
+ </form>
77
+ <div id="result"></div>
78
+ </main>
79
+ </body>
80
+ </html>
81
+ """
82
+
83
+ @flask_app.route('/')
84
+ def index():
85
+ return render_template_string(html_template)
86
+
87
+ @flask_app.route('/analyze', methods=['POST'])
88
+ def analyze():
89
+ comment = request.form.get('comment')
90
+ if not comment or comment.strip() == "":
91
+ return jsonify({'error': 'Please provide a valid comment.'})
92
+ sentiment = classify_sentiment(comment)
93
+ sentiment_label = "Positive" if sentiment == 2 else "Neutral" if sentiment == 1 else "Negative"
94
+ return jsonify({'message': f'Sentiment analysis complete. The sentiment is: {sentiment_label}.'})
95
+
96
+ # Wrap the Flask app as an ASGI app
97
+ app = WsgiToAsgi(flask_app)
98
+
99
+ if __name__ == '__main__':
100
+ import uvicorn
101
+ uvicorn.run(app, host="0.0.0.0", port=7860)