| from flask import Flask, request, render_template_string, jsonify, send_from_directory |
| import pandas as pd |
| import re |
| import os |
| from transformers import XLMRobertaForSequenceClassification, XLMRobertaTokenizer |
| from peft import PeftModel, PeftConfig |
| import torch |
| from collections import defaultdict |
| from werkzeug.utils import secure_filename |
|
|
| |
| flask_app = Flask(__name__) |
| UPLOAD_FOLDER = 'uploads' |
| os.makedirs(UPLOAD_FOLDER, exist_ok=True) |
| flask_app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER |
|
|
| |
| tokenizer = XLMRobertaTokenizer.from_pretrained("letijo03/lora-adapter-32",use_fast=True, trust_remote_code=True) |
| base_model = XLMRobertaForSequenceClassification.from_pretrained("xlm-roberta-base", num_labels=3) |
| config = PeftConfig.from_pretrained("letijo03/lora-adapter-32") |
| model = PeftModel.from_pretrained(base_model, "letijo03/lora-adapter-32") |
|
|
| model.eval() |
|
|
|
|
| |
| def generate_ngrams(text, n=2): |
| text = text.lower() |
| words = text.split() |
| if len(words) < n: |
| return [] |
| return [' '.join(words[i:i+n]) for i in range(len(words) - n + 1)] |
|
|
| def get_top_phrases(comments, top_n=5, min_occurrence=2, ngram_size=2): |
| phrase_counts = defaultdict(int) |
| for comment in comments: |
| phrases = generate_ngrams(comment, n=ngram_size) |
| for phrase in phrases: |
| phrase_counts[phrase] += 1 |
| filtered_phrases = {phrase: count for phrase, count in phrase_counts.items() if count >= min_occurrence} |
| sorted_phrases = sorted(filtered_phrases.items(), key=lambda x: x[1], reverse=True) |
| return sorted_phrases[:top_n] |
|
|
| def clean_data(df): |
| df['Comment'] = df['Comment'].apply(lambda x: re.sub(r'[^a-zA-Z0-9\s]', '', str(x))) |
| df = df[df['Comment'].str.strip() != ''] |
| return df |
| |
| def classify_sentiment_batch(texts): |
| |
| inputs = tokenizer(texts, return_tensors="pt", padding=True, truncation=True, max_length=512) |
| inputs = {key: value.to(device) for key, value in inputs.items()} |
| with torch.no_grad(): |
| outputs = model(**inputs) |
| predictions = torch.argmax(outputs.logits, dim=-1) |
| return predictions.cpu().numpy() |
|
|
|
|
| def generate_insights(df): |
| insights = {} |
| sentiment_mapping = {2: 'Positive', 1: 'Neutral', 0: 'Negative'} |
|
|
| for sentiment_value, sentiment_label in sentiment_mapping.items(): |
| subset = df[df['Sentiment'] == sentiment_value] |
| count = len(subset) |
|
|
| if count == 0: |
| insights[sentiment_label] = f"There are no significant comments for {sentiment_label.lower()} sentiment." |
| else: |
| comments = subset['Comment'].dropna().tolist() |
| insights[sentiment_label] = generate_contextual_insight(comments, sentiment_label) |
|
|
| return insights |
|
|
| def generate_contextual_insight(comments, sentiment_label): |
| if sentiment_label == "Positive": |
| return generate_positive_insight(comments) |
| elif sentiment_label == "Neutral": |
| return generate_neutral_insight(comments) |
| elif sentiment_label == "Negative": |
| return generate_negative_insight(comments) |
|
|
| def generate_positive_insight(comments): |
| positive_insight = "Positive comments show strong satisfaction, commonly highlighting fast delivery, good packaging, cheap prices, and quality products. " |
| |
| if any("fast" in comment.lower() or "quick" in comment.lower() for comment in comments): |
| positive_insight += "Many buyers praised fast shipping. " |
| if any("packaging" in comment.lower() or "sealed" in comment.lower() for comment in comments): |
| positive_insight += "Well-packaged items were frequently mentioned. " |
| if any("cheap" in comment.lower() or "affordable" in comment.lower() for comment in comments): |
| positive_insight += "Affordability and value for money stood out." |
|
|
| return positive_insight |
|
|
| def generate_neutral_insight(comments): |
| neutral_insight = "Neutral comments are mostly factual, sharing moderate satisfaction without strong praise or complaint. " |
| |
| if any("average" in comment.lower() or "normal" in comment.lower() for comment in comments): |
| neutral_insight += "Some users found the product quality or service to be average. " |
| if any("okay" in comment.lower() or "fine" in comment.lower() for comment in comments): |
| neutral_insight += "Others simply stated that the item or service was acceptable, without notable issues. " |
| if any("small issue" in comment.lower() or "minor defect" in comment.lower() for comment in comments): |
| neutral_insight += "Minor imperfections like scratches or small delivery delays were sometimes noted." |
|
|
| return neutral_insight |
|
|
| def generate_negative_insight(comments): |
| negative_insight = "Negative comments emphasize dissatisfaction, commonly about defective products, incorrect items, late deliveries, and unresponsive customer service. " |
|
|
| if any("broken" in comment.lower() or "defective" in comment.lower() or "damage" in comment.lower() for comment in comments): |
| negative_insight += "Broken, defective, or damaged products were often mentioned. " |
| if any("wrong item" in comment.lower() or "incorrect" in comment.lower() for comment in comments): |
| negative_insight += "Receiving the wrong item was a frequent complaint. " |
| if any("late" in comment.lower() or "delay" in comment.lower() for comment in comments): |
| negative_insight += "Delivery delays frustrated several buyers. " |
| if any("no response" in comment.lower() or "ignored" in comment.lower() or "no reply" in comment.lower() for comment in comments): |
| negative_insight += "Lack of seller support or poor customer service was also criticized." |
|
|
| return negative_insight |
|
|
| |
| html_template = """ |
| <!DOCTYPE html> |
| <html lang="en"> |
| <head> |
| <meta charset="UTF-8"> |
| <meta name="viewport" content="width=device-width, initial-scale=1.0"> |
| <title>Shopee Comment Sentiment Analysis</title> |
| |
| <style> |
| body { |
| font-family: 'Poppins', sans-serif; |
| background: linear-gradient(to right, #f8f9fa, #ffe0c3); |
| margin: 0; |
| padding: 0; |
| display: flex; |
| flex-direction: column; |
| min-height: 100vh; |
| } |
| |
| header { |
| background: linear-gradient(90deg, #ff5722, #ff7043); |
| color: white; |
| padding: 1.5rem; |
| text-align: center; |
| font-size: 2rem; |
| box-shadow: 0 4px 10px rgba(0, 0, 0, 0.2); |
| } |
| |
| main { |
| flex: 1; |
| padding: 2rem; |
| display: flex; |
| flex-direction: column; |
| align-items: center; |
| } |
| |
| form { |
| background: white; |
| padding: 2rem; |
| border-radius: 20px; |
| box-shadow: 0 8px 24px rgba(0, 0, 0, 0.2); |
| display: flex; |
| flex-direction: column; |
| align-items: center; |
| gap: 1.5rem; |
| width: 100%; |
| max-width: 400px; |
| transform: perspective(1000px) rotateX(2deg); |
| } |
| |
| input[type="file"] { |
| padding: 0.8rem; |
| border: 2px solid #ff7043; |
| border-radius: 12px; |
| width: 100%; |
| } |
| |
| button { |
| background: linear-gradient(90deg, #ff7043, #ff5722); |
| color: white; |
| border: none; |
| padding: 1rem 2rem; |
| border-radius: 16px; |
| font-size: 1.2rem; |
| cursor: pointer; |
| transition: transform 0.3s, background 0.3s; |
| } |
| |
| button:hover { |
| background: linear-gradient(90deg, #ff5722, #e64a19); |
| transform: scale(1.05); |
| } |
| |
| #loadingContainer { |
| display: none; |
| flex-direction: column; |
| align-items: center; |
| margin-top: 2rem; |
| } |
| |
| .spinner { |
| width: 60px; |
| height: 60px; |
| border: 8px solid #eee; |
| border-top: 8px solid #ff5722; |
| border-radius: 50%; |
| animation: spin 1s linear infinite; |
| } |
| |
| @keyframes spin { |
| 0% { transform: rotate(0deg); } |
| 100% { transform: rotate(360deg); } |
| } |
| |
| #result { |
| margin-top: 2rem; |
| width: 100%; |
| max-width: 700px; |
| text-align: center; |
| } |
| |
| .insights { |
| margin-top: 2rem; |
| padding: 2rem; |
| background: white; |
| border-radius: 16px; |
| box-shadow: 0 6px 18px rgba(0, 0, 0, 0.15); |
| text-align: left; |
| } |
| |
| .insights h3 { |
| margin-bottom: 1rem; |
| color: #ff5722; |
| } |
| |
| .insights p { |
| font-size: 1.1rem; |
| margin: 0.5rem 0; |
| } |
| |
| footer { |
| background: linear-gradient(90deg, #ff5722, #ff7043); |
| color: white; |
| text-align: center; |
| padding: 1rem; |
| font-size: 0.9rem; |
| margin-top: auto; |
| } |
| </style> |
| |
| <script type="text/javascript" src="https://www.gstatic.com/charts/loader.js"></script> |
| <script> |
| google.charts.load('current', { 'packages': ['corechart'] }); |
| |
| document.addEventListener("DOMContentLoaded", function () { |
| const form = document.getElementById("uploadForm"); |
| const loadingContainer = document.getElementById("loadingContainer"); |
| const resultDiv = document.getElementById("result"); |
| |
| form.onsubmit = async function (e) { |
| e.preventDefault(); |
| loadingContainer.style.display = "flex"; |
| resultDiv.innerHTML = ""; |
| |
| const formData = new FormData(form); |
| |
| try { |
| const response = await fetch('/analyze', { |
| method: 'POST', |
| body: formData |
| }); |
| const data = await response.json(); |
| |
| loadingContainer.style.display = "none"; |
| |
| if (data.error) { |
| resultDiv.innerHTML = `<p style="color:red;">${data.error}</p>`; |
| } else { |
| const chartData = [ |
| ["Sentiment", "Count"], |
| ["Positive", data.chart_data.Positive || 0], |
| ["Neutral", data.chart_data.Neutral || 0], |
| ["Negative", data.chart_data.Negative || 0] |
| ]; |
| drawPieChart(chartData); |
| |
| // Display insights |
| const insightsDiv = document.createElement('div'); |
| insightsDiv.classList.add('insights'); |
| insightsDiv.innerHTML = ` |
| <h3>Insights</h3> |
| <p><strong>Positive:</strong> ${data.insights.Positive}</p> |
| <p><strong>Neutral:</strong> ${data.insights.Neutral}</p> |
| <p><strong>Negative:</strong> ${data.insights.Negative}</p> |
| `; |
| resultDiv.appendChild(insightsDiv); |
| } |
| } catch (error) { |
| loadingContainer.style.display = "none"; |
| resultDiv.innerHTML = `<p style="color:red;">An error occurred: ${error.message}</p>`; |
| } |
| }; |
| }); |
| |
| function drawPieChart(chartData) { |
| const data = google.visualization.arrayToDataTable(chartData); |
| |
| const options = { |
| title: 'Sentiment Distribution', |
| pieHole: 0.5, |
| colors: ['#4caf50', '#ffc107', '#f44336'], |
| legend: { position: 'bottom' }, |
| backgroundColor: 'transparent' |
| }; |
| |
| const chart = new google.visualization.PieChart(document.getElementById('result')); |
| chart.draw(data, options); |
| } |
| </script> |
| |
| </head> |
| |
| <body> |
| |
| <header> |
| Shopee Comment Sentiment Analysis |
| </header> |
| |
| <main> |
| <form id="uploadForm" enctype="multipart/form-data"> |
| <input type="file" name="file" accept=".csv" required> |
| <button type="submit">Analyze CSV</button> |
| </form> |
| |
| <div id="loadingContainer"> |
| <div class="spinner"></div> |
| <p>Analyzing, please wait...</p> |
| </div> |
| |
| <div id="result"></div> |
| <div id="downloadLink"></div> |
| </main> |
| |
| <footer> |
| © 2025 Shopee Sentiment Analyzer. All rights reserved. |
| </footer> |
| |
| </body> |
| </html> |
| """ |
|
|
| @flask_app.route('/') |
| def index(): |
| return render_template_string(html_template) |
|
|
| |
| @flask_app.route('/analyze', methods=['POST']) |
| def analyze(): |
| if 'file' not in request.files: |
| return jsonify({'error': 'No file part'}) |
|
|
| file = request.files['file'] |
| if file.filename == '': |
| return jsonify({'error': 'No selected file'}) |
|
|
| filename = secure_filename(file.filename) |
| filepath = os.path.join(flask_app.config['UPLOAD_FOLDER'], filename) |
| file.save(filepath) |
|
|
| try: |
| df = pd.read_csv(filepath) |
| if 'Comment' not in df.columns: |
| return jsonify({'error': "CSV must contain a 'Comment' column."}) |
|
|
| df = clean_data(df) |
| |
| |
| batch_size = 32 |
| comments = df['Comment'].tolist() |
| all_predictions = [] |
|
|
| for i in tqdm(range(0, len(comments), batch_size)): |
| batch = comments[i:i+batch_size] |
| batch_predictions = classify_sentiment_batch(batch) |
| all_predictions.extend(batch_predictions) |
|
|
| df['Sentiment'] = all_predictions |
|
|
| |
| positive_count = len(df[df['Sentiment'] == 2]) |
| neutral_count = len(df[df['Sentiment'] == 1]) |
| negative_count = len(df[df['Sentiment'] == 0]) |
|
|
| chart_data_counts = { |
| "Positive": positive_count, |
| "Neutral": neutral_count, |
| "Negative": negative_count |
| } |
|
|
| insights = generate_insights(df) |
|
|
| return jsonify({ |
| 'message': 'Analysis completed', |
| 'chart_data': chart_data_counts, |
| 'insights': insights |
| }) |
|
|
| except Exception as e: |
| return jsonify({'error': f'Error processing file: {str(e)}'}) |
|
|
| |
| @flask_app.route('/download/<path:filename>') |
| def download(filename): |
| return send_from_directory('static', filename, as_attachment=True) |
|
|
| from asgiref.wsgi import WsgiToAsgi |
| app = WsgiToAsgi(flask_app) |
|
|
| if __name__ == '__main__': |
| import uvicorn |
| uvicorn.run(app, host="0.0.0.0", port=int(os.environ.get("PORT", 7860))) |
|
|
|
|