| | from flask import Flask, request, jsonify, send_from_directory |
| | import pandas as pd |
| | import torch |
| | from transformers import BertTokenizer, BertForSequenceClassification |
| | from wordcloud import WordCloud |
| | import uuid |
| | import io |
| | import base64 |
| | import os |
| | from PIL import Image |
| |
|
| | app = Flask(__name__) |
| | UPLOAD_FOLDER = "uploads" |
| | |
| |
|
| | @app.route('/uploads/<filename>') |
| | def uploaded_file(filename): |
| | return send_from_directory(app.config['UPLOAD_FOLDER'], filename) |
| |
|
| | |
| | tokenizer = BertTokenizer.from_pretrained("bert-base-multilingual-cased") |
| | model_path = "./src/emotion_final_model" |
| | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
| | model = BertForSequenceClassification.from_pretrained(model_path).to(device) |
| | model.eval() |
| |
|
| | |
| | label_mapping = {0: "negative", 1: "neutral", 2: "positive"} |
| |
|
| | @app.route('/predict', methods=['POST']) |
| | def predict(): |
| | data = request.get_json() |
| | text = data.get('text') |
| |
|
| | if not text: |
| | return jsonify({"error": "No text provided"}), 400 |
| |
|
| | inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512) |
| | inputs = {key: value.to(device) for key, value in inputs.items()} |
| |
|
| | with torch.no_grad(): |
| | outputs = model(**inputs) |
| |
|
| | logits = outputs.logits |
| | predicted_class_idx = torch.argmax(logits, dim=-1).item() |
| | sentiment = label_mapping[predicted_class_idx] |
| |
|
| | return jsonify({"sentiment": sentiment}) |
| |
|
| | aspect_keywords = { |
| | "Quality": ["quality", "material", "durable", "performance", "sturdy", "broken", "defective", "معیار", "ٹوٹا ہوا", "خراب"], |
| | "Price": ["price", "cheap", "expensive", "value", "cost", "قیمت", "مہنگا", "سستا", "قیمت زیادہ"], |
| | "Delivery": ["delivery", "shipping", "arrived", "late", "courier", "ترسیل", "شپنگ", "تاخیر", "دیر سے پہنچا"], |
| | "Usability": ["easy to use", "setup", "installation", "instructions", "user-friendly", "آسان", "استعمال میں آسان", "سیٹ اپ", "تنصیب"], |
| | "Design": ["design", "style", "appearance", "color", "looks", "ڈیزائن", "خوبصورتی", "رنگ", "ساخت"], |
| | "Warranty/Support": ["warranty", "support", "return", "replacement", "service center", "وارنٹی", "واپسی", "تبادلہ", "سروس سینٹر"] |
| | } |
| |
|
| | def detect_aspects(text): |
| | text_lower = text.lower() |
| | detected = [] |
| | for aspect, keywords in aspect_keywords.items(): |
| | if any(keyword in text_lower for keyword in keywords): |
| | detected.append(aspect) |
| | return detected |
| |
|
| | @app.route("/analyze", methods=["POST"]) |
| | def analyze(): |
| | if 'file' not in request.files: |
| | return jsonify({"error": "No file uploaded"}), 400 |
| | file = request.files['file'] |
| | print(file.filename) |
| | df = pd.read_csv(file) |
| | print(df.to_string()) |
| | total_positive = 0 |
| | total_negative = 0 |
| | total_neutral = 0 |
| | all_text = "" |
| | |
| | aspect_summary = {aspect: {"positive": 0, "negative": 0, "neutral": 0, "total": 0} for aspect in aspect_keywords} |
| | for text in df['Review'].dropna(): |
| | inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512) |
| | inputs = {k: v.to(device) for k, v in inputs.items()} |
| |
|
| | with torch.no_grad(): |
| | outputs = model(**inputs) |
| |
|
| | predicted_class_idx = torch.argmax(outputs.logits, dim=-1).item() |
| | sentiment = label_mapping[predicted_class_idx] |
| |
|
| | if sentiment == "positive": |
| | total_positive += 1 |
| | elif sentiment == "negative": |
| | total_negative += 1 |
| | else: |
| | total_neutral += 1 |
| |
|
| | all_text += " " + text |
| |
|
| | detected_aspects = detect_aspects(text) |
| | for aspect in detected_aspects: |
| | aspect_summary[aspect][sentiment] += 1 |
| | aspect_summary[aspect]["total"] += 1 |
| |
|
| | |
| | wordcloud = WordCloud(width=800, height=400, background_color='white', font_path='src/urdu_font.ttf').generate(all_text) |
| |
|
| | |
| | |
| | |
| |
|
| | wordcloud_path = os.path.join("uploads", f"wordcloud{uuid.uuid4()}.png") |
| | wordcloud.to_file(wordcloud_path) |
| |
|
| | |
| | with open(wordcloud_path, "rb") as image_file: |
| | encoded_image = base64.b64encode(image_file.read()).decode('utf-8') |
| |
|
| | print({ |
| | "total_positive": total_positive, |
| | "total_negative": total_negative, |
| | "total_neutral": total_neutral, |
| | "aspect_summary": aspect_summary, |
| | "wordcloud_image_path": wordcloud_path, |
| | }) |
| |
|
| | return jsonify({ |
| | "total_positive": total_positive, |
| | "total_negative": total_negative, |
| | "total_neutral": total_neutral, |
| | "aspect_summary": aspect_summary, |
| | "wordcloud_image_path": wordcloud_path, |
| | |
| | }) |
| |
|
| | def run_flask(): |
| | app.run(host="0.0.0.0", port=5000) |
| |
|
| | import threading |
| | threading.Thread(target=run_flask).start() |
| |
|
| | import streamlit as st |
| | import pandas as pd |
| | import plotly.express as px |
| | from io import BytesIO, StringIO |
| | from PIL import Image |
| | import random |
| | import requests |
| | import os |
| | import uuid |
| | import tempfile |
| |
|
| | API_URL = 'http://127.0.0.1:5000/analyze' |
| |
|
| | |
| | |
| | |
| | st.set_page_config( |
| | page_title="Multilingual Sentiment Analyzer", |
| | layout="wide" |
| | ) |
| |
|
| | st.markdown(""" |
| | <style> |
| | /* Light theme override */ |
| | html, body, .stApp { |
| | background-color: #ffffff !important; |
| | color: #000000 !important; |
| | } |
| | |
| | h1, h2, h3, h4, h5, h6, p, div, span, label, section, .markdown-text-container { |
| | color: #000000 !important; |
| | } |
| | |
| | .stFileUploader > div, .stFileUploader div div { |
| | background-color: #f9f9f9 !important; |
| | border: 1px solid #ccc !important; |
| | color: #000000 !important; |
| | } |
| | </style> |
| | """, unsafe_allow_html=True) |
| |
|
| | st.markdown(""" |
| | <div style='text-align: center; padding-top: 10px;'> |
| | <h1 style='font-size: 40px;'>🌍 Multilingual Sentiment Analysis Dashboard</h1> |
| | <p style='font-size: 18px; color: #ccc; max-width: 720px; margin: auto;'> |
| | Upload a CSV to explore sentiment Report. With sentiment analysis, you can catch early signals, reduce risk, and validate market fit — even across global audiences. |
| | </p> |
| | </div> |
| | """, unsafe_allow_html=True) |
| |
|
| | |
| | |
| | |
| | def load_dummy_data(): |
| | return pd.DataFrame({ |
| | "Review": [ |
| | "La livraison était très rapide et le service excellent.", |
| | "The product quality was terrible, I want a refund.", |
| | "Servicio al cliente fue amable pero no resolvieron mi problema.", |
| | "Das Produkt kam beschädigt an und der Support war unhöflich.", |
| | "Great value for the price, I'm very happy!", |
| | "Muy mal embalaje, pero el envío fue rápido.", |
| | "客服很好,但产品描述不准确。", |
| | "Perfect fit, just as described. Will buy again!" |
| | ] |
| | }) |
| |
|
| | |
| | |
| | |
| | with st.expander("📁 Upload Your CSV File", expanded=True): |
| | uploaded_file = st.file_uploader("Choose a CSV file with reviews", type=["csv"]) |
| | |
| | run_analysis = st.button("🚀 Run Analysis", type="primary") |
| |
|
| | |
| | if uploaded_file: |
| | try: |
| | |
| | df = pd.read_csv(uploaded_file) |
| | if df.empty: |
| | st.error("The uploaded CSV file is empty.") |
| | df = load_dummy_data() |
| | else: |
| | st.success("✅ File uploaded successfully!") |
| | except Exception as e: |
| | st.error(f"Error reading CSV: {e}") |
| | df = load_dummy_data() |
| | else: |
| | st.info("Using built-in demo data. Upload a CSV to use your own.") |
| | df = load_dummy_data() |
| | |
| | |
| | st.write("✅ App is running! Here's a sample:") |
| | st.dataframe(df.head()) |
| | |
| | |
| | |
| | |
| | text_column = 'Review' |
| | enable_aspect = True |
| |
|
| | |
| | |
| | |
| | st.markdown("---") |
| | st.markdown("### 🔎 Sentiment Analysis Results") |
| |
|
| | |
| | def fake_sentiment_predict(text): |
| | return random.choice(["Positive", "Negative", "Neutral"]), round(random.uniform(0.65, 0.99), 2) |
| |
|
| | |
| | if 'analyzed_df' not in st.session_state: |
| | st.session_state.analyzed_df = df.copy() |
| |
|
| | |
| | positive, negative, neutral, total = 0, 0, 0, 0 |
| |
|
| | |
| | if run_analysis: |
| | if not enable_aspect: |
| | |
| | fake_results = [fake_sentiment_predict(text) for text in df[text_column]] |
| | sentiments, confidences = zip(*fake_results) |
| | |
| | |
| | st.session_state.analyzed_df = df.copy() |
| | st.session_state.analyzed_df["Sentiment"] = sentiments |
| | st.session_state.analyzed_df["Confidence"] = confidences |
| |
|
| | |
| | sentiment_counts = pd.Series(sentiments).value_counts() |
| | positive = sentiment_counts.get("Positive", 0) |
| | negative = sentiment_counts.get("Negative", 0) |
| | neutral = sentiment_counts.get("Neutral", 0) |
| | total = positive + negative + neutral |
| | else: |
| | |
| | pass |
| |
|
| | def percent(part): |
| | return f"{round((part / total) * 100)}%" if total else "0%" |
| |
|
| | |
| | card_col, chart_col = st.columns([1.2, 2]) |
| |
|
| | with card_col: |
| | st.markdown(""" |
| | <style> |
| | .card-container { |
| | max-width: 10px; |
| | margin: 0 auto; |
| | } |
| | .card { |
| | padding: 10px; |
| | border-radius: 12px; |
| | margin-bottom: 10px; |
| | font-size: 16px; |
| | font-weight: 500; |
| | line-height: 1.5; |
| | background-color: var(--secondary-background-color); |
| | border: 1px solid rgba(255,255,255,0.15); |
| | color: white; |
| | text-align: center; |
| | } |
| | .card strong { |
| | font-size: 20px; |
| | display: block; |
| | margin-top: 5px; |
| | } |
| | </style> |
| | <div class="card-container"> |
| | """, unsafe_allow_html=True) |
| |
|
| | if total > 0: |
| | with chart_col: |
| | fig = px.pie( |
| | names=["Positive", "Negative", "Neutral"], |
| | values=[positive, negative, neutral], |
| | color_discrete_map={ |
| | "Positive": "#66bb6a", |
| | "Negative": "#ef5350", |
| | "Neutral": "#42a5f5" |
| | } |
| | ) |
| | fig.update_traces( |
| | textinfo='percent+label', |
| | hoverinfo='label+percent+value', |
| | pull=[0.03, 0.03, 0.03] |
| | ) |
| | fig.update_layout( |
| | margin=dict(t=20, b=20, l=10, r=10), |
| | paper_bgcolor="rgba(0,0,0,0)", |
| | plot_bgcolor="rgba(0,0,0,0)", |
| | font_color="white" |
| | ) |
| | st.plotly_chart(fig, use_container_width=True) |
| |
|
| | |
| | |
| | |
| | if run_analysis and enable_aspect: |
| | st.subheader("📌 Aspect Sentiment Summary") |
| |
|
| | with st.spinner("Fetching Aspect Report from API..."): |
| | try: |
| | |
| | api_df = df.copy() |
| | |
| | |
| | unique_filename = f"temp_reviews_{uuid.uuid4()}.csv" |
| | |
| | |
| | with tempfile.NamedTemporaryFile(delete=False, suffix=unique_filename) as tmp: |
| | api_df.to_csv(tmp.name, index=False) |
| | tmp_file_path = tmp.name |
| | |
| | |
| | with open(tmp_file_path, 'rb') as file: |
| | |
| | files = {'file': (unique_filename, file, 'text/csv')} |
| | |
| | response = requests.post(API_URL, files=files) |
| | |
| | |
| | try: |
| | os.unlink(tmp_file_path) |
| | except: |
| | pass |
| | |
| | if response.status_code == 200: |
| | response_json = response.json() |
| |
|
| | |
| | st.session_state.api_response_json = response_json |
| | |
| | |
| | positive = response_json.get("total_positive", 0) |
| | negative = response_json.get("total_negative", 0) |
| | neutral = response_json.get("total_neutral", 0) |
| | total = positive + negative + neutral |
| | |
| | |
| | if "positive_percentage" not in response_json and total > 0: |
| | response_json["positive_percentage"] = round((positive / total) * 100) |
| | response_json["negative_percentage"] = round((negative / total) * 100) |
| | response_json["neutral_percentage"] = round((neutral / total) * 100) |
| | |
| | |
| | if "total_reviews" not in response_json: |
| | response_json["total_reviews"] = total |
| | |
| | |
| | positive = response_json.get("total_positive", 0) |
| | negative = response_json.get("total_negative", 0) |
| | neutral = response_json.get("total_neutral", 0) |
| | total = positive + negative + neutral |
| | |
| | |
| | with card_col: |
| | st.markdown(f""" |
| | <div class="card" style="border-color:#bfbfbf;"> |
| | 📊 <strong>Total Reviews</strong> |
| | {total} |
| | </div> |
| | <div class="card" style="border-color:#66bb6a;"> |
| | ✅ <strong>{positive} Positive</strong> |
| | {percent(positive)} of total |
| | </div> |
| | <div class="card" style="border-color:#ef5350;"> |
| | ❗ <strong>{negative} Negative</strong> |
| | {percent(negative)} of total |
| | </div> |
| | <div class="card" style="border-color:#42a5f5;"> |
| | 😐 <strong>{neutral} Neutral</strong> |
| | {percent(neutral)} of total |
| | </div> |
| | </div> |
| | """, unsafe_allow_html=True) |
| | |
| | |
| | with chart_col: |
| | fig = px.pie( |
| | names=["Positive", "Negative", "Neutral"], |
| | values=[positive, negative, neutral], |
| | color_discrete_map={ |
| | "Positive": "#66bb6a", |
| | "Negative": "#ef5350", |
| | "Neutral": "#42a5f5" |
| | } |
| | ) |
| | fig.update_traces( |
| | textinfo='percent+label', |
| | hoverinfo='label+percent+value', |
| | pull=[0.03, 0.03, 0.03] |
| | ) |
| | fig.update_layout( |
| | margin=dict(t=20, b=20, l=10, r=10), |
| | paper_bgcolor="rgba(0,0,0,0)", |
| | plot_bgcolor="rgba(0,0,0,0)", |
| | font_color="white" |
| | ) |
| | st.plotly_chart(fig, use_container_width=True) |
| |
|
| | |
| | if "review_details" in response_json: |
| | |
| | api_result_df = pd.DataFrame(response_json["review_details"]) |
| | |
| | |
| | st.session_state.analyzed_df = api_result_df |
| | else: |
| | |
| | st.session_state.analyzed_df = df.copy() |
| | |
| | if "sentiments" in response_json: |
| | st.session_state.analyzed_df["Sentiment"] = response_json["sentiments"] |
| | |
| | for key in ["confidences", "languages"]: |
| | if key in response_json: |
| | column_name = key.rstrip("s").capitalize() |
| | st.session_state.analyzed_df[column_name] = response_json[key] |
| |
|
| | |
| | aspect_rows = [] |
| | for aspect, values in response_json["aspect_summary"].items(): |
| | aspect_rows.append({ |
| | "Aspect": aspect, |
| | "Positive": values["positive"], |
| | "Negative": values["negative"], |
| | "Neutral": values["neutral"], |
| | "Total": values["total"] |
| | }) |
| | aspect_df = pd.DataFrame(aspect_rows) |
| | |
| | |
| | if not aspect_df.empty and aspect_df["Total"].sum() > 0: |
| |
|
| | |
| | st.session_state.aspect_dataframe = aspect_df |
| | st.dataframe(aspect_df) |
| |
|
| | |
| | melted = aspect_df.melt( |
| | id_vars="Aspect", |
| | value_vars=["Positive", "Negative", "Neutral"], |
| | var_name="Sentiment", |
| | value_name="Count" |
| | ) |
| |
|
| | col1, col2 = st.columns([4, 2]) |
| |
|
| | with col1: |
| | st.markdown("### 📊 Sentiment by Aspect") |
| | bar_chart = px.bar( |
| | melted, |
| | x="Aspect", |
| | y="Count", |
| | color="Sentiment", |
| | barmode="group", |
| | title=None, |
| | color_discrete_map={ |
| | "Positive": "#66bb6a", |
| | "Negative": "#ef5350", |
| | "Neutral": "#42a5f5" |
| | } |
| | ) |
| | |
| | bar_chart.update_layout( |
| | paper_bgcolor="rgba(0,0,0,0)", |
| | plot_bgcolor="rgba(0,0,0,0)", |
| | font_color="white", |
| | xaxis=dict(gridcolor="rgba(255,255,255,0.1)"), |
| | yaxis=dict(gridcolor="rgba(255,255,255,0.1)") |
| | ) |
| | st.plotly_chart(bar_chart, use_container_width=True) |
| |
|
| | with col2: |
| | st.markdown("### 🌀 Review Keywords") |
| |
|
| | |
| | if "wordcloud_image_base64" in response_json: |
| | import base64 |
| | st.markdown("<div style='padding-top:60px'></div>", unsafe_allow_html=True) |
| | st.image( |
| | BytesIO(base64.b64decode(response_json["wordcloud_image_base64"])), |
| | caption="Keyword Cloud", |
| | use_container_width=True |
| | ) |
| | else: |
| | try: |
| | |
| | wordcloud_path = response_json.get("wordcloud_image_path") |
| | if wordcloud_path and os.path.exists(wordcloud_path): |
| | image = Image.open(wordcloud_path) |
| | st.markdown("<div style='padding-top:60px'></div>", unsafe_allow_html=True) |
| | st.image(image, caption="Keywords", use_container_width=True) |
| | else: |
| | |
| | if os.path.exists("wordcloud.jpg"): |
| | image = Image.open("wordcloud.jpg") |
| | st.markdown("<div style='padding-top:60px'></div>", unsafe_allow_html=True) |
| | st.image(image, caption="Keywords", use_container_width=True) |
| | except Exception as e: |
| | st.warning(f"⚠ Word cloud image not found: {e}") |
| | else: |
| | st.info("No aspects detected in the reviews.") |
| | else: |
| | st.error(f"API Error: {response.status_code} - {response.text}") |
| | except Exception as e: |
| | st.error(f"API call failed: {e}") |
| | import traceback |
| | st.code(traceback.format_exc(), language="python") |
| |
|
| |
|
| | |
| | |
| | |
| | if run_analysis or total > 0: |
| | st.subheader("📥 Download Analyzed File") |
| |
|
| | def generate_excel_report(df, aspect_data=None, response_json=None): |
| | output = BytesIO() |
| | |
| | with pd.ExcelWriter(output, engine='xlsxwriter') as writer: |
| | |
| | df.to_excel(writer, index=False, sheet_name='Sentiment_Report') |
| | |
| | |
| | if aspect_data is not None and not aspect_data.empty: |
| | aspect_data.to_excel(writer, index=False, sheet_name='Aspect_Analysis') |
| | |
| | |
| | if response_json: |
| | |
| | summary_data = { |
| | 'Metric': ['Total Reviews', 'Positive', 'Negative', 'Neutral'], |
| | 'Count': [ |
| | response_json.get('total_reviews', 0), |
| | response_json.get('total_positive', 0), |
| | response_json.get('total_negative', 0), |
| | response_json.get('total_neutral', 0) |
| | ], |
| | 'Percentage': [ |
| | '100%', |
| | f"{response_json.get('positive_percentage', 0)}%", |
| | f"{response_json.get('negative_percentage', 0)}%", |
| | f"{response_json.get('neutral_percentage', 0)}%" |
| | ] |
| | } |
| | summary_df = pd.DataFrame(summary_data) |
| | summary_df.to_excel(writer, index=False, sheet_name='Summary') |
| | |
| | |
| | if 'review_details' in response_json: |
| | details_df = pd.DataFrame(response_json['review_details']) |
| | details_df.to_excel(writer, index=False, sheet_name='Review_Details') |
| | |
| | |
| | workbook = writer.book |
| | |
| | |
| | header_format = workbook.add_format({ |
| | 'bold': True, |
| | 'text_wrap': True, |
| | 'valign': 'top', |
| | 'border': 1 |
| | }) |
| | |
| | |
| | for sheet_name in writer.sheets: |
| | worksheet = writer.sheets[sheet_name] |
| | |
| | if sheet_name == 'Sentiment_Report': |
| | columns = df.columns |
| | elif sheet_name == 'Aspect_Analysis' and aspect_data is not None: |
| | columns = aspect_data.columns |
| | elif sheet_name == 'Summary': |
| | columns = summary_data.keys() |
| | elif sheet_name == 'Review_Details' and 'review_details' in response_json: |
| | columns = details_df.columns |
| | else: |
| | continue |
| | |
| | |
| | for col_num, value in enumerate(columns): |
| | worksheet.write(0, col_num, value, header_format) |
| | |
| | |
| | try: |
| | worksheet.autofit() |
| | except AttributeError: |
| | |
| | for col_num, value in enumerate(columns): |
| | |
| | worksheet.set_column(col_num, col_num, max(10, len(str(value)) + 2)) |
| | |
| | return output.getvalue() |
| |
|
| | |
| | if 'api_response_json' not in st.session_state: |
| | st.session_state.api_response_json = None |
| | if 'aspect_dataframe' not in st.session_state: |
| | st.session_state.aspect_dataframe = None |
| |
|
| | |
| | if run_analysis and enable_aspect and 'response_json' in locals(): |
| | st.session_state.api_response_json = response_json |
| | if 'aspect_df' in locals() and not aspect_df.empty: |
| | st.session_state.aspect_dataframe = aspect_df |
| |
|
| | st.download_button( |
| | label="📥 Download Results as Excel", |
| | data=generate_excel_report( |
| | st.session_state.analyzed_df, |
| | st.session_state.aspect_dataframe, |
| | st.session_state.api_response_json |
| | ), |
| | file_name="sentiment_analysis_report.xlsx", |
| | mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" |
| | ) |
| |
|
| | |
| | st.markdown(""" |
| | <div style="text-align: center; margin-top: 50px; padding: 20px; color: #888; font-size: 14px;"> |
| | <p>Multilingual Sentiment Analysis Dashboard | Made with Streamlit</p> |
| | </div> |
| | """, unsafe_allow_html=True) |