|
|
|
|
|
|
|
|
|
|
|
import pandas as pd |
|
|
import re |
|
|
import random |
|
|
import os |
|
|
|
|
|
def simple_sentiment_analysis(text): |
|
|
""" |
|
|
A very simple rule-based sentiment analyzer for demonstration purposes. |
|
|
Returns a sentiment label (neutral, positive, negative) and confidence score. |
|
|
""" |
|
|
if not text or len(text.strip()) < 15: |
|
|
return "neutral", 0.5 |
|
|
|
|
|
|
|
|
text = text.lower() |
|
|
|
|
|
|
|
|
positive_words = [ |
|
|
"baik", "bagus", "hebat", "cantik", "indah", "suka", "gembira", "senang", |
|
|
"setuju", "betul", "benar", "berkesan", "berjaya", "cemerlang", "positif", |
|
|
"good", "great", "excellent", "amazing", "wonderful", "happy", "like", "love", |
|
|
"agree", "correct", "true", "effective", "successful", "positive" |
|
|
] |
|
|
|
|
|
negative_words = [ |
|
|
"buruk", "teruk", "hodoh", "benci", "marah", "sedih", "kecewa", "susah", |
|
|
"tidak setuju", "salah", "palsu", "gagal", "negatif", "masalah", "bahaya", |
|
|
"bad", "terrible", "ugly", "hate", "angry", "sad", "disappointed", "difficult", |
|
|
"disagree", "wrong", "false", "fail", "negative", "problem", "dangerous" |
|
|
] |
|
|
|
|
|
|
|
|
positive_count = sum(1 for word in positive_words if re.search(r'\b' + re.escape(word) + r'\b', text)) |
|
|
negative_count = sum(1 for word in negative_words if re.search(r'\b' + re.escape(word) + r'\b', text)) |
|
|
|
|
|
|
|
|
if positive_count > negative_count: |
|
|
sentiment = "positive" |
|
|
confidence = 0.5 + min(0.5, (positive_count - negative_count) / 10) |
|
|
elif negative_count > positive_count: |
|
|
sentiment = "negative" |
|
|
confidence = 0.5 + min(0.5, (negative_count - positive_count) / 10) |
|
|
else: |
|
|
sentiment = "neutral" |
|
|
confidence = 0.5 |
|
|
|
|
|
return sentiment, round(confidence, 4) |
|
|
|
|
|
def run(csv_path, sentiment_output_path=None): |
|
|
""" |
|
|
Runs sentiment analysis on combined comment + post text from the input CSV. |
|
|
Saves the result (with sentiment + confidence columns) to a new CSV. |
|
|
""" |
|
|
print(f"[📄] Reading dataset: {csv_path}") |
|
|
df = pd.read_csv(csv_path) |
|
|
|
|
|
|
|
|
df['combined_text'] = df['comment_text'].fillna('') + ". " + df['post_text'].fillna('') |
|
|
df['combined_text'] = df['combined_text'].str.strip() |
|
|
|
|
|
sentiments = [] |
|
|
confidences = [] |
|
|
|
|
|
print("[🔍] Running simple sentiment classification...") |
|
|
for text in df['combined_text']: |
|
|
sentiment, confidence = simple_sentiment_analysis(text) |
|
|
sentiments.append(sentiment) |
|
|
confidences.append(confidence) |
|
|
|
|
|
|
|
|
df['sentiment'] = sentiments |
|
|
df['confidence'] = confidences |
|
|
|
|
|
|
|
|
sentiment_map = { |
|
|
"neutral": 0, |
|
|
"positive": 1, |
|
|
"negative": 2 |
|
|
} |
|
|
df['sentiment_value'] = df['sentiment'].map(sentiment_map) |
|
|
|
|
|
|
|
|
if not sentiment_output_path: |
|
|
sentiment_output_path = csv_path.replace(".csv", "_sentiment.csv") |
|
|
|
|
|
df.to_csv(sentiment_output_path, index=False) |
|
|
print(f"[💾] Sentiment analysis completed. Output saved to: {sentiment_output_path}") |
|
|
|
|
|
|