# sentiment_analyzer.py # Simple sentiment analyzer that doesn't require PyTorch import pandas as pd import re import random import os def simple_sentiment_analysis(text): """ A very simple rule-based sentiment analyzer for demonstration purposes. Returns a sentiment label (neutral, positive, negative) and confidence score. """ if not text or len(text.strip()) < 15: return "neutral", 0.5 # Convert to lowercase text = text.lower() # Define positive and negative word lists (Malay and English) positive_words = [ "baik", "bagus", "hebat", "cantik", "indah", "suka", "gembira", "senang", "setuju", "betul", "benar", "berkesan", "berjaya", "cemerlang", "positif", "good", "great", "excellent", "amazing", "wonderful", "happy", "like", "love", "agree", "correct", "true", "effective", "successful", "positive" ] negative_words = [ "buruk", "teruk", "hodoh", "benci", "marah", "sedih", "kecewa", "susah", "tidak setuju", "salah", "palsu", "gagal", "negatif", "masalah", "bahaya", "bad", "terrible", "ugly", "hate", "angry", "sad", "disappointed", "difficult", "disagree", "wrong", "false", "fail", "negative", "problem", "dangerous" ] # Count positive and negative words positive_count = sum(1 for word in positive_words if re.search(r'\b' + re.escape(word) + r'\b', text)) negative_count = sum(1 for word in negative_words if re.search(r'\b' + re.escape(word) + r'\b', text)) # Determine sentiment if positive_count > negative_count: sentiment = "positive" confidence = 0.5 + min(0.5, (positive_count - negative_count) / 10) elif negative_count > positive_count: sentiment = "negative" confidence = 0.5 + min(0.5, (negative_count - positive_count) / 10) else: sentiment = "neutral" confidence = 0.5 return sentiment, round(confidence, 4) def run(csv_path, sentiment_output_path=None): """ Runs sentiment analysis on combined comment + post text from the input CSV. Saves the result (with sentiment + confidence columns) to a new CSV. """ print(f"[📄] Reading dataset: {csv_path}") df = pd.read_csv(csv_path) # Combine comment and post text into a single field df['combined_text'] = df['comment_text'].fillna('') + ". " + df['post_text'].fillna('') df['combined_text'] = df['combined_text'].str.strip() sentiments = [] confidences = [] print("[🔍] Running simple sentiment classification...") for text in df['combined_text']: sentiment, confidence = simple_sentiment_analysis(text) sentiments.append(sentiment) confidences.append(confidence) # Add results to DataFrame df['sentiment'] = sentiments df['confidence'] = confidences # Map sentiments to numeric values for compatibility with the rest of the system sentiment_map = { "neutral": 0, "positive": 1, "negative": 2 } df['sentiment_value'] = df['sentiment'].map(sentiment_map) # Determine the output path dynamically if not provided if not sentiment_output_path: sentiment_output_path = csv_path.replace(".csv", "_sentiment.csv") df.to_csv(sentiment_output_path, index=False) print(f"[💾] Sentiment analysis completed. Output saved to: {sentiment_output_path}")