testbed / ai_api /library /sentiment_analyzer.py
xspinners's picture
initial
090987a
# sentiment_analyzer.py
# Simple sentiment analyzer that doesn't require PyTorch
import pandas as pd
import re
import random
import os
def simple_sentiment_analysis(text):
"""
A very simple rule-based sentiment analyzer for demonstration purposes.
Returns a sentiment label (neutral, positive, negative) and confidence score.
"""
if not text or len(text.strip()) < 15:
return "neutral", 0.5
# Convert to lowercase
text = text.lower()
# Define positive and negative word lists (Malay and English)
positive_words = [
"baik", "bagus", "hebat", "cantik", "indah", "suka", "gembira", "senang",
"setuju", "betul", "benar", "berkesan", "berjaya", "cemerlang", "positif",
"good", "great", "excellent", "amazing", "wonderful", "happy", "like", "love",
"agree", "correct", "true", "effective", "successful", "positive"
]
negative_words = [
"buruk", "teruk", "hodoh", "benci", "marah", "sedih", "kecewa", "susah",
"tidak setuju", "salah", "palsu", "gagal", "negatif", "masalah", "bahaya",
"bad", "terrible", "ugly", "hate", "angry", "sad", "disappointed", "difficult",
"disagree", "wrong", "false", "fail", "negative", "problem", "dangerous"
]
# Count positive and negative words
positive_count = sum(1 for word in positive_words if re.search(r'\b' + re.escape(word) + r'\b', text))
negative_count = sum(1 for word in negative_words if re.search(r'\b' + re.escape(word) + r'\b', text))
# Determine sentiment
if positive_count > negative_count:
sentiment = "positive"
confidence = 0.5 + min(0.5, (positive_count - negative_count) / 10)
elif negative_count > positive_count:
sentiment = "negative"
confidence = 0.5 + min(0.5, (negative_count - positive_count) / 10)
else:
sentiment = "neutral"
confidence = 0.5
return sentiment, round(confidence, 4)
def run(csv_path, sentiment_output_path=None):
"""
Runs sentiment analysis on combined comment + post text from the input CSV.
Saves the result (with sentiment + confidence columns) to a new CSV.
"""
print(f"[📄] Reading dataset: {csv_path}")
df = pd.read_csv(csv_path)
# Combine comment and post text into a single field
df['combined_text'] = df['comment_text'].fillna('') + ". " + df['post_text'].fillna('')
df['combined_text'] = df['combined_text'].str.strip()
sentiments = []
confidences = []
print("[🔍] Running simple sentiment classification...")
for text in df['combined_text']:
sentiment, confidence = simple_sentiment_analysis(text)
sentiments.append(sentiment)
confidences.append(confidence)
# Add results to DataFrame
df['sentiment'] = sentiments
df['confidence'] = confidences
# Map sentiments to numeric values for compatibility with the rest of the system
sentiment_map = {
"neutral": 0,
"positive": 1,
"negative": 2
}
df['sentiment_value'] = df['sentiment'].map(sentiment_map)
# Determine the output path dynamically if not provided
if not sentiment_output_path:
sentiment_output_path = csv_path.replace(".csv", "_sentiment.csv")
df.to_csv(sentiment_output_path, index=False)
print(f"[💾] Sentiment analysis completed. Output saved to: {sentiment_output_path}")