Spaces:

xspinners
/

testbed

Runtime error

App Files Files Community

testbed / ai_api /library /sentiment_analyzer.py

xspinners

initial

090987a 8 months ago

raw

history blame contribute delete

3.39 kB

	# sentiment_analyzer.py
	# Simple sentiment analyzer that doesn't require PyTorch

	import pandas as pd
	import re
	import random
	import os

	def simple_sentiment_analysis(text):
	"""
	A very simple rule-based sentiment analyzer for demonstration purposes.
	Returns a sentiment label (neutral, positive, negative) and confidence score.
	"""
	if not text or len(text.strip()) < 15:
	return "neutral", 0.5

	# Convert to lowercase
	text = text.lower()

	# Define positive and negative word lists (Malay and English)
	positive_words = [
	"baik", "bagus", "hebat", "cantik", "indah", "suka", "gembira", "senang",
	"setuju", "betul", "benar", "berkesan", "berjaya", "cemerlang", "positif",
	"good", "great", "excellent", "amazing", "wonderful", "happy", "like", "love",
	"agree", "correct", "true", "effective", "successful", "positive"
	]

	negative_words = [
	"buruk", "teruk", "hodoh", "benci", "marah", "sedih", "kecewa", "susah",
	"tidak setuju", "salah", "palsu", "gagal", "negatif", "masalah", "bahaya",
	"bad", "terrible", "ugly", "hate", "angry", "sad", "disappointed", "difficult",
	"disagree", "wrong", "false", "fail", "negative", "problem", "dangerous"
	]

	# Count positive and negative words
	positive_count = sum(1 for word in positive_words if re.search(r'\b' + re.escape(word) + r'\b', text))
	negative_count = sum(1 for word in negative_words if re.search(r'\b' + re.escape(word) + r'\b', text))

	# Determine sentiment
	if positive_count > negative_count:
	sentiment = "positive"
	confidence = 0.5 + min(0.5, (positive_count - negative_count) / 10)
	elif negative_count > positive_count:
	sentiment = "negative"
	confidence = 0.5 + min(0.5, (negative_count - positive_count) / 10)
	else:
	sentiment = "neutral"
	confidence = 0.5

	return sentiment, round(confidence, 4)

	def run(csv_path, sentiment_output_path=None):
	"""
	Runs sentiment analysis on combined comment + post text from the input CSV.
	Saves the result (with sentiment + confidence columns) to a new CSV.
	"""
	print(f"[📄] Reading dataset: {csv_path}")
	df = pd.read_csv(csv_path)

	# Combine comment and post text into a single field
	df['combined_text'] = df['comment_text'].fillna('') + ". " + df['post_text'].fillna('')
	df['combined_text'] = df['combined_text'].str.strip()

	sentiments = []
	confidences = []

	print("[🔍] Running simple sentiment classification...")
	for text in df['combined_text']:
	sentiment, confidence = simple_sentiment_analysis(text)
	sentiments.append(sentiment)
	confidences.append(confidence)

	# Add results to DataFrame
	df['sentiment'] = sentiments
	df['confidence'] = confidences

	# Map sentiments to numeric values for compatibility with the rest of the system
	sentiment_map = {
	"neutral": 0,
	"positive": 1,
	"negative": 2
	}
	df['sentiment_value'] = df['sentiment'].map(sentiment_map)

	# Determine the output path dynamically if not provided
	if not sentiment_output_path:
	sentiment_output_path = csv_path.replace(".csv", "_sentiment.csv")

	df.to_csv(sentiment_output_path, index=False)
	print(f"[💾] Sentiment analysis completed. Output saved to: {sentiment_output_path}")