| | import pandas as pd |
| | from typing import Tuple |
| | from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline |
| |
|
| | |
| | clf_pipeline = None |
| | reg_pipeline = None |
| |
|
| | model_name_cls = "sdbrgo/roberta-tagalog-sentiment-multiclass-classifier" |
| | model_name_reg = "sdbrgo/roberta-tagalog-sentiment-intensity-regressor" |
| |
|
| | def load_models(): |
| | global clf_pipeline, reg_pipeline |
| |
|
| | if clf_pipeline is None: |
| | tokenizer_cls = AutoTokenizer.from_pretrained(model_name_cls) |
| | model_cls = AutoModelForSequenceClassification.from_pretrained(model_name_cls) |
| | clf_pipeline = pipeline( |
| | "text-classification", |
| | model=model_cls, |
| | tokenizer=tokenizer_cls, |
| | top_k=1, |
| | device=-1 |
| | ) |
| |
|
| | if reg_pipeline is None: |
| | tokenizer_reg = AutoTokenizer.from_pretrained(model_name_reg) |
| | model_reg = AutoModelForSequenceClassification.from_pretrained(model_name_reg) |
| | reg_pipeline = pipeline( |
| | "feature-extraction", |
| | model=model_reg, |
| | tokenizer=tokenizer_reg, |
| | device=-1 |
| | ) |
| | |
| | def transform_sentiments(df): |
| | |
| | load_models() |
| | |
| | processed_df = df.copy() |
| | |
| | |
| | if "text" not in processed_df.columns: |
| | raise ValueError("Input DataFrame must contain a 'text' column") |
| |
|
| | |
| | processed_df["text"] = ( |
| | processed_df["text"] |
| | .fillna("") |
| | .astype(str) |
| | ) |
| | |
| | texts = processed_df["text"].tolist() |
| |
|
| | |
| | cls_outputs = clf_pipeline(texts) |
| |
|
| | |
| | print(cls_outputs[0]) |
| |
|
| | cls_outputs = [o[0] if isinstance(o, list) else o for o in cls_outputs] |
| | |
| | processed_df["label"] = [o["label"] for o in cls_outputs] |
| | processed_df["sentiment_confidence"] = [o["score"] for o in cls_outputs] |
| |
|
| | |
| | reg_outputs = reg_pipeline(texts) |
| |
|
| | processed_df["intensity"] = [ |
| | float(o[0][0]) for o in reg_outputs |
| | ] |
| |
|
| | |
| | sorted_sentiments = processed_df.sort_values("intensity", ascending=False) |
| | sorted_sentiments = sorted_sentiments[["text", "label", "intensity"]] |
| |
|
| | |
| | print(df.columns) |
| | print(processed_df.columns) |
| | print(sorted_sentiments.columns) |
| |
|
| | |
| | |
| | return processed_df, sorted_sentiments |
| |
|
| | def compute_sentiment_metrics(processed_df, feedback_volume, multiplier_cap=0.7): |
| | if feedback_volume == 0: |
| | return { |
| | "w_pos": 0.0, |
| | "w_neg": 0.0 |
| | } |
| | |
| | df = processed_df.copy() |
| |
|
| | LABEL_MAP = { |
| | "LABEL_0": "neg", |
| | "LABEL_1": "neu", |
| | "LABEL_2": "pos" |
| | } |
| | df["label"] = df["label"].map(LABEL_MAP) |
| |
|
| | |
| | label_counts = df["label"].value_counts() |
| | |
| | |
| | label_dtype = df["label"].dtype |
| | print(label_dtype) |
| | |
| |
|
| | raw_sentiment_ratios = { |
| | label: round(label_counts.get(label, 0) / feedback_volume, 2) |
| | for label in ["neg", "neu", "pos"] |
| | } |
| |
|
| | |
| | intensity_sums = df.groupby("label")["intensity"].sum().to_dict() |
| |
|
| | |
| | intensity_sums = { |
| | label: intensity_sums.get(label, 0.0) |
| | for label in ["neg", "pos"] |
| | } |
| |
|
| | |
| | total_intensity = sum(intensity_sums.values()) |
| |
|
| | if total_intensity == 0: |
| | intensity_ratios = {label: 0.0 for label in intensity_sums} |
| | else: |
| | intensity_ratios = { |
| | label: round(intensity_sums[label] / total_intensity * 100, 2) |
| | for label in intensity_sums |
| | } |
| |
|
| | |
| | spa = intensity_ratios["pos"] - intensity_ratios["neg"] |
| | final_spa = round(spa / 100, 2) |
| |
|
| | spa_meta = { |
| | "Value": final_spa, |
| | "Description": f"SPA {'> 0' if final_spa>0 else '< 0' if final_spa<0 else '= 0'} indicates that {'positive' if final_spa>0 else 'negative' if final_spa<0 else 'balanced'} sentiment dominates." |
| | } |
| |
|
| | sentiment_dict = { |
| | "Raw Sentiment Ratios": raw_sentiment_ratios, |
| | "Sentiment Intensity Ratios": f"{intensity_ratios["pos"]}% of expressed intensity is positive, {intensity_ratios["neg"]}% is negative.", |
| | "SPA (Sentiment-Participation Assymetry)": spa_meta |
| | } |
| |
|
| | return sentiment_dict |