Upload 8 files

Browse files

Files changed (8) hide show

Phases/compute.py +85 -0
Phases/ingestion.py +41 -0
Phases/interpret.py +46 -0
Phases/sentiment_modeling.py +142 -0
README.md +14 -0
app.py +83 -0
gitattributes +35 -0
requirements.txt +4 -0

Phases/compute.py ADDED Viewed

	@@ -0,0 +1,85 @@

+import math
+class ParticipationAdoptionIndex:
+    def __init__(self, num_participants, target_population, feedback_volume, w_pos, w_neg):
+        self.num_participants = num_participants
+        self.target_population = target_population
+        self.feedback_volume = feedback_volume
+        self.w_pos = w_pos
+        self.w_neg = w_neg
+        self.components_1 = {}
+        self.components_2 = {}
+    # ---------- helper functions ----------
+    def _compute_pci(self, participants_by_group):
+        total = sum(participants_by_group.values())
+        if total == 0:
+            return 0.0
+        shares = [
+            count / total
+            for count in participants_by_group.values()
+        ]
+        return sum(s ** 2 for s in shares)
+    def _normalize_components(self):
+        total = sum(abs(v["value"]) for v in self.components_1.values())
+        if total == 0:
+            for k in self.components_1:
+                if self.components_1[k]["value"] is not None:
+                    self.components_1[k]["value"] = 0.0
+        else:
+            for k in self.components_1:
+                self.components_1[k]["value"] /= total
+    def _sort_components(self):
+        return dict(
+            sorted(
+                self.components_1.items(),
+                key=lambda item: abs(item[1]["value"]),
+                reverse=True
+            )
+        )
+    # ---------- public API ----------
+    def compute_pai(self, participants_by_group=None):
+        # absolute counts
+        spi_a_abs = self.target_population - self.num_participants
+        spi_b_abs = self.target_population - spi_a_abs - self.feedback_volume
+        # ----- 'Participation Dynamics' components -----
+        pcr = self.num_participants / self.target_population
+        spi_a = spi_a_abs / self.target_population
+        spi_b = spi_b_abs / self.target_population
+        peg = 1 - (self.feedback_volume / self.num_participants)
+        # ----- 'Reach and Equity' components -----
+        pci = self._compute_pci(participants_by_group) if participants_by_group else 0.0
+        effective_groups = 1.0 / pci if pci > 0 else 0.0
+        effective_groups_int = int(math.floor(effective_groups))
+        effective_groups_rounded = int(round(effective_groups))
+        self.components_1 = {
+            "PCR (Participation Coverage Ratio)": {"Value": pcr, "Description": "≈1 → Participation closely matches the target population. ≈0 → Participation reached very few of the intended population."},
+            "SPI-A (Silent Participation Inference - Silent Non-Adoption)": {"Value": spi_a, "Description": "High → Large segments of the target population were not reached or excluded. Low → Minimal non-participation across the target population."},
+            "SPI-B (Silent Participation Inference - Silent Adoption)": {"Value": spi_b, "Description": "High → Silent participation dominates. Low → Expressive participation dominates."},
+            "PEG (Participation-to-Expression Gap)": {"Value": peg, "Description": "≈1 → High expression gap among participants. ≈0 → Minimal expression gap."}
+        }
+        self.components_2 = {
+            "PCI (Participation Concentration Index)":
+                {"Value": pci,
+                 "Description": "≈1 → Participation is concentrated to certain groups. ≈0 → Participation is balanced across groups."
+                },
+            "Group Summary":
+                {"Minimum Participating Groups": f"At least {effective_groups_int} groups meaningfully participated.",
+                 "Effective Participation Groups": f"Participation is roughly equivalent to {effective_groups_rounded} equally sized groups."
+                }
+        }
+        # self._normalize_components()
+        return self.components_1, self.components_2

Phases/ingestion.py ADDED Viewed

	@@ -0,0 +1,41 @@

+import pandas as pd
+import os
+import warnings
+def ingest_feedback(file):
+    try:
+        feedback_df = pd.read_csv(file, encoding="utf-8", encoding_errors="replace")
+    except UnicodeDecodeError:
+        feedback_df = pd.read_csv(file, encoding="latin-1", encoding_errors="replace")  # or cp1252
+    feedback_volume = len(feedback_df)
+    return feedback_df, feedback_volume
+def ingest_adoption(file):
+    # --- Validation ---
+    if file is None or file == "":
+        warnings.warn("Adoption file is empty; participants_by_group will be empty")
+        return {}
+    if not os.path.exists(file):
+        warnings.warn("Adoption file is empty; participants_by_group will be empty")
+        return {}
+    try:
+        adoption_df = pd.read_csv(file, encoding="utf-8", encoding_errors="replace")
+    except UnicodeDecodeError:
+        adoption_df = pd.read_csv(file, encoding="latin-1", encoding_errors="replace")
+    if adoption_df.empty:
+        warnings.warn("Adoption file is empty; participants_by_group will be empty")
+        return {}
+    # handle missing required column
+    # there must be a "group" column in the adoption dataset
+    if "group" not in adoption_df.columns:
+        raise ValueError("Adoption data must contain \"group\" column.")
+    participants_by_group = adoption_df.groupby("group").size().to_dict()
+    return participants_by_group

Phases/interpret.py ADDED Viewed

	@@ -0,0 +1,46 @@

+def assign_typology(components):
+    # define state rules for every components
+    STATE_RULES = {
+        "PCR (Participation Coverage Ratio)": [
+            (0.5, "Low Alert"),
+            (0.2, "Medium Alert"),
+            (0.0, "High Alert")
+        ],
+        "SPI-A (Silent Participation Inference - Silent Non-Adoption)": [
+            (0.7, "High Alert"),
+            (0.3, "Medium Alert"),
+            (0.0, "Low Alert")
+        ],
+        "SPI-B (Silent Participation Inference - Silent Adoption)": [
+            (0.5, "High Alert"),
+            (0.2, "Medium Alert"),
+            (0.0, "Low Alert")
+        ],
+        "PEG (Participation-to-Expression Gap)": [
+            (0.6, "High Alert"),
+            (0.3, "Medium Alert"),
+            (0.0, "Low Alert")
+        ]
+    }
+    buckets = {
+        "High Alert": [],
+        "Medium Alert": [],
+        "Low Alert": []
+    }
+    for component, data in components.items():
+        value = data["Value"]
+        description = data.get("Description", "")
+        for threshold, state in STATE_RULES[component]:
+            if value >= threshold:
+                buckets[state].append({
+                    "Component": component,
+                    "Value": value,
+                    "Description": description
+                })
+                break
+    return buckets

Phases/sentiment_modeling.py ADDED Viewed

	@@ -0,0 +1,142 @@

+import pandas as pd
+from typing import Tuple
+from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
+# --- globals ---
+clf_pipeline = None
+reg_pipeline = None
+model_name_cls = "sdbrgo/roberta-tagalog-sentiment-multiclass-classifier"
+model_name_reg = "sdbrgo/roberta-tagalog-sentiment-intensity-regressor"
+def load_models():
+    global clf_pipeline, reg_pipeline
+    if clf_pipeline is None:
+        tokenizer_cls = AutoTokenizer.from_pretrained(model_name_cls)
+        model_cls = AutoModelForSequenceClassification.from_pretrained(model_name_cls)
+        clf_pipeline = pipeline(
+            "text-classification",
+            model=model_cls,
+            tokenizer=tokenizer_cls,
+            top_k=1,
+            device=-1
+        )
+    if reg_pipeline is None:
+        tokenizer_reg = AutoTokenizer.from_pretrained(model_name_reg)
+        model_reg = AutoModelForSequenceClassification.from_pretrained(model_name_reg)
+        reg_pipeline = pipeline(
+            "feature-extraction",
+            model=model_reg,
+            tokenizer=tokenizer_reg,
+            device=-1
+        )
+def transform_sentiments(df):
+    load_models()
+    processed_df = df.copy()
+    # Feature Validation: ensures 'text' column exists
+    if "text" not in processed_df.columns:
+        raise ValueError("Input DataFrame must contain a 'text' column")
+    # force cast to string and handle NaNs
+    processed_df["text"] = (
+        processed_df["text"]
+        .fillna("") # replace NaN with empty string
+        .astype(str) # force everything to string
+    )
+    texts = processed_df["text"].tolist()
+    # --- sentiment classification ---
+    cls_outputs = clf_pipeline(texts)
+    #DEBUG MODE
+    print(cls_outputs[0])
+    cls_outputs = [o[0] if isinstance(o, list) else o for o in cls_outputs]
+    processed_df["label"] = [o["label"] for o in cls_outputs]
+    processed_df["sentiment_confidence"] = [o["score"] for o in cls_outputs]
+    # --- sentiment intensity regression ---
+    reg_outputs = reg_pipeline(texts)
+    processed_df["intensity"] = [
+        float(o[0][0]) for o in reg_outputs
+    ]
+    # features of resulting df: "label", "sentiment_confidence", "intensity"
+    return processed_df
+def compute_sentiment_metrics(processed_df, feedback_volume, multiplier_cap=0.7):
+    if feedback_volume == 0:
+        return {
+            "w_pos": 0.0,
+            "w_neg": 0.0
+        }
+    df = processed_df.copy()
+    LABEL_MAP = {
+        "LABEL_0": "neg",
+        "LABEL_1": "neu",
+        "LABEL_2": "pos"
+    }
+    df["label"] = df["label"].map(LABEL_MAP)
+    # ----- 1. get ratio of pos, neg, neu labels -----
+    label_counts = df["label"].value_counts()
+    # ===== DEBUGGING =====
+    label_dtype = df["label"].dtype
+    print(label_dtype)
+    # =====================
+    raw_sentiment_ratios = {
+        label: label_counts.get(label, 0) / feedback_volume
+        for label in ["neg", "neu", "pos"]
+    }
+    # ----- 2. get sum of intensity scores per label -----
+    intensity_sums = df.groupby("label")["intensity"].sum().to_dict()
+    # ensure all labels exist
+    intensity_sums = {
+        label: intensity_sums.get(label, 0.0)
+        for label in ["neg", "neu", "pos"]
+    }
+    # ----- 3. compute multiplier per label -----
+    total_intensity = sum(intensity_sums.values())
+    if total_intensity == 0:
+        weight_multipliers = {label: 1.0 for label in intensity_sums}
+    else:
+        weight_multipliers = {
+            label: 1.0 + multiplier_cap * (intensity_sums[label] / total_intensity)
+            for label in intensity_sums
+        }
+    # ----- 4. compute weighted sentiment ratio (WSR) -----
+    w_pos = raw_sentiment_ratios["pos"] * weight_multipliers["pos"]
+    w_neg = raw_sentiment_ratios["neg"] * weight_multipliers["neg"]
+    # ----- 5. calculate SPA (sentiment participation assymetry) here -----
+    spa = w_pos - w_neg
+    spa_meta = {
+        "Value": spa,
+        "Description": "Positive -> Sentiments are mostly positive. Negative -> Sentiments are mostly negative."
+    }
+    return {
+        "Weighted Positive": w_pos,
+        "Weighted Negative": w_neg,
+        "SPA (Sentiment-Participation Assymetry)": spa_meta,
+        "Raw Sentiment Ratios": raw_sentiment_ratios,
+    }

README.md ADDED Viewed

	@@ -0,0 +1,14 @@

+---
+title: Peopulse
+emoji: 📊
+colorFrom: pink
+colorTo: purple
+sdk: gradio
+sdk_version: 6.2.0
+app_file: app.py
+pinned: false
+license: cc-by-nc-nd-4.0
+short_description: Silence-Aware Citizen Feedback Intelligence System
+---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py ADDED Viewed

	@@ -0,0 +1,83 @@

+import gradio as gr
+from phase.ingestion import ingest_feedback, ingest_adoption
+from phase.sentiment_modeling import transform_sentiments, compute_sentiment_metrics
+from phase.compute import ParticipationAdoptionIndex
+from phase.interpret import assign_typology
+def run_app(feedback_file, adoption_file, num_participants, target_population):
+    # Step 1: Ingest data
+    df, feedback_volume = ingest_feedback(feedback_file)
+    participants_by_group = ingest_adoption(adoption_file)
+    # Step 2: Sentiment modeling
+    processed_df = transform_sentiments(df)
+    sentiment_metrics = compute_sentiment_metrics(processed_df, feedback_volume)
+    w_pos = sentiment_metrics["Weighted Positive"]
+    w_neg = sentiment_metrics["Weighted Negative"]
+    # Step 3: Compute Participation Adoption Index (PAI)
+    pai_calculator = ParticipationAdoptionIndex(
+        num_participants=num_participants,
+        target_population=target_population,
+        feedback_volume=feedback_volume,
+        w_pos=w_pos,
+        w_neg=w_neg
+    )
+    pai_1, pai_2 = pai_calculator.compute_pai(participants_by_group)
+    # Step 4: Interpret results
+    typology = assign_typology(pai_1)
+    return sentiment_metrics, typology, pai_2
+# ========== GRADIO INTERFACE ==========
+with gr.Blocks() as demo:
+    gr.Markdown("# 🏙️📊Peopulse: Citizen Feedback Intelligence System")
+    # ----- INPUTS -----
+    with gr.Row():
+        feedback_file_input = gr.File(label="Upload Feedback Data (CSV)")
+        adoption_file_input = gr.File(label="Upload Adoption Data (CSV)")
+    with gr.Row():
+        num_participants_input = gr.Number(
+            label="Number of Participants",
+            value=1000,
+            minimum=0,
+            maximum=1e9,
+            step=1,
+            precision=0
+        )
+        target_population_input = gr.Number(
+            label="Target Population Size",
+            value=10000,
+            minimum=1,
+            maximum=1e10,
+            step=1,
+            precision=0
+        )
+    btn = gr.Button("Run Evaluation")
+    # ----- OUTPUTS -----
+    with gr.Row():
+        with gr.Column(scale=1):
+            gr.Markdown("## 🗨️📈Public Sentiment Analytics")
+            sentiment_metrics_output = gr.JSON()
+            gr.Markdown("## 📃🩺Reach & Equity")
+            pai_2_output = gr.JSON()
+        with gr.Column(scale=1):
+            gr.Markdown("## 📃🩺Participation Dynamics")
+            typology_output = gr.JSON()
+    btn.click(
+        fn=run_app,
+        inputs=[feedback_file_input, adoption_file_input, num_participants_input, target_population_input],
+        outputs=[sentiment_metrics_output, typology_output, pai_2_output]
+    )
+demo.launch()

gitattributes ADDED Viewed

	@@ -0,0 +1,35 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+pandas
+transformers
+gradio
+torch