mysteriouskeyboard commited on
Commit
e82eb4a
·
verified ·
1 Parent(s): ea26e82

Upload 8 files

Browse files
Phases/compute.py ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import math
2
+
3
+ class ParticipationAdoptionIndex:
4
+
5
+ def __init__(self, num_participants, target_population, feedback_volume, w_pos, w_neg):
6
+ self.num_participants = num_participants
7
+ self.target_population = target_population
8
+ self.feedback_volume = feedback_volume
9
+ self.w_pos = w_pos
10
+ self.w_neg = w_neg
11
+ self.components_1 = {}
12
+ self.components_2 = {}
13
+
14
+ # ---------- helper functions ----------
15
+ def _compute_pci(self, participants_by_group):
16
+ total = sum(participants_by_group.values())
17
+ if total == 0:
18
+ return 0.0
19
+
20
+ shares = [
21
+ count / total
22
+ for count in participants_by_group.values()
23
+ ]
24
+
25
+ return sum(s ** 2 for s in shares)
26
+
27
+ def _normalize_components(self):
28
+ total = sum(abs(v["value"]) for v in self.components_1.values())
29
+
30
+ if total == 0:
31
+ for k in self.components_1:
32
+ if self.components_1[k]["value"] is not None:
33
+ self.components_1[k]["value"] = 0.0
34
+ else:
35
+ for k in self.components_1:
36
+ self.components_1[k]["value"] /= total
37
+
38
+ def _sort_components(self):
39
+ return dict(
40
+ sorted(
41
+ self.components_1.items(),
42
+ key=lambda item: abs(item[1]["value"]),
43
+ reverse=True
44
+ )
45
+ )
46
+
47
+ # ---------- public API ----------
48
+ def compute_pai(self, participants_by_group=None):
49
+ # absolute counts
50
+ spi_a_abs = self.target_population - self.num_participants
51
+ spi_b_abs = self.target_population - spi_a_abs - self.feedback_volume
52
+
53
+ # ----- 'Participation Dynamics' components -----
54
+ pcr = self.num_participants / self.target_population
55
+ spi_a = spi_a_abs / self.target_population
56
+ spi_b = spi_b_abs / self.target_population
57
+ peg = 1 - (self.feedback_volume / self.num_participants)
58
+
59
+ # ----- 'Reach and Equity' components -----
60
+ pci = self._compute_pci(participants_by_group) if participants_by_group else 0.0
61
+ effective_groups = 1.0 / pci if pci > 0 else 0.0
62
+ effective_groups_int = int(math.floor(effective_groups))
63
+ effective_groups_rounded = int(round(effective_groups))
64
+
65
+ self.components_1 = {
66
+ "PCR (Participation Coverage Ratio)": {"Value": pcr, "Description": "≈1 → Participation closely matches the target population. ≈0 → Participation reached very few of the intended population."},
67
+ "SPI-A (Silent Participation Inference - Silent Non-Adoption)": {"Value": spi_a, "Description": "High → Large segments of the target population were not reached or excluded. Low → Minimal non-participation across the target population."},
68
+ "SPI-B (Silent Participation Inference - Silent Adoption)": {"Value": spi_b, "Description": "High → Silent participation dominates. Low → Expressive participation dominates."},
69
+ "PEG (Participation-to-Expression Gap)": {"Value": peg, "Description": "≈1 → High expression gap among participants. ≈0 → Minimal expression gap."}
70
+ }
71
+
72
+ self.components_2 = {
73
+ "PCI (Participation Concentration Index)":
74
+ {"Value": pci,
75
+ "Description": "≈1 → Participation is concentrated to certain groups. ≈0 → Participation is balanced across groups."
76
+ },
77
+ "Group Summary":
78
+ {"Minimum Participating Groups": f"At least {effective_groups_int} groups meaningfully participated.",
79
+ "Effective Participation Groups": f"Participation is roughly equivalent to {effective_groups_rounded} equally sized groups."
80
+ }
81
+ }
82
+
83
+ # self._normalize_components()
84
+
85
+ return self.components_1, self.components_2
Phases/ingestion.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import os
3
+ import warnings
4
+
5
+ def ingest_feedback(file):
6
+ try:
7
+ feedback_df = pd.read_csv(file, encoding="utf-8", encoding_errors="replace")
8
+ except UnicodeDecodeError:
9
+ feedback_df = pd.read_csv(file, encoding="latin-1", encoding_errors="replace") # or cp1252
10
+
11
+ feedback_volume = len(feedback_df)
12
+
13
+ return feedback_df, feedback_volume
14
+
15
+ def ingest_adoption(file):
16
+
17
+ # --- Validation ---
18
+ if file is None or file == "":
19
+ warnings.warn("Adoption file is empty; participants_by_group will be empty")
20
+ return {}
21
+
22
+ if not os.path.exists(file):
23
+ warnings.warn("Adoption file is empty; participants_by_group will be empty")
24
+ return {}
25
+ try:
26
+ adoption_df = pd.read_csv(file, encoding="utf-8", encoding_errors="replace")
27
+ except UnicodeDecodeError:
28
+ adoption_df = pd.read_csv(file, encoding="latin-1", encoding_errors="replace")
29
+
30
+ if adoption_df.empty:
31
+ warnings.warn("Adoption file is empty; participants_by_group will be empty")
32
+ return {}
33
+
34
+ # handle missing required column
35
+ # there must be a "group" column in the adoption dataset
36
+ if "group" not in adoption_df.columns:
37
+ raise ValueError("Adoption data must contain \"group\" column.")
38
+
39
+ participants_by_group = adoption_df.groupby("group").size().to_dict()
40
+
41
+ return participants_by_group
Phases/interpret.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def assign_typology(components):
2
+
3
+ # define state rules for every components
4
+ STATE_RULES = {
5
+ "PCR (Participation Coverage Ratio)": [
6
+ (0.5, "Low Alert"),
7
+ (0.2, "Medium Alert"),
8
+ (0.0, "High Alert")
9
+ ],
10
+ "SPI-A (Silent Participation Inference - Silent Non-Adoption)": [
11
+ (0.7, "High Alert"),
12
+ (0.3, "Medium Alert"),
13
+ (0.0, "Low Alert")
14
+ ],
15
+ "SPI-B (Silent Participation Inference - Silent Adoption)": [
16
+ (0.5, "High Alert"),
17
+ (0.2, "Medium Alert"),
18
+ (0.0, "Low Alert")
19
+ ],
20
+ "PEG (Participation-to-Expression Gap)": [
21
+ (0.6, "High Alert"),
22
+ (0.3, "Medium Alert"),
23
+ (0.0, "Low Alert")
24
+ ]
25
+ }
26
+
27
+ buckets = {
28
+ "High Alert": [],
29
+ "Medium Alert": [],
30
+ "Low Alert": []
31
+ }
32
+
33
+ for component, data in components.items():
34
+ value = data["Value"]
35
+ description = data.get("Description", "")
36
+
37
+ for threshold, state in STATE_RULES[component]:
38
+ if value >= threshold:
39
+ buckets[state].append({
40
+ "Component": component,
41
+ "Value": value,
42
+ "Description": description
43
+ })
44
+ break
45
+
46
+ return buckets
Phases/sentiment_modeling.py ADDED
@@ -0,0 +1,142 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ from typing import Tuple
3
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
4
+
5
+ # --- globals ---
6
+ clf_pipeline = None
7
+ reg_pipeline = None
8
+
9
+ model_name_cls = "sdbrgo/roberta-tagalog-sentiment-multiclass-classifier"
10
+ model_name_reg = "sdbrgo/roberta-tagalog-sentiment-intensity-regressor"
11
+
12
+ def load_models():
13
+ global clf_pipeline, reg_pipeline
14
+
15
+ if clf_pipeline is None:
16
+ tokenizer_cls = AutoTokenizer.from_pretrained(model_name_cls)
17
+ model_cls = AutoModelForSequenceClassification.from_pretrained(model_name_cls)
18
+ clf_pipeline = pipeline(
19
+ "text-classification",
20
+ model=model_cls,
21
+ tokenizer=tokenizer_cls,
22
+ top_k=1,
23
+ device=-1
24
+ )
25
+
26
+ if reg_pipeline is None:
27
+ tokenizer_reg = AutoTokenizer.from_pretrained(model_name_reg)
28
+ model_reg = AutoModelForSequenceClassification.from_pretrained(model_name_reg)
29
+ reg_pipeline = pipeline(
30
+ "feature-extraction",
31
+ model=model_reg,
32
+ tokenizer=tokenizer_reg,
33
+ device=-1
34
+ )
35
+
36
+ def transform_sentiments(df):
37
+
38
+ load_models()
39
+
40
+ processed_df = df.copy()
41
+
42
+ # Feature Validation: ensures 'text' column exists
43
+ if "text" not in processed_df.columns:
44
+ raise ValueError("Input DataFrame must contain a 'text' column")
45
+
46
+ # force cast to string and handle NaNs
47
+ processed_df["text"] = (
48
+ processed_df["text"]
49
+ .fillna("") # replace NaN with empty string
50
+ .astype(str) # force everything to string
51
+ )
52
+
53
+ texts = processed_df["text"].tolist()
54
+
55
+ # --- sentiment classification ---
56
+ cls_outputs = clf_pipeline(texts)
57
+
58
+ #DEBUG MODE
59
+ print(cls_outputs[0])
60
+
61
+ cls_outputs = [o[0] if isinstance(o, list) else o for o in cls_outputs]
62
+
63
+ processed_df["label"] = [o["label"] for o in cls_outputs]
64
+ processed_df["sentiment_confidence"] = [o["score"] for o in cls_outputs]
65
+
66
+ # --- sentiment intensity regression ---
67
+ reg_outputs = reg_pipeline(texts)
68
+
69
+ processed_df["intensity"] = [
70
+ float(o[0][0]) for o in reg_outputs
71
+ ]
72
+
73
+ # features of resulting df: "label", "sentiment_confidence", "intensity"
74
+ return processed_df
75
+
76
+ def compute_sentiment_metrics(processed_df, feedback_volume, multiplier_cap=0.7):
77
+ if feedback_volume == 0:
78
+ return {
79
+ "w_pos": 0.0,
80
+ "w_neg": 0.0
81
+ }
82
+
83
+ df = processed_df.copy()
84
+
85
+ LABEL_MAP = {
86
+ "LABEL_0": "neg",
87
+ "LABEL_1": "neu",
88
+ "LABEL_2": "pos"
89
+ }
90
+ df["label"] = df["label"].map(LABEL_MAP)
91
+
92
+ # ----- 1. get ratio of pos, neg, neu labels -----
93
+ label_counts = df["label"].value_counts()
94
+
95
+ # ===== DEBUGGING =====
96
+ label_dtype = df["label"].dtype
97
+ print(label_dtype)
98
+ # =====================
99
+
100
+ raw_sentiment_ratios = {
101
+ label: label_counts.get(label, 0) / feedback_volume
102
+ for label in ["neg", "neu", "pos"]
103
+ }
104
+
105
+ # ----- 2. get sum of intensity scores per label -----
106
+ intensity_sums = df.groupby("label")["intensity"].sum().to_dict()
107
+
108
+ # ensure all labels exist
109
+ intensity_sums = {
110
+ label: intensity_sums.get(label, 0.0)
111
+ for label in ["neg", "neu", "pos"]
112
+ }
113
+
114
+ # ----- 3. compute multiplier per label -----
115
+ total_intensity = sum(intensity_sums.values())
116
+
117
+ if total_intensity == 0:
118
+ weight_multipliers = {label: 1.0 for label in intensity_sums}
119
+ else:
120
+ weight_multipliers = {
121
+ label: 1.0 + multiplier_cap * (intensity_sums[label] / total_intensity)
122
+ for label in intensity_sums
123
+ }
124
+
125
+ # ----- 4. compute weighted sentiment ratio (WSR) -----
126
+ w_pos = raw_sentiment_ratios["pos"] * weight_multipliers["pos"]
127
+ w_neg = raw_sentiment_ratios["neg"] * weight_multipliers["neg"]
128
+
129
+ # ----- 5. calculate SPA (sentiment participation assymetry) here -----
130
+ spa = w_pos - w_neg
131
+
132
+ spa_meta = {
133
+ "Value": spa,
134
+ "Description": "Positive -> Sentiments are mostly positive. Negative -> Sentiments are mostly negative."
135
+ }
136
+
137
+ return {
138
+ "Weighted Positive": w_pos,
139
+ "Weighted Negative": w_neg,
140
+ "SPA (Sentiment-Participation Assymetry)": spa_meta,
141
+ "Raw Sentiment Ratios": raw_sentiment_ratios,
142
+ }
README.md ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Peopulse
3
+ emoji: 📊
4
+ colorFrom: pink
5
+ colorTo: purple
6
+ sdk: gradio
7
+ sdk_version: 6.2.0
8
+ app_file: app.py
9
+ pinned: false
10
+ license: cc-by-nc-nd-4.0
11
+ short_description: Silence-Aware Citizen Feedback Intelligence System
12
+ ---
13
+
14
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+
3
+ from phase.ingestion import ingest_feedback, ingest_adoption
4
+ from phase.sentiment_modeling import transform_sentiments, compute_sentiment_metrics
5
+ from phase.compute import ParticipationAdoptionIndex
6
+ from phase.interpret import assign_typology
7
+
8
+ def run_app(feedback_file, adoption_file, num_participants, target_population):
9
+
10
+ # Step 1: Ingest data
11
+ df, feedback_volume = ingest_feedback(feedback_file)
12
+ participants_by_group = ingest_adoption(adoption_file)
13
+
14
+ # Step 2: Sentiment modeling
15
+ processed_df = transform_sentiments(df)
16
+ sentiment_metrics = compute_sentiment_metrics(processed_df, feedback_volume)
17
+
18
+ w_pos = sentiment_metrics["Weighted Positive"]
19
+ w_neg = sentiment_metrics["Weighted Negative"]
20
+
21
+ # Step 3: Compute Participation Adoption Index (PAI)
22
+ pai_calculator = ParticipationAdoptionIndex(
23
+ num_participants=num_participants,
24
+ target_population=target_population,
25
+ feedback_volume=feedback_volume,
26
+ w_pos=w_pos,
27
+ w_neg=w_neg
28
+ )
29
+
30
+ pai_1, pai_2 = pai_calculator.compute_pai(participants_by_group)
31
+
32
+ # Step 4: Interpret results
33
+ typology = assign_typology(pai_1)
34
+
35
+ return sentiment_metrics, typology, pai_2
36
+
37
+ # ========== GRADIO INTERFACE ==========
38
+ with gr.Blocks() as demo:
39
+ gr.Markdown("# 🏙️📊Peopulse: Citizen Feedback Intelligence System")
40
+
41
+ # ----- INPUTS -----
42
+ with gr.Row():
43
+ feedback_file_input = gr.File(label="Upload Feedback Data (CSV)")
44
+ adoption_file_input = gr.File(label="Upload Adoption Data (CSV)")
45
+
46
+ with gr.Row():
47
+ num_participants_input = gr.Number(
48
+ label="Number of Participants",
49
+ value=1000,
50
+ minimum=0,
51
+ maximum=1e9,
52
+ step=1,
53
+ precision=0
54
+ )
55
+ target_population_input = gr.Number(
56
+ label="Target Population Size",
57
+ value=10000,
58
+ minimum=1,
59
+ maximum=1e10,
60
+ step=1,
61
+ precision=0
62
+ )
63
+
64
+ btn = gr.Button("Run Evaluation")
65
+
66
+ # ----- OUTPUTS -----
67
+ with gr.Row():
68
+ with gr.Column(scale=1):
69
+ gr.Markdown("## 🗨️📈Public Sentiment Analytics")
70
+ sentiment_metrics_output = gr.JSON()
71
+ gr.Markdown("## 📃🩺Reach & Equity")
72
+ pai_2_output = gr.JSON()
73
+ with gr.Column(scale=1):
74
+ gr.Markdown("## 📃🩺Participation Dynamics")
75
+ typology_output = gr.JSON()
76
+
77
+ btn.click(
78
+ fn=run_app,
79
+ inputs=[feedback_file_input, adoption_file_input, num_participants_input, target_population_input],
80
+ outputs=[sentiment_metrics_output, typology_output, pai_2_output]
81
+ )
82
+
83
+ demo.launch()
gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ pandas
2
+ transformers
3
+ gradio
4
+ torch