Spaces:

seedflora
/

ev-sentiment-dashboard

Running

App Files Files Community

seedflora commited on about 17 hours ago

Commit

7f0ea09

verified ·

1 Parent(s): 49069ef

Initial space deploy

Browse files

Files changed (6) hide show

.gitattributes +1 -0
README.md +8 -6
app.py +293 -0
data.xlsx +3 -0
requirements.txt +7 -0
results.csv +6 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+data.xlsx filter=lfs diff=lfs merge=lfs -text

README.md CHANGED Viewed

@@ -1,12 +1,14 @@
----
-title: Ev Sentiment Dashboard
-emoji: 🌍
-colorFrom: purple
-colorTo: indigo
 sdk: gradio
 sdk_version: 6.3.0
 app_file: app.py
 pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+---
+title: EV Sentiment Dashboard
+emoji: 🚗
+colorFrom: green
+colorTo: blue
 sdk: gradio
 sdk_version: 6.3.0
 app_file: app.py
 pinned: false
 ---
+# EV Sentiment Dashboard
+Klasifikasi sentimen + dashboard analitik (word cloud, distribusi label, dan perbandingan model).

app.py ADDED Viewed

	@@ -0,0 +1,293 @@

+import json
+import os
+import re
+from collections import Counter
+from pathlib import Path
+import gradio as gr
+import matplotlib
+matplotlib.use("Agg")
+import matplotlib.pyplot as plt
+import pandas as pd
+import torch
+from transformers import AutoModelForSequenceClassification, AutoTokenizer
+from wordcloud import WordCloud
+MODEL_ID = "seedflora/ev-sentiment"
+DATA_PATH = "data.xlsx"
+TEXT_COL = "clean_text_formal"
+LABEL_COL = "label"
+RESULTS_PATH = "results.csv"
+def load_label_map(model_dir: Path):
+    label_map_path = model_dir / "label_map.json"
+    if label_map_path.exists():
+        with label_map_path.open("r", encoding="utf-8") as f:
+            return json.load(f)
+    return None
+TOKENIZER = AutoTokenizer.from_pretrained(MODEL_ID, use_fast=True)
+MODEL = AutoModelForSequenceClassification.from_pretrained(MODEL_ID)
+MODEL.eval()
+ID2LABEL = MODEL.config.id2label
+STOPWORDS = {
+    "yang",
+    "dan",
+    "di",
+    "ke",
+    "dari",
+    "untuk",
+    "pada",
+    "ini",
+    "itu",
+    "atau",
+    "juga",
+    "dengan",
+    "karena",
+    "bahwa",
+    "sudah",
+    "belum",
+    "tidak",
+    "bukan",
+    "jadi",
+    "agar",
+    "sebagai",
+    "lebih",
+    "paling",
+    "seperti",
+    "saja",
+    "masih",
+    "bisa",
+    "dapat",
+    "akan",
+    "kami",
+    "kita",
+    "saya",
+    "anda",
+    "mereka",
+    "aku",
+    "dia",
+    "kamu",
+    "nya",
+    "the",
+    "a",
+    "an",
+    "is",
+    "are",
+    "of",
+    "to",
+    "in",
+    "for",
+    "on",
+    "it",
+}
+def load_dataset():
+    path = Path(DATA_PATH)
+    if not path.exists():
+        return None, {}
+    df = pd.read_excel(path)
+    if TEXT_COL not in df.columns or LABEL_COL not in df.columns:
+        return None, {}
+    df = df[[TEXT_COL, LABEL_COL]].dropna()
+    df[TEXT_COL] = df[TEXT_COL].astype(str)
+    labels = sorted(df[LABEL_COL].unique().tolist())
+    if set(labels) == {0, 2}:
+        label_name = {0: "Negatif", 2: "Positif"}
+    elif set(labels) == {0, 1}:
+        label_name = {0: "Negatif", 1: "Positif"}
+    else:
+        label_name = {val: f"Label {val}" for val in labels}
+    return df, label_name
+def load_results():
+    path = Path(RESULTS_PATH)
+    if not path.exists():
+        return None
+    try:
+        return pd.read_csv(path)
+    except Exception:
+        return None
+DATA_DF, LABEL_NAME = load_dataset()
+RESULTS_DF = load_results()
+def predict(text):
+    if not text or not text.strip():
+        return {}
+    inputs = TOKENIZER(text, return_tensors="pt", truncation=True)
+    with torch.no_grad():
+        logits = MODEL(**inputs).logits
+        probs = torch.softmax(logits, dim=-1).squeeze().tolist()
+    scores = {ID2LABEL[i]: float(probs[i]) for i in range(len(probs))}
+    return scores
+def _tokenize(text: str):
+    text = text.lower()
+    text = re.sub(r"[^a-z0-9\s]", " ", text)
+    tokens = [t for t in text.split() if t and t not in STOPWORDS and len(t) > 2]
+    return tokens
+def _filter_df(label_choice: str):
+    if DATA_DF is None:
+        return None
+    if label_choice == "Semua":
+        return DATA_DF
+    label_val = None
+    for val, name in LABEL_NAME.items():
+        if name == label_choice:
+            label_val = val
+            break
+    if label_val is None:
+        return DATA_DF
+    return DATA_DF[DATA_DF[LABEL_COL] == label_val]
+def build_distribution_plot():
+    if DATA_DF is None:
+        fig = plt.figure()
+        plt.text(0.5, 0.5, "Dataset tidak ditemukan", ha="center", va="center")
+        return fig
+    counts = DATA_DF[LABEL_COL].value_counts().sort_index()
+    labels = [LABEL_NAME.get(val, str(val)) for val in counts.index.tolist()]
+    fig, ax = plt.subplots(figsize=(6, 4))
+    ax.bar(labels, counts.values, color=["#ef4444", "#22c55e"])
+    ax.set_title("Distribusi Label")
+    ax.set_ylabel("Jumlah")
+    ax.grid(axis="y", linestyle="--", alpha=0.4)
+    return fig
+def build_top_words_plot(label_choice: str, top_n: int = 20):
+    df = _filter_df(label_choice)
+    fig, ax = plt.subplots(figsize=(6, 5))
+    if df is None or df.empty:
+        ax.text(0.5, 0.5, "Data kosong", ha="center", va="center")
+        return fig
+    tokens = []
+    for text in df[TEXT_COL].tolist():
+        tokens.extend(_tokenize(text))
+    if not tokens:
+        ax.text(0.5, 0.5, "Token kosong", ha="center", va="center")
+        return fig
+    common = Counter(tokens).most_common(top_n)
+    words = [w for w, _ in common][::-1]
+    freqs = [c for _, c in common][::-1]
+    ax.barh(words, freqs, color="#3b82f6")
+    ax.set_title(f"Top {top_n} Kata - {label_choice}")
+    return fig
+def build_wordcloud(label_choice: str):
+    df = _filter_df(label_choice)
+    fig, ax = plt.subplots(figsize=(7, 4.5))
+    if df is None or df.empty:
+        ax.text(0.5, 0.5, "Data kosong", ha="center", va="center")
+        ax.axis("off")
+        return fig
+    tokens = []
+    for text in df[TEXT_COL].tolist():
+        tokens.extend(_tokenize(text))
+    if not tokens:
+        ax.text(0.5, 0.5, "Token kosong", ha="center", va="center")
+        ax.axis("off")
+        return fig
+    wc = WordCloud(width=900, height=500, background_color="white", collocations=False)
+    wc.generate(" ".join(tokens))
+    ax.imshow(wc, interpolation="bilinear")
+    ax.axis("off")
+    ax.set_title(f"Word Cloud - {label_choice}")
+    return fig
+def build_model_comparison_plot():
+    fig, ax = plt.subplots(figsize=(6, 4))
+    if RESULTS_DF is None or RESULTS_DF.empty:
+        ax.text(0.5, 0.5, "results.csv tidak ditemukan", ha="center", va="center")
+        return fig
+    data = RESULTS_DF.copy()
+    data = data.sort_values("val_f1", ascending=False)
+    models = data["model"].tolist()
+    val = data["val_f1"].tolist()
+    test = data["test_f1"].tolist()
+    x = range(len(models))
+    ax.bar(x, val, width=0.4, label="Val F1", color="#22c55e")
+    ax.bar([i + 0.4 for i in x], test, width=0.4, label="Test F1", color="#3b82f6")
+    ax.set_xticks([i + 0.2 for i in x])
+    ax.set_xticklabels(models, rotation=45, ha="right")
+    ax.set_ylim(0, 1.0)
+    ax.set_title("Perbandingan Model (F1)")
+    ax.legend()
+    fig.tight_layout()
+    return fig
+def analytics(label_choice):
+    dist_fig = build_distribution_plot()
+    top_fig = build_top_words_plot(label_choice)
+    wc_fig = build_wordcloud(label_choice)
+    model_fig = build_model_comparison_plot()
+    if DATA_DF is None:
+        summary = pd.DataFrame([{"metric": "rows", "value": 0}])
+    else:
+        summary = pd.DataFrame(
+            [{"metric": "rows", "value": len(DATA_DF)}]
+            + [
+                {"metric": f"label_{LABEL_NAME.get(k, k)}", "value": v}
+                for k, v in DATA_DF[LABEL_COL].value_counts().to_dict().items()
+            ]
+        )
+    return dist_fig, top_fig, wc_fig, model_fig, summary
+with gr.Blocks(title="Klasifikasi Sentimen EV") as app:
+    gr.Markdown("# Klasifikasi Sentimen EV")
+    gr.Markdown("Prediksi sentimen + dashboard analitik (word cloud & distribusi label).")
+    with gr.Tab("Prediksi"):
+        inp = gr.Textbox(lines=4, label="Teks")
+        out = gr.Label(num_top_classes=2, label="Prediksi")
+        btn = gr.Button("Prediksi")
+        btn.click(predict, inputs=inp, outputs=out)
+    with gr.Tab("Analitik"):
+        label_options = ["Semua"] + list(LABEL_NAME.values()) if LABEL_NAME else ["Semua"]
+        label_choice = gr.Dropdown(label_options, value="Semua", label="Filter Label")
+        dist_plot = gr.Plot(label="Distribusi Label")
+        top_plot = gr.Plot(label="Top Kata")
+        wc_plot = gr.Plot(label="Word Cloud")
+        model_plot = gr.Plot(label="Perbandingan Model")
+        summary_tbl = gr.Dataframe(label="Ringkasan Dataset", interactive=False)
+        run_btn = gr.Button("Generate")
+        run_btn.click(
+            analytics,
+            inputs=label_choice,
+            outputs=[dist_plot, top_plot, wc_plot, model_plot, summary_tbl],
+        )
+        label_choice.change(
+            analytics,
+            inputs=label_choice,
+            outputs=[dist_plot, top_plot, wc_plot, model_plot, summary_tbl],
+        )
+    app.load(
+        analytics,
+        inputs=label_choice,
+        outputs=[dist_plot, top_plot, wc_plot, model_plot, summary_tbl],
+    )
+if __name__ == "__main__":
+    app.launch()

data.xlsx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:038011314726d0073886358f9e9890f5d9e7595bb7fa46a82f4b0e6c1f15af61
+size 124200

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+transformers==4.56.2
+torch
+pandas
+openpyxl
+gradio
+matplotlib
+wordcloud

results.csv ADDED Viewed

	@@ -0,0 +1,6 @@

+model,run_dir,val_accuracy,val_precision,val_recall,val_f1,test_accuracy,test_precision,test_recall,test_f1
+indobenchmark/indobert-base-p1,outputs\indobenchmark_indobert-base-p1,0.9779411764705882,0.9850746268656716,0.9705882352941176,0.9777777777777777,0.9191176470588235,0.9014084507042254,0.9411764705882353,0.920863309352518
+cahya/bert-base-indonesian-1.5G,outputs\cahya_bert-base-indonesian-1.5G,0.9705882352941176,0.9571428571428572,0.9852941176470589,0.9710144927536232,0.9264705882352942,0.9142857142857143,0.9411764705882353,0.927536231884058
+cahya/roberta-base-indonesian-1.5G,outputs\cahya_roberta-base-indonesian-1.5G,0.9852941176470589,0.9852941176470589,0.9852941176470589,0.9852941176470589,0.9338235294117647,0.9154929577464789,0.9558823529411765,0.935251798561151
+xlm-roberta-base,outputs\xlm-roberta-base,0.9411764705882353,0.9166666666666666,0.9705882352941176,0.9428571428571428,0.9338235294117647,0.9154929577464789,0.9558823529411765,0.935251798561151
+bert-base-multilingual-cased,outputs\bert-base-multilingual-cased,0.9485294117647058,0.9178082191780822,0.9852941176470589,0.950354609929078,0.8970588235294118,0.875,0.9264705882352942,0.9