Spaces:

Dusit-P
/

Thai-Sentiment-GUI

Sleeping

App Files Files Community

Dusit-P commited on Oct 4, 2025

Commit

f6f7109

verified ·

1 Parent(s): cc03334

Update app.py

Browse files

Files changed (1) hide show

app.py +139 -282

app.py CHANGED Viewed

@@ -1,8 +1,9 @@
 # app.py — Thai Sentiment (WangchanBERTa Variants)
-# - No Single tab
-# - No aspect analysis (focus on POS/NEG)
-# - CSV tab: date pickers appear ONLY if a date column exists (use DatePicker)
-# - Predict buttons right below inputs
 import os, json, importlib.util, traceback, re, math, tempfile, datetime
 import gradio as gr
 import torch, pandas as pd
@@ -21,12 +22,31 @@ AVAILABLE_CHOICES = ["WCB", "WCB_BiLSTM", "WCB_CNN_BiLSTM", "WCB_4Layer_BiLSTM"]
 if DEFAULT_MODEL not in AVAILABLE_CHOICES:
     DEFAULT_MODEL = "WCB"
-NEG_COLOR = "#F87171"   # red
-POS_COLOR = "#34D399"   # green
 TEMPLATE  = "plotly_white"
 CACHE = {}
 # ================= Loader =================
 def _import_models():
     if "models_module" in CACHE:
@@ -42,84 +62,56 @@ def load_model(model_name: str):
     key = f"model:{model_name}"
     if key in CACHE:
         return CACHE[key]
     cfg_path = hf_hub_download(REPO_ID, filename=f"{model_name}/config.json", token=HF_TOKEN)
     w_path   = hf_hub_download(REPO_ID, filename=f"{model_name}/model.safetensors", token=HF_TOKEN)
     with open(cfg_path, "r", encoding="utf-8") as f:
         cfg = json.load(f)
     base_model = cfg.get("base_model", "airesearch/wangchanberta-base-att-spm-uncased")
     arch_name  = cfg.get("architecture", model_name)
     tok = AutoTokenizer.from_pretrained(base_model)
     models = _import_models()
     model = models._build(arch_name, base_model, int(cfg.get("num_labels",2)),
                           cfg.get("pooling_after_lstm", "masked_mean"))
     state = load_file(w_path)
     model.load_state_dict(state, strict=False)
     model.eval()
     CACHE[key] = (model, tok, cfg)
     return CACHE[key]
 # ================= Utils =================
-_INVALID_STRINGS = {"-", "--", "—", "n/a", "na", "null", "none", "nan", ".", "…", ""}
 _RE_HAS_LETTER = re.compile(r"[ก-๙A-Za-z]")
-def _norm_text(v) -> str:
     if v is None: return ""
     if isinstance(v, float) and math.isnan(v): return ""
     return str(v).strip().strip('"').strip("'").strip(",")
-def _is_substantive_text(s: str, min_chars: int = 2) -> bool:
     if not s: return False
     if s.lower() in _INVALID_STRINGS: return False
     if not _RE_HAS_LETTER.search(s): return False
-    if len(s.replace(" ", "")) < min_chars: return False
     return True
-def _format_pct(x: float) -> str:
-    return f"{x*100:.2f}%"
-def _to_datetime_safe(s):
-    return pd.to_datetime(s, errors="coerce", infer_datetime_format=True, utc=False)
-def _normalize_datepicker_value(v):
-    """รับค่าจาก gr.DatePicker (datetime.date หรือ str หรือ None) → pandas.Timestamp หรือ None"""
-    if v is None or (isinstance(v, float) and math.isnan(v)):
-        return None
-    if isinstance(v, datetime.date):
-        return pd.Timestamp(v)
-    # เผื่อบางเวอร์ชันส่ง str 'YYYY-MM-DD'
-    try:
-        ts = pd.to_datetime(v, errors="coerce")
-        return ts if pd.notna(ts) else None
-    except Exception:
-        return None
 LIKELY_TEXT_COLS = ["text","review","message","comment","content","sentence","body","ข้อความ","รีวิว"]
 LIKELY_DATE_COLS = ["date","created_at","time","timestamp","datetime","วันที่","วันเวลา","เวลา"]
-def detect_text_and_date_cols(df: pd.DataFrame):
     cols = list(df.columns)
-    # text col
     low = {c.lower(): c for c in cols}
     text_col = None
     for k in LIKELY_TEXT_COLS:
-        if k in low:
-            text_col = low[k]; break
     if text_col is None:
         cand = [c for c in cols if df[c].dtype == object]
         text_col = cand[0] if cand else cols[0]
-    # date candidates
     date_candidates = []
     for c in cols:
-        if c.lower() in LIKELY_DATE_COLS:
-            date_candidates.append(c)
-            continue
         sample = df[c].head(50)
         if _to_datetime_safe(sample).notna().sum() >= max(3, int(len(sample)*0.2)):
             date_candidates.append(c)
@@ -128,277 +120,142 @@ def detect_text_and_date_cols(df: pd.DataFrame):
     return text_col, date_candidates, date_col
 # ================= Charts =================
-def make_basic_charts(df: pd.DataFrame):
     total = len(df)
-    neg_df = df[df["label"] == "negative"].copy()
-    pos_df = df[df["label"] == "positive"].copy()
-    # bar counts
     fig_bar = go.Figure()
     fig_bar.add_bar(name="negative", x=["negative"], y=[len(neg_df)], marker_color=NEG_COLOR)
     fig_bar.add_bar(name="positive", x=["positive"], y=[len(pos_df)], marker_color=POS_COLOR)
     fig_bar.update_layout(barmode="group", title="Label counts", template=TEMPLATE)
-    # pie pos/neg
-    labels = ["negative", "positive"]
-    values = [len(neg_df), len(pos_df)]
-    fig_pie = go.Figure(go.Pie(labels=labels, values=values, hole=0.35, sort=False,
                                marker=dict(colors=[NEG_COLOR, POS_COLOR])))
     fig_pie.update_layout(title="Positive vs Negative", template=TEMPLATE)
     neg_avg = pd.to_numeric(df["negative(%)"].str.rstrip("%"), errors="coerce").mean()
     pos_avg = pd.to_numeric(df["positive(%)"].str.rstrip("%"), errors="coerce").mean()
-    info = (
-        f"**Summary**  \n"
-        f"- Total: {total}  \n"
-        f"- Negative: {len(neg_df)}  \n"
-        f"- Positive: {len(pos_df)}  \n"
-        f"- Avg negative: {neg_avg:.2f}%  \n"
-        f"- Avg positive: {pos_avg:.2f}%"
-    )
     return fig_bar, fig_pie, info
 def _resample_counts(df, date_col, freq):
-    g = df.groupby([pd.Grouper(key=date_col, freq=freq), "label"]).size().unstack(fill_value=0)
-    for col in ["negative","positive"]:
-        if col not in g.columns:
-            g[col] = 0
     return g[["negative","positive"]].sort_index()
-def _rolling_window(freq):
-    return 7 if freq == "D" else (4 if freq == "W" else 3)
-def make_time_chart(df: pd.DataFrame, date_col: str, freq: str, use_ma: bool):
-    ts = _resample_counts(df, date_col, freq)
-    if use_ma:
-        win = _rolling_window(freq)
-        ts = ts.rolling(win, min_periods=1).mean()
-    fig_line = go.Figure()
-    fig_line.add_scatter(x=ts.index, y=ts["negative"], mode="lines",
-                         name="negative", line=dict(color=NEG_COLOR))
-    fig_line.add_scatter(x=ts.index, y=ts["positive"], mode="lines",
-                         name="positive", line=dict(color=POS_COLOR))
-    fig_line.update_layout(title="Reviews over time (POS/NEG)", template=TEMPLATE,
-                           xaxis_title="Date", yaxis_title="Count")
-    return fig_line
 # ================= Core Predict =================
 def _predict_batch(texts, model_name, batch_size=32):
-    model, tok, cfg = load_model(model_name)
-    results = []
-    for i in range(0, len(texts), batch_size):
-        chunk = texts[i:i+batch_size]
-        enc = tok(chunk, padding=True, truncation=True,
-                  max_length=cfg.get("max_length",128), return_tensors="pt")
         with torch.no_grad():
-            logits = model(enc["input_ids"], enc["attention_mask"])
-            probs = F.softmax(logits, dim=1).cpu().numpy()
-        for txt, p in zip(chunk, probs):
-            neg, pos = float(p[0]), float(p[1])
-            label = "positive" if pos >= neg else "negative"
-            results.append({
-                "review": txt,
-                "negative(%)": _format_pct(neg),
-                "positive(%)": _format_pct(pos),
-                "label": label,
-            })
     return results
-# ================= Batch (Textarea) =================
-def predict_many(text_block: str, model_choice: str):
     try:
-        raw_lines = (text_block or "").splitlines()
-        all_norm = [_norm_text(t) for t in raw_lines]
-        cleaned = [t for t in all_norm if _is_substantive_text(t)]
-        skipped = len(all_norm) - len(cleaned)
-        if len(cleaned) == 0:
-            empty = pd.DataFrame(columns=["review","negative(%)","positive(%)","label"])
-            return empty, go.Figure(), go.Figure(), "No valid text"
-        results = _predict_batch(cleaned, model_choice)
-        df = pd.DataFrame(results)
-        fig_bar, fig_pie, info_md = make_basic_charts(df)
-        info_md = f"{info_md}  \n- Skipped: {skipped}"
-        return df, fig_bar, fig_pie, info_md
-    except Exception:
-        tb = traceback.format_exc()
-        empty = pd.DataFrame(columns=["review","negative(%)","positive(%)","label"])
-        return empty, go.Figure(), go.Figure(), f"**Error**\n```\n{tb}\n```"
-# ================= CSV Inspect (auto-detect & toggle UI) =================
 def on_file_change(file_obj):
-    """
-    เมื่ออัปโหลดไฟล์:
-    - คืน options ของ text/date dropdown
-    - ชื่อ default ที่เลือก
-    - toggle visibility ของ date controls + line chart placeholder
-    """
     if file_obj is None:
-        return (
-            gr.update(choices=[], value=None),   # text_dd
-            gr.update(choices=[], value=None),   # date_dd
-            gr.update(visible=False),            # date_from
-            gr.update(visible=False),            # date_to
-            gr.update(visible=False),            # freq
-            gr.update(visible=False),            # use_ma
-            gr.update(visible=False),            # line chart
-            "Please upload a CSV file."
-        )
     try:
-        df_raw = pd.read_csv(file_obj.name)
-        cols = list(df_raw.columns)
-        text_col, date_candidates, date_col = detect_text_and_date_cols(df_raw)
-        has_date = date_col is not None
-        note = "Detected text column: **{}**".format(text_col)
-        if has_date:
-            note += "; detected date column: **{}**".format(date_col)
-        else:
-            note += "; _no date/timestamp column detected_"
-        return (
-            gr.update(choices=cols, value=text_col),
-            gr.update(choices=date_candidates, value=date_col),
-            gr.update(visible=has_date),
-            gr.update(visible=has_date),
-            gr.update(visible=has_date),
-            gr.update(visible=has_date),
-            gr.update(visible=has_date),
-            note
-        )
-    except Exception:
-        tb = traceback.format_exc()
-        return (
-            gr.update(choices=[], value=None),
-            gr.update(choices=[], value=None),
-            gr.update(visible=False),
-            gr.update(visible=False),
-            gr.update(visible=False),
-            gr.update(visible=False),
-            gr.update(visible=False),
-            f"**Error reading CSV**\n```\n{tb}\n```"
-        )
 # ================= CSV Predict =================
-def predict_csv(file_obj, model_choice: str, text_col_name: str,
-                date_col_name: str, date_from, date_to,
-                freq_choice: str, use_ma: bool):
     try:
-        if file_obj is None:
-            return pd.DataFrame(), go.Figure(), go.Figure(), gr.update(visible=False, value=go.Figure()), "Please upload a CSV.", None
-        df_raw = pd.read_csv(file_obj.name)
-        cols = list(df_raw.columns)
-        col_text = text_col_name if text_col_name in cols else detect_text_and_date_cols(df_raw)[0]
-        texts = [_norm_text(v) for v in df_raw[col_text].tolist()]
-        texts = [t for t in texts if _is_substantive_text(t)]
-        if len(texts) == 0:
-            return pd.DataFrame(), go.Figure(), go.Figure(), gr.update(visible=False, value=go.Figure()), "No valid texts in selected column.", None
-        # predict
-        results = _predict_batch(texts, model_choice)
-        out_df = pd.DataFrame(results)
-        # basic charts
-        fig_bar, fig_pie, info_basic = make_basic_charts(out_df)
-        # time charts (optional)
-        show_time = False
-        fig_line = go.Figure()
-        if date_col_name and (date_col_name in cols):
-            dts = _to_datetime_safe(df_raw[date_col_name])
             if dts.notna().any():
-                df_time = out_df.copy()
-                df_time["__dt__"] = dts
-                df_time = df_time.dropna(subset=["__dt__"])
-                # normalize datepicker values
-                start_ts = _normalize_datepicker_value(date_from)
-                end_ts   = _normalize_datepicker_value(date_to)
-                if start_ts is not None:
-                    df_time = df_time[df_time["__dt__"] >= start_ts]
-                if end_ts is not None:
-                    df_time = df_time[df_time["__dt__"] <= end_ts]
-                if len(df_time) > 0:
-                    fig_line = make_time_chart(df_time, "__dt__", freq_choice, use_ma)
-                    show_time = True
-        # downloadable CSV
-        fd, out_path = tempfile.mkstemp(prefix="pred_", suffix=".csv")
-        os.close(fd)
-        out_df.to_csv(out_path, index=False, encoding="utf-8-sig")
-        info_time = ""
-        if date_col_name:
-            if show_time:
-                info_time = f"\n\nTime chart based on date column: **{date_col_name}**, Freq: **{freq_choice}**, MA: **{use_ma}**"
-            else:
-                info_time = "\n\n_Selected date range has no data OR unable to parse dates._"
-        else:
-            info_time = "\n\n_No date/timestamp column selected — time chart hidden._"
-        info_md = info_basic + info_time
-        return out_df, fig_bar, fig_pie, gr.update(visible=show_time, value=fig_line), info_md, out_path
-    except Exception:
-        tb = traceback.format_exc()
-        return pd.DataFrame(), go.Figure(), go.Figure(), gr.update(visible=False, value=go.Figure()), f"**Error**\n```\n{tb}\n```", None
 # ================= Gradio UI =================
-with gr.Blocks(title="Thai Sentiment (WangchanBERTa Variants)") as demo:
-    gr.Markdown("### Thai Sentiment (WangchanBERTa Variants) — Focus on POS/NEG")
-    model_radio = gr.Radio(choices=AVAILABLE_CHOICES, value=DEFAULT_MODEL, label="เลือกโมเดล")
-    # ---- Batch (Textarea) ----
-    with gr.Tab("Batch (หลายข้อความ)"):
-        t2 = gr.Textbox(lines=8, label="พิมพ์หลายรีวิว (บรรทัดละ 1 รีวิว)")
-        btn_batch = gr.Button("Predict", variant="primary")
-        df2  = gr.Dataframe(label="ผลลัพธ์", interactive=False)
-        bar2 = gr.Plot(label="Label counts (bar)")
-        pie2 = gr.Plot(label="Positive vs Negative (pie)")
-        sum2 = gr.Markdown()
-        btn_batch.click(predict_many, [t2, model_radio], [df2, bar2, pie2, sum2])
-    # ---- CSV Upload ----
     with gr.Tab("CSV Upload"):
         with gr.Row():
-            file_in = gr.File(label="อัปโหลดไฟล์ .csv", file_types=[".csv"])
-            text_dd = gr.Dropdown(label="คอลัมน์ข้อความ", choices=[], value=None)
-            date_dd = gr.Dropdown(label="คอลัมน์วันเวลา (ถ้ามี)", choices=[], value=None)
         with gr.Row():
-            # ใช้ DatePicker แทน Date (รองรับ gradio เวอร์ชันที่ไม่เคยมี gr.Date)
-            date_from = gr.DatePicker(label="เริ่มวันที่", visible=False)
-            date_to   = gr.DatePicker(label="ถึงวันที่", visible=False)
-            freq      = gr.Radio(choices=["D","W","M"], value="D", label="ความถี่ (Day/Week/Month)", visible=False)
-            use_ma    = gr.Checkbox(value=True, label="Moving average (7/4/3)", visible=False)
-        btn_csv = gr.Button("Predict CSV", variant="primary")
-        note_detect = gr.Markdown()
-        df3  = gr.Dataframe(label="ผลลัพธ์", interactive=False)
-        bar3 = gr.Plot(label="Label counts (bar)")
-        pie3 = gr.Plot(label="Positive vs Negative (pie)")
-        line = gr.Plot(label="Reviews over time (POS/NEG)", visible=False)
-        sum3 = gr.Markdown()
-        dl3  = gr.File(label="ดาวน์โหลดผลเป็น CSV", interactive=False)
-        file_in.change(
-            on_file_change, [file_in],
-            [text_dd, date_dd, date_from, date_to, freq, use_ma, line, note_detect]
-        )
-        btn_csv.click(
-            predict_csv,
-            [file_in, model_radio, text_dd, date_dd, date_from, date_to, freq, use_ma],
-            [df3, bar3, pie3, line, sum3, dl3]
-        )
-if __name__ == "__main__":
-    demo.launch()

 # app.py — Thai Sentiment (WangchanBERTa Variants)
+# - Focus on POS/NEG only
+# - Batch + CSV tabs
+# - CSV: auto-detect text/date cols, hide date widgets if no date col
+# - DatePicker fallback to Textbox if component missing
 import os, json, importlib.util, traceback, re, math, tempfile, datetime
 import gradio as gr
 import torch, pandas as pd
 if DEFAULT_MODEL not in AVAILABLE_CHOICES:
     DEFAULT_MODEL = "WCB"
+NEG_COLOR = "#F87171"
+POS_COLOR = "#34D399"
 TEMPLATE  = "plotly_white"
 CACHE = {}
+# ================= Date Component Fallback =================
+try:
+    DateInput = getattr(gr, "Date", None) or getattr(gr, "DatePicker", None)
+except Exception:
+    DateInput = None
+DATE_FALLBACK_TO_TEXT = False
+if DateInput is None:
+    DateInput = gr.Textbox
+    DATE_FALLBACK_TO_TEXT = True
+def _normalize_date_input(v):
+    if v is None: return None
+    if isinstance(v, float) and math.isnan(v): return None
+    if isinstance(v, datetime.date): return pd.Timestamp(v)
+    try:
+        ts = pd.to_datetime(v, errors="coerce")
+        return ts if pd.notna(ts) else None
+    except Exception:
+        return None
 # ================= Loader =================
 def _import_models():
     if "models_module" in CACHE:
     key = f"model:{model_name}"
     if key in CACHE:
         return CACHE[key]
     cfg_path = hf_hub_download(REPO_ID, filename=f"{model_name}/config.json", token=HF_TOKEN)
     w_path   = hf_hub_download(REPO_ID, filename=f"{model_name}/model.safetensors", token=HF_TOKEN)
     with open(cfg_path, "r", encoding="utf-8") as f:
         cfg = json.load(f)
     base_model = cfg.get("base_model", "airesearch/wangchanberta-base-att-spm-uncased")
     arch_name  = cfg.get("architecture", model_name)
     tok = AutoTokenizer.from_pretrained(base_model)
     models = _import_models()
     model = models._build(arch_name, base_model, int(cfg.get("num_labels",2)),
                           cfg.get("pooling_after_lstm", "masked_mean"))
     state = load_file(w_path)
     model.load_state_dict(state, strict=False)
     model.eval()
     CACHE[key] = (model, tok, cfg)
     return CACHE[key]
 # ================= Utils =================
+_INVALID_STRINGS = {"-", "--","—","n/a","na","null","none","nan",".","…",""}
 _RE_HAS_LETTER = re.compile(r"[ก-๙A-Za-z]")
+def _norm_text(v):
     if v is None: return ""
     if isinstance(v, float) and math.isnan(v): return ""
     return str(v).strip().strip('"').strip("'").strip(",")
+def _is_substantive_text(s, min_chars=2):
     if not s: return False
     if s.lower() in _INVALID_STRINGS: return False
     if not _RE_HAS_LETTER.search(s): return False
+    if len(s.replace(" ","")) < min_chars: return False
     return True
+def _format_pct(x): return f"{x*100:.2f}%"
+def _to_datetime_safe(s): return pd.to_datetime(s, errors="coerce", infer_datetime_format=True, utc=False)
 LIKELY_TEXT_COLS = ["text","review","message","comment","content","sentence","body","ข้อความ","รีวิว"]
 LIKELY_DATE_COLS = ["date","created_at","time","timestamp","datetime","วันที่","วันเวลา","เวลา"]
+def detect_text_and_date_cols(df):
     cols = list(df.columns)
     low = {c.lower(): c for c in cols}
     text_col = None
     for k in LIKELY_TEXT_COLS:
+        if k in low: text_col = low[k]; break
     if text_col is None:
         cand = [c for c in cols if df[c].dtype == object]
         text_col = cand[0] if cand else cols[0]
     date_candidates = []
     for c in cols:
+        if c.lower() in LIKELY_DATE_COLS: date_candidates.append(c); continue
         sample = df[c].head(50)
         if _to_datetime_safe(sample).notna().sum() >= max(3, int(len(sample)*0.2)):
             date_candidates.append(c)
     return text_col, date_candidates, date_col
 # ================= Charts =================
+def make_basic_charts(df):
     total = len(df)
+    neg_df = df[df["label"]=="negative"]; pos_df = df[df["label"]=="positive"]
     fig_bar = go.Figure()
     fig_bar.add_bar(name="negative", x=["negative"], y=[len(neg_df)], marker_color=NEG_COLOR)
     fig_bar.add_bar(name="positive", x=["positive"], y=[len(pos_df)], marker_color=POS_COLOR)
     fig_bar.update_layout(barmode="group", title="Label counts", template=TEMPLATE)
+    labels=["negative","positive"]; values=[len(neg_df), len(pos_df)]
+    fig_pie = go.Figure(go.Pie(labels=labels, values=values, hole=0.35,
                                marker=dict(colors=[NEG_COLOR, POS_COLOR])))
     fig_pie.update_layout(title="Positive vs Negative", template=TEMPLATE)
     neg_avg = pd.to_numeric(df["negative(%)"].str.rstrip("%"), errors="coerce").mean()
     pos_avg = pd.to_numeric(df["positive(%)"].str.rstrip("%"), errors="coerce").mean()
+    info=(f"**Summary**\n- Total: {total}\n- Negative: {len(neg_df)}\n- Positive: {len(pos_df)}\n"
+          f"- Avg negative: {neg_avg:.2f}%\n- Avg positive: {pos_avg:.2f}%")
     return fig_bar, fig_pie, info
 def _resample_counts(df, date_col, freq):
+    g = df.groupby([pd.Grouper(key=date_col, freq=freq),"label"]).size().unstack(fill_value=0)
+    for c in ["negative","positive"]:
+        if c not in g.columns: g[c]=0
     return g[["negative","positive"]].sort_index()
+def _rolling_window(freq): return 7 if freq=="D" else (4 if freq=="W" else 3)
+def make_time_chart(df, date_col, freq, use_ma):
+    ts=_resample_counts(df,date_col,freq)
+    if use_ma: ts=ts.rolling(_rolling_window(freq), min_periods=1).mean()
+    fig=go.Figure()
+    fig.add_scatter(x=ts.index,y=ts["negative"],mode="lines",name="negative",line=dict(color=NEG_COLOR))
+    fig.add_scatter(x=ts.index,y=ts["positive"],mode="lines",name="positive",line=dict(color=POS_COLOR))
+    fig.update_layout(title="Reviews over time (POS/NEG)",template=TEMPLATE,
+                      xaxis_title="Date",yaxis_title="Count")
+    return fig
 # ================= Core Predict =================
 def _predict_batch(texts, model_name, batch_size=32):
+    model,tok,cfg=load_model(model_name); results=[]
+    for i in range(0,len(texts),batch_size):
+        chunk=texts[i:i+batch_size]
+        enc=tok(chunk,padding=True,truncation=True,
+                max_length=cfg.get("max_length",128),return_tensors="pt")
         with torch.no_grad():
+            logits=model(enc["input_ids"],enc["attention_mask"])
+            probs=F.softmax(logits,dim=1).cpu().numpy()
+        for txt,p in zip(chunk,probs):
+            neg,pos=float(p[0]),float(p[1])
+            label="positive" if pos>=neg else "negative"
+            results.append({"review":txt,"negative(%)":_format_pct(neg),
+                            "positive(%)":_format_pct(pos),"label":label})
     return results
+# ================= Batch =================
+def predict_many(text_block, model_choice):
     try:
+        raw=(text_block or "").splitlines()
+        norm=[_norm_text(t) for t in raw]; clean=[t for t in norm if _is_substantive_text(t)]
+        if not clean: return pd.DataFrame(),go.Figure(),go.Figure(),"No valid text"
+        results=_predict_batch(clean,model_choice); df=pd.DataFrame(results)
+        bar,pie,info=make_basic_charts(df)
+        return df,bar,pie,info
+    except: return pd.DataFrame(),go.Figure(),go.Figure(),traceback.format_exc()
+# ================= CSV Inspect =================
 def on_file_change(file_obj):
     if file_obj is None:
+        return gr.update(choices=[],value=None),gr.update(choices=[],value=None),\
+               gr.update(visible=False),gr.update(visible=False),\
+               gr.update(visible=False),gr.update(visible=False),\
+               gr.update(visible=False),"Please upload a CSV"
     try:
+        df=pd.read_csv(file_obj.name)
+        text_col,date_candidates,date_col=detect_text_and_date_cols(df)
+        has_date=date_col is not None
+        note=f"Detected text col: **{text_col}**; "+("date col: **{}**".format(date_col) if has_date else "_no date col_")
+        return gr.update(choices=list(df.columns),value=text_col),\
+               gr.update(choices=date_candidates,value=date_col),\
+               gr.update(visible=has_date),gr.update(visible=has_date),\
+               gr.update(visible=has_date),gr.update(visible=has_date),\
+               gr.update(visible=has_date),note
+    except: return gr.update(choices=[],value=None),gr.update(choices=[],value=None),\
+                   gr.update(visible=False),gr.update(visible=False),\
+                   gr.update(visible=False),gr.update(visible=False),\
+                   gr.update(visible=False),"Error reading CSV"
 # ================= CSV Predict =================
+def predict_csv(file_obj,model_choice,text_col,date_col,date_from,date_to,freq,use_ma):
+    if file_obj is None: return pd.DataFrame(),go.Figure(),go.Figure(),gr.update(visible=False), "No file",None
     try:
+        df_raw=pd.read_csv(file_obj.name); cols=list(df_raw.columns)
+        if text_col not in cols: text_col,_d,_dc=detect_text_and_date_cols(df_raw);
+        texts=[_norm_text(v) for v in df_raw[text_col].tolist()]
+        texts=[t for t in texts if _is_substantive_text(t)]
+        if not texts: return pd.DataFrame(),go.Figure(),go.Figure(),gr.update(visible=False),"No valid texts",None
+        results=_predict_batch(texts,model_choice); out=pd.DataFrame(results)
+        bar,pie,info=make_basic_charts(out)
+        fig_line=go.Figure(); show_time=False
+        if date_col and date_col in cols:
+            dts=_to_datetime_safe(df_raw[date_col])
             if dts.notna().any():
+                df_time=out.copy(); df_time["__dt__"]=dts; df_time=df_time.dropna(subset=["__dt__"])
+                start_ts=_normalize_date_input(date_from); end_ts=_normalize_date_input(date_to)
+                if start_ts is not None: df_time=df_time[df_time["__dt__"]>=start_ts]
+                if end_ts is not None: df_time=df_time[df_time["__dt__"]<=end_ts]
+                if len(df_time)>0: fig_line=make_time_chart(df_time,"__dt__",freq,use_ma); show_time=True
+        fd,path=tempfile.mkstemp(suffix=".csv"); os.close(fd)
+        out.to_csv(path,index=False,encoding="utf-8-sig")
+        return out,bar,pie,gr.update(visible=show_time,value=fig_line),info,path
+    except: return pd.DataFrame(),go.Figure(),go.Figure(),gr.update(visible=False),"Error\n"+traceback.format_exc(),None
 # ================= Gradio UI =================
+with gr.Blocks(title="Thai Sentiment") as demo:
+    gr.Markdown("### Thai Sentiment — WangchanBERTa Variants")
+    model_radio=gr.Radio(choices=AVAILABLE_CHOICES,value=DEFAULT_MODEL,label="เลือกโมเดล")
+    with gr.Tab("Batch"):
+        t2=gr.Textbox(lines=8,label="รีวิว (บรรทัดละ 1)")
+        btn2=gr.Button("Predict",variant="primary")
+        df2=gr.Dataframe(); bar2=gr.Plot(); pie2=gr.Plot(); sum2=gr.Markdown()
+        btn2.click(predict_many,[t2,model_radio],[df2,bar2,pie2,sum2])
     with gr.Tab("CSV Upload"):
         with gr.Row():
+            file_in=gr.File(file_types=[".csv"]); text_dd=gr.Dropdown(label="Text col")
+            date_dd=gr.Dropdown(label="Date col (opt)")
         with gr.Row():
+            date_from=DateInput(label="เริ่มวันที่"+(" (YYYY-MM-DD)" if DATE_FALLBACK_TO_TEXT else ""),visible=False)
+            date_to=DateInput(label="ถึงวันที่"+(" (YYYY-MM-DD)" if DATE_FALLBACK_TO_TEXT else ""),visible=False)
+            freq=gr.Radio(choices=["D","W","M"],value="D",label="Freq",visible=False)
+            use_ma=gr.Checkbox(value=True,label="MA",visible=False)
+        btn3=gr.Button("Predict CSV",variant="primary")
+        note=gr.Markdown()
+        df3=gr.Dataframe(); bar3=gr.Plot(); pie3=gr.Plot()
+        line=gr.Plot(visible=False); sum3=gr.Markdown(); dl=gr.File()
+        file_in.change(on_file_change,[file_in],[text_dd,date_dd,date_from,date_to,freq,use_ma,line,note])
+        btn3.click(predict_csv,[file_in,model_radio,text_dd,date_dd,date_from,date_to,freq,use_ma],[df3,bar3,pie3,line,sum3,dl])
+if __name__=="__main__": demo.launch()