Spaces:

Aswin92
/

disaster-tweet-classifier

Sleeping

App Files Files Community

Aswin92 commited on Nov 17, 2025

Commit

2d905bf

verified ·

1 Parent(s): 4fb6e5b

Update app.py

Browse files

Files changed (1) hide show

app.py +101 -117

app.py CHANGED Viewed

@@ -47,7 +47,16 @@ class LSTMClassifier(nn.Module):
         return logits
-# ------------------- Cached model loaders -------------------
 @st.cache_resource
 def load_deberta():
     model_name = "Aswin92/deberta-v3-disaster-tweets"
@@ -70,11 +79,8 @@ def load_distilbert():
 @st.cache_resource
 def load_bilstm():
-    import zipfile
-    # Reuse the DistilBERT tokenizer instead of local tokenizer.json
-    distil_tokenizer, _ = load_distilbert()
-    tokenizer = distil_tokenizer
     model = LSTMClassifier(
         vocab_size=tokenizer.vocab_size,
@@ -86,60 +92,20 @@ def load_bilstm():
         num_classes=2,
     )
-    # Extract BiLSTM weights from zip file
-    zip_filename = "saved_models.zip"
-    state_filename = "bilstm_state_dict.pt"
-    # Debug: Check current directory and files
-    current_dir = os.getcwd()
-    print(f"Current directory: {current_dir}")
-    print(f"Files in current directory: {os.listdir(current_dir)}")
-    # Try different possible paths
-    possible_paths = [
-        zip_filename,
-        f"/app/{zip_filename}",
-        os.path.join(current_dir, zip_filename),
-    ]
-    zip_path_found = None
-    for path in possible_paths:
-        print(f"Checking path: {path} - Exists: {os.path.exists(path)}")
-        if os.path.exists(path):
-            zip_path_found = path
-            break
-    if zip_path_found is None:
         files_in_dir = os.listdir(current_dir)
         raise FileNotFoundError(
-            f"Zip file '{zip_filename}' not found in any expected location.\n"
             f"Current directory: {current_dir}\n"
-            f"Files in directory: {files_in_dir}\n"
-            f"Paths checked: {possible_paths}"
         )
-    print(f"Found zip at: {zip_path_found}")
-    with zipfile.ZipFile(zip_path_found, 'r') as zip_ref:
-        # Check if file exists in zip
-        zip_contents = zip_ref.namelist()
-        print(f"Contents of zip: {zip_contents}")
-        if state_filename not in zip_contents:
-            raise FileNotFoundError(
-                f"'{state_filename}' not found in {zip_path_found}. "
-                f"Available files: {zip_contents}"
-            )
-        # Extract to temporary location and load
-        zip_ref.extract(state_filename, path="/tmp")
-        temp_state_path = f"/tmp/{state_filename}"
-        state_dict = torch.load(temp_state_path, map_location=DEVICE, weights_only=True)
-        # Clean up temporary file
-        os.remove(temp_state_path)
     model.load_state_dict(state_dict)
     model.to(DEVICE)
     model.eval()
@@ -214,13 +180,21 @@ st.set_page_config(page_title="Disaster Tweet Classifier", layout="centered")
 st.title("🌪️ Disaster Tweet Classifier")
 st.write(
     "NLP project on the Kaggle **Disaster Tweets** dataset.\n\n"
-    "Type a tweet once and compare how **DeBERTa-v3**, **DistilBERT**, and a custom **BiLSTM (RNN)** "
-    "decide whether it describes a real disaster."
 )
 # -------- Sidebar controls --------
 with st.sidebar:
-    st.header("⚙️ Thresholds per model")
     thr_deberta = st.slider(
         "DeBERTa-v3 threshold",
@@ -228,6 +202,7 @@ with st.sidebar:
         max_value=0.95,
         value=0.60,
         step=0.05,
     )
     thr_distil = st.slider(
         "DistilBERT threshold",
@@ -235,6 +210,7 @@ with st.sidebar:
         max_value=0.95,
         value=0.80,
         step=0.05,
     )
     thr_bilstm = st.slider(
         "BiLSTM (RNN) threshold",
@@ -242,11 +218,12 @@ with st.sidebar:
         max_value=0.95,
         value=0.35,
         step=0.05,
     )
     st.caption(
         "Each model predicts `P(disaster)`. If that probability is "
-        "≥ its threshold, we classify it as **disaster (1)**."
     )
 # -------- Main input area --------
@@ -262,70 +239,77 @@ tweet_text = st.text_area(
     height=120,
 )
-if st.button("Classify with all models"):
     text = tweet_text.strip()
     if not text:
         st.warning("Please type a tweet first.")
     else:
-        try:
-            with st.spinner("Running all three models..."):
-                configs = [
-                    ("DeBERTa-v3", thr_deberta),
-                    ("DistilBERT", thr_distil),
-                    ("BiLSTM (RNN)", thr_bilstm),
-                ]
-                rows = []
-                for name, thr in configs:
-                    pred_label, prob_not, prob_dis = predict_text(text, name, thr)
-                    rows.append(
-                        {
-                            "Model": name,
-                            "Threshold": thr,
-                            "P_not_disaster": prob_not,
-                            "P_disaster": prob_dis,
-                            "Predicted_label": pred_label,
-                        }
                     )
-            # ---- Table view ----
-            st.subheader("📋 Model outputs")
-            df = pd.DataFrame(rows)
-            # Nice formatting for display
-            df_display = df.copy()
-            df_display["P_not_disaster"] = df_display["P_not_disaster"].map(lambda x: f"{x:.3f}")
-            df_display["P_disaster"] = df_display["P_disaster"].map(lambda x: f"{x:.3f}")
-            df_display["Threshold"] = df_display["Threshold"].map(lambda x: f"{x:.2f}")
-            st.dataframe(df_display, use_container_width=True)
-            # ---- Interactive bar chart comparing P(disaster) ----
-            st.subheader("📊 P(disaster) comparison")
-            chart_df = df[["Model", "P_disaster"]].set_index("Model")
-            st.bar_chart(chart_df)
-            # ---- Per-model summary text ----
-            st.subheader("🔎 Per-model decisions")
-            for row in rows:
-                name = row["Model"]
-                thr = row["Threshold"]
-                p_dis = row["P_disaster"]
-                p_not = row["P_not_disaster"]
-                label = row["Predicted_label"]
-                st.markdown(f"**{name}**")
-                st.write(
-                    f"- P(disaster = 1): `{p_dis:.3f}`\n"
-                    f"- P(not disaster = 0): `{p_not:.3f}`\n"
-                    f"- Threshold: `{thr:.2f}` → prediction = `{label}`"
-                )
-                st.markdown("---")
-        except FileNotFoundError as e:
-            st.error(f"❌ {str(e)}")
-            st.info("Please upload `bilstm_state_dict.pt` to the root of your Space repository.")
-        except Exception as e:
-            st.error(f"❌ An error occurred: {str(e)}")
-            st.exception(e)
 st.markdown("---")
 st.caption(

         return logits
+# ------------------- Shared tokenizer for BiLSTM -------------------
+@st.cache_resource
+def load_shared_tokenizer():
+    """Load tokenizer once for BiLSTM (uses DistilBERT tokenizer)"""
+    model_name = "Aswin92/distilbert-disaster-tweets"
+    tokenizer = AutoTokenizer.from_pretrained(model_name)
+    return tokenizer
+# ------------------- Individual model loaders -------------------
 @st.cache_resource
 def load_deberta():
     model_name = "Aswin92/deberta-v3-disaster-tweets"
 @st.cache_resource
 def load_bilstm():
+    # Use shared tokenizer instead of loading DistilBERT model
+    tokenizer = load_shared_tokenizer()
     model = LSTMClassifier(
         vocab_size=tokenizer.vocab_size,
         num_classes=2,
     )
+    # Load BiLSTM weights directly
+    state_path = "bilstm_state_dict.pt"
+    if not os.path.exists(state_path):
+        current_dir = os.getcwd()
         files_in_dir = os.listdir(current_dir)
         raise FileNotFoundError(
+            f"BiLSTM weights file '{state_path}' not found.\n"
             f"Current directory: {current_dir}\n"
+            f"Files available: {files_in_dir}\n"
+            f"Please upload 'bilstm_state_dict.pt' directly to your Space root (not in a zip)."
         )
+    state_dict = torch.load(state_path, map_location=DEVICE, weights_only=True)
     model.load_state_dict(state_dict)
     model.to(DEVICE)
     model.eval()
 st.title("🌪️ Disaster Tweet Classifier")
 st.write(
     "NLP project on the Kaggle **Disaster Tweets** dataset.\n\n"
+    "Compare **DeBERTa-v3**, **DistilBERT**, and a custom **BiLSTM (RNN)** "
+    "to decide whether a tweet describes a real disaster."
 )
 # -------- Sidebar controls --------
 with st.sidebar:
+    st.header("⚙️ Model Selection")
+    # Let user select which models to run
+    run_deberta = st.checkbox("Run DeBERTa-v3", value=True)
+    run_distilbert = st.checkbox("Run DistilBERT", value=True)
+    run_bilstm = st.checkbox("Run BiLSTM (RNN)", value=True)
+    st.markdown("---")
+    st.header("⚙️ Thresholds")
     thr_deberta = st.slider(
         "DeBERTa-v3 threshold",
         max_value=0.95,
         value=0.60,
         step=0.05,
+        disabled=not run_deberta,
     )
     thr_distil = st.slider(
         "DistilBERT threshold",
         max_value=0.95,
         value=0.80,
         step=0.05,
+        disabled=not run_distilbert,
     )
     thr_bilstm = st.slider(
         "BiLSTM (RNN) threshold",
         max_value=0.95,
         value=0.35,
         step=0.05,
+        disabled=not run_bilstm,
     )
     st.caption(
         "Each model predicts `P(disaster)`. If that probability is "
+        "≥ its threshold, we classify it as **disaster**."
     )
 # -------- Main input area --------
     height=120,
 )
+if st.button("Classify Tweet"):
     text = tweet_text.strip()
     if not text:
         st.warning("Please type a tweet first.")
     else:
+        # Build list of models to run based on checkboxes
+        configs = []
+        if run_deberta:
+            configs.append(("DeBERTa-v3", thr_deberta))
+        if run_distilbert:
+            configs.append(("DistilBERT", thr_distil))
+        if run_bilstm:
+            configs.append(("BiLSTM (RNN)", thr_bilstm))
+        if not configs:
+            st.warning("Please select at least one model to run.")
+        else:
+            try:
+                with st.spinner(f"Running {len(configs)} model(s)..."):
+                    rows = []
+                    for name, thr in configs:
+                        pred_label, prob_not, prob_dis = predict_text(text, name, thr)
+                        rows.append(
+                            {
+                                "Model": name,
+                                "Threshold": thr,
+                                "P_not_disaster": prob_not,
+                                "P_disaster": prob_dis,
+                                "Predicted_label": pred_label,
+                            }
+                        )
+                # ---- Table view ----
+                st.subheader("📋 Model outputs")
+                df = pd.DataFrame(rows)
+                # Nice formatting for display
+                df_display = df.copy()
+                df_display["P_not_disaster"] = df_display["P_not_disaster"].map(lambda x: f"{x:.3f}")
+                df_display["P_disaster"] = df_display["P_disaster"].map(lambda x: f"{x:.3f}")
+                df_display["Threshold"] = df_display["Threshold"].map(lambda x: f"{x:.2f}")
+                st.dataframe(df_display, use_container_width=True)
+                # ---- Interactive bar chart comparing P(disaster) ----
+                if len(rows) > 1:
+                    st.subheader("📊 P(disaster) comparison")
+                    chart_df = df[["Model", "P_disaster"]].set_index("Model")
+                    st.bar_chart(chart_df)
+                # ---- Per-model summary text ----
+                st.subheader("🔎 Per-model decisions")
+                for row in rows:
+                    name = row["Model"]
+                    thr = row["Threshold"]
+                    p_dis = row["P_disaster"]
+                    p_not = row["P_not_disaster"]
+                    label = row["Predicted_label"]
+                    st.markdown(f"**{name}**")
+                    st.write(
+                        f"- P(disaster = 1): `{p_dis:.3f}`\n"
+                        f"- P(not disaster = 0): `{p_not:.3f}`\n"
+                        f"- Threshold: `{thr:.2f}` → prediction = `{label}`"
                     )
+                    st.markdown("---")
+            except FileNotFoundError as e:
+                st.error(f"❌ {str(e)}")
+                st.info("Please upload `bilstm_state_dict.pt` to the root of your Space repository.")
+            except Exception as e:
+                st.error(f"❌ An error occurred: {str(e)}")
+                st.exception(e)
 st.markdown("---")
 st.caption(