Spaces:

MrUtakata
/

ddgnn

Build error

App Files Files Community

MrUtakata commited on Apr 18, 2025

Commit

db2d2c1

verified ·

1 Parent(s): da55453

Update app.py

Browse files

Files changed (1) hide show

app.py +26 -26

app.py CHANGED Viewed

@@ -7,59 +7,59 @@ import numpy as np
 from nltk.corpus import stopwords
 from nltk.tokenize import RegexpTokenizer
-from sklearn.neighbors import NearestNeighbors
 from sklearn.feature_extraction.text import TfidfVectorizer
-# ——— 1) Download NLTK data & set up tokenizer/stopwords ———
 nltk.download('stopwords')
-STOP_WORDS = set(stopwords.words('english'))
-TOKENIZER = RegexpTokenizer(r'\w+')
 def preprocess_text(text: str) -> str:
-    tokens = TOKENIZER.tokenize(text.lower())
-    return " ".join([t for t in tokens if t not in STOP_WORDS])
-# ——— 2) Load saved artifacts once ———
-@st.cache(allow_output_mutation=True)
-def load_artifacts():
     tfidf: TfidfVectorizer = joblib.load("tfidf_vectorizer.pkl")
-    knn: NearestNeighbors = joblib.load("knn_model.pkl")
     sage_model: torch.nn.Module = joblib.load("sage_model.pkl")
     sage_model.eval()
-    return tfidf, knn, sage_model
-tfidf, knn, sage_model = load_artifacts()
 # ——— 3) Streamlit UI ———
 st.title("Disinformation Detection")
 st.write(
-    """Enter a snippet of text below and click **Predict** to see
-    whether it is more likely **True Information** or **Disinformation**."""
 )
-user_input = st.text_area("Article text", height=200)
 if st.button("Predict"):
     if not user_input.strip():
         st.warning("Please enter some text first.")
     else:
         # Preprocess & vectorize
-        clean = preprocess_text(user_input)
-        vec = tfidf.transform([clean]).toarray()
-        x = torch.from_numpy(vec).float()       # shape [1, D]
-        # Build an “empty” graph so SAGEConv still runs (no neighbor messages)
         edge_index = torch.empty((2, 0), dtype=torch.long)
         # Inference
         with torch.no_grad():
-            out = sage_model(x, edge_index)    # [1, 2]
-            probs = torch.exp(out).numpy()[0]  # turn log‑softmax → probs
-        lst = [f"🔵 True information:  {probs[1]:.2%}",
-               f"🔴 Disinformation: {probs[0]:.2%}"]
         st.markdown("### Prediction probabilities")
-        st.write("\n\n".join(lst))
-        pred = "✅ Likely TRUE" if probs[1] > probs[0] else "❌ Likely DISINFORMATION"
-        st.markdown(f"## **{pred}**")

 from nltk.corpus import stopwords
 from nltk.tokenize import RegexpTokenizer
 from sklearn.feature_extraction.text import TfidfVectorizer
+# ——— 1) NLTK setup ———
 nltk.download('stopwords')
+_STOP_WORDS = set(stopwords.words('english'))
+_TOKENIZER = RegexpTokenizer(r'\w+')
 def preprocess_text(text: str) -> str:
+    tokens = _TOKENIZER.tokenize(text.lower())
+    return " ".join([t for t in tokens if t not in _STOP_WORDS])
+# ——— 2) Load artifacts once ———
+@st.cache_resource
+def load_resources():
     tfidf: TfidfVectorizer = joblib.load("tfidf_vectorizer.pkl")
     sage_model: torch.nn.Module = joblib.load("sage_model.pkl")
     sage_model.eval()
+    return tfidf, sage_model
+tfidf, sage_model = load_resources()
 # ——— 3) Streamlit UI ———
 st.title("Disinformation Detection")
 st.write(
+    """
+    Paste or type a snippet of text below and click **Predict**.
+    The model will output the probability it’s **True Information** vs. **Disinformation**.
+    """
 )
+user_input = st.text_area("Your text here", height=200)
 if st.button("Predict"):
     if not user_input.strip():
         st.warning("Please enter some text first.")
     else:
         # Preprocess & vectorize
+        cleaned = preprocess_text(user_input)
+        vec = tfidf.transform([cleaned]).toarray()
+        x = torch.from_numpy(vec).float()   # shape [1, D]
+        # Build an “empty” graph so the SAGEConv layers run (no neighbor messages)
         edge_index = torch.empty((2, 0), dtype=torch.long)
         # Inference
         with torch.no_grad():
+            logits = sage_model(x, edge_index)        # [1, 2]
+            probs = torch.exp(logits).numpy()[0]      # turn log‑softmax → probs
+        # Display
         st.markdown("### Prediction probabilities")
+        st.write(f"• 🔵 True information:  {probs[1]:.2%}")
+        st.write(f"• 🔴 Disinformation:    {probs[0]:.2%}")
+        label = "✅ Likely TRUE" if probs[1] > probs[0] else "❌ Likely DISINFORMATION"
+        st.markdown(f"## **{label}**")