MrUtakata commited on
Commit
da55453
Β·
verified Β·
1 Parent(s): 19518b7

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +65 -0
app.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import joblib
3
+ import nltk
4
+ import torch
5
+ import torch.nn.functional as F
6
+ import numpy as np
7
+
8
+ from nltk.corpus import stopwords
9
+ from nltk.tokenize import RegexpTokenizer
10
+ from sklearn.neighbors import NearestNeighbors
11
+ from sklearn.feature_extraction.text import TfidfVectorizer
12
+
13
+ # β€”β€”β€” 1) Download NLTK data & set up tokenizer/stopwords β€”β€”β€”
14
+ nltk.download('stopwords')
15
+ STOP_WORDS = set(stopwords.words('english'))
16
+ TOKENIZER = RegexpTokenizer(r'\w+')
17
+
18
+ def preprocess_text(text: str) -> str:
19
+ tokens = TOKENIZER.tokenize(text.lower())
20
+ return " ".join([t for t in tokens if t not in STOP_WORDS])
21
+
22
+ # β€”β€”β€” 2) Load saved artifacts once β€”β€”β€”
23
+ @st.cache(allow_output_mutation=True)
24
+ def load_artifacts():
25
+ tfidf: TfidfVectorizer = joblib.load("tfidf_vectorizer.pkl")
26
+ knn: NearestNeighbors = joblib.load("knn_model.pkl")
27
+ sage_model: torch.nn.Module = joblib.load("sage_model.pkl")
28
+ sage_model.eval()
29
+ return tfidf, knn, sage_model
30
+
31
+ tfidf, knn, sage_model = load_artifacts()
32
+
33
+ # β€”β€”β€” 3) Streamlit UI β€”β€”β€”
34
+ st.title("Disinformation Detection")
35
+ st.write(
36
+ """Enter a snippet of text below and click **Predict** to see
37
+ whether it is more likely **True Information** or **Disinformation**."""
38
+ )
39
+
40
+ user_input = st.text_area("Article text", height=200)
41
+
42
+ if st.button("Predict"):
43
+ if not user_input.strip():
44
+ st.warning("Please enter some text first.")
45
+ else:
46
+ # Preprocess & vectorize
47
+ clean = preprocess_text(user_input)
48
+ vec = tfidf.transform([clean]).toarray()
49
+ x = torch.from_numpy(vec).float() # shape [1, D]
50
+
51
+ # Build an β€œempty” graph so SAGEConv still runs (no neighbor messages)
52
+ edge_index = torch.empty((2, 0), dtype=torch.long)
53
+
54
+ # Inference
55
+ with torch.no_grad():
56
+ out = sage_model(x, edge_index) # [1, 2]
57
+ probs = torch.exp(out).numpy()[0] # turn log‑softmax β†’ probs
58
+
59
+ lst = [f"πŸ”΅ True information: {probs[1]:.2%}",
60
+ f"πŸ”΄ Disinformation: {probs[0]:.2%}"]
61
+ st.markdown("### Prediction probabilities")
62
+ st.write("\n\n".join(lst))
63
+
64
+ pred = "βœ… Likely TRUE" if probs[1] > probs[0] else "❌ Likely DISINFORMATION"
65
+ st.markdown(f"## **{pred}**")