Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -7,6 +7,7 @@ from keybert import KeyBERT
|
|
| 7 |
from textblob import TextBlob
|
| 8 |
from transformers import pipeline
|
| 9 |
import textstat
|
|
|
|
| 10 |
|
| 11 |
# constants
|
| 12 |
MAX_TEXT_LENGTH = 1500
|
|
@@ -36,6 +37,18 @@ def _load_nlp_models() -> typing.Dict[str, typing.Any]:
|
|
| 36 |
aggregation_strategy="simple"
|
| 37 |
)
|
| 38 |
keyword_extractor = KeyBERT(model="all-mpnet-base-v2")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
|
| 40 |
return {
|
| 41 |
"sentiment": sentiment_analyzer,
|
|
@@ -165,6 +178,27 @@ def _highlight_keywords(text: str, keywords: typing.List[str]) -> str:
|
|
| 165 |
return highlighted_text
|
| 166 |
|
| 167 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 168 |
# STATE MANAGEMENT
|
| 169 |
if "results_a" not in st.session_state:
|
| 170 |
st.session_state.results_a = None
|
|
@@ -181,13 +215,23 @@ st.divider()
|
|
| 181 |
with st.spinner("Starting NLP models."):
|
| 182 |
_load_nlp_models()
|
| 183 |
|
|
|
|
|
|
|
| 184 |
col1, col2 = st.columns(2)
|
| 185 |
|
| 186 |
with col1:
|
| 187 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 188 |
|
| 189 |
with col2:
|
| 190 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 191 |
|
| 192 |
st.write("")
|
| 193 |
|
|
@@ -202,7 +246,15 @@ if st.session_state.results_a and st.session_state.results_b:
|
|
| 202 |
st.divider()
|
| 203 |
st.markdown("### Framing Analytics & Comparison")
|
| 204 |
|
| 205 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 206 |
st.plotly_chart(_create_comparison_radar_chart(st.session_state.results_a, st.session_state.results_b), use_container_width=True)
|
| 207 |
|
| 208 |
res_col1, res_col2 = st.columns(2)
|
|
|
|
| 7 |
from textblob import TextBlob
|
| 8 |
from transformers import pipeline
|
| 9 |
import textstat
|
| 10 |
+
import trafilatura
|
| 11 |
|
| 12 |
# constants
|
| 13 |
MAX_TEXT_LENGTH = 1500
|
|
|
|
| 37 |
aggregation_strategy="simple"
|
| 38 |
)
|
| 39 |
keyword_extractor = KeyBERT(model="all-mpnet-base-v2")
|
| 40 |
+
nli_classifier = pipeline(
|
| 41 |
+
"text-classification",
|
| 42 |
+
model="roberta-large-mnli"
|
| 43 |
+
)
|
| 44 |
+
|
| 45 |
+
return {
|
| 46 |
+
"sentiment": sentiment_analyzer,
|
| 47 |
+
"emotion": emotion_classifier,
|
| 48 |
+
"ner": ner_extractor,
|
| 49 |
+
"keyword": keyword_extractor,
|
| 50 |
+
"nli": nli_classifier,
|
| 51 |
+
}
|
| 52 |
|
| 53 |
return {
|
| 54 |
"sentiment": sentiment_analyzer,
|
|
|
|
| 178 |
return highlighted_text
|
| 179 |
|
| 180 |
|
| 181 |
+
def fetch_article_text(url: str) -> str:
|
| 182 |
+
"""Scrapes clean article text from a given URL."""
|
| 183 |
+
downloaded = trafilatura.fetch_url(url)
|
| 184 |
+
if downloaded:
|
| 185 |
+
text = trafilatura.extract(downloaded)
|
| 186 |
+
return text if text else "Error: Could not extract text."
|
| 187 |
+
return "Error: Could not fetch URL. It might be protected."
|
| 188 |
+
|
| 189 |
+
|
| 190 |
+
def check_contradiction(text_a: str, text_b: str) -> dict:
|
| 191 |
+
"""Uses NLI to see if the arguments of the articles contradict."""
|
| 192 |
+
models = _load_nlp_models()
|
| 193 |
+
premise = text_a[:400]
|
| 194 |
+
hypothesis = text_b[:400]
|
| 195 |
+
|
| 196 |
+
nli_input = f"{premise} </s></s> {hypothesis}"
|
| 197 |
+
result = models["nli"](nli_input)[0]
|
| 198 |
+
|
| 199 |
+
return {"relationship": result["label"], "confidence": result["score"]}
|
| 200 |
+
|
| 201 |
+
|
| 202 |
# STATE MANAGEMENT
|
| 203 |
if "results_a" not in st.session_state:
|
| 204 |
st.session_state.results_a = None
|
|
|
|
| 215 |
with st.spinner("Starting NLP models."):
|
| 216 |
_load_nlp_models()
|
| 217 |
|
| 218 |
+
input_method = st.radio("Input Method", ["Paste Text", "Paste URL"], horizontal=True)
|
| 219 |
+
|
| 220 |
col1, col2 = st.columns(2)
|
| 221 |
|
| 222 |
with col1:
|
| 223 |
+
if input_method == "Paste Text":
|
| 224 |
+
user_article_a = st.text_area("Data Source A", value=ARTICLE_A.strip(), height=220)
|
| 225 |
+
else:
|
| 226 |
+
url_a = st.text_input("Source A URL")
|
| 227 |
+
user_article_a = fetch_article_text(url_a) if url_a else ""
|
| 228 |
|
| 229 |
with col2:
|
| 230 |
+
if input_method == "Paste Text":
|
| 231 |
+
user_article_b = st.text_area("Data Source B", value=ARTICLE_B.strip(), height=220)
|
| 232 |
+
else:
|
| 233 |
+
url_b = st.text_input("Source B URL")
|
| 234 |
+
user_article_b = fetch_article_text(url_b) if url_b else ""
|
| 235 |
|
| 236 |
st.write("")
|
| 237 |
|
|
|
|
| 246 |
st.divider()
|
| 247 |
st.markdown("### Framing Analytics & Comparison")
|
| 248 |
|
| 249 |
+
# Display Contradictions
|
| 250 |
+
nli_result = check_contradiction(user_article_a, user_article_b)
|
| 251 |
+
if nli_result["relationship"] == "CONTRADICTION":
|
| 252 |
+
st.error(f"**NARRATIVE CONTRADICTION** (Confidence: {nli_result['confidence']:.2f}) - These sources are actively disputing each other's foundational facts.")
|
| 253 |
+
elif nli_result["relationship"] == "ENTAILMENT":
|
| 254 |
+
st.success(f"**NARRATIVE ALIGNMENT** (Confidence: {nli_result['confidence']:.2f}) - These sources agree on the core premise.")
|
| 255 |
+
else:
|
| 256 |
+
st.info(f"**NEUTRAL RELATIONSHIP** - These sources are discussing the topic without direct contradiction or alignment.")
|
| 257 |
+
|
| 258 |
st.plotly_chart(_create_comparison_radar_chart(st.session_state.results_a, st.session_state.results_b), use_container_width=True)
|
| 259 |
|
| 260 |
res_col1, res_col2 = st.columns(2)
|