NKessler commited on
Commit
4ef11a1
·
verified ·
1 Parent(s): 7da623a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +55 -3
app.py CHANGED
@@ -7,6 +7,7 @@ from keybert import KeyBERT
7
  from textblob import TextBlob
8
  from transformers import pipeline
9
  import textstat
 
10
 
11
  # constants
12
  MAX_TEXT_LENGTH = 1500
@@ -36,6 +37,18 @@ def _load_nlp_models() -> typing.Dict[str, typing.Any]:
36
  aggregation_strategy="simple"
37
  )
38
  keyword_extractor = KeyBERT(model="all-mpnet-base-v2")
 
 
 
 
 
 
 
 
 
 
 
 
39
 
40
  return {
41
  "sentiment": sentiment_analyzer,
@@ -165,6 +178,27 @@ def _highlight_keywords(text: str, keywords: typing.List[str]) -> str:
165
  return highlighted_text
166
 
167
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
168
  # STATE MANAGEMENT
169
  if "results_a" not in st.session_state:
170
  st.session_state.results_a = None
@@ -181,13 +215,23 @@ st.divider()
181
  with st.spinner("Starting NLP models."):
182
  _load_nlp_models()
183
 
 
 
184
  col1, col2 = st.columns(2)
185
 
186
  with col1:
187
- user_article_a = st.text_area("Data Source A", value=ARTICLE_A.strip(), height=220)
 
 
 
 
188
 
189
  with col2:
190
- user_article_b = st.text_area("Data Source B", value=ARTICLE_B.strip(), height=220)
 
 
 
 
191
 
192
  st.write("")
193
 
@@ -202,7 +246,15 @@ if st.session_state.results_a and st.session_state.results_b:
202
  st.divider()
203
  st.markdown("### Framing Analytics & Comparison")
204
 
205
- # Radar Chart spans the top
 
 
 
 
 
 
 
 
206
  st.plotly_chart(_create_comparison_radar_chart(st.session_state.results_a, st.session_state.results_b), use_container_width=True)
207
 
208
  res_col1, res_col2 = st.columns(2)
 
7
  from textblob import TextBlob
8
  from transformers import pipeline
9
  import textstat
10
+ import trafilatura
11
 
12
  # constants
13
  MAX_TEXT_LENGTH = 1500
 
37
  aggregation_strategy="simple"
38
  )
39
  keyword_extractor = KeyBERT(model="all-mpnet-base-v2")
40
+ nli_classifier = pipeline(
41
+ "text-classification",
42
+ model="roberta-large-mnli"
43
+ )
44
+
45
+ return {
46
+ "sentiment": sentiment_analyzer,
47
+ "emotion": emotion_classifier,
48
+ "ner": ner_extractor,
49
+ "keyword": keyword_extractor,
50
+ "nli": nli_classifier,
51
+ }
52
 
53
  return {
54
  "sentiment": sentiment_analyzer,
 
178
  return highlighted_text
179
 
180
 
181
+ def fetch_article_text(url: str) -> str:
182
+ """Scrapes clean article text from a given URL."""
183
+ downloaded = trafilatura.fetch_url(url)
184
+ if downloaded:
185
+ text = trafilatura.extract(downloaded)
186
+ return text if text else "Error: Could not extract text."
187
+ return "Error: Could not fetch URL. It might be protected."
188
+
189
+
190
+ def check_contradiction(text_a: str, text_b: str) -> dict:
191
+ """Uses NLI to see if the arguments of the articles contradict."""
192
+ models = _load_nlp_models()
193
+ premise = text_a[:400]
194
+ hypothesis = text_b[:400]
195
+
196
+ nli_input = f"{premise} </s></s> {hypothesis}"
197
+ result = models["nli"](nli_input)[0]
198
+
199
+ return {"relationship": result["label"], "confidence": result["score"]}
200
+
201
+
202
  # STATE MANAGEMENT
203
  if "results_a" not in st.session_state:
204
  st.session_state.results_a = None
 
215
  with st.spinner("Starting NLP models."):
216
  _load_nlp_models()
217
 
218
+ input_method = st.radio("Input Method", ["Paste Text", "Paste URL"], horizontal=True)
219
+
220
  col1, col2 = st.columns(2)
221
 
222
  with col1:
223
+ if input_method == "Paste Text":
224
+ user_article_a = st.text_area("Data Source A", value=ARTICLE_A.strip(), height=220)
225
+ else:
226
+ url_a = st.text_input("Source A URL")
227
+ user_article_a = fetch_article_text(url_a) if url_a else ""
228
 
229
  with col2:
230
+ if input_method == "Paste Text":
231
+ user_article_b = st.text_area("Data Source B", value=ARTICLE_B.strip(), height=220)
232
+ else:
233
+ url_b = st.text_input("Source B URL")
234
+ user_article_b = fetch_article_text(url_b) if url_b else ""
235
 
236
  st.write("")
237
 
 
246
  st.divider()
247
  st.markdown("### Framing Analytics & Comparison")
248
 
249
+ # Display Contradictions
250
+ nli_result = check_contradiction(user_article_a, user_article_b)
251
+ if nli_result["relationship"] == "CONTRADICTION":
252
+ st.error(f"**NARRATIVE CONTRADICTION** (Confidence: {nli_result['confidence']:.2f}) - These sources are actively disputing each other's foundational facts.")
253
+ elif nli_result["relationship"] == "ENTAILMENT":
254
+ st.success(f"**NARRATIVE ALIGNMENT** (Confidence: {nli_result['confidence']:.2f}) - These sources agree on the core premise.")
255
+ else:
256
+ st.info(f"**NEUTRAL RELATIONSHIP** - These sources are discussing the topic without direct contradiction or alignment.")
257
+
258
  st.plotly_chart(_create_comparison_radar_chart(st.session_state.results_a, st.session_state.results_b), use_container_width=True)
259
 
260
  res_col1, res_col2 = st.columns(2)