rahideer commited on
Commit
b0efa4e
Β·
verified Β·
1 Parent(s): 5b325c2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -10
app.py CHANGED
@@ -1,17 +1,28 @@
1
- import streamlit as st
 
2
  import pandas as pd
3
  import torch
4
  from sentence_transformers import SentenceTransformer, util
5
  from transformers import pipeline
 
6
 
7
  st.set_page_config(page_title="News Fact Checker", page_icon="πŸ“°")
8
 
 
9
  @st.cache_data
10
- def load_data():
11
- df = pd.read_csv("climate/ag_news_csv/train.csv", header=None, names=["label", "title", "description"])
 
 
 
 
 
 
 
12
  df["text"] = df["title"] + ". " + df["description"]
13
- return df.head(1000) # limit for faster app
14
 
 
15
  @st.cache_resource
16
  def load_models():
17
  embedder = SentenceTransformer('all-MiniLM-L6-v2')
@@ -19,22 +30,24 @@ def load_models():
19
  return embedder, summarizer
20
 
21
  st.title("πŸ“° News Fact Checker")
22
- st.markdown("Enter a news-related **claim**. We'll retrieve climate news and give you a summary to verify or refute it.")
23
 
24
- claim = st.text_input("πŸ” Enter your claim:")
25
- data = load_data()
 
26
  embedder, summarizer = load_models()
27
 
 
28
  if claim:
29
- with st.spinner("Retrieving relevant news..."):
30
  corpus = data["text"].tolist()
31
  corpus_embeddings = embedder.encode(corpus, convert_to_tensor=True)
32
  query_embedding = embedder.encode(claim, convert_to_tensor=True)
33
- hits = util.semantic_search(query_embedding, corpus_embeddings, top_k=3)[0]
34
 
 
35
  top_passages = [corpus[hit['corpus_id']] for hit in hits]
36
- combined = " ".join(top_passages)
37
 
 
38
  if len(combined) > 1024:
39
  combined = combined[:1024]
40
 
@@ -42,3 +55,7 @@ if claim:
42
 
43
  st.markdown("### βœ… Fact-Checked Summary")
44
  st.success(summary)
 
 
 
 
 
1
+ import zipfile
2
+ import os
3
  import pandas as pd
4
  import torch
5
  from sentence_transformers import SentenceTransformer, util
6
  from transformers import pipeline
7
+ import streamlit as st
8
 
9
  st.set_page_config(page_title="News Fact Checker", page_icon="πŸ“°")
10
 
11
+ # Step 1: Unzip dataset (only once)
12
  @st.cache_data
13
+ def extract_dataset():
14
+ zip_path = "climate.zip"
15
+ extract_dir = "climate"
16
+
17
+ if not os.path.exists(os.path.join(extract_dir, "ag_news_csv", "train.csv")):
18
+ with zipfile.ZipFile(zip_path, 'r') as zip_ref:
19
+ zip_ref.extractall(extract_dir)
20
+
21
+ df = pd.read_csv(os.path.join(extract_dir, "ag_news_csv", "train.csv"), header=None, names=["label", "title", "description"])
22
  df["text"] = df["title"] + ". " + df["description"]
23
+ return df.head(1000) # Sample only top 1000 rows
24
 
25
+ # Step 2: Load embedding model + summarizer
26
  @st.cache_resource
27
  def load_models():
28
  embedder = SentenceTransformer('all-MiniLM-L6-v2')
 
30
  return embedder, summarizer
31
 
32
  st.title("πŸ“° News Fact Checker")
33
+ st.markdown("Enter a news-related **claim** and get back a summary based on real climate news articles to help verify it.")
34
 
35
+ # Step 3: UI
36
+ claim = st.text_input("πŸ” Enter your claim here:")
37
+ data = extract_dataset()
38
  embedder, summarizer = load_models()
39
 
40
+ # Step 4: Process and return result
41
  if claim:
42
+ with st.spinner("πŸ” Searching relevant news..."):
43
  corpus = data["text"].tolist()
44
  corpus_embeddings = embedder.encode(corpus, convert_to_tensor=True)
45
  query_embedding = embedder.encode(claim, convert_to_tensor=True)
 
46
 
47
+ hits = util.semantic_search(query_embedding, corpus_embeddings, top_k=3)[0]
48
  top_passages = [corpus[hit['corpus_id']] for hit in hits]
 
49
 
50
+ combined = " ".join(top_passages)
51
  if len(combined) > 1024:
52
  combined = combined[:1024]
53
 
 
55
 
56
  st.markdown("### βœ… Fact-Checked Summary")
57
  st.success(summary)
58
+
59
+ with st.expander("πŸ”Ž Top Relevant News Passages"):
60
+ for i, passage in enumerate(top_passages, 1):
61
+ st.markdown(f"**Snippet {i}:** {passage}")