Mohammedmarzuk17 commited on
Commit
502bdcb
·
verified ·
1 Parent(s): 1c585a6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +64 -75
app.py CHANGED
@@ -1,6 +1,6 @@
1
  import gradio as gr
2
  from transformers import pipeline
3
- import feedparser, requests, re, datetime, os, json
4
 
5
  # ---------------------------
6
  # Load Models
@@ -31,46 +31,25 @@ RSS_FEEDS = {
31
 
32
  GOOGLE_API_KEY = "AIzaSyAC56onKwR17zd_djUPEfGXQACy9qRjDxw"
33
  GOOGLE_CX = "87391aed073954cae"
34
- GOOGLE_DAILY_LIMIT = 100
35
- QUOTA_FILE = "google_quota.json"
36
-
37
- # ---------------------------
38
- # Quota Management (UTC Reset)
39
- # ---------------------------
40
- def load_quota():
41
- if os.path.exists(QUOTA_FILE):
42
- with open(QUOTA_FILE, "r") as f:
43
- data = json.load(f)
44
- reset_time = datetime.datetime.fromisoformat(data["reset"])
45
- if datetime.datetime.utcnow() > reset_time:
46
- return {"count": 0, "reset": (datetime.datetime.utcnow() + datetime.timedelta(days=1)).isoformat()}
47
- return data
48
- return {"count": 0, "reset": (datetime.datetime.utcnow() + datetime.timedelta(days=1)).isoformat()}
49
 
50
- def save_quota(data):
51
- with open(QUOTA_FILE, "w") as f:
52
- json.dump(data, f)
53
-
54
- def can_use_google():
55
- quota = load_quota()
56
- return quota["count"] < GOOGLE_DAILY_LIMIT
57
-
58
- def increment_google_quota():
59
- quota = load_quota()
60
- quota["count"] += 1
61
- save_quota(quota)
62
 
63
  # ---------------------------
64
  # Claim Extraction
65
  # ---------------------------
66
  def extract_claims(page_text):
67
- """Extract top 10 factual claims from page text."""
68
- sentences = [s.strip() for s in page_text.split(".") if len(s.strip()) > 5]
 
 
69
  results = []
70
  for s in sentences:
71
  out = claim_classifier(s, claim_labels)
72
  if out["labels"][0] == "factual claim":
73
  results.append(s)
 
74
  return results[:10]
75
 
76
  # ---------------------------
@@ -83,29 +62,36 @@ def detect_ai(texts):
83
  results = []
84
  for t in texts:
85
  out = ai_detector(t)
86
- results.append({"text": t, "label": out[0]["label"], "score": round(out[0]["score"], 3)})
 
 
 
 
 
 
 
 
 
 
 
 
87
  return results
88
 
89
  # ---------------------------
90
  # Evidence Gathering
91
  # ---------------------------
92
- def fetch_rss(claim):
93
- """Search RSS feeds for claim (max 2 results)."""
94
- results = []
95
  for source, url in RSS_FEEDS.items():
96
  try:
97
  feed = feedparser.parse(url)
98
- for entry in feed.entries[:15]:
99
- if re.search(claim, entry.title, re.IGNORECASE) or re.search(claim, entry.summary, re.IGNORECASE):
100
- results.append(f"[{source}] {entry.title}: {entry.summary}")
101
- if len(results) >= 2:
102
- return results
103
  except Exception:
104
  continue
105
- return results
106
 
107
  def fetch_wikipedia(claim):
108
- """Fetch Wikipedia summary (max 2 results)."""
109
  try:
110
  url = f"https://en.wikipedia.org/api/rest_v1/page/summary/{requests.utils.quote(claim)}"
111
  r = requests.get(url).json()
@@ -115,16 +101,22 @@ def fetch_wikipedia(claim):
115
  return []
116
  return []
117
 
118
- def fetch_google(claim):
119
- """Google Custom Search (max 2 results, quota-limited)."""
120
- if not can_use_google():
 
 
 
 
 
121
  return ["[Google] Daily quota reached (100 queries)."]
 
122
  try:
123
  url = f"https://www.googleapis.com/customsearch/v1?q={requests.utils.quote(claim)}&key={GOOGLE_API_KEY}&cx={GOOGLE_CX}"
124
  r = requests.get(url).json()
125
- increment_google_quota()
126
  items = r.get("items", [])
127
- return [f"[Google] {item['title']}: {item['snippet']}" for item in items[:2]]
128
  except Exception:
129
  return []
130
 
@@ -135,19 +127,16 @@ def fact_check(claims, evidence_texts, threshold=0.7):
135
  results = []
136
  for c in claims:
137
  for ev in evidence_texts:
138
- try:
139
- out = nli_pipeline(hypothesis=c, premise=ev)
140
- label = out[0]["label"]
141
- score = round(out[0]["score"], 3)
142
- if score >= threshold:
143
- results.append({
144
- "claim": c,
145
- "evidence": ev,
146
- "label": label,
147
- "score": score
148
- })
149
- except Exception:
150
- continue
151
  return results
152
 
153
  # ---------------------------
@@ -157,32 +146,32 @@ def predict(page_text=""):
157
  """
158
  1. Extract claims from page_text
159
  2. Run AI Detection
160
- 3. Gather evidence (Google + Wikipedia + RSS, all tied to claims, quota aware)
161
- 4. Fact-check claims against evidence
162
  """
 
163
  claims = extract_claims(page_text) if page_text else []
 
 
164
  ai_results = detect_ai(claims) if claims else []
165
 
166
- evidence_map = {}
 
167
  for c in claims:
168
- evidence_map[c] = []
169
- evidence_map[c].extend(fetch_google(c))
170
- evidence_map[c].extend(fetch_wikipedia(c))
171
- evidence_map[c].extend(fetch_rss(c))
172
-
173
- evidence_texts = [ev for evs in evidence_map.values() for ev in evs]
174
-
175
- fc_results = fact_check(claims, evidence_texts[:20]) if claims and evidence_texts else []
176
 
177
- quota = load_quota()
 
178
 
179
  return {
180
  "claims": claims,
181
  "ai_detection": ai_results,
182
- "google_quota_used": quota["count"],
183
- "google_quota_reset": quota["reset"],
184
- "evidence_samples": {c: evidence_map[c][:3] for c in claims},
185
- "fact_checking": fc_results[:15]
186
  }
187
 
188
  # ---------------------------
 
1
  import gradio as gr
2
  from transformers import pipeline
3
+ import feedparser, requests, re, time, datetime
4
 
5
  # ---------------------------
6
  # Load Models
 
31
 
32
  GOOGLE_API_KEY = "AIzaSyAC56onKwR17zd_djUPEfGXQACy9qRjDxw"
33
  GOOGLE_CX = "87391aed073954cae"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
 
35
+ # Google search quota tracking
36
+ google_quota = {"count": 0, "date": datetime.date.today()}
37
+ GOOGLE_DAILY_LIMIT = 100
 
 
 
 
 
 
 
 
 
38
 
39
  # ---------------------------
40
  # Claim Extraction
41
  # ---------------------------
42
  def extract_claims(page_text):
43
+ """Extract top 10 factual claims from page text (split on ., , and ;)"""
44
+ chunks = re.split(r'[.,;]', page_text)
45
+ sentences = [s.strip() for s in chunks if len(s.strip()) > 5]
46
+
47
  results = []
48
  for s in sentences:
49
  out = claim_classifier(s, claim_labels)
50
  if out["labels"][0] == "factual claim":
51
  results.append(s)
52
+
53
  return results[:10]
54
 
55
  # ---------------------------
 
62
  results = []
63
  for t in texts:
64
  out = ai_detector(t)
65
+ raw_label = out[0]["label"]
66
+
67
+ # Map labels to friendlier ones
68
+ if raw_label.lower() in ["fake", "ai-generated"]:
69
+ label = "AI-generated"
70
+ else:
71
+ label = "Human"
72
+
73
+ results.append({
74
+ "text": t,
75
+ "label": label,
76
+ "score": round(out[0]["score"], 3)
77
+ })
78
  return results
79
 
80
  # ---------------------------
81
  # Evidence Gathering
82
  # ---------------------------
83
+ def fetch_rss_articles():
84
+ articles = []
 
85
  for source, url in RSS_FEEDS.items():
86
  try:
87
  feed = feedparser.parse(url)
88
+ for entry in feed.entries[:5]:
89
+ articles.append(f"[{source}] {entry.title}: {entry.summary}")
 
 
 
90
  except Exception:
91
  continue
92
+ return articles
93
 
94
  def fetch_wikipedia(claim):
 
95
  try:
96
  url = f"https://en.wikipedia.org/api/rest_v1/page/summary/{requests.utils.quote(claim)}"
97
  r = requests.get(url).json()
 
101
  return []
102
  return []
103
 
104
+ def fetch_google_search(claim):
105
+ global google_quota
106
+ today = datetime.date.today()
107
+ # reset quota daily
108
+ if google_quota["date"] != today:
109
+ google_quota = {"count": 0, "date": today}
110
+
111
+ if google_quota["count"] >= GOOGLE_DAILY_LIMIT:
112
  return ["[Google] Daily quota reached (100 queries)."]
113
+
114
  try:
115
  url = f"https://www.googleapis.com/customsearch/v1?q={requests.utils.quote(claim)}&key={GOOGLE_API_KEY}&cx={GOOGLE_CX}"
116
  r = requests.get(url).json()
117
+ google_quota["count"] += 1
118
  items = r.get("items", [])
119
+ return [f"[Google] {item['title']}: {item['snippet']}" for item in items[:3]]
120
  except Exception:
121
  return []
122
 
 
127
  results = []
128
  for c in claims:
129
  for ev in evidence_texts:
130
+ out = nli_pipeline(hypothesis=c, premise=ev)
131
+ label = out[0]["label"]
132
+ score = round(out[0]["score"], 3)
133
+ if score >= threshold:
134
+ results.append({
135
+ "claim": c,
136
+ "evidence": ev,
137
+ "label": label,
138
+ "score": score
139
+ })
 
 
 
140
  return results
141
 
142
  # ---------------------------
 
146
  """
147
  1. Extract claims from page_text
148
  2. Run AI Detection
149
+ 3. Gather evidence (RSS + Wikipedia + Google, with quota)
150
+ 4. Fact-check claims against evidence (only strong matches kept)
151
  """
152
+ # Step 1: Extract claims
153
  claims = extract_claims(page_text) if page_text else []
154
+
155
+ # Step 2: AI detection
156
  ai_results = detect_ai(claims) if claims else []
157
 
158
+ # Step 3: Evidence gathering
159
+ evidence_texts = []
160
  for c in claims:
161
+ evidence_texts.extend(fetch_wikipedia(c))
162
+ evidence_texts.extend(fetch_google_search(c))
163
+ evidence_texts.extend(fetch_rss_articles())
 
 
 
 
 
164
 
165
+ # Step 4: Fact-checking
166
+ fc_results = fact_check(claims, evidence_texts[:15]) if claims and evidence_texts else []
167
 
168
  return {
169
  "claims": claims,
170
  "ai_detection": ai_results,
171
+ "google_quota_used": google_quota["count"],
172
+ "google_quota_reset": str(datetime.datetime.combine(google_quota["date"] + datetime.timedelta(days=1), datetime.time.min)),
173
+ "evidence_samples": {c: evidence_texts[:2] for c in claims[:2]}, # sample evidence
174
+ "fact_checking": fc_results[:10]
175
  }
176
 
177
  # ---------------------------