Mohammedmarzuk17 commited on
Commit
b8674d2
·
verified ·
1 Parent(s): f3e44e3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +44 -21
app.py CHANGED
@@ -1,19 +1,27 @@
 
1
  import gradio as gr
 
2
  from transformers import pipeline
3
  from sentence_transformers import SentenceTransformer, util
4
- import requests, re, datetime
5
  from concurrent.futures import ThreadPoolExecutor
6
 
7
  # ---------------------------
8
- # Load Models
9
  # ---------------------------
 
10
 
11
- # Claim Extraction → Zero-Shot Classifier
 
 
 
 
12
  claim_model_name = "MoritzLaurer/DeBERTa-v3-base-mnli"
13
  claim_classifier = pipeline(
14
  "zero-shot-classification",
15
  model=claim_model_name,
16
- device=-1
 
 
17
  )
18
  claim_labels = ["factual claim", "opinion", "personal anecdote", "other"]
19
 
@@ -25,24 +33,30 @@ ai_detector = pipeline(
25
  device=-1
26
  )
27
 
28
- # Semantic Model (CORRECT way for EmbeddingGemma)
29
  SEM_MODEL_NAME = "google/embeddinggemma-300m"
30
  sem_model = SentenceTransformer(SEM_MODEL_NAME)
31
 
32
  # ---------------------------
33
  # Google Search Config
34
  # ---------------------------
35
- GOOGLE_API_KEY = "AIzaSyAC56onKwR17zd_djUPEfGXQACy9qRjDxw"
36
- GOOGLE_CX = "87391aed073954cae"
37
 
38
  google_quota = {"count": 0, "date": datetime.date.today()}
39
  GOOGLE_DAILY_LIMIT = 100
40
 
 
 
 
 
 
 
41
  # ---------------------------
42
- # Helpers
43
  # ---------------------------
44
  def safe_split_text(text):
45
- pattern = r'(?<!\d)[.](?!\d)'
46
  return [s.strip() for s in re.split(pattern, text) if len(s.strip()) > 10]
47
 
48
  # ---------------------------
@@ -53,11 +67,13 @@ def extract_claims(text, max_claims=20):
53
 
54
  def classify(s):
55
  out = claim_classifier(s, claim_labels)
56
- lbl = out["labels"][0]
57
- score = round(out["scores"][0], 3)
58
- return {"text": s, "label": lbl, "score": score}
 
 
59
 
60
- with ThreadPoolExecutor() as ex:
61
  results = list(ex.map(classify, sentences))
62
 
63
  return results[:max_claims]
@@ -68,18 +84,24 @@ def extract_claims(text, max_claims=20):
68
  def detect_ai(texts):
69
  if isinstance(texts, str):
70
  texts = [texts]
71
- out = []
72
  for t in texts:
73
  r = ai_detector(t)[0]
74
  label = "AI-generated" if r["label"].lower() in ["fake", "ai-generated"] else "Human"
75
- out.append({"text": t, "label": label, "score": round(r["score"], 3)})
76
- return out
 
 
 
 
77
 
78
  # ---------------------------
79
- # Google + Semantic Fact Check
80
  # ---------------------------
81
  def fetch_google_search_semantic(claim, k=3):
 
82
  global google_quota
 
83
  if google_quota["count"] >= GOOGLE_DAILY_LIMIT:
84
  return {"keyword": [], "semantic": []}
85
 
@@ -91,11 +113,11 @@ def fetch_google_search_semantic(claim, k=3):
91
 
92
  r = requests.get(url).json()
93
  google_quota["count"] += 1
94
- items = r.get("items", [])
95
 
 
96
  snippets = [f"{i['title']}: {i['snippet']}" for i in items]
97
- keyword_results = snippets[:k]
98
 
 
99
  if not snippets:
100
  return {"keyword": keyword_results, "semantic": []}
101
 
@@ -116,7 +138,7 @@ def fetch_google_search_semantic(claim, k=3):
116
  # ---------------------------
117
  def predict(text=""):
118
  if not text.strip():
119
- return {"error": "No input"}
120
 
121
  full_ai = detect_ai(text)
122
  sentences = safe_split_text(text)
@@ -134,7 +156,8 @@ def predict(text=""):
134
  },
135
  "claims": claims,
136
  "claims_ai_detection": claim_ai,
137
- "claims_fact_checking": claim_fc
 
138
  }
139
 
140
  # ---------------------------
 
1
+ import os
2
  import gradio as gr
3
+ import datetime, re, requests
4
  from transformers import pipeline
5
  from sentence_transformers import SentenceTransformer, util
 
6
  from concurrent.futures import ThreadPoolExecutor
7
 
8
  # ---------------------------
9
+ # Environment-safe settings
10
  # ---------------------------
11
+ os.environ["TOKENIZERS_PARALLELISM"] = "false"
12
 
13
+ # ---------------------------
14
+ # Load Models (SAFE MODE)
15
+ # ---------------------------
16
+
17
+ # Claim Extraction (FORCE slow tokenizer)
18
  claim_model_name = "MoritzLaurer/DeBERTa-v3-base-mnli"
19
  claim_classifier = pipeline(
20
  "zero-shot-classification",
21
  model=claim_model_name,
22
+ tokenizer=claim_model_name,
23
+ device=-1,
24
+ use_fast=False # 🔥 CRITICAL FIX
25
  )
26
  claim_labels = ["factual claim", "opinion", "personal anecdote", "other"]
27
 
 
33
  device=-1
34
  )
35
 
36
+ # Semantic Model (EmbeddingGemma)
37
  SEM_MODEL_NAME = "google/embeddinggemma-300m"
38
  sem_model = SentenceTransformer(SEM_MODEL_NAME)
39
 
40
  # ---------------------------
41
  # Google Search Config
42
  # ---------------------------
43
+ GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
44
+ GOOGLE_CX = os.getenv("GOOGLE_CX")
45
 
46
  google_quota = {"count": 0, "date": datetime.date.today()}
47
  GOOGLE_DAILY_LIMIT = 100
48
 
49
+ def check_google_quota():
50
+ global google_quota
51
+ today = datetime.date.today()
52
+ if google_quota["date"] != today:
53
+ google_quota = {"count": 0, "date": today}
54
+
55
  # ---------------------------
56
+ # Text Split Helper
57
  # ---------------------------
58
  def safe_split_text(text):
59
+ pattern = r'(?<!\d)[.](?!\d)|;'
60
  return [s.strip() for s in re.split(pattern, text) if len(s.strip()) > 10]
61
 
62
  # ---------------------------
 
67
 
68
  def classify(s):
69
  out = claim_classifier(s, claim_labels)
70
+ return {
71
+ "text": s,
72
+ "label": out["labels"][0],
73
+ "score": round(out["scores"][0], 3)
74
+ }
75
 
76
+ with ThreadPoolExecutor(max_workers=4) as ex:
77
  results = list(ex.map(classify, sentences))
78
 
79
  return results[:max_claims]
 
84
  def detect_ai(texts):
85
  if isinstance(texts, str):
86
  texts = [texts]
87
+ results = []
88
  for t in texts:
89
  r = ai_detector(t)[0]
90
  label = "AI-generated" if r["label"].lower() in ["fake", "ai-generated"] else "Human"
91
+ results.append({
92
+ "text": t,
93
+ "label": label,
94
+ "score": round(r["score"], 3)
95
+ })
96
+ return results
97
 
98
  # ---------------------------
99
+ # Keyword + Semantic Fact Check
100
  # ---------------------------
101
  def fetch_google_search_semantic(claim, k=3):
102
+ check_google_quota()
103
  global google_quota
104
+
105
  if google_quota["count"] >= GOOGLE_DAILY_LIMIT:
106
  return {"keyword": [], "semantic": []}
107
 
 
113
 
114
  r = requests.get(url).json()
115
  google_quota["count"] += 1
 
116
 
117
+ items = r.get("items", [])
118
  snippets = [f"{i['title']}: {i['snippet']}" for i in items]
 
119
 
120
+ keyword_results = snippets[:k]
121
  if not snippets:
122
  return {"keyword": keyword_results, "semantic": []}
123
 
 
138
  # ---------------------------
139
  def predict(text=""):
140
  if not text.strip():
141
+ return {"error": "No input provided"}
142
 
143
  full_ai = detect_ai(text)
144
  sentences = safe_split_text(text)
 
156
  },
157
  "claims": claims,
158
  "claims_ai_detection": claim_ai,
159
+ "claims_fact_checking": claim_fc,
160
+ "google_quota_used": google_quota["count"]
161
  }
162
 
163
  # ---------------------------