Spaces:

Mohammedmarzuk17
/

Edushield-AI-Backend

Sleeping

App Files Files Community

Mohammedmarzuk17 commited on Dec 13, 2025

Commit

b8674d2

verified ·

1 Parent(s): f3e44e3

Update app.py

Browse files

Files changed (1) hide show

app.py +44 -21

app.py CHANGED Viewed

@@ -1,19 +1,27 @@
 import gradio as gr
 from transformers import pipeline
 from sentence_transformers import SentenceTransformer, util
-import requests, re, datetime
 from concurrent.futures import ThreadPoolExecutor
 # ---------------------------
-# Load Models
 # ---------------------------
-# Claim Extraction → Zero-Shot Classifier
 claim_model_name = "MoritzLaurer/DeBERTa-v3-base-mnli"
 claim_classifier = pipeline(
     "zero-shot-classification",
     model=claim_model_name,
-    device=-1
 )
 claim_labels = ["factual claim", "opinion", "personal anecdote", "other"]
@@ -25,24 +33,30 @@ ai_detector = pipeline(
     device=-1
 )
-# ✅ Semantic Model (CORRECT way for EmbeddingGemma)
 SEM_MODEL_NAME = "google/embeddinggemma-300m"
 sem_model = SentenceTransformer(SEM_MODEL_NAME)
 # ---------------------------
 # Google Search Config
 # ---------------------------
-GOOGLE_API_KEY = "AIzaSyAC56onKwR17zd_djUPEfGXQACy9qRjDxw"
-GOOGLE_CX = "87391aed073954cae"
 google_quota = {"count": 0, "date": datetime.date.today()}
 GOOGLE_DAILY_LIMIT = 100
 # ---------------------------
-# Helpers
 # ---------------------------
 def safe_split_text(text):
-    pattern = r'(?<!\d)[.](?!\d)'
     return [s.strip() for s in re.split(pattern, text) if len(s.strip()) > 10]
 # ---------------------------
@@ -53,11 +67,13 @@ def extract_claims(text, max_claims=20):
     def classify(s):
         out = claim_classifier(s, claim_labels)
-        lbl = out["labels"][0]
-        score = round(out["scores"][0], 3)
-        return {"text": s, "label": lbl, "score": score}
-    with ThreadPoolExecutor() as ex:
         results = list(ex.map(classify, sentences))
     return results[:max_claims]
@@ -68,18 +84,24 @@ def extract_claims(text, max_claims=20):
 def detect_ai(texts):
     if isinstance(texts, str):
         texts = [texts]
-    out = []
     for t in texts:
         r = ai_detector(t)[0]
         label = "AI-generated" if r["label"].lower() in ["fake", "ai-generated"] else "Human"
-        out.append({"text": t, "label": label, "score": round(r["score"], 3)})
-    return out
 # ---------------------------
-# Google + Semantic Fact Check
 # ---------------------------
 def fetch_google_search_semantic(claim, k=3):
     global google_quota
     if google_quota["count"] >= GOOGLE_DAILY_LIMIT:
         return {"keyword": [], "semantic": []}
@@ -91,11 +113,11 @@ def fetch_google_search_semantic(claim, k=3):
     r = requests.get(url).json()
     google_quota["count"] += 1
-    items = r.get("items", [])
     snippets = [f"{i['title']}: {i['snippet']}" for i in items]
-    keyword_results = snippets[:k]
     if not snippets:
         return {"keyword": keyword_results, "semantic": []}
@@ -116,7 +138,7 @@ def fetch_google_search_semantic(claim, k=3):
 # ---------------------------
 def predict(text=""):
     if not text.strip():
-        return {"error": "No input"}
     full_ai = detect_ai(text)
     sentences = safe_split_text(text)
@@ -134,7 +156,8 @@ def predict(text=""):
         },
         "claims": claims,
         "claims_ai_detection": claim_ai,
-        "claims_fact_checking": claim_fc
     }
 # ---------------------------

+import os
 import gradio as gr
+import datetime, re, requests
 from transformers import pipeline
 from sentence_transformers import SentenceTransformer, util
 from concurrent.futures import ThreadPoolExecutor
 # ---------------------------
+# Environment-safe settings
 # ---------------------------
+os.environ["TOKENIZERS_PARALLELISM"] = "false"
+# ---------------------------
+# Load Models (SAFE MODE)
+# ---------------------------
+# Claim Extraction (FORCE slow tokenizer)
 claim_model_name = "MoritzLaurer/DeBERTa-v3-base-mnli"
 claim_classifier = pipeline(
     "zero-shot-classification",
     model=claim_model_name,
+    tokenizer=claim_model_name,
+    device=-1,
+    use_fast=False   # 🔥 CRITICAL FIX
 )
 claim_labels = ["factual claim", "opinion", "personal anecdote", "other"]
     device=-1
 )
+# Semantic Model (EmbeddingGemma)
 SEM_MODEL_NAME = "google/embeddinggemma-300m"
 sem_model = SentenceTransformer(SEM_MODEL_NAME)
 # ---------------------------
 # Google Search Config
 # ---------------------------
+GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
+GOOGLE_CX = os.getenv("GOOGLE_CX")
 google_quota = {"count": 0, "date": datetime.date.today()}
 GOOGLE_DAILY_LIMIT = 100
+def check_google_quota():
+    global google_quota
+    today = datetime.date.today()
+    if google_quota["date"] != today:
+        google_quota = {"count": 0, "date": today}
 # ---------------------------
+# Text Split Helper
 # ---------------------------
 def safe_split_text(text):
+    pattern = r'(?<!\d)[.](?!\d)|;'
     return [s.strip() for s in re.split(pattern, text) if len(s.strip()) > 10]
 # ---------------------------
     def classify(s):
         out = claim_classifier(s, claim_labels)
+        return {
+            "text": s,
+            "label": out["labels"][0],
+            "score": round(out["scores"][0], 3)
+        }
+    with ThreadPoolExecutor(max_workers=4) as ex:
         results = list(ex.map(classify, sentences))
     return results[:max_claims]
 def detect_ai(texts):
     if isinstance(texts, str):
         texts = [texts]
+    results = []
     for t in texts:
         r = ai_detector(t)[0]
         label = "AI-generated" if r["label"].lower() in ["fake", "ai-generated"] else "Human"
+        results.append({
+            "text": t,
+            "label": label,
+            "score": round(r["score"], 3)
+        })
+    return results
 # ---------------------------
+# Keyword + Semantic Fact Check
 # ---------------------------
 def fetch_google_search_semantic(claim, k=3):
+    check_google_quota()
     global google_quota
     if google_quota["count"] >= GOOGLE_DAILY_LIMIT:
         return {"keyword": [], "semantic": []}
     r = requests.get(url).json()
     google_quota["count"] += 1
+    items = r.get("items", [])
     snippets = [f"{i['title']}: {i['snippet']}" for i in items]
+    keyword_results = snippets[:k]
     if not snippets:
         return {"keyword": keyword_results, "semantic": []}
 # ---------------------------
 def predict(text=""):
     if not text.strip():
+        return {"error": "No input provided"}
     full_ai = detect_ai(text)
     sentences = safe_split_text(text)
         },
         "claims": claims,
         "claims_ai_detection": claim_ai,
+        "claims_fact_checking": claim_fc,
+        "google_quota_used": google_quota["count"]
     }
 # ---------------------------