Spaces:

Mohammedmarzuk17
/

Edushield-AI-Backend

Sleeping

App Files Files Community

Mohammedmarzuk17 commited on Sep 18, 2025

Commit

281b438

verified ·

1 Parent(s): fd336f7

Update app.py

Browse files

Files changed (1) hide show

app.py +71 -154

app.py CHANGED Viewed

@@ -1,174 +1,91 @@
 import gradio as gr
-import requests, feedparser, time, threading, re, json, os
-from concurrent.futures import ThreadPoolExecutor
-from sentence_transformers import SentenceTransformer, util
-import nltk
-from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
 # ---------------------------
-# NLTK setup
 # ---------------------------
-nltk.download('punkt')
-# ---------------------------
-# Models
-# ---------------------------
-# Sentence embeddings for semantic similarity
-embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
-# Claim classifier
-claim_model_name = "MoritzLaurer/DeBERTa-v3-base-mnli"
-tokenizer = AutoTokenizer.from_pretrained(claim_model_name, use_fast=False)
-model = AutoModelForSequenceClassification.from_pretrained(claim_model_name)
-claim_classifier = pipeline("zero-shot-classification", model=model, tokenizer=tokenizer)
 claim_labels = ["factual claim", "opinion", "personal anecdote", "other"]
-# AI detector
 ai_detect_model_name = "roberta-base-openai-detector"
-ai_detector = pipeline("text-classification", model=ai_detect_model_name)
-# NLI pipeline
 nli_model_name = "valhalla/distilbart-mnli-12-3"
-nli_pipeline = pipeline("text-classification", model=nli_model_name, tokenizer=nli_model_name)
-# ---------------------------
-# Evidence sources
-# ---------------------------
-RSS_FEEDS = [
-    "https://www.snopes.com/feed/",
-    "https://www.politifact.com/rss/factchecks/",
-    "https://www.factcheck.org/feed/",
-]
-RSS_CACHE = []
-CACHE_TTL = 60 * 60 * 3  # 3 hours
-RSS_LAST_FETCH = 0
-# ---------------------------
-# Helpers
-# ---------------------------
-def clean_text(text):
-    text = re.sub(r'<.*?>', '', text)
-    text = re.sub(r'\s+', ' ', text)
-    return text.strip()
-def fetch_rss_articles():
-    articles = []
-    for url in RSS_FEEDS:
-        try:
-            feed = feedparser.parse(url)
-            for entry in feed.entries[:10]:
-                title = clean_text(entry.get("title", ""))
-                summary = clean_text(entry.get("summary", ""))
-                articles.append({"title": title, "summary": summary})
-        except Exception:
-            continue
-    return articles
-def refresh_rss_cache(force=False):
-    global RSS_CACHE, RSS_LAST_FETCH
-    now = time.time()
-    if force or (now - RSS_LAST_FETCH > CACHE_TTL) or not RSS_CACHE:
-        RSS_CACHE = fetch_rss_articles()
-        RSS_LAST_FETCH = now
-def start_rss_refresher():
-    def loop():
-        while True:
-            refresh_rss_cache(force=True)
-            time.sleep(CACHE_TTL)
-    t = threading.Thread(target=loop, daemon=True)
-    t.start()
 # ---------------------------
-# Claim extraction
 # ---------------------------
-def extract_claims(text):
-    sentences = re.split(r'(?<=[.!?;\n])\s+', text)
-    claims = []
     for s in sentences:
-        s = s.strip()
-        if len(s) < 15:
-            continue
         out = claim_classifier(s, claim_labels)
-        if "factual claim" in out["labels"] and out["scores"][out["labels"].index("factual claim")] > 0.25:
-            claims.append(s)
-    return claims[:10]
-# ---------------------------
-# Semantic RSS matching
-# ---------------------------
-def match_rss_semantic(claim, top_k=2):
-    if not RSS_CACHE:
-        return []
-    claim_emb = embedding_model.encode(claim, convert_to_tensor=True)
-    summaries = [a["summary"] for a in RSS_CACHE]
-    text_embs = embedding_model.encode(summaries, convert_to_tensor=True)
-    scores = util.pytorch_cos_sim(claim_emb, text_embs).cpu().numpy()[0]
-    top_idx = scores.argsort()[::-1][:top_k]
-    matched = [summaries[i] for i in top_idx if scores[i] > 0.3]
-    return matched
-# ---------------------------
-# NLI & AI detection
-# ---------------------------
-def process_evidence_pair(claim, evidence):
-    out = nli_pipeline(f"{claim} </s></s> {evidence}")[0]
-    label = out['label']
-    score = out['score']
-    simplified_label = "Uncertain"
-    if score > 0.6:
-        simplified_label = "True" if label == "ENTAILMENT" else "False" if label == "CONTRADICTION" else "Uncertain"
-    ai_out = ai_detector(claim)[0]
-    ai_score = 1 - ai_out['score'] if ai_out['label'] != "Fake" else ai_out['score']
-    trustworthiness = round((score * 0.7 + ai_score * 0.3) * 100, 1)
-    return {
-        "text": evidence[:300]+"..." if len(evidence)>300 else evidence,
-        "label": simplified_label,
-        "score": round(score,3),
-        "trustworthiness": trustworthiness
-    }
-# ---------------------------
-# Fact-checking
-# ---------------------------
-def fact_check(claims):
     results = []
-    refresh_rss_cache()
-    with ThreadPoolExecutor(max_workers=5) as executor:
-        for c in claims:
-            evidence = match_rss_semantic(c)
-            if not evidence:
-                results.append({"claim": c, "evidence": [], "trustworthiness": 0.0})
-                continue
-            futures = [executor.submit(process_evidence_pair, c, e) for e in evidence]
-            top_evidence = [f.result() for f in futures]
-            results.append({"claim": c, "evidence": top_evidence})
     return results
-# ---------------------------
-# Predict function
-# ---------------------------
-def predict(page_text=""):
-    claims = extract_claims(page_text)
-    fc_results = fact_check(claims) if claims else []
-    return {"claims": claims, "fact_checking": fc_results}
-# ---------------------------
-# Gradio UI
-# ---------------------------
-with gr.Blocks() as demo:
-    gr.Markdown("## EduShield AI - Fact-Checking with AI Models")
-    page_input = gr.Textbox(label="Paste page text", lines=10)
-    predict_btn = gr.Button("Run Predict")
-    output_json = gr.JSON(label="Results")
-    predict_btn.click(fn=predict, inputs=[page_input], outputs=output_json)
 # ---------------------------
-# Launch
-# ---------------------------
-if __name__ == "__main__":
-    refresh_rss_cache(force=True)
-    start_rss_refresher()
-    demo.launch(server_name="0.0.0.0")

 import gradio as gr
+from transformers import pipeline
 # ---------------------------
+# Load Models
 # ---------------------------
+claim_model_name = "microsoft/deberta-v3-base-zeroshot-v1.1"
+claim_classifier = pipeline("zero-shot-classification", model=claim_model_name, device=0)
 claim_labels = ["factual claim", "opinion", "personal anecdote", "other"]
 ai_detect_model_name = "roberta-base-openai-detector"
+ai_detector = pipeline("text-classification", model=ai_detect_model_name, device=0)
 nli_model_name = "valhalla/distilbart-mnli-12-3"
+nli_pipeline = pipeline("text-classification", model=nli_model_name, tokenizer=nli_model_name, device=0)
 # ---------------------------
+# Functions
 # ---------------------------
+def extract_claims(page_text):
+    sentences = [s.strip() for s in page_text.split(".") if len(s.strip()) > 5]
+    results = []
     for s in sentences:
         out = claim_classifier(s, claim_labels)
+        if out["labels"][0] == "factual claim":
+            results.append(s)
+    return results[:5]
+def detect_ai(texts):
+    if isinstance(texts, str):
+        texts = [texts]
     results = []
+    for t in texts:
+        out = ai_detector(t)
+        results.append({"text": t, "label": out[0]["label"], "score": round(out[0]["score"], 3)})
     return results
+def fact_check(claims, evidence_text):
+    if isinstance(claims, str):
+        claims = [claims]
+    results = []
+    for c in claims:
+        out = nli_pipeline(hypothesis=c, sequence_pair=evidence_text)
+        results.append({"claim": c, "label": out[0]["label"], "score": round(out[0]["score"], 3)})
+    return results
 # ---------------------------
+# Unified Predict Function
+# ---------------------------
+def predict(page_text="", selected_text="", evidence_text=""):
+    """
+    1. Extract top 5 claims from page_text
+    2. Run AI Detection on claims + selected_text
+    3. Run Fact-Checking on claims + evidence_text if provided
+    """
+    # Extract claims
+    claims = extract_claims(page_text) if page_text else []
+...
+...     # Combine claims + selected text for AI detection
+...     ai_input = claims.copy()
+...     if selected_text:
+...         ai_input.append(selected_text)
+...     ai_results = detect_ai(ai_input) if ai_input else []
+...
+...     # Fact-checking: only if evidence is provided
+...     fc_results = fact_check(claims + ([selected_text] if selected_text else []), evidence_text) if evidence_text else []
+...
+...     return {
+...         "claims": claims,
+...         "ai_detection": ai_results,
+...         "fact_checking": fc_results
+...     }
+...
+... # ---------------------------
+... # Gradio UI
+... # ---------------------------
+... with gr.Blocks() as demo:
+...     gr.Markdown("## EduShield AI Backend - Predict API & UI")
+...
+...     page_text_input = gr.Textbox(label="Full Page Text", lines=10, placeholder="Paste page text here...")
+...     selected_text_input = gr.Textbox(label="Selected Text", lines=5, placeholder="Paste selected text here...")
+...     evidence_input = gr.Textbox(label="Evidence Text", lines=5, placeholder="Paste evidence text here...")
+...     predict_btn = gr.Button("Run Predict")
+...     output_json = gr.JSON(label="Predict Results")
+...     predict_btn.click(predict, inputs=[page_text_input, selected_text_input, evidence_input], outputs=output_json)
+...
+... # ---------------------------
+... # Launch
+... # ---------------------------
+... if __name__ == "__main__":
+...     demo.launch(server_name="0.0.0.0")