Spaces:

Mohammedmarzuk17
/

Edushield-AI-Backend

Sleeping

App Files Files Community

Mohammedmarzuk17 commited on Sep 19, 2025

Commit

71be51b

verified ·

1 Parent(s): 965563c

Update app.py

Browse files

Files changed (1) hide show

app.py +36 -20

app.py CHANGED Viewed

@@ -29,22 +29,19 @@ GOOGLE_DAILY_LIMIT = 100
 # ---------------------------
 def extract_claims(page_text, max_claims=20, batch_size=50):
     """
-    Extract top claims from page text:
     - Split on '.' first, then split on ',' and ';' but skip numeric/money commas.
     - Use zero-shot classification to get factual claim, opinion, or personal anecdote.
-    - Threaded processing for efficiency.
     """
     # Step 1: Split text on '.'
     sentences = [s.strip() for s in page_text.split('.') if len(s.strip().split()) > 4]
     # Step 2: Function to safely split a sentence on ',' and ';'
     def safe_split(s):
-        # Avoid splitting commas inside numbers like 1,000 or $7,000
-        pattern = r'(?<![\d\$]),|;'  # split on comma not preceded by digit or $ or on semicolon
         chunks = re.split(pattern, s)
         return [c.strip() for c in chunks if len(c.split()) > 4]
-    # Apply safe splitting
     refined_sentences = []
     for s in sentences:
         refined_sentences.extend(safe_split(s))
@@ -52,7 +49,6 @@ def extract_claims(page_text, max_claims=20, batch_size=50):
     # Step 3: Function to classify a single sentence
     def classify_sentence(s):
         out = claim_classifier(s, claim_labels)
-        # Pick the most important label (factual > opinion > personal anecdote)
         label_priority = ["factual claim", "opinion", "personal anecdote"]
         for lbl in label_priority:
             if lbl in out["labels"]:
@@ -107,38 +103,58 @@ def fetch_google_search(claim):
     except Exception:
         return []
 # ---------------------------
 # Unified Predict Function
 # ---------------------------
-def predict(page_text=""):
     """
-    1. Extract claims from page_text
-    2. Run AI Detection
-    3. Gather evidence (Google only, with quota)
-    4. Store evidence directly inside fact_checking (claim → list of 3 summaries)
     """
-    claims_data = extract_claims(page_text) if page_text else []
-    claims_texts = [c["text"] for c in claims_data]
-    ai_results = detect_ai(claims_texts) if claims_texts else []
-    # Gather evidence per claim → stored directly in fact_checking
     fact_checking = {c["text"]: fetch_google_search(c["text"]) for c in claims_data}
     return {
         "claims": claims_data,
-        "ai_detection": ai_results,
         "google_quota_used": google_quota["count"],
-        "google_quota_reset": str(datetime.datetime.combine(google_quota["date"] + datetime.timedelta(days=1), datetime.time.min)),
-        "fact_checking": fact_checking
     }
 # ---------------------------
 # Gradio UI
 # ---------------------------
 with gr.Blocks() as demo:
-    gr.Markdown("## EduShield AI Backend - Google-only Mode (Safe Claim Splitting & Fact-Check)")
-    page_text_input = gr.Textbox(label="Full Page Text", lines=10, placeholder="Paste page text here...")
     predict_btn = gr.Button("Run Predict")
     output_json = gr.JSON(label="Predict Results")

 # ---------------------------
 def extract_claims(page_text, max_claims=20, batch_size=50):
     """
+    Extract top claims from text:
     - Split on '.' first, then split on ',' and ';' but skip numeric/money commas.
     - Use zero-shot classification to get factual claim, opinion, or personal anecdote.
     """
     # Step 1: Split text on '.'
     sentences = [s.strip() for s in page_text.split('.') if len(s.strip().split()) > 4]
     # Step 2: Function to safely split a sentence on ',' and ';'
     def safe_split(s):
+        pattern = r'(?<![\d\$]),|;'  # avoid commas in numbers like 1,000
         chunks = re.split(pattern, s)
         return [c.strip() for c in chunks if len(c.split()) > 4]
     refined_sentences = []
     for s in sentences:
         refined_sentences.extend(safe_split(s))
     # Step 3: Function to classify a single sentence
     def classify_sentence(s):
         out = claim_classifier(s, claim_labels)
         label_priority = ["factual claim", "opinion", "personal anecdote"]
         for lbl in label_priority:
             if lbl in out["labels"]:
     except Exception:
         return []
+# ---------------------------
+# Dot-split helper for raw text
+# ---------------------------
+def split_on_dots(text):
+    return [s.strip() for s in text.split('.') if len(s.strip().split()) > 4]
 # ---------------------------
 # Unified Predict Function
 # ---------------------------
+def predict(user_text=""):
     """
+    Runs both:
+    1. Full-text analysis (AI detection on entire input + dot-split fact-check)
+    2. Claim-extracted analysis (claim split + AI detection + fact-check)
     """
+    if not user_text.strip():
+        return {"error": "No text provided."}
+    # --- Full text analysis ---
+    full_ai_result = detect_ai(user_text)
+    dot_sentences = split_on_dots(user_text)
+    full_fact_checking = {s: fetch_google_search(s) for s in dot_sentences}
+    # --- Claim-based analysis ---
+    claims_data = extract_claims(user_text)
+    claims_texts = [c["text"] for c in claims_data]
+    claims_ai_results = detect_ai(claims_texts) if claims_texts else []
     fact_checking = {c["text"]: fetch_google_search(c["text"]) for c in claims_data}
     return {
+        "full_text": {
+            "input": user_text,
+            "ai_detection": full_ai_result,
+            "fact_checking": full_fact_checking
+        },
         "claims": claims_data,
+        "claims_ai_detection": claims_ai_results,
+        "claims_fact_checking": fact_checking,
         "google_quota_used": google_quota["count"],
+        "google_quota_reset": str(datetime.datetime.combine(
+            google_quota["date"] + datetime.timedelta(days=1),
+            datetime.time.min
+        ))
     }
 # ---------------------------
 # Gradio UI
 # ---------------------------
 with gr.Blocks() as demo:
+    gr.Markdown("## EduShield AI Backend - Dual Mode (Full-text + Claims)")
+    page_text_input = gr.Textbox(label="Input Text", lines=10, placeholder="Paste text here...")
     predict_btn = gr.Button("Run Predict")
     output_json = gr.JSON(label="Predict Results")