Spaces:

Mohammedmarzuk17
/

Edushield-AI-Backend

Sleeping

App Files Files Community

Mohammedmarzuk17 commited on Sep 19, 2025

Commit

0619122

verified ·

1 Parent(s): 71be51b

Update app.py

Browse files

Files changed (1) hide show

app.py +21 -28

app.py CHANGED Viewed

@@ -25,28 +25,28 @@ google_quota = {"count": 0, "date": datetime.date.today()}
 GOOGLE_DAILY_LIMIT = 100
 # ---------------------------
-# Claim Extraction (Skip splitting numeric/money commas)
 # ---------------------------
 def extract_claims(page_text, max_claims=20, batch_size=50):
     """
     Extract top claims from text:
-    - Split on '.' first, then split on ',' and ';' but skip numeric/money commas.
-    - Use zero-shot classification to get factual claim, opinion, or personal anecdote.
     """
-    # Step 1: Split text on '.'
-    sentences = [s.strip() for s in page_text.split('.') if len(s.strip().split()) > 4]
-    # Step 2: Function to safely split a sentence on ',' and ';'
-    def safe_split(s):
-        pattern = r'(?<![\d\$]),|;'  # avoid commas in numbers like 1,000
-        chunks = re.split(pattern, s)
-        return [c.strip() for c in chunks if len(c.split()) > 4]
-    refined_sentences = []
-    for s in sentences:
-        refined_sentences.extend(safe_split(s))
-    # Step 3: Function to classify a single sentence
     def classify_sentence(s):
         out = claim_classifier(s, claim_labels)
         label_priority = ["factual claim", "opinion", "personal anecdote"]
@@ -55,16 +55,15 @@ def extract_claims(page_text, max_claims=20, batch_size=50):
                 return {"text": s, "label": lbl, "score": round(out["scores"][out["labels"].index(lbl)], 3)}
         return None
-    # Step 4: Threaded classification
     results = []
     with ThreadPoolExecutor() as executor:
-        for r in executor.map(classify_sentence, refined_sentences):
             if r:
                 results.append(r)
-    # Step 5: Limit top claims
     results = sorted(results, key=lambda x: -len(x["text"]))[:max_claims]
     return results
 # ---------------------------
@@ -103,19 +102,13 @@ def fetch_google_search(claim):
     except Exception:
         return []
-# ---------------------------
-# Dot-split helper for raw text
-# ---------------------------
-def split_on_dots(text):
-    return [s.strip() for s in text.split('.') if len(s.strip().split()) > 4]
 # ---------------------------
 # Unified Predict Function
 # ---------------------------
 def predict(user_text=""):
     """
     Runs both:
-    1. Full-text analysis (AI detection on entire input + dot-split fact-check)
     2. Claim-extracted analysis (claim split + AI detection + fact-check)
     """
     if not user_text.strip():
@@ -123,7 +116,7 @@ def predict(user_text=""):
     # --- Full text analysis ---
     full_ai_result = detect_ai(user_text)
-    dot_sentences = split_on_dots(user_text)
     full_fact_checking = {s: fetch_google_search(s) for s in dot_sentences}
     # --- Claim-based analysis ---

 GOOGLE_DAILY_LIMIT = 100
 # ---------------------------
+# Safe Split Helpers
+# ---------------------------
+def safe_split_text(text):
+    """
+    Split text safely on '.' or ',' or ';'
+    but do NOT split when between numbers (e.g., 1.41, 1,200, $1,200).
+    """
+    pattern = r'(?<!\d)[.](?!\d)|(?<![\d\$]),(?!\d)|;'
+    return [s.strip() for s in re.split(pattern, text) if len(s.strip().split()) > 4]
+# ---------------------------
+# Claim Extraction
 # ---------------------------
 def extract_claims(page_text, max_claims=20, batch_size=50):
     """
     Extract top claims from text:
+    - Uses safe_split_text for splitting.
+    - Classifies each piece into factual claim, opinion, or anecdote.
     """
+    sentences = safe_split_text(page_text)
+    # Step 1: Function to classify a single sentence
     def classify_sentence(s):
         out = claim_classifier(s, claim_labels)
         label_priority = ["factual claim", "opinion", "personal anecdote"]
                 return {"text": s, "label": lbl, "score": round(out["scores"][out["labels"].index(lbl)], 3)}
         return None
+    # Step 2: Threaded classification
     results = []
     with ThreadPoolExecutor() as executor:
+        for r in executor.map(classify_sentence, sentences):
             if r:
                 results.append(r)
+    # Step 3: Limit top claims
     results = sorted(results, key=lambda x: -len(x["text"]))[:max_claims]
     return results
 # ---------------------------
     except Exception:
         return []
 # ---------------------------
 # Unified Predict Function
 # ---------------------------
 def predict(user_text=""):
     """
     Runs both:
+    1. Full-text analysis (AI detection on entire input + safe-split fact-check)
     2. Claim-extracted analysis (claim split + AI detection + fact-check)
     """
     if not user_text.strip():
     # --- Full text analysis ---
     full_ai_result = detect_ai(user_text)
+    dot_sentences = safe_split_text(user_text)
     full_fact_checking = {s: fetch_google_search(s) for s in dot_sentences}
     # --- Claim-based analysis ---