Mohammedmarzuk17 commited on
Commit
71be51b
·
verified ·
1 Parent(s): 965563c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -20
app.py CHANGED
@@ -29,22 +29,19 @@ GOOGLE_DAILY_LIMIT = 100
29
  # ---------------------------
30
  def extract_claims(page_text, max_claims=20, batch_size=50):
31
  """
32
- Extract top claims from page text:
33
  - Split on '.' first, then split on ',' and ';' but skip numeric/money commas.
34
  - Use zero-shot classification to get factual claim, opinion, or personal anecdote.
35
- - Threaded processing for efficiency.
36
  """
37
  # Step 1: Split text on '.'
38
  sentences = [s.strip() for s in page_text.split('.') if len(s.strip().split()) > 4]
39
 
40
  # Step 2: Function to safely split a sentence on ',' and ';'
41
  def safe_split(s):
42
- # Avoid splitting commas inside numbers like 1,000 or $7,000
43
- pattern = r'(?<![\d\$]),|;' # split on comma not preceded by digit or $ or on semicolon
44
  chunks = re.split(pattern, s)
45
  return [c.strip() for c in chunks if len(c.split()) > 4]
46
 
47
- # Apply safe splitting
48
  refined_sentences = []
49
  for s in sentences:
50
  refined_sentences.extend(safe_split(s))
@@ -52,7 +49,6 @@ def extract_claims(page_text, max_claims=20, batch_size=50):
52
  # Step 3: Function to classify a single sentence
53
  def classify_sentence(s):
54
  out = claim_classifier(s, claim_labels)
55
- # Pick the most important label (factual > opinion > personal anecdote)
56
  label_priority = ["factual claim", "opinion", "personal anecdote"]
57
  for lbl in label_priority:
58
  if lbl in out["labels"]:
@@ -107,38 +103,58 @@ def fetch_google_search(claim):
107
  except Exception:
108
  return []
109
 
 
 
 
 
 
 
110
  # ---------------------------
111
  # Unified Predict Function
112
  # ---------------------------
113
- def predict(page_text=""):
114
  """
115
- 1. Extract claims from page_text
116
- 2. Run AI Detection
117
- 3. Gather evidence (Google only, with quota)
118
- 4. Store evidence directly inside fact_checking (claim → list of 3 summaries)
119
  """
120
- claims_data = extract_claims(page_text) if page_text else []
121
- claims_texts = [c["text"] for c in claims_data]
122
- ai_results = detect_ai(claims_texts) if claims_texts else []
 
 
 
 
123
 
124
- # Gather evidence per claim → stored directly in fact_checking
 
 
 
125
  fact_checking = {c["text"]: fetch_google_search(c["text"]) for c in claims_data}
126
 
127
  return {
 
 
 
 
 
128
  "claims": claims_data,
129
- "ai_detection": ai_results,
 
130
  "google_quota_used": google_quota["count"],
131
- "google_quota_reset": str(datetime.datetime.combine(google_quota["date"] + datetime.timedelta(days=1), datetime.time.min)),
132
- "fact_checking": fact_checking
 
 
133
  }
134
 
135
  # ---------------------------
136
  # Gradio UI
137
  # ---------------------------
138
  with gr.Blocks() as demo:
139
- gr.Markdown("## EduShield AI Backend - Google-only Mode (Safe Claim Splitting & Fact-Check)")
140
 
141
- page_text_input = gr.Textbox(label="Full Page Text", lines=10, placeholder="Paste page text here...")
142
  predict_btn = gr.Button("Run Predict")
143
  output_json = gr.JSON(label="Predict Results")
144
 
 
29
  # ---------------------------
30
  def extract_claims(page_text, max_claims=20, batch_size=50):
31
  """
32
+ Extract top claims from text:
33
  - Split on '.' first, then split on ',' and ';' but skip numeric/money commas.
34
  - Use zero-shot classification to get factual claim, opinion, or personal anecdote.
 
35
  """
36
  # Step 1: Split text on '.'
37
  sentences = [s.strip() for s in page_text.split('.') if len(s.strip().split()) > 4]
38
 
39
  # Step 2: Function to safely split a sentence on ',' and ';'
40
  def safe_split(s):
41
+ pattern = r'(?<![\d\$]),|;' # avoid commas in numbers like 1,000
 
42
  chunks = re.split(pattern, s)
43
  return [c.strip() for c in chunks if len(c.split()) > 4]
44
 
 
45
  refined_sentences = []
46
  for s in sentences:
47
  refined_sentences.extend(safe_split(s))
 
49
  # Step 3: Function to classify a single sentence
50
  def classify_sentence(s):
51
  out = claim_classifier(s, claim_labels)
 
52
  label_priority = ["factual claim", "opinion", "personal anecdote"]
53
  for lbl in label_priority:
54
  if lbl in out["labels"]:
 
103
  except Exception:
104
  return []
105
 
106
+ # ---------------------------
107
+ # Dot-split helper for raw text
108
+ # ---------------------------
109
+ def split_on_dots(text):
110
+ return [s.strip() for s in text.split('.') if len(s.strip().split()) > 4]
111
+
112
  # ---------------------------
113
  # Unified Predict Function
114
  # ---------------------------
115
+ def predict(user_text=""):
116
  """
117
+ Runs both:
118
+ 1. Full-text analysis (AI detection on entire input + dot-split fact-check)
119
+ 2. Claim-extracted analysis (claim split + AI detection + fact-check)
 
120
  """
121
+ if not user_text.strip():
122
+ return {"error": "No text provided."}
123
+
124
+ # --- Full text analysis ---
125
+ full_ai_result = detect_ai(user_text)
126
+ dot_sentences = split_on_dots(user_text)
127
+ full_fact_checking = {s: fetch_google_search(s) for s in dot_sentences}
128
 
129
+ # --- Claim-based analysis ---
130
+ claims_data = extract_claims(user_text)
131
+ claims_texts = [c["text"] for c in claims_data]
132
+ claims_ai_results = detect_ai(claims_texts) if claims_texts else []
133
  fact_checking = {c["text"]: fetch_google_search(c["text"]) for c in claims_data}
134
 
135
  return {
136
+ "full_text": {
137
+ "input": user_text,
138
+ "ai_detection": full_ai_result,
139
+ "fact_checking": full_fact_checking
140
+ },
141
  "claims": claims_data,
142
+ "claims_ai_detection": claims_ai_results,
143
+ "claims_fact_checking": fact_checking,
144
  "google_quota_used": google_quota["count"],
145
+ "google_quota_reset": str(datetime.datetime.combine(
146
+ google_quota["date"] + datetime.timedelta(days=1),
147
+ datetime.time.min
148
+ ))
149
  }
150
 
151
  # ---------------------------
152
  # Gradio UI
153
  # ---------------------------
154
  with gr.Blocks() as demo:
155
+ gr.Markdown("## EduShield AI Backend - Dual Mode (Full-text + Claims)")
156
 
157
+ page_text_input = gr.Textbox(label="Input Text", lines=10, placeholder="Paste text here...")
158
  predict_btn = gr.Button("Run Predict")
159
  output_json = gr.JSON(label="Predict Results")
160