Spaces:

yasserrmd
/

IntegrityChecker

Sleeping

App Files Files Community

yasserrmd commited on Nov 4, 2024

Commit

4e7fa9f

verified ·

1 Parent(s): 602cd0f

Update app.py

Browse files

Files changed (1) hide show

app.py +41 -28

app.py CHANGED Viewed

@@ -29,45 +29,58 @@ def check_plagiarism(text):
     # Tokenize and process the input text
     inputs = tokenizer(text, return_tensors="pt").to(DEVICE)
     # Generate output with model, capturing scores (logits)
     with torch.no_grad():
         outputs = model.generate(
             inputs['input_ids'],
-            max_length=inputs['input_ids'].shape[1] + 50,  # Generate up to 50 additional tokens
             output_scores=True,
             return_dict_in_generate=True
         )
-    # Initialize empty string to store highlighted output
     highlighted_text = ""
-    is_watermarked = False
-    try:
-        # Extract generated tokens and their scores
-        generated_tokens = outputs.sequences[0]
-        token_scores = outputs.scores
-        # Loop through each generated token and its corresponding score
-        for token_id, score in zip(generated_tokens, token_scores):
-            # Apply SynthIDLogitsProcessor to each score by calling it with 'scores=score'
-            processed_score = logits_processor(scores=score)
-            token_text = tokenizer.decode(token_id.unsqueeze(0))  # Decode token_id for individual token text
-            # If processed score indicates watermark, highlight this token
-            if processed_score.mean().item() > 0.5:
-                is_watermarked = True
-                highlighted_text += f"<mark>{token_text}</mark>"  # Highlight AI-generated content
-            else:
-                highlighted_text += token_text
-        if is_watermarked:
-            return f"Flagged as AI-generated content (Academic Integrity Warning): {highlighted_text}"
         else:
-            return "Content appears to be human-generated."
-    except Exception as e:
-        return f"Error in detection process: {e}"
 # Define the Gradio interface
 def create_plagiarism_checker():

     # Tokenize and process the input text
     inputs = tokenizer(text, return_tensors="pt").to(DEVICE)
+    inputs_len = inputs['input_ids'].shape[1]
     # Generate output with model, capturing scores (logits)
     with torch.no_grad():
         outputs = model.generate(
             inputs['input_ids'],
+            max_length=inputs_len + 50,  # Generate up to 50 additional tokens
             output_scores=True,
             return_dict_in_generate=True
         )
+    # Extract the generated tokens from the model's predictions
+    generated_tokens = outputs.sequences[:, inputs_len:]
+    # Compute masks for watermark detection
+    eos_token_mask = logits_processor.compute_eos_token_mask(
+        input_ids=generated_tokens,
+        eos_token_id=tokenizer.eos_token_id,
+    )[:, CONFIG['ngram_len'] - 1 :]
+    context_repetition_mask = logits_processor.compute_context_repetition_mask(
+        input_ids=generated_tokens
+    )
+    # Combine the masks
+    combined_mask = context_repetition_mask * eos_token_mask
+    # Compute G values for the generated text
+    g_values = logits_processor.compute_g_values(input_ids=generated_tokens)
+    # Score the G values with the combined mask
+    score = detector_mean.mean_score(g_values.cpu().numpy(), combined_mask.cpu().numpy())
+    # Initialize string to store highlighted output
     highlighted_text = ""
+    # Loop through each token and apply highlighting if it meets the watermark criteria
+    for token_id, g_val, mask in zip(generated_tokens[0], g_values[0], combined_mask[0]):
+        token_text = tokenizer.decode(token_id.unsqueeze(0))
+        # If the token is part of the watermark (based on mask and g_value), highlight it
+        if mask.item() and g_val.item() > 0.5:
+            highlighted_text += f"<mark>{token_text}</mark>"  # Highlight watermarked content
         else:
+            highlighted_text += token_text
+    # Return the highlighted text and overall watermark score
+    if score > 0.5:
+        return f"Flagged as AI-generated content (Academic Integrity Warning): {highlighted_text}"
+    else:
+        return f"Content appears to be human-generated. {highlighted_text}"
 # Define the Gradio interface
 def create_plagiarism_checker():