Spaces:

yasserrmd
/

IntegrityChecker

Sleeping

yasserrmd commited on Nov 4, 2024

Commit

7f059db

verified ·

1 Parent(s): 40f65bf

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -76,12 +76,11 @@ def check_plagiarism(text):
     # Initialize string to store highlighted output
     highlighted_text = ""
-    # Loop through each token and apply highlighting if it meets the watermark criteria
     for token_id, g_val, mask in zip(generated_tokens[0], g_values[0], combined_mask[0]):
         token_text = tokenizer.decode(token_id.unsqueeze(0))
-        # If the token is part of the watermark (based on mask and g_value), highlight it
-        if mask.item() and g_val.item() > 0.5:
             highlighted_text += f"<mark>{token_text}</mark>"  # Highlight watermarked content
         else:
             highlighted_text += token_text

     # Initialize string to store highlighted output
     highlighted_text = ""
     for token_id, g_val, mask in zip(generated_tokens[0], g_values[0], combined_mask[0]):
         token_text = tokenizer.decode(token_id.unsqueeze(0))
+        # If the token is part of the watermark, use a mean or max threshold on g_val if it's multi-element
+        if mask.item() and g_val.mean().item() > 0.5:  # Use .mean() to get a scalar value
             highlighted_text += f"<mark>{token_text}</mark>"  # Highlight watermarked content
         else:
             highlighted_text += token_text