Spaces:

yasserrmd
/

IntegrityChecker

Sleeping

App Files Files Community

yasserrmd commited on Nov 4, 2024

Commit

87aa0e9

verified ·

1 Parent(s): 7ada44c

Update app.py

Browse files

Files changed (1) hide show

app.py +32 -32

app.py CHANGED Viewed

@@ -50,40 +50,40 @@ def check_plagiarism(text):
         )
-    # Extract the generated tokens from the model's predictions
-    generated_tokens = outputs.sequences[:, inputs_len:]
-    # Compute masks for watermark detection
-    eos_token_mask = logits_processor.compute_eos_token_mask(
-        input_ids=generated_tokens,
-        eos_token_id=tokenizer.eos_token_id,
-    )[:, CONFIG['ngram_len'] - 1 :]
-    context_repetition_mask = logits_processor.compute_context_repetition_mask(
-        input_ids=generated_tokens
-    )
-    # Combine the masks
-    combined_mask = context_repetition_mask * eos_token_mask
-    # Compute G values for the generated text
-    g_values = logits_processor.compute_g_values(input_ids=generated_tokens)
-    # Score the G values with the combined mask
-    score = detector_mean.mean_score(g_values.cpu().numpy(), combined_mask.cpu().numpy())
-    # Initialize string to store highlighted output
-    highlighted_text = ""
-    # Loop through each token and apply highlighting if it meets the watermark criteria
-    for token_id, g_val, mask in zip(generated_tokens[0], g_values[0], combined_mask[0]):
-        token_text = tokenizer.decode(token_id.unsqueeze(0))
-        # If the token is part of the watermark (based on mask and g_value), highlight it
-        if mask.item() and g_val.item() > 0.5:
-            highlighted_text += f"<mark>{token_text}</mark>"  # Highlight watermarked content
-        else:
-            highlighted_text += token_text
     # Return the highlighted text and overall watermark score
     if score > 0.5:

         )
+        # Extract the generated tokens from the model's predictions
+        generated_tokens = outputs.sequences[:, inputs_len:]
+        # Compute masks for watermark detection
+        eos_token_mask = logits_processor.compute_eos_token_mask(
+            input_ids=generated_tokens,
+            eos_token_id=tokenizer.eos_token_id,
+        )[:, CONFIG['ngram_len'] - 1 :]
+        context_repetition_mask = logits_processor.compute_context_repetition_mask(
+            input_ids=generated_tokens
+        )
+        # Combine the masks
+        combined_mask = context_repetition_mask * eos_token_mask
+        # Compute G values for the generated text
+        g_values = logits_processor.compute_g_values(input_ids=generated_tokens)
+        # Score the G values with the combined mask
+        score = detector_mean.mean_score(g_values.cpu().numpy(), combined_mask.cpu().numpy())
+        # Initialize string to store highlighted output
+        highlighted_text = ""
+        # Loop through each token and apply highlighting if it meets the watermark criteria
+        for token_id, g_val, mask in zip(generated_tokens[0], g_values[0], combined_mask[0]):
+            token_text = tokenizer.decode(token_id.unsqueeze(0))
+            # If the token is part of the watermark (based on mask and g_value), highlight it
+            if mask.item() and g_val.item() > 0.5:
+                highlighted_text += f"<mark>{token_text}</mark>"  # Highlight watermarked content
+            else:
+                highlighted_text += token_text
     # Return the highlighted text and overall watermark score
     if score > 0.5: