Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -76,12 +76,11 @@ def check_plagiarism(text):
|
|
| 76 |
# Initialize string to store highlighted output
|
| 77 |
highlighted_text = ""
|
| 78 |
|
| 79 |
-
# Loop through each token and apply highlighting if it meets the watermark criteria
|
| 80 |
for token_id, g_val, mask in zip(generated_tokens[0], g_values[0], combined_mask[0]):
|
| 81 |
token_text = tokenizer.decode(token_id.unsqueeze(0))
|
| 82 |
|
| 83 |
-
# If the token is part of the watermark
|
| 84 |
-
if mask.item() and g_val.item() > 0.5:
|
| 85 |
highlighted_text += f"<mark>{token_text}</mark>" # Highlight watermarked content
|
| 86 |
else:
|
| 87 |
highlighted_text += token_text
|
|
|
|
| 76 |
# Initialize string to store highlighted output
|
| 77 |
highlighted_text = ""
|
| 78 |
|
|
|
|
| 79 |
for token_id, g_val, mask in zip(generated_tokens[0], g_values[0], combined_mask[0]):
|
| 80 |
token_text = tokenizer.decode(token_id.unsqueeze(0))
|
| 81 |
|
| 82 |
+
# If the token is part of the watermark, use a mean or max threshold on g_val if it's multi-element
|
| 83 |
+
if mask.item() and g_val.mean().item() > 0.5: # Use .mean() to get a scalar value
|
| 84 |
highlighted_text += f"<mark>{token_text}</mark>" # Highlight watermarked content
|
| 85 |
else:
|
| 86 |
highlighted_text += token_text
|