yasserrmd commited on
Commit
87aa0e9
·
verified ·
1 Parent(s): 7ada44c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -32
app.py CHANGED
@@ -50,40 +50,40 @@ def check_plagiarism(text):
50
  )
51
 
52
 
53
- # Extract the generated tokens from the model's predictions
54
- generated_tokens = outputs.sequences[:, inputs_len:]
55
-
56
- # Compute masks for watermark detection
57
- eos_token_mask = logits_processor.compute_eos_token_mask(
58
- input_ids=generated_tokens,
59
- eos_token_id=tokenizer.eos_token_id,
60
- )[:, CONFIG['ngram_len'] - 1 :]
61
-
62
- context_repetition_mask = logits_processor.compute_context_repetition_mask(
63
- input_ids=generated_tokens
64
- )
65
-
66
- # Combine the masks
67
- combined_mask = context_repetition_mask * eos_token_mask
68
-
69
- # Compute G values for the generated text
70
- g_values = logits_processor.compute_g_values(input_ids=generated_tokens)
71
-
72
- # Score the G values with the combined mask
73
- score = detector_mean.mean_score(g_values.cpu().numpy(), combined_mask.cpu().numpy())
74
-
75
- # Initialize string to store highlighted output
76
- highlighted_text = ""
77
 
78
- # Loop through each token and apply highlighting if it meets the watermark criteria
79
- for token_id, g_val, mask in zip(generated_tokens[0], g_values[0], combined_mask[0]):
80
- token_text = tokenizer.decode(token_id.unsqueeze(0))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
 
82
- # If the token is part of the watermark (based on mask and g_value), highlight it
83
- if mask.item() and g_val.item() > 0.5:
84
- highlighted_text += f"<mark>{token_text}</mark>" # Highlight watermarked content
85
- else:
86
- highlighted_text += token_text
 
 
 
 
87
 
88
  # Return the highlighted text and overall watermark score
89
  if score > 0.5:
 
50
  )
51
 
52
 
53
+ # Extract the generated tokens from the model's predictions
54
+ generated_tokens = outputs.sequences[:, inputs_len:]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
 
56
+ # Compute masks for watermark detection
57
+ eos_token_mask = logits_processor.compute_eos_token_mask(
58
+ input_ids=generated_tokens,
59
+ eos_token_id=tokenizer.eos_token_id,
60
+ )[:, CONFIG['ngram_len'] - 1 :]
61
+
62
+ context_repetition_mask = logits_processor.compute_context_repetition_mask(
63
+ input_ids=generated_tokens
64
+ )
65
+
66
+ # Combine the masks
67
+ combined_mask = context_repetition_mask * eos_token_mask
68
+
69
+ # Compute G values for the generated text
70
+ g_values = logits_processor.compute_g_values(input_ids=generated_tokens)
71
+
72
+ # Score the G values with the combined mask
73
+ score = detector_mean.mean_score(g_values.cpu().numpy(), combined_mask.cpu().numpy())
74
+
75
+ # Initialize string to store highlighted output
76
+ highlighted_text = ""
77
 
78
+ # Loop through each token and apply highlighting if it meets the watermark criteria
79
+ for token_id, g_val, mask in zip(generated_tokens[0], g_values[0], combined_mask[0]):
80
+ token_text = tokenizer.decode(token_id.unsqueeze(0))
81
+
82
+ # If the token is part of the watermark (based on mask and g_value), highlight it
83
+ if mask.item() and g_val.item() > 0.5:
84
+ highlighted_text += f"<mark>{token_text}</mark>" # Highlight watermarked content
85
+ else:
86
+ highlighted_text += token_text
87
 
88
  # Return the highlighted text and overall watermark score
89
  if score > 0.5: