yasserrmd commited on
Commit
40f65bf
·
verified ·
1 Parent(s): 0bb1e67

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -14
app.py CHANGED
@@ -3,6 +3,7 @@ import torch
3
  import transformers
4
  import spaces
5
  from synthid_text import synthid_mixin, logits_processing
 
6
 
7
  # Configurations and model selection
8
  MODEL_NAME = "google/gemma-7b-it"
@@ -69,21 +70,21 @@ def check_plagiarism(text):
69
  # Compute G values for the generated text
70
  g_values = logits_processor.compute_g_values(input_ids=generated_tokens)
71
 
72
- # Score the G values with the combined mask
73
- score = detector_mean.mean_score(g_values.cpu().numpy(), combined_mask.cpu().numpy())
74
-
75
- # Initialize string to store highlighted output
76
- highlighted_text = ""
 
 
 
 
77
 
78
- # Loop through each token and apply highlighting if it meets the watermark criteria
79
- for token_id, g_val, mask in zip(generated_tokens[0], g_values[0], combined_mask[0]):
80
- token_text = tokenizer.decode(token_id.unsqueeze(0))
81
-
82
- # If the token is part of the watermark (based on mask and g_value), highlight it
83
- if mask.item() and g_val.item() > 0.5:
84
- highlighted_text += f"<mark>{token_text}</mark>" # Highlight watermarked content
85
- else:
86
- highlighted_text += token_text
87
 
88
  # Return the highlighted text and overall watermark score
89
  if score > 0.5:
 
3
  import transformers
4
  import spaces
5
  from synthid_text import synthid_mixin, logits_processing
6
+ from synthid_text.detector_mean import mean_score
7
 
8
  # Configurations and model selection
9
  MODEL_NAME = "google/gemma-7b-it"
 
70
  # Compute G values for the generated text
71
  g_values = logits_processor.compute_g_values(input_ids=generated_tokens)
72
 
73
+ # Score the G values with the combined mask
74
+ score = mean_score(g_values.cpu().numpy(), combined_mask.cpu().numpy())
75
+
76
+ # Initialize string to store highlighted output
77
+ highlighted_text = ""
78
+
79
+ # Loop through each token and apply highlighting if it meets the watermark criteria
80
+ for token_id, g_val, mask in zip(generated_tokens[0], g_values[0], combined_mask[0]):
81
+ token_text = tokenizer.decode(token_id.unsqueeze(0))
82
 
83
+ # If the token is part of the watermark (based on mask and g_value), highlight it
84
+ if mask.item() and g_val.item() > 0.5:
85
+ highlighted_text += f"<mark>{token_text}</mark>" # Highlight watermarked content
86
+ else:
87
+ highlighted_text += token_text
 
 
 
 
88
 
89
  # Return the highlighted text and overall watermark score
90
  if score > 0.5: