Spaces:

yasserrmd
/

IntegrityChecker

Sleeping

yasserrmd commited on Nov 4, 2024

Commit

40f65bf

verified ·

1 Parent(s): 0bb1e67

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -3,6 +3,7 @@ import torch
 import transformers
 import spaces
 from synthid_text import synthid_mixin, logits_processing
 # Configurations and model selection
 MODEL_NAME = "google/gemma-7b-it"
@@ -69,21 +70,21 @@ def check_plagiarism(text):
         # Compute G values for the generated text
         g_values = logits_processor.compute_g_values(input_ids=generated_tokens)
-        # Score the G values with the combined mask
-        score = detector_mean.mean_score(g_values.cpu().numpy(), combined_mask.cpu().numpy())
-        # Initialize string to store highlighted output
-        highlighted_text = ""
-        # Loop through each token and apply highlighting if it meets the watermark criteria
-        for token_id, g_val, mask in zip(generated_tokens[0], g_values[0], combined_mask[0]):
-            token_text = tokenizer.decode(token_id.unsqueeze(0))
-            # If the token is part of the watermark (based on mask and g_value), highlight it
-            if mask.item() and g_val.item() > 0.5:
-                highlighted_text += f"<mark>{token_text}</mark>"  # Highlight watermarked content
-            else:
-                highlighted_text += token_text
     # Return the highlighted text and overall watermark score
     if score > 0.5:

 import transformers
 import spaces
 from synthid_text import synthid_mixin, logits_processing
+from synthid_text.detector_mean import mean_score
 # Configurations and model selection
 MODEL_NAME = "google/gemma-7b-it"
         # Compute G values for the generated text
         g_values = logits_processor.compute_g_values(input_ids=generated_tokens)
+    # Score the G values with the combined mask
+    score = mean_score(g_values.cpu().numpy(), combined_mask.cpu().numpy())
+    # Initialize string to store highlighted output
+    highlighted_text = ""
+    # Loop through each token and apply highlighting if it meets the watermark criteria
+    for token_id, g_val, mask in zip(generated_tokens[0], g_values[0], combined_mask[0]):
+        token_text = tokenizer.decode(token_id.unsqueeze(0))
+        # If the token is part of the watermark (based on mask and g_value), highlight it
+        if mask.item() and g_val.item() > 0.5:
+            highlighted_text += f"<mark>{token_text}</mark>"  # Highlight watermarked content
+        else:
+            highlighted_text += token_text
     # Return the highlighted text and overall watermark score
     if score > 0.5: