Spaces:

yasserrmd
/

IntegrityChecker

Sleeping

App Files Files Community

yasserrmd commited on Nov 4, 2024

Commit

addd1a8

verified ·

1 Parent(s): c41212f

Update app.py

Browse files

Files changed (1) hide show

app.py +28 -15

app.py CHANGED Viewed

@@ -2,14 +2,12 @@ import gradio as gr
 import torch
 import transformers
 import spaces
-from synthid_text import detector_bayesian, synthid_mixin, logits_processing
 # Configurations and model selection
-MODEL_NAME = "google/gemma-7b-it"  # Choose the model (Gemma models used in SynthID)
 DEVICE = torch.device('cuda:0') if torch.cuda.is_available() else torch.device('cpu')
 # Initialize model and tokenizer
 model = transformers.AutoModelForCausalLM.from_pretrained(MODEL_NAME).to(DEVICE)
 tokenizer = transformers.AutoTokenizer.from_pretrained(MODEL_NAME)
@@ -21,15 +19,14 @@ CONFIG = synthid_mixin.DEFAULT_WATERMARKING_CONFIG
-# Function to check for AI-generated content using SynthID
 @spaces.GPU
 def check_plagiarism(text):
     # Logits processor for SynthID
     logits_processor = logits_processing.SynthIDLogitsProcessor(
         **CONFIG, top_k=40, temperature=0.5
     )
     # Tokenize and process the input text
     inputs = tokenizer(text, return_tensors="pt").to(DEVICE)
@@ -42,16 +39,32 @@ def check_plagiarism(text):
             return_dict_in_generate=True
         )
-    # Process logits through SynthID to check for watermark presence
     try:
-        # Pass logits (scores) to the SynthIDLogitsProcessor
-        logits = outputs.scores  # Extract logits from the generation output
-        is_watermarked = logits_processor(inputs['input_ids'], logits=logits).mean().item() > 0.5
         if is_watermarked:
-            return "Flagged as AI-generated content (Academic Integrity Warning)."
         else:
             return "Content appears to be human-generated."
     except Exception as e:
         return f"Error in detection process: {e}"
@@ -64,8 +77,8 @@ def create_plagiarism_checker():
         # Input textbox for users to paste text
         text_input = gr.Textbox(placeholder="Paste your text here...", label="Input Text", lines=10)
-        # Output box to display the result
-        output = gr.Textbox(label="Integrity Check Result", interactive=False)
         # Button to initiate the check
         check_button = gr.Button("Check Text")

 import torch
 import transformers
 import spaces
+from synthid_text import synthid_mixin, logits_processing
 # Configurations and model selection
+MODEL_NAME = "google/gemma-7b-it"
 DEVICE = torch.device('cuda:0') if torch.cuda.is_available() else torch.device('cpu')
 # Initialize model and tokenizer
 model = transformers.AutoModelForCausalLM.from_pretrained(MODEL_NAME).to(DEVICE)
 tokenizer = transformers.AutoTokenizer.from_pretrained(MODEL_NAME)
+# Function to check for AI-generated content using SynthID and highlight watermark
 @spaces.GPU
 def check_plagiarism(text):
     # Logits processor for SynthID
     logits_processor = logits_processing.SynthIDLogitsProcessor(
         **CONFIG, top_k=40, temperature=0.5
     )
     # Tokenize and process the input text
     inputs = tokenizer(text, return_tensors="pt").to(DEVICE)
             return_dict_in_generate=True
         )
+    # Initialize empty string to store highlighted output
+    highlighted_text = ""
+    is_watermarked = False
     try:
+        # Extract generated tokens and their scores
+        generated_tokens = outputs.sequences[0]
+        token_scores = outputs.scores
+        # Loop through each generated token and its corresponding score
+        for token, score in zip(generated_tokens, token_scores):
+            processed_score = logits_processor(score)
+            token_text = tokenizer.decode(token)
+            # If processed score indicates watermark, highlight this token
+            if processed_score.mean().item() > 0.5:
+                is_watermarked = True
+                highlighted_text += f"<mark>{token_text}</mark>"  # Highlight AI-generated content
+            else:
+                highlighted_text += token_text
         if is_watermarked:
+            return f"Flagged as AI-generated content (Academic Integrity Warning): {highlighted_text}"
         else:
             return "Content appears to be human-generated."
     except Exception as e:
         return f"Error in detection process: {e}"
         # Input textbox for users to paste text
         text_input = gr.Textbox(placeholder="Paste your text here...", label="Input Text", lines=10)
+        # Output box to display the result with highlighted watermark
+        output = gr.HTML(label="Integrity Check Result")
         # Button to initiate the check
         check_button = gr.Button("Check Text")