yasserrmd commited on
Commit
addd1a8
·
verified ·
1 Parent(s): c41212f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -15
app.py CHANGED
@@ -2,14 +2,12 @@ import gradio as gr
2
  import torch
3
  import transformers
4
  import spaces
5
- from synthid_text import detector_bayesian, synthid_mixin, logits_processing
6
-
7
 
8
  # Configurations and model selection
9
- MODEL_NAME = "google/gemma-7b-it" # Choose the model (Gemma models used in SynthID)
10
  DEVICE = torch.device('cuda:0') if torch.cuda.is_available() else torch.device('cpu')
11
 
12
-
13
  # Initialize model and tokenizer
14
  model = transformers.AutoModelForCausalLM.from_pretrained(MODEL_NAME).to(DEVICE)
15
  tokenizer = transformers.AutoTokenizer.from_pretrained(MODEL_NAME)
@@ -21,15 +19,14 @@ CONFIG = synthid_mixin.DEFAULT_WATERMARKING_CONFIG
21
 
22
 
23
 
24
- # Function to check for AI-generated content using SynthID
25
  @spaces.GPU
26
  def check_plagiarism(text):
 
27
  # Logits processor for SynthID
28
  logits_processor = logits_processing.SynthIDLogitsProcessor(
29
  **CONFIG, top_k=40, temperature=0.5
30
  )
31
-
32
-
33
  # Tokenize and process the input text
34
  inputs = tokenizer(text, return_tensors="pt").to(DEVICE)
35
 
@@ -42,16 +39,32 @@ def check_plagiarism(text):
42
  return_dict_in_generate=True
43
  )
44
 
45
- # Process logits through SynthID to check for watermark presence
 
 
 
46
  try:
47
- # Pass logits (scores) to the SynthIDLogitsProcessor
48
- logits = outputs.scores # Extract logits from the generation output
49
- is_watermarked = logits_processor(inputs['input_ids'], logits=logits).mean().item() > 0.5
50
-
 
 
 
 
 
 
 
 
 
 
 
 
51
  if is_watermarked:
52
- return "Flagged as AI-generated content (Academic Integrity Warning)."
53
  else:
54
  return "Content appears to be human-generated."
 
55
  except Exception as e:
56
  return f"Error in detection process: {e}"
57
 
@@ -64,8 +77,8 @@ def create_plagiarism_checker():
64
  # Input textbox for users to paste text
65
  text_input = gr.Textbox(placeholder="Paste your text here...", label="Input Text", lines=10)
66
 
67
- # Output box to display the result
68
- output = gr.Textbox(label="Integrity Check Result", interactive=False)
69
 
70
  # Button to initiate the check
71
  check_button = gr.Button("Check Text")
 
2
  import torch
3
  import transformers
4
  import spaces
5
+ from synthid_text import synthid_mixin, logits_processing
 
6
 
7
  # Configurations and model selection
8
+ MODEL_NAME = "google/gemma-7b-it"
9
  DEVICE = torch.device('cuda:0') if torch.cuda.is_available() else torch.device('cpu')
10
 
 
11
  # Initialize model and tokenizer
12
  model = transformers.AutoModelForCausalLM.from_pretrained(MODEL_NAME).to(DEVICE)
13
  tokenizer = transformers.AutoTokenizer.from_pretrained(MODEL_NAME)
 
19
 
20
 
21
 
22
+ # Function to check for AI-generated content using SynthID and highlight watermark
23
  @spaces.GPU
24
  def check_plagiarism(text):
25
+
26
  # Logits processor for SynthID
27
  logits_processor = logits_processing.SynthIDLogitsProcessor(
28
  **CONFIG, top_k=40, temperature=0.5
29
  )
 
 
30
  # Tokenize and process the input text
31
  inputs = tokenizer(text, return_tensors="pt").to(DEVICE)
32
 
 
39
  return_dict_in_generate=True
40
  )
41
 
42
+ # Initialize empty string to store highlighted output
43
+ highlighted_text = ""
44
+ is_watermarked = False
45
+
46
  try:
47
+ # Extract generated tokens and their scores
48
+ generated_tokens = outputs.sequences[0]
49
+ token_scores = outputs.scores
50
+
51
+ # Loop through each generated token and its corresponding score
52
+ for token, score in zip(generated_tokens, token_scores):
53
+ processed_score = logits_processor(score)
54
+ token_text = tokenizer.decode(token)
55
+
56
+ # If processed score indicates watermark, highlight this token
57
+ if processed_score.mean().item() > 0.5:
58
+ is_watermarked = True
59
+ highlighted_text += f"<mark>{token_text}</mark>" # Highlight AI-generated content
60
+ else:
61
+ highlighted_text += token_text
62
+
63
  if is_watermarked:
64
+ return f"Flagged as AI-generated content (Academic Integrity Warning): {highlighted_text}"
65
  else:
66
  return "Content appears to be human-generated."
67
+
68
  except Exception as e:
69
  return f"Error in detection process: {e}"
70
 
 
77
  # Input textbox for users to paste text
78
  text_input = gr.Textbox(placeholder="Paste your text here...", label="Input Text", lines=10)
79
 
80
+ # Output box to display the result with highlighted watermark
81
+ output = gr.HTML(label="Integrity Check Result")
82
 
83
  # Button to initiate the check
84
  check_button = gr.Button("Check Text")