Spaces:

WordLift
/

synthID

Running

App Files Files Community

cyberandy commited on Oct 25, 2024

Commit

180ea05

verified ·

1 Parent(s): ebd0825

Update app.py

Browse files

Files changed (1) hide show

app.py +17 -53

app.py CHANGED Viewed

@@ -7,37 +7,23 @@ from transformers import (
 )
 # Initialize model and tokenizer
-MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.2"  # Changed to open-access model
-print(f"Loading model and tokenizer from {MODEL_NAME}...")
 tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
-model = AutoModelForCausalLM.from_pretrained(
-    MODEL_NAME,
-    torch_dtype=torch.float16,  # Use half precision to reduce memory usage
-    device_map="auto"  # Automatically handle device placement
-)
 # Configure watermarking
 WATERMARK_KEYS = [654, 400, 836, 123, 340, 443, 597, 160, 57, 789]  # Example keys
 watermarking_config = SynthIDTextWatermarkingConfig(
     keys=WATERMARK_KEYS,
     ngram_len=5,
-    gamma=0.5,  # Controls watermark strength
 )
-def format_prompt(text):
-    """Format the prompt for Mistral instruction model."""
-    return f"<s>[INST] {text} [/INST]"
 def apply_watermark(text):
     """Apply SynthID watermark to input text."""
     try:
-        # Format the prompt for Mistral
-        formatted_text = format_prompt(text)
         # Tokenize input
-        inputs = tokenizer(formatted_text, return_tensors="pt", truncation=True, max_length=512)
-        inputs = {k: v.to(model.device) for k, v in inputs.items()}
         # Generate with watermark
         with torch.no_grad():
@@ -45,16 +31,14 @@ def apply_watermark(text):
                 **inputs,
                 watermarking_config=watermarking_config,
                 do_sample=True,
-                max_length=len(inputs["input_ids"][0]) + 200,  # Add more tokens for generation
                 pad_token_id=tokenizer.eos_token_id,
-                temperature=0.7,
                 top_p=0.9
             )
         # Decode output
         watermarked_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
-        # Remove the instruction prompt from the output
-        watermarked_text = watermarked_text.replace(text, "").strip()
         return watermarked_text, "Watermark applied successfully!"
     except Exception as e:
         return text, f"Error applying watermark: {str(e)}"
@@ -62,20 +46,17 @@ def apply_watermark(text):
 def analyze_text(text):
     """Analyze text characteristics that might indicate watermarking."""
     try:
-        # Basic text analysis
         total_words = len(text.split())
         avg_word_length = sum(len(word) for word in text.split()) / total_words if total_words > 0 else 0
-        sentences = text.split('.')
-        avg_sentence_length = sum(len(s.split()) for s in sentences if s.strip()) / len(sentences) if sentences else 0
         # Create analysis report
         analysis = f"""Text Analysis:
 - Total words: {total_words}
 - Average word length: {avg_word_length:.2f}
-- Average sentence length: {avg_sentence_length:.2f} words
 Note: This is a basic analysis. The official SynthID detector is not yet available in the public transformers package.
-For proper watermark detection, please refer to the official implementation when it becomes available."""
         return analysis
     except Exception as e:
@@ -84,47 +65,30 @@ For proper watermark detection, please refer to the official implementation when
 # Create Gradio interface
 with gr.Blocks(title="SynthID Text Watermarking Tool") as app:
     gr.Markdown("# SynthID Text Watermarking Tool")
-    gr.Markdown("""This demo shows how to apply SynthID watermarks to text using Mistral-7B-Instruct-v0.2.
                 Note: The official detector is not yet publicly available.""")
     with gr.Tab("Apply Watermark"):
         with gr.Row():
-            input_text = gr.Textbox(
-                label="Input Text (Prompt)",
-                lines=5,
-                placeholder="Enter text you want to watermark..."
-            )
-            output_text = gr.Textbox(
-                label="Generated Text with Watermark",
-                lines=5
-            )
             status = gr.Textbox(label="Status")
-        apply_btn = gr.Button("Generate with Watermark")
         apply_btn.click(apply_watermark, inputs=[input_text], outputs=[output_text, status])
     with gr.Tab("Analyze Text"):
         with gr.Row():
-            analyze_input = gr.Textbox(
-                label="Text to Analyze",
-                lines=5,
-                placeholder="Enter text to analyze..."
-            )
             analyze_result = gr.Textbox(label="Analysis Result", lines=5)
         analyze_btn = gr.Button("Analyze Text")
         analyze_btn.click(analyze_text, inputs=[analyze_input], outputs=[analyze_result])
     gr.Markdown("""
-    ### Usage Notes:
-    1. Enter a prompt in the "Input Text" box
-    2. Click "Generate with Watermark" to create watermarked text
-    3. The model will generate a response with an embedded watermark
-    4. The watermark is designed to be imperceptible to humans
-    ### Technical Notes:
-    - Using Mistral-7B-Instruct-v0.2 model
-    - Half-precision (float16) for efficient memory usage
-    - Automatic device placement (CPU/GPU)
     - The official detector will be available in future releases
     """)
 # Launch the app

 )
 # Initialize model and tokenizer
+MODEL_NAME = "google/gemma-2b"  # You can change this to your preferred model
 tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+model = AutoModelForCausalLM.from_pretrained(MODEL_NAME)
 # Configure watermarking
 WATERMARK_KEYS = [654, 400, 836, 123, 340, 443, 597, 160, 57, 789]  # Example keys
 watermarking_config = SynthIDTextWatermarkingConfig(
     keys=WATERMARK_KEYS,
     ngram_len=5,
+    gamma=0.5,  # Additional parameter to control watermark strength
 )
 def apply_watermark(text):
     """Apply SynthID watermark to input text."""
     try:
         # Tokenize input
+        inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
         # Generate with watermark
         with torch.no_grad():
                 **inputs,
                 watermarking_config=watermarking_config,
                 do_sample=True,
+                max_length=len(inputs["input_ids"][0]) + 100,  # Add some extra tokens
                 pad_token_id=tokenizer.eos_token_id,
+                temperature=0.7,  # Add some randomness to generation
                 top_p=0.9
             )
         # Decode output
         watermarked_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
         return watermarked_text, "Watermark applied successfully!"
     except Exception as e:
         return text, f"Error applying watermark: {str(e)}"
 def analyze_text(text):
     """Analyze text characteristics that might indicate watermarking."""
     try:
+        # Basic text analysis (since we don't have access to the detector yet)
         total_words = len(text.split())
         avg_word_length = sum(len(word) for word in text.split()) / total_words if total_words > 0 else 0
         # Create analysis report
         analysis = f"""Text Analysis:
 - Total words: {total_words}
 - Average word length: {avg_word_length:.2f}
 Note: This is a basic analysis. The official SynthID detector is not yet available in the public transformers package.
+For proper watermark detection, please refer to the official Google DeepMind implementation when it becomes available."""
         return analysis
     except Exception as e:
 # Create Gradio interface
 with gr.Blocks(title="SynthID Text Watermarking Tool") as app:
     gr.Markdown("# SynthID Text Watermarking Tool")
+    gr.Markdown("""This demo shows how to apply SynthID watermarks to text.
                 Note: The official detector is not yet publicly available.""")
     with gr.Tab("Apply Watermark"):
         with gr.Row():
+            input_text = gr.Textbox(label="Input Text", lines=5)
+            output_text = gr.Textbox(label="Watermarked Text", lines=5)
             status = gr.Textbox(label="Status")
+        apply_btn = gr.Button("Apply Watermark")
         apply_btn.click(apply_watermark, inputs=[input_text], outputs=[output_text, status])
     with gr.Tab("Analyze Text"):
         with gr.Row():
+            analyze_input = gr.Textbox(label="Text to Analyze", lines=5)
             analyze_result = gr.Textbox(label="Analysis Result", lines=5)
         analyze_btn = gr.Button("Analyze Text")
         analyze_btn.click(analyze_text, inputs=[analyze_input], outputs=[analyze_result])
     gr.Markdown("""
+    ### Notes:
+    - The watermark is designed to be imperceptible to humans
+    - This demo only implements watermark application
     - The official detector will be available in future releases
+    - For production use, use your own secure watermark keys
     """)
 # Launch the app