Spaces:

zazaman
/

guardrails-final

Sleeping

File size: 5,562 Bytes

# main.py
import os
# Disable TensorFlow oneDNN warnings
os.environ["TF_ENABLE_ONEDNN_OPTS"] = "0"
# Disable torch compile warnings and optimizations for CPU-only devices
os.environ["TORCH_COMPILE_DISABLE"] = "1"
os.environ["TORCHDYNAMO_DISABLE"] = "1"

import sys
import time

from backend import Backend
import config


def run_output_test_mode():
    """
    Runs the application in output testing mode for testing modular output guardrails.
    """
    print("\n\n" + "=" * 60)
    print("\n--- OUTPUT GUARDRAIL TESTING MODE ---")
    print("🔍 This mode allows you to test modular output guardrails with manual input")
    print("   You can enter both a prompt and the LLM's response to test filtering")
    print("=" * 60)

    try:
        # Initialize backend in output test mode
        app_backend = Backend(output_test_mode=True)
    except Exception as e:
        print(f"\n❌ Error initializing output testing backend: {e}")
        print("   Make sure you have the presidio libraries installed for PII detection.")
        sys.exit(1)

    while True:
        try:
            print(f"\n{'='*60}")
            print("📝 OUTPUT GUARDRAIL TEST")
            print(f"{'='*60}")
            
            # Get prompt from user
            prompt = input("\n💭 Enter the input prompt (or 'exit' to quit): ")
            if prompt.lower() in ["exit", "quit"]:
                print("\n👋 Exiting output test mode. Goodbye!")
                break

            # Get manual output from user
            print("\n🤖 Enter the LLM output you want to test:")
            print("(Press Enter twice to finish your input)\n")
            
            lines = []
            empty_line_count = 0
            
            while True:
                line = input()
                if line == "":
                    empty_line_count += 1
                    if empty_line_count >= 2:
                        break
                    lines.append(line)
                else:
                    empty_line_count = 0
                    lines.append(line)
            
            manual_output = "\n".join(lines).strip()
            if not manual_output:
                print("❌ No output provided. Please try again.")
                continue

            print(f"\n✅ Testing output ({len(manual_output)} characters) against modular guardrails...\n")

            # Test the output against guardrails
            processed_output, is_safe = app_backend.test_output_guardrails(prompt, manual_output)

            print(f"\n{'='*60}")
            print("📊 GUARDRAIL TEST RESULTS")
            print(f"{'='*60}")
            
            if is_safe:
                print("✅ Result: OUTPUT APPROVED")
                print("\n📄 Final output after guardrail processing:")
                print(f"'{processed_output}'")
                
                if processed_output != manual_output:
                    print(f"\n⚠️  Note: Output was modified by guardrails")
                    print(f"   Original length: {len(manual_output)} characters")
                    print(f"   Modified length: {len(processed_output)} characters")
            else:
                print("🔒 Result: OUTPUT BLOCKED")
                print(f"\n❌ Reason: {processed_output}")

        except KeyboardInterrupt:
            print("\n👋 Exiting output test mode. Goodbye!")
            break
        except Exception as e:
            print(f"\n\n❌ An error occurred: {e}")


def run_interactive_mode(app_backend: Backend):
    """
    Runs the application in interactive mode, accepting user input.
    """
    print("\n\n" + "=" * 60)
    print("\n--- INTERACTIVE MODE ---")
    print("🔒 AI Detection: Finetuned model will scan all prompts for attacks")
    print("Enter your prompt below. Type 'exit' or 'quit' to end the session.")
    print("=" * 60)

    while True:
        try:
            prompt = input("\n👤 You: ")
            if prompt.lower() in ["exit", "quit"]:
                print("\n👋 Exiting interactive mode. Goodbye!")
                break

            response_stream, is_safe, processed_prompt = app_backend.process_request(
                prompt, stream=True
            )

            if not is_safe:
                print(f"   🔒 System: {response_stream}")
                continue

            print("\n🤖 Chatbot (streaming): ", end="")
            full_response = ""
            for chunk in response_stream:
                full_response += chunk
                print(chunk, end="", flush=True)
                time.sleep(0.05)
            print()  # For the newline

        except KeyboardInterrupt:
            print("\n👋 Exiting interactive mode. Goodbye!")
            break
        except Exception as e:
            print(f"\n\n❌ An error occurred: {e}")


def main():
    """
    Main entry point. Initializes the backend and runs in the configured mode.
    """
    # Check if we should run in output testing mode
    if len(sys.argv) > 1 and sys.argv[1] == "output_test":
        run_output_test_mode()
        return
    
    print("=" * 60)
    print("  Guardrails System")
    print("  🔒 AI-powered attack detection with finetuned model")
    print("=" * 60)

    try:
        app_backend = Backend()
    except Exception as e:
        print(f"\n❌ Error initializing backend: {e}")
        print("   Make sure you have the transformers library installed for AI Detection Mode.")
        sys.exit(1)

    run_interactive_mode(app_backend)


if __name__ == "__main__":
    main()