jonmabe commited on
Commit
bd30e03
Β·
verified Β·
1 Parent(s): 1c9bb7e

Initial Gradio demo upload

Browse files
Files changed (3) hide show
  1. README.md +32 -5
  2. app.py +184 -0
  3. requirements.txt +4 -0
README.md CHANGED
@@ -1,12 +1,39 @@
1
  ---
2
- title: Privacy Classifier Demo
3
- emoji: 🏒
4
- colorFrom: blue
5
  colorTo: green
6
  sdk: gradio
7
- sdk_version: 6.5.1
8
  app_file: app.py
9
  pinned: false
 
10
  ---
11
 
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: Privacy Classifier
3
+ emoji: πŸ”’
4
+ colorFrom: red
5
  colorTo: green
6
  sdk: gradio
7
+ sdk_version: 4.44.0
8
  app_file: app.py
9
  pinned: false
10
+ license: apache-2.0
11
  ---
12
 
13
+ # Privacy Classifier Demo
14
+
15
+ Classify prompts to determine if they contain sensitive information that should stay local or if they're safe to send to cloud LLM services.
16
+
17
+ ## Classifications
18
+
19
+ - **πŸ”΄ KEEP_LOCAL**: Contains PII, sensitive data, or private information
20
+ - **🟒 ALLOW_CLOUD**: Safe to process with cloud-based AI services
21
+
22
+ ## Model
23
+
24
+ This demo uses [jonmabe/privacy-classifier-electra](https://huggingface.co/jonmabe/privacy-classifier-electra), an ELECTRA-based classifier fine-tuned to detect sensitive information in prompts.
25
+
26
+ ## Use Cases
27
+
28
+ - Privacy-aware prompt routing
29
+ - Data loss prevention for LLM applications
30
+ - Compliance with data protection regulations
31
+
32
+ ## Examples
33
+
34
+ | Prompt | Classification |
35
+ |--------|---------------|
36
+ | "What is the capital of France?" | ALLOW_CLOUD |
37
+ | "My SSN is 123-45-6789" | KEEP_LOCAL |
38
+ | "Write me a poem about the ocean" | ALLOW_CLOUD |
39
+ | "My password is hunter2" | KEEP_LOCAL |
app.py ADDED
@@ -0,0 +1,184 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Privacy Classifier Demo - Classifies prompts as KEEP_LOCAL vs ALLOW_CLOUD
3
+ """
4
+
5
+ import gradio as gr
6
+ import torch
7
+ from transformers import AutoModelForSequenceClassification, AutoTokenizer
8
+
9
+ # Model configuration
10
+ MODEL_ID = "jonmabe/privacy-classifier-electra"
11
+ # Model labels: 0=safe (ALLOW_CLOUD), 1=sensitive (KEEP_LOCAL)
12
+ LABELS = ["ALLOW_CLOUD", "KEEP_LOCAL"] # index 0=safe, index 1=sensitive
13
+
14
+ # Load model and tokenizer
15
+ print("Loading model...")
16
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
17
+ model = AutoModelForSequenceClassification.from_pretrained(MODEL_ID)
18
+ model.eval()
19
+
20
+ # Move to GPU if available
21
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
22
+ model = model.to(device)
23
+ print(f"Model loaded on {device}")
24
+
25
+
26
+ def classify_prompt(text: str) -> tuple[str, dict]:
27
+ """
28
+ Classify a prompt as KEEP_LOCAL or ALLOW_CLOUD.
29
+
30
+ Returns:
31
+ - Classification label with confidence
32
+ - Dictionary of class probabilities for the label component
33
+ """
34
+ if not text.strip():
35
+ return "Please enter a prompt to classify.", {}
36
+
37
+ # Tokenize
38
+ inputs = tokenizer(
39
+ text,
40
+ return_tensors="pt",
41
+ truncation=True,
42
+ max_length=512,
43
+ padding=True
44
+ )
45
+ inputs = {k: v.to(device) for k, v in inputs.items()}
46
+
47
+ # Inference
48
+ with torch.no_grad():
49
+ outputs = model(**inputs)
50
+ logits = outputs.logits
51
+ probs = torch.softmax(logits, dim=-1)[0]
52
+
53
+ # Get prediction
54
+ pred_idx = probs.argmax().item()
55
+ pred_label = LABELS[pred_idx]
56
+ confidence = probs[pred_idx].item()
57
+
58
+ # Create probability dict for label component
59
+ prob_dict = {label: float(probs[i]) for i, label in enumerate(LABELS)}
60
+
61
+ return f"{pred_label} ({confidence:.1%} confidence)", prob_dict
62
+
63
+
64
+ def get_color_for_label(label: str) -> str:
65
+ """Return color based on classification."""
66
+ if "KEEP_LOCAL" in label:
67
+ return "red"
68
+ elif "ALLOW_CLOUD" in label:
69
+ return "green"
70
+ return "gray"
71
+
72
+
73
+ # Example prompts
74
+ EXAMPLES = [
75
+ ["What is the capital of France?"],
76
+ ["My social security number is 123-45-6789, can you help me file taxes?"],
77
+ ["Write me a poem about the ocean."],
78
+ ["Here's my password: hunter2, please remember it."],
79
+ ["Explain how photosynthesis works."],
80
+ ["My credit card number is 4111-1111-1111-1111, check if it's valid."],
81
+ ["What are some good restaurants in Seattle?"],
82
+ ["My medical records show I have diabetes. What should I eat?"],
83
+ ["Translate 'hello world' to Spanish."],
84
+ ["My home address is 123 Main St, Anytown USA. Send me a pizza."],
85
+ ["How do I sort a list in Python?"],
86
+ ["My employee ID is E12345 and my salary is $85,000."],
87
+ ]
88
+
89
+
90
+ # Custom CSS for styling
91
+ css = """
92
+ .keep-local {
93
+ background: linear-gradient(135deg, #ff6b6b 0%, #ee5a5a 100%) !important;
94
+ color: white !important;
95
+ font-weight: bold !important;
96
+ }
97
+ .allow-cloud {
98
+ background: linear-gradient(135deg, #51cf66 0%, #40c057 100%) !important;
99
+ color: white !important;
100
+ font-weight: bold !important;
101
+ }
102
+ .result-box {
103
+ font-size: 1.2em;
104
+ padding: 20px;
105
+ border-radius: 10px;
106
+ text-align: center;
107
+ }
108
+ """
109
+
110
+ # Create Gradio interface
111
+ with gr.Blocks(css=css, title="Privacy Classifier") as demo:
112
+ gr.Markdown("""
113
+ # πŸ”’ Privacy Classifier
114
+
115
+ Classify prompts to determine if they contain sensitive information that should stay local
116
+ or if they're safe to send to cloud LLM services.
117
+
118
+ - **πŸ”΄ KEEP_LOCAL**: Contains PII, sensitive data, or private information
119
+ - **🟒 ALLOW_CLOUD**: Safe to process with cloud-based AI services
120
+
121
+ This model helps route requests in privacy-aware AI systems.
122
+ """)
123
+
124
+ with gr.Row():
125
+ with gr.Column(scale=2):
126
+ input_text = gr.Textbox(
127
+ label="Enter your prompt",
128
+ placeholder="Type a prompt to classify...",
129
+ lines=3,
130
+ )
131
+ classify_btn = gr.Button("πŸ” Classify", variant="primary", size="lg")
132
+
133
+ with gr.Column(scale=1):
134
+ result_label = gr.Textbox(
135
+ label="Classification Result",
136
+ interactive=False,
137
+ lines=2,
138
+ )
139
+ confidence_chart = gr.Label(
140
+ label="Confidence Scores",
141
+ num_top_classes=2,
142
+ )
143
+
144
+ gr.Markdown("### πŸ“ Example Prompts")
145
+ gr.Examples(
146
+ examples=EXAMPLES,
147
+ inputs=input_text,
148
+ outputs=[result_label, confidence_chart],
149
+ fn=classify_prompt,
150
+ cache_examples=False,
151
+ )
152
+
153
+ # Event handlers
154
+ classify_btn.click(
155
+ fn=classify_prompt,
156
+ inputs=input_text,
157
+ outputs=[result_label, confidence_chart],
158
+ )
159
+
160
+ input_text.submit(
161
+ fn=classify_prompt,
162
+ inputs=input_text,
163
+ outputs=[result_label, confidence_chart],
164
+ )
165
+
166
+ gr.Markdown("""
167
+ ---
168
+ ### About This Model
169
+
170
+ **Model**: [jonmabe/privacy-classifier-electra](https://huggingface.co/jonmabe/privacy-classifier-electra)
171
+
172
+ This is an ELECTRA-based classifier fine-tuned to detect sensitive information in prompts.
173
+ Use cases include:
174
+ - Privacy-aware prompt routing
175
+ - Data loss prevention for LLM applications
176
+ - Compliance with data protection regulations
177
+
178
+ ⚠️ **Disclaimer**: This model is for demonstration purposes. Always verify classifications
179
+ for production use cases involving sensitive data.
180
+ """)
181
+
182
+
183
+ if __name__ == "__main__":
184
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ gradio>=4.0.0
2
+ torch>=2.0.0
3
+ transformers>=4.35.0
4
+ accelerate>=0.24.0