80cols commited on
Commit
efc037d
·
verified ·
1 Parent(s): d8484b9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -1091
app.py CHANGED
@@ -1,1094 +1,3 @@
1
- # # """A Gradio app for anonymizing text data using FHE."""
2
-
3
- # # import os
4
- # # import re
5
- # # import subprocess
6
- # # import time
7
- # # import uuid
8
- # # from typing import Dict, List
9
-
10
- # # import numpy
11
- # # import pandas as pd
12
- # # import requests
13
- # # from fhe_anonymizer import FHEAnonymizer
14
- # # from utils_demo import *
15
-
16
- # # from concrete.ml.deployment import FHEModelClient
17
-
18
-
19
-
20
- # import gradio as gr
21
- # from predictor import predict, key_already_generated, pre_process_encrypt_send_purchase, decrypt_prediction
22
- # import base64
23
-
24
- # def key_generated():
25
- # """
26
- # Check if the evaluation keys have already been generated.
27
- # Returns:
28
- # bool: True if the evaluation keys have already been generated, False otherwise.
29
- # """
30
- # if not key_already_generated():
31
- # error_message = (
32
- # f"Error Encountered While generating the evaluation keys."
33
- # )
34
- # print(error_message)
35
- # return {gen_key_btn: gr.update(value=error_message)}
36
- # else:
37
- # print("Keys have been generated ✅")
38
- # return {gen_key_btn: gr.update(value="Keys have been generated ✅")}
39
-
40
-
41
- # # demo = gr.Blocks(css=".markdown-body { font-size: 18px; }")
42
-
43
-
44
-
45
-
46
-
47
-
48
-
49
-
50
-
51
-
52
-
53
-
54
-
55
-
56
-
57
-
58
-
59
- # # # Ensure the directory is clean before starting processes or reading files
60
- # # clean_directory()
61
-
62
- # # anonymizer = FHEAnonymizer()
63
-
64
- # # # Start the Uvicorn server hosting the FastAPI app
65
- # # subprocess.Popen(["uvicorn", "server:app"], cwd=CURRENT_DIR)
66
- # # time.sleep(3)
67
-
68
- # # # Load data from files required for the application
69
- # # UUID_MAP = read_json(MAPPING_UUID_PATH)
70
- # # ANONYMIZED_DOCUMENT = read_txt(ANONYMIZED_FILE_PATH)
71
- # # MAPPING_ANONYMIZED_SENTENCES = read_pickle(MAPPING_ANONYMIZED_SENTENCES_PATH)
72
- # # MAPPING_ENCRYPTED_SENTENCES = read_pickle(MAPPING_ENCRYPTED_SENTENCES_PATH)
73
- # # ORIGINAL_DOCUMENT = read_txt(ORIGINAL_FILE_PATH).split("\n\n")
74
- # # MAPPING_DOC_EMBEDDING = read_pickle(MAPPING_DOC_EMBEDDING_PATH)
75
-
76
- # # print(f"{ORIGINAL_DOCUMENT=}\n")
77
- # # print(f"{MAPPING_DOC_EMBEDDING.keys()=}")
78
-
79
- # # # 4. Data Processing and Operations (No specific operations shown here, assuming it's part of anonymizer or client usage)
80
-
81
- # # # 5. Utilizing External Services or APIs
82
- # # # (Assuming client initialization and anonymizer setup are parts of using external services or application-specific logic)
83
-
84
- # # # Generate a random user ID for this session
85
- # # USER_ID = numpy.random.randint(0, 2**32)
86
-
87
-
88
-
89
-
90
-
91
-
92
-
93
-
94
-
95
-
96
- # # def select_static_anonymized_sentences_fn(selected_sentences: List):
97
-
98
- # # selected_sentences = [MAPPING_ANONYMIZED_SENTENCES[sentence] for sentence in selected_sentences]
99
-
100
- # # anonymized_selected_sentence = sorted(selected_sentences, key=lambda x: x[0])
101
-
102
- # # anonymized_selected_sentence = [sentence for _, sentence in anonymized_selected_sentence]
103
-
104
- # # return "\n\n".join(anonymized_selected_sentence)
105
-
106
-
107
- # # def key_gen_fn() -> Dict:
108
- # # """Generate keys for a given user."""
109
-
110
- # # print("------------ Step 1: Key Generation:")
111
-
112
- # # print(f"Your user ID is: {USER_ID}....")
113
-
114
-
115
- # # client = FHEModelClient(path_dir=DEPLOYMENT_DIR, key_dir=KEYS_DIR / f"{USER_ID}")
116
- # # client.load()
117
-
118
- # # # Creates the private and evaluation keys on the client side
119
- # # client.generate_private_and_evaluation_keys()
120
-
121
- # # # Get the serialized evaluation keys
122
- # # serialized_evaluation_keys = client.get_serialized_evaluation_keys()
123
- # # assert isinstance(serialized_evaluation_keys, bytes)
124
-
125
- # # # Save the evaluation key
126
- # # evaluation_key_path = KEYS_DIR / f"{USER_ID}/evaluation_key"
127
-
128
- # # write_bytes(evaluation_key_path, serialized_evaluation_keys)
129
-
130
- # # # anonymizer.generate_key()
131
-
132
- # # if not evaluation_key_path.is_file():
133
- # # error_message = (
134
- # # f"Error Encountered While generating the evaluation {evaluation_key_path.is_file()=}"
135
- # # )
136
- # # print(error_message)
137
- # # return {gen_key_btn: gr.update(value=error_message)}
138
- # # else:
139
- # # print("Keys have been generated ✅")
140
- # # return {gen_key_btn: gr.update(value="Keys have been generated ✅")}
141
-
142
-
143
- # # def encrypt_doc_fn(doc):
144
-
145
- # # print(f"\n------------ Step 2.1: Doc encryption: {doc=}")
146
-
147
- # # if not (KEYS_DIR / f"{USER_ID}/evaluation_key").is_file():
148
- # # return {encrypted_doc_box: gr.update(value="Error ❌: Please generate the key first!", lines=10)}
149
-
150
- # # # Retrieve the client API
151
- # # client = FHEModelClient(path_dir=DEPLOYMENT_DIR, key_dir=KEYS_DIR / f"{USER_ID}")
152
- # # client.load()
153
-
154
- # # encrypted_tokens = []
155
- # # tokens = re.findall(r"(\b[\w\.\/\-@]+\b|[\s,.!?;:'\"-]+|\$\d+(?:\.\d+)?|\€\d+(?:\.\d+)?)", ' '.join(doc))
156
-
157
- # # for token in tokens:
158
- # # if token.strip() and re.match(r"\w+", token):
159
- # # emb_x = MAPPING_DOC_EMBEDDING[token]
160
- # # assert emb_x.shape == (1, 1024)
161
- # # encrypted_x = client.quantize_encrypt_serialize(emb_x)
162
- # # assert isinstance(encrypted_x, bytes)
163
- # # encrypted_tokens.append(encrypted_x)
164
-
165
- # # print("Doc encrypted ✅ on Client Side")
166
-
167
- # # # No need to save it
168
- # # # write_bytes(KEYS_DIR / f"{USER_ID}/encrypted_doc", b"".join(encrypted_tokens))
169
-
170
- # # encrypted_quant_tokens_hex = [token.hex()[500:510] for token in encrypted_tokens]
171
-
172
- # # return {
173
- # # encrypted_doc_box: gr.update(value=" ".join(encrypted_quant_tokens_hex), lines=10),
174
- # # anonymized_doc_output: gr.update(visible=True, value=None),
175
- # # }
176
-
177
-
178
- # # def encrypt_query_fn(query):
179
-
180
- # # print(f"\n------------ Step 2: Query encryption: {query=}")
181
-
182
- # # if not (KEYS_DIR / f"{USER_ID}/evaluation_key").is_file():
183
- # # return {output_encrypted_box: gr.update(value="Error ❌: Please generate the key first!", lines=8)}
184
-
185
- # # if is_user_query_valid(query):
186
- # # return {
187
- # # query_box: gr.update(
188
- # # value=(
189
- # # "Unable to process ❌: The request exceeds the length limit or falls "
190
- # # "outside the scope of this document. Please refine your query."
191
- # # )
192
- # # )
193
- # # }
194
-
195
- # # # Retrieve the client API
196
- # # client = FHEModelClient(path_dir=DEPLOYMENT_DIR, key_dir=KEYS_DIR / f"{USER_ID}")
197
- # # client.load()
198
-
199
- # # encrypted_tokens = []
200
-
201
- # # # Pattern to identify words and non-words (including punctuation, spaces, etc.)
202
- # # tokens = re.findall(r"(\b[\w\.\/\-@]+\b|[\s,.!?;:'\"-]+)", query)
203
-
204
- # # for token in tokens:
205
-
206
- # # # 1- Ignore non-words tokens
207
- # # if bool(re.match(r"^\s+$", token)):
208
- # # continue
209
-
210
- # # # 2- Directly append non-word tokens or whitespace to processed_tokens
211
-
212
- # # # Prediction for each word
213
- # # emb_x = get_batch_text_representation([token], EMBEDDINGS_MODEL, TOKENIZER)
214
- # # encrypted_x = client.quantize_encrypt_serialize(emb_x)
215
- # # assert isinstance(encrypted_x, bytes)
216
-
217
- # # encrypted_tokens.append(encrypted_x)
218
-
219
- # # print("Data encrypted ✅ on Client Side")
220
-
221
- # # assert len({len(token) for token in encrypted_tokens}) == 1
222
-
223
- # # write_bytes(KEYS_DIR / f"{USER_ID}/encrypted_input", b"".join(encrypted_tokens))
224
- # # write_bytes(
225
- # # KEYS_DIR / f"{USER_ID}/encrypted_input_len", len(encrypted_tokens[0]).to_bytes(10, "big")
226
- # # )
227
-
228
- # # encrypted_quant_tokens_hex = [token.hex()[500:580] for token in encrypted_tokens]
229
-
230
- # # return {
231
- # # output_encrypted_box: gr.update(value=" ".join(encrypted_quant_tokens_hex), lines=8),
232
- # # anonymized_query_output: gr.update(visible=True, value=None),
233
- # # identified_words_output_df: gr.update(visible=False, value=None),
234
- # # }
235
-
236
-
237
- # # def send_input_fn(query) -> Dict:
238
- # # """Send the encrypted data and the evaluation key to the server."""
239
-
240
- # # print("------------ Step 3.1: Send encrypted_data to the Server")
241
-
242
- # # evaluation_key_path = KEYS_DIR / f"{USER_ID}/evaluation_key"
243
- # # encrypted_input_path = KEYS_DIR / f"{USER_ID}/encrypted_input"
244
- # # encrypted_input_len_path = KEYS_DIR / f"{USER_ID}/encrypted_input_len"
245
-
246
- # # if not evaluation_key_path.is_file():
247
- # # error_message = (
248
- # # "Error Encountered While Sending Data to the Server: "
249
- # # f"The key has been generated correctly - {evaluation_key_path.is_file()=}"
250
- # # )
251
- # # return {anonymized_query_output: gr.update(value=error_message)}
252
-
253
- # # if not encrypted_input_path.is_file():
254
- # # error_message = (
255
- # # "Error Encountered While Sending Data to the Server: The data has not been encrypted "
256
- # # f"correctly on the client side - {encrypted_input_path.is_file()=}"
257
- # # )
258
- # # return {anonymized_query_output: gr.update(value=error_message)}
259
-
260
- # # # Define the data and files to post
261
- # # data = {"user_id": USER_ID, "input": query}
262
-
263
- # # files = [
264
- # # ("files", open(evaluation_key_path, "rb")),
265
- # # ("files", open(encrypted_input_path, "rb")),
266
- # # ("files", open(encrypted_input_len_path, "rb")),
267
- # # ]
268
-
269
- # # # Send the encrypted input and evaluation key to the server
270
- # # url = SERVER_URL + "send_input"
271
-
272
- # # with requests.post(
273
- # # url=url,
274
- # # data=data,
275
- # # files=files,
276
- # # ) as resp:
277
- # # print("Data sent to the server ✅" if resp.ok else "Error ❌ in sending data to the server")
278
-
279
-
280
- # # def run_fhe_in_server_fn() -> Dict:
281
- # # """Run in FHE the anonymization of the query"""
282
-
283
- # # print("------------ Step 3.2: Run in FHE on the Server Side")
284
-
285
- # # evaluation_key_path = KEYS_DIR / f"{USER_ID}/evaluation_key"
286
- # # encrypted_input_path = KEYS_DIR / f"{USER_ID}/encrypted_input"
287
-
288
- # # if not evaluation_key_path.is_file():
289
- # # error_message = (
290
- # # "Error Encountered While Sending Data to the Server: "
291
- # # f"The key has been generated correctly - {evaluation_key_path.is_file()=}"
292
- # # )
293
- # # return {anonymized_query_output: gr.update(value=error_message)}
294
-
295
- # # if not encrypted_input_path.is_file():
296
- # # error_message = (
297
- # # "Error Encountered While Sending Data to the Server: The data has not been encrypted "
298
- # # f"correctly on the client side - {encrypted_input_path.is_file()=}"
299
- # # )
300
- # # return {anonymized_query_output: gr.update(value=error_message)}
301
-
302
- # # data = {
303
- # # "user_id": USER_ID,
304
- # # }
305
-
306
- # # url = SERVER_URL + "run_fhe"
307
-
308
- # # with requests.post(
309
- # # url=url,
310
- # # data=data,
311
- # # ) as response:
312
- # # if not response.ok:
313
- # # return {
314
- # # anonymized_query_output: gr.update(
315
- # # value=(
316
- # # "⚠️ An error occurred on the Server Side. "
317
- # # "Please check connectivity and data transmission."
318
- # # ),
319
- # # ),
320
- # # }
321
- # # else:
322
- # # time.sleep(1)
323
- # # print(f"The query anonymization was computed in {response.json():.2f} s per token.")
324
-
325
-
326
- # # def get_output_fn() -> Dict:
327
-
328
- # # print("------------ Step 3.3: Get the output from the Server Side")
329
-
330
- # # if not (KEYS_DIR / f"{USER_ID}/evaluation_key").is_file():
331
- # # error_message = (
332
- # # "Error Encountered While Sending Data to the Server: "
333
- # # "The key has not been generated correctly"
334
- # # )
335
- # # return {anonymized_query_output: gr.update(value=error_message)}
336
-
337
- # # if not (KEYS_DIR / f"{USER_ID}/encrypted_input").is_file():
338
- # # error_message = (
339
- # # "Error Encountered While Sending Data to the Server: "
340
- # # "The data has not been encrypted correctly on the client side"
341
- # # )
342
- # # return {anonymized_query_output: gr.update(value=error_message)}
343
-
344
- # # data = {
345
- # # "user_id": USER_ID,
346
- # # }
347
-
348
- # # # Retrieve the encrypted output
349
- # # url = SERVER_URL + "get_output"
350
- # # with requests.post(
351
- # # url=url,
352
- # # data=data,
353
- # # ) as response:
354
- # # if response.ok:
355
- # # print("Data received ✅ from the remote Server")
356
- # # response_data = response.json()
357
- # # encrypted_output_base64 = response_data["encrypted_output"]
358
- # # length_encrypted_output_base64 = response_data["length"]
359
-
360
- # # # Decode the base64 encoded data
361
- # # encrypted_output = base64.b64decode(encrypted_output_base64)
362
- # # length_encrypted_output = base64.b64decode(length_encrypted_output_base64)
363
-
364
- # # # Save the encrypted output to bytes in a file as it is too large to pass through
365
- # # # regular Gradio buttons (see https://github.com/gradio-app/gradio/issues/1877)
366
-
367
- # # write_bytes(CLIENT_DIR / f"{USER_ID}_encrypted_output", encrypted_output)
368
- # # write_bytes(CLIENT_DIR / f"{USER_ID}_encrypted_output_len", length_encrypted_output)
369
-
370
- # # else:
371
- # # print("Error ❌ in getting data to the server")
372
-
373
-
374
- # # def decrypt_fn(text) -> Dict:
375
- # # """Dencrypt the data on the `Client Side`."""
376
-
377
- # # print("------------ Step 4: Dencrypt the data on the `Client Side`")
378
-
379
- # # # Get the encrypted output path
380
- # # encrypted_output_path = CLIENT_DIR / f"{USER_ID}_encrypted_output"
381
-
382
- # # if not encrypted_output_path.is_file():
383
- # # error_message = """⚠️ Please ensure that: \n
384
- # # - the connectivity \n
385
- # # - the query has been submitted \n
386
- # # - the evaluation key has been generated \n
387
- # # - the server processed the encrypted data \n
388
- # # - the Client received the data from the Server before decrypting the prediction
389
- # # """
390
- # # print(error_message)
391
-
392
- # # return error_message, None
393
-
394
- # # # Retrieve the client API
395
- # # client = FHEModelClient(path_dir=DEPLOYMENT_DIR, key_dir=KEYS_DIR / f"{USER_ID}")
396
- # # client.load()
397
-
398
- # # # Load the encrypted output as bytes
399
- # # encrypted_output = read_bytes(CLIENT_DIR / f"{USER_ID}_encrypted_output")
400
- # # length = int.from_bytes(read_bytes(CLIENT_DIR / f"{USER_ID}_encrypted_output_len"), "big")
401
-
402
- # # tokens = re.findall(r"(\b[\w\.\/\-@]+\b|[\s,.!?;:'\"-]+)", text)
403
-
404
- # # decrypted_output, identified_words_with_prob = [], []
405
-
406
- # # i = 0
407
- # # for token in tokens:
408
-
409
- # # # Directly append non-word tokens or whitespace to processed_tokens
410
- # # if bool(re.match(r"^\s+$", token)):
411
- # # continue
412
- # # else:
413
- # # encrypted_token = encrypted_output[i : i + length]
414
- # # prediction_proba = client.deserialize_decrypt_dequantize(encrypted_token)
415
- # # probability = prediction_proba[0][1]
416
- # # i += length
417
-
418
- # # if probability >= 0.77:
419
- # # identified_words_with_prob.append((token, probability))
420
-
421
- # # # Use the existing UUID if available, otherwise generate a new one
422
- # # tmp_uuid = UUID_MAP.get(token, str(uuid.uuid4())[:8])
423
- # # decrypted_output.append(tmp_uuid)
424
- # # UUID_MAP[token] = tmp_uuid
425
- # # else:
426
- # # decrypted_output.append(token)
427
-
428
- # # # Update the UUID map with query.
429
- # # write_json(MAPPING_UUID_PATH, UUID_MAP)
430
-
431
- # # # Removing Spaces Before Punctuation:
432
- # # anonymized_text = re.sub(r"\s([,.!?;:])", r"\1", " ".join(decrypted_output))
433
-
434
- # # # Convert the list of identified words and probabilities into a DataFrame
435
- # # if identified_words_with_prob:
436
- # # identified_df = pd.DataFrame(
437
- # # identified_words_with_prob, columns=["Identified Words", "Probability"]
438
- # # )
439
- # # else:
440
- # # identified_df = pd.DataFrame(columns=["Identified Words", "Probability"])
441
-
442
- # # print("Decryption done ✅ on Client Side")
443
-
444
- # # return anonymized_text, identified_df
445
-
446
-
447
- # # def anonymization_with_fn(selected_sentences, query):
448
-
449
- # # encrypt_query_fn(query)
450
-
451
- # # send_input_fn(query)
452
-
453
- # # run_fhe_in_server_fn()
454
-
455
- # # get_output_fn()
456
-
457
- # # anonymized_text, identified_df = decrypt_fn(query)
458
-
459
- # # return {
460
- # # anonymized_doc_output: gr.update(value=select_static_anonymized_sentences_fn(selected_sentences)),
461
- # # anonymized_query_output: gr.update(value=anonymized_text),
462
- # # identified_words_output_df: gr.update(value=identified_df, visible=False),
463
- # # }
464
-
465
-
466
- # # def query_chatgpt_fn(anonymized_query, anonymized_document):
467
-
468
- # # print("------------ Step 5: ChatGPT communication")
469
-
470
- # # if not (KEYS_DIR / f"{USER_ID}/evaluation_key").is_file():
471
- # # error_message = "Error ❌: Please generate the key first!"
472
- # # return {chatgpt_response_anonymized: gr.update(value=error_message)}
473
-
474
- # # if not (CLIENT_DIR / f"{USER_ID}_encrypted_output").is_file():
475
- # # error_message = "Error ❌: Please encrypt your query first!"
476
- # # return {chatgpt_response_anonymized: gr.update(value=error_message)}
477
-
478
- # # context_prompt = read_txt(PROMPT_PATH)
479
-
480
- # # # Prepare prompt
481
- # # query = (
482
- # # "Document content:\n```\n"
483
- # # + anonymized_document
484
- # # + "\n\n```"
485
- # # + "Query:\n```\n"
486
- # # + anonymized_query
487
- # # + "\n```"
488
- # # )
489
- # # print(f'Prompt of CHATGPT:\n{query}')
490
-
491
- # # completion = client.chat.completions.create(
492
- # # model="gpt-4-1106-preview", # Replace with "gpt-4" if available
493
- # # messages=[
494
- # # {"role": "system", "content": context_prompt},
495
- # # {"role": "user", "content": query},
496
- # # ],
497
- # # )
498
- # # anonymized_response = completion.choices[0].message.content
499
- # # uuid_map = read_json(MAPPING_UUID_PATH)
500
-
501
- # # inverse_uuid_map = {
502
- # # v: k for k, v in uuid_map.items()
503
- # # } # TODO load the inverse mapping from disk for efficiency
504
-
505
- # # # Pattern to identify words and non-words (including punctuation, spaces, etc.)
506
- # # tokens = re.findall(r"(\b[\w\.\/\-@]+\b|[\s,.!?;:'\"-]+)", anonymized_response)
507
- # # processed_tokens = []
508
-
509
- # # for token in tokens:
510
- # # # Directly append non-word tokens or whitespace to processed_tokens
511
- # # if not token.strip() or not re.match(r"\w+", token):
512
- # # processed_tokens.append(token)
513
- # # continue
514
-
515
- # # if token in inverse_uuid_map:
516
- # # processed_tokens.append(inverse_uuid_map[token])
517
- # # else:
518
- # # processed_tokens.append(token)
519
- # # deanonymized_response = "".join(processed_tokens)
520
-
521
- # # return {chatgpt_response_anonymized: gr.update(value=anonymized_response),
522
- # # chatgpt_response_deanonymized: gr.update(value=deanonymized_response)}
523
-
524
-
525
-
526
-
527
-
528
-
529
-
530
-
531
-
532
-
533
-
534
-
535
-
536
-
537
-
538
-
539
-
540
-
541
-
542
-
543
-
544
-
545
-
546
-
547
-
548
-
549
-
550
-
551
-
552
-
553
-
554
-
555
-
556
- # demo = gr.Blocks(css=".markdown-body { font-size: 18px; }")
557
-
558
- # with demo:
559
-
560
- # # gr.Markdown(
561
- # # """
562
- # # <p align="center">
563
- # # <img width=200 src="https://user-images.githubusercontent.com/5758427/197816413-d9cddad3-ba38-4793-847d-120975e1da11.png">
564
- # # </p>
565
- # # """)
566
-
567
- # gr.Markdown(
568
- # f"""
569
- # <div style="display: flex; justify-content: center; align-items: center;">
570
- # <img style="margin-right: 50px;" width=200 src="https://huggingface.co/spaces/Tenefix/private-fhe-fraud-detection/resolve/main/Img/zama.png">
571
- # <img width=200 src="https://huggingface.co/spaces/Tenefix/private-fhe-fraud-detection/resolve/main/Img/Epita.png">
572
- # </div>
573
- # """
574
- # )
575
- # gr.Markdown(
576
- # """
577
- # <h1 style="text-align: center;">Fraud Detection with FHE Model</h1>
578
- # <p align="center">
579
- # <a href="https://github.com/CirSandro/private-fhe-fraud-detection">
580
- # <span style="vertical-align: middle; display:inline-block; margin-right: 3px;">💳</span>private-fhe-fraud-detection
581
- # </a>
582
- # —
583
- # <a href="https://docs.zama.ai/concrete-ml">
584
- # <span style="vertical-align: middle; display:inline-block; margin-right: 3px;">🔒</span>Documentation Concrete-ML
585
- # </a>
586
- # </p>
587
- # """
588
- # )
589
-
590
- # gr.Markdown(
591
- # """
592
- # <p align="center" style="font-size: 16px;">
593
- # How to detect bank fraud without using your personal data ?</p>
594
- # """
595
- # )
596
-
597
- # # gr.Markdown(
598
- # # """
599
- # # <h1 style="text-align: center;">Encrypted Anonymization Using Fully Homomorphic Encryption</h1>
600
- # # <p align="center">
601
- # # <a href="https://github.com/zama-ai/concrete-ml"> <img style="vertical-align: middle; display:inline-block; margin-right: 3px;" width=15 src="file/images/logos/github.png">Concrete-ML</a>
602
- # # —
603
- # # <a href="https://docs.zama.ai/concrete-ml"> <img style="vertical-align: middle; display:inline-block; margin-right: 3px;" width=15 src="file/images/logos/documentation.png">Documentation</a>
604
- # # —
605
- # # <a href=" https://community.zama.ai/c/concrete-ml/8"> <img style="vertical-align: middle; display:inline-block; margin-right: 3px;" width=15 src="file/images/logos/community.png">Community</a>
606
- # # —
607
- # # <a href="https://twitter.com/zama_fhe"> <img style="vertical-align: middle; display:inline-block; margin-right: 3px;" width=15 src="file/images/logos/x.png">@zama_fhe</a>
608
- # # </p>
609
- # # """
610
- # # )
611
-
612
- # # gr.Markdown(
613
- # # """
614
- # # <p align="center" style="font-size: 16px;">
615
- # # Anonymization is the process of removing personally identifiable information (PII) data from
616
- # # a document in order to protect individual privacy.</p>
617
-
618
- # # <p align="center" style="font-size: 16px;">
619
- # # Encrypted anonymization uses Fully Homomorphic Encryption (FHE) to anonymize personally
620
- # # identifiable information (PII) within encrypted documents, enabling computations to be
621
- # # performed on the encrypted data.</p>
622
-
623
- # # <p align="center" style="font-size: 16px;">
624
- # # In the example above, we're showing how encrypted anonymization can be leveraged to use LLM
625
- # # services such as ChatGPT in a privacy-preserving manner.</p>
626
- # # """
627
- # # )
628
-
629
- # # gr.Markdown(
630
- # # """
631
- # # <p align="center">
632
- # # <img width="75%" height="30%" src="https://raw.githubusercontent.com/kcelia/Img/main/fhe_anonymization_banner.png">
633
- # # </p>
634
- # # """
635
- # # )
636
-
637
- # with gr.Accordion("What is bank fraud detection?", open=False):
638
- # gr.Markdown(
639
- # """
640
- # Bank fraud detection is the process of identifying fraudulent activities or transactions
641
- # that may pose a risk to a bank or its customers. It is essential to detect fraudulent
642
- # activities to prevent financial losses and protect the integrity of the banking system.
643
- # """
644
- # )
645
-
646
- # with gr.Accordion("Why is it important to protect this data?", open=False):
647
- # gr.Markdown(
648
- # """
649
- # Banking and financial data often contain sensitive personal information, such as income,
650
- # spending habits, and account numbers. Protecting this information ensures that customers'
651
- # privacy is respected and safeguarded from unauthorized access.
652
- # """
653
- # )
654
-
655
- # with gr.Accordion("Why is Fully Homomorphic Encryption (FHE) a good solution?", open=False):
656
- # gr.Markdown(
657
- # """
658
- # Fully Homomorphic Encryption (FHE) is a powerful technique for enhancing privacy and accuracy
659
- # in the context of fraud detection, particularly when dealing with sensitive banking data. FHE
660
- # allows for the encryption of data, which can then be processed and analyzed without ever needing
661
- # to decrypt it.
662
- # Each party involved in the detection process can collaborate without compromising user privacy,
663
- # minimizing the risk of data leaks or breaches. The data remains confidential throughout the entire
664
- # process, ensuring that the privacy of users is maintained.
665
- # """
666
- # )
667
-
668
- # gr.Markdown(
669
- # """
670
- # <p style="text-align: center;">
671
- # Below, we will explain the flow in the image by simulating a purchase you've just made, and show you how our fraud detection model processes the transaction.
672
- # </p>
673
- # """
674
- # )
675
-
676
-
677
- # gr.Markdown(
678
- # f"""
679
- # <p align="center">
680
- # <img width="75%" height="30%" src="https://huggingface.co/spaces/Tenefix/private-fhe-fraud-detection/resolve/main/Img/schema.png">
681
- # </p>
682
- # """
683
- # )
684
-
685
- # gr.Markdown("<hr />")
686
-
687
- # ########################## Key Gen Part ##########################
688
-
689
- # gr.Markdown(
690
- # "## Step 1: Generate the keys\n\n"
691
- # """In Fully Homomorphic Encryption (FHE) methods, two types of keys are created. The first
692
- # type, called secret keys, are used to encrypt and decrypt the user's data. The second type,
693
- # called evaluation keys, enables a server to work on the encrypted data without seeing the
694
- # actual data.
695
- # """
696
- # )
697
-
698
- # gen_key_btn = gr.Button("Generate the secret and evaluation keys")
699
-
700
- # gen_key_btn.click(
701
- # key_generated,
702
- # inputs=[],
703
- # outputs=[gen_key_btn],
704
- # )#547
705
-
706
- # gr.Markdown("<hr />")
707
-
708
- # ########################## Encrypt Data ##########################
709
-
710
- # gr.Markdown(
711
- # "## Step 2: Make your purchase\n\n"
712
- # """
713
- # 🛍️ It's time to shop! To simulate your latest purchase, please provide the details of your most recent transaction.
714
-
715
- # If you don't have an idea, you can pre-fill with an example of fraud or non-fraud.
716
- # """
717
- # )
718
-
719
- # def prefill_fraud():
720
- # return 34, 50, 3, False, False, False, True
721
-
722
- # def prefill_no_fraud():
723
- # return 12, 2, 0.7, True, False, True, False
724
-
725
- # with gr.Row():
726
- # prefill_button = gr.Button("Exemple Fraud")
727
- # prefill_button_no = gr.Button("Exemple No-Fraud")
728
-
729
- # with gr.Row():
730
- # with gr.Column():
731
- # distance_home = gr.Number(
732
- # minimum=float(0),
733
- # maximum=float(22000),
734
- # step=1,
735
- # value=10,
736
- # label="Distance from Home",
737
- # info="How far was the purchase from your home (in km)?"
738
- # )
739
- # distance_last = gr.Number(
740
- # minimum=float(0),
741
- # maximum=float(22000),
742
- # step=1,
743
- # value=1,
744
- # label="Distance from Last Transaction",
745
- # info="Distance between this purchase and the last one (in km)?"
746
- # )
747
- # ratio = gr.Number(
748
- # minimum=float(0),
749
- # maximum=float(10000),
750
- # step=0.1,
751
- # value=1,
752
- # label="Ratio to Median Purchase Price",
753
- # info="Purchase ratio compared to your average purchase",
754
- # )
755
- # repeat_retailer = gr.Checkbox(
756
- # label="Repeat Retailer",
757
- # info="Check if you are purchasing from the same retailer as your last transaction"
758
- # )
759
- # used_chip = gr.Checkbox(
760
- # label="Used Chip",
761
- # info="Check if you used a chip card for this transaction"
762
- # )
763
- # used_pin_number = gr.Checkbox(
764
- # label="Used Pin Number",
765
- # info="Check if you used your PIN number during the transaction"
766
- # )
767
- # online = gr.Checkbox(
768
- # label="Online Order",
769
- # info="Check if you made your purchase online"
770
- # )
771
-
772
-
773
- # prefill_button.click(
774
- # fn=prefill_fraud,
775
- # inputs=[],
776
- # outputs=[
777
- # distance_home,
778
- # distance_last,
779
- # ratio,
780
- # repeat_retailer,
781
- # used_chip,
782
- # used_pin_number,
783
- # online
784
- # ]
785
- # )
786
-
787
- # prefill_button_no.click(
788
- # fn=prefill_no_fraud,
789
- # inputs=[],
790
- # outputs=[
791
- # distance_home,
792
- # distance_last,
793
- # ratio,
794
- # repeat_retailer,
795
- # used_chip,
796
- # used_pin_number,
797
- # online
798
- # ]
799
- # )
800
-
801
- # with gr.Row():
802
- # with gr.Column(scale=2):
803
- # encrypt_button_applicant = gr.Button("Encrypt the inputs and send to server.")
804
-
805
- # encrypted_input_applicant = gr.Textbox(
806
- # label="Encrypted input representation:", max_lines=2, interactive=False
807
- # )
808
-
809
- # encrypt_button_applicant.click(
810
- # pre_process_encrypt_send_purchase,
811
- # inputs=[distance_home, distance_last, ratio, repeat_retailer, used_chip, used_pin_number, \
812
- # online],
813
- # outputs=[encrypted_input_applicant, encrypt_button_applicant],
814
- # )
815
-
816
- # gr.Markdown("<hr />")
817
-
818
- # ########################## Model Prediction ##########################
819
-
820
- # gr.Markdown("## Step 3: Run the FHE evaluation.")
821
- # gr.Markdown("<span style='color:grey'>Server Side</span>")
822
- # gr.Markdown(
823
- # """
824
- # It's high time to launch our prediction, by pressing the button you will launch the
825
- # fraud analysis that our fictitious bank offers you.
826
- # This server employs a [Random Forest (by Concrete-ML)](https://github.com/zama-ai/concrete-ml/blob/release/1.8.x/docs/references/api/concrete.ml.sklearn.rf.md#class-randomforestclassifier)
827
- # classifier model that has been trained on a synthetic data-set.
828
- # """
829
- # )
830
-
831
- # execute_fhe_button = gr.Button("Run the FHE evaluation.")
832
- # fhe_execution_time = gr.Textbox(
833
- # label="Total FHE execution time (in seconds):", max_lines=1, interactive=False
834
- # )
835
-
836
- # # Button to send the encodings to the server using post method
837
- # execute_fhe_button.click(predict, outputs=[fhe_execution_time, execute_fhe_button])
838
-
839
- # gr.Markdown("<hr />")
840
-
841
- # ######################### Decrypt Prediction ##########################
842
-
843
- # gr.Markdown("## Step 4: Receive the encrypted output from the server and decrypt.")
844
- # gr.Markdown(
845
- # """
846
- # 🔔 You will receive a notification! Is this a Fraud? The message is decrypted by pressing the button.
847
- # """
848
- # )
849
-
850
- # get_output_button = gr.Button("Decrypt the prediction.")
851
- # prediction_output = gr.Textbox(
852
- # label="Prediction", max_lines=1, interactive=False
853
- # )
854
- # prediction_bar = gr.HTML(label="Prediction Bar") # For the percentage bar
855
-
856
- # get_output_button.click(
857
- # decrypt_prediction,
858
- # outputs=[prediction_output, get_output_button, prediction_bar],
859
- # )
860
-
861
-
862
- # gr.Markdown(
863
- # """
864
- # You now know that it is possible to detect bank fraud without knowing your personal information.
865
- # """
866
- # )
867
-
868
- # gr.Markdown(
869
- # "The app was built with [Concrete-ML](https://github.com/zama-ai/concrete-ml), a "
870
- # "Privacy-Preserving Machine Learning (PPML) open-source set of tools by [Zama](https://zama.ai/). "
871
- # "Try it yourself and don't forget to star on Github &#11088;."
872
- # )
873
-
874
-
875
-
876
-
877
-
878
-
879
-
880
-
881
-
882
-
883
-
884
-
885
-
886
-
887
-
888
-
889
-
890
-
891
-
892
-
893
-
894
-
895
-
896
-
897
-
898
-
899
-
900
-
901
- # # ########################## Key Gen Part ##########################
902
-
903
- # # gr.Markdown(
904
- # # "## Step 1: Generate the keys\n\n"
905
- # # """In Fully Homomorphic Encryption (FHE) methods, two types of keys are created. The first
906
- # # type, called secret keys, are used to encrypt and decrypt the user's data. The second type,
907
- # # called evaluation keys, enables a server to work on the encrypted data without seeing the
908
- # # actual data.
909
- # # """
910
- # # )
911
-
912
- # # gen_key_btn = gr.Button("Generate the secret and evaluation keys")
913
-
914
- # # gen_key_btn.click(
915
- # # key_gen_fn,
916
- # # inputs=[],
917
- # # outputs=[gen_key_btn],
918
- # # )
919
-
920
- # # ########################## Main document Part ##########################
921
-
922
- # # gr.Markdown("<hr />")
923
- # # gr.Markdown("## Step 2.1: Select the document you want to encrypt\n\n"
924
- # # """To make it simple, we pre-compiled the following document, but you are free to choose
925
- # # on which part you want to run this example.
926
- # # """
927
- # # )
928
-
929
- # # with gr.Row():
930
- # # with gr.Column(scale=5):
931
- # # original_sentences_box = gr.CheckboxGroup(
932
- # # ORIGINAL_DOCUMENT,
933
- # # value=ORIGINAL_DOCUMENT,
934
- # # label="Contract:",
935
- # # show_label=True,
936
- # # )
937
-
938
- # # with gr.Column(scale=1, min_width=6):
939
- # # gr.HTML("<div style='height: 77px;'></div>")
940
- # # encrypt_doc_btn = gr.Button("Encrypt the document")
941
-
942
- # # with gr.Column(scale=5):
943
- # # encrypted_doc_box = gr.Textbox(
944
- # # label="Encrypted document:", show_label=True, interactive=False, lines=10
945
- # # )
946
-
947
-
948
- # # ########################## User Query Part ##########################
949
-
950
- # # gr.Markdown("<hr />")
951
- # # gr.Markdown("## Step 2.2: Select the prompt you want to encrypt\n\n"
952
- # # """Please choose from the predefined options in
953
- # # <span style='color:grey'>“Prompt examples”</span> or craft a custom question in
954
- # # the <span style='color:grey'>“Customized prompt”</span> text box.
955
- # # Remain concise and relevant to the context. Any off-topic query will not be processed.""")
956
-
957
- # # with gr.Row():
958
- # # with gr.Column(scale=5):
959
-
960
- # # with gr.Column(scale=5):
961
- # # default_query_box = gr.Dropdown(
962
- # # list(DEFAULT_QUERIES.values()), label="PROMPT EXAMPLES:"
963
- # # )
964
-
965
- # # gr.Markdown("Or")
966
-
967
- # # query_box = gr.Textbox(
968
- # # value="What is Kate international bank account number?", label="CUSTOMIZED PROMPT:", interactive=True
969
- # # )
970
-
971
- # # default_query_box.change(
972
- # # fn=lambda default_query_box: default_query_box,
973
- # # inputs=[default_query_box],
974
- # # outputs=[query_box],
975
- # # )
976
-
977
- # # with gr.Column(scale=1, min_width=6):
978
- # # gr.HTML("<div style='height: 77px;'></div>")
979
- # # encrypt_query_btn = gr.Button("Encrypt the prompt")
980
- # # # gr.HTML("<div style='height: 50px;'></div>")
981
-
982
- # # with gr.Column(scale=5):
983
- # # output_encrypted_box = gr.Textbox(
984
- # # label="Encrypted anonymized query that will be sent to the anonymization server:",
985
- # # lines=8,
986
- # # )
987
-
988
- # # ########################## FHE processing Part ##########################
989
-
990
- # # gr.Markdown("<hr />")
991
- # # gr.Markdown("## Step 3: Anonymize the document and the prompt using FHE")
992
- # # gr.Markdown(
993
- # # """Once the client encrypts the document and the prompt locally, it will be sent to a remote
994
- # # server to perform the anonymization on encrypted data. When the computation is done, the
995
- # # server will return the result to the client for decryption.
996
- # # """
997
- # # )
998
-
999
- # # run_fhe_btn = gr.Button("Anonymize using FHE")
1000
-
1001
- # # with gr.Row():
1002
- # # with gr.Column(scale=5):
1003
-
1004
- # # anonymized_doc_output = gr.Textbox(
1005
- # # label="Decrypted and anonymized document", lines=10, interactive=True
1006
- # # )
1007
-
1008
- # # with gr.Column(scale=5):
1009
-
1010
- # # anonymized_query_output = gr.Textbox(
1011
- # # label="Decrypted and anonymized prompt", lines=10, interactive=True
1012
- # # )
1013
-
1014
-
1015
- # # identified_words_output_df = gr.Dataframe(label="Identified words:", visible=False)
1016
-
1017
- # # encrypt_doc_btn.click(
1018
- # # fn=encrypt_doc_fn,
1019
- # # inputs=[original_sentences_box],
1020
- # # outputs=[encrypted_doc_box, anonymized_doc_output],
1021
- # # )
1022
-
1023
- # # encrypt_query_btn.click(
1024
- # # fn=encrypt_query_fn,
1025
- # # inputs=[query_box],
1026
- # # outputs=[
1027
- # # query_box,
1028
- # # output_encrypted_box,
1029
- # # anonymized_query_output,
1030
- # # identified_words_output_df,
1031
- # # ],
1032
- # # )
1033
-
1034
- # # run_fhe_btn.click(
1035
- # # anonymization_with_fn,
1036
- # # inputs=[original_sentences_box, query_box],
1037
- # # outputs=[anonymized_doc_output, anonymized_query_output, identified_words_output_df],
1038
- # # )
1039
-
1040
- # # ########################## ChatGpt Part ##########################
1041
-
1042
- # # gr.Markdown("<hr />")
1043
- # # gr.Markdown("## Step 4: Send anonymized prompt to ChatGPT")
1044
- # # gr.Markdown(
1045
- # # """After securely anonymizing the query with FHE,
1046
- # # you can forward it to ChatGPT without having any concern about information leakage."""
1047
- # # )
1048
-
1049
- # # chatgpt_button = gr.Button("Query ChatGPT")
1050
-
1051
- # # with gr.Row():
1052
- # # chatgpt_response_anonymized = gr.Textbox(label="ChatGPT's anonymized response:", lines=5)
1053
- # # chatgpt_response_deanonymized = gr.Textbox(
1054
- # # label="ChatGPT's non-anonymized response:", lines=5
1055
- # # )
1056
-
1057
- # # chatgpt_button.click(
1058
- # # query_chatgpt_fn,
1059
- # # inputs=[anonymized_query_output, anonymized_doc_output],
1060
- # # outputs=[chatgpt_response_anonymized, chatgpt_response_deanonymized],
1061
- # # )
1062
-
1063
- # # gr.Markdown(
1064
- # # """**Please note**: As this space is intended solely for demonstration purposes, some
1065
- # # private information may be missed during by the anonymization algorithm. Please validate the
1066
- # # following query before sending it to ChatGPT."""
1067
- # # )
1068
- # # Launch the app
1069
- # # demo.launch(share=False)
1070
-
1071
-
1072
- # if __name__ == "__main__":
1073
- # demo.launch()
1074
-
1075
-
1076
-
1077
-
1078
-
1079
-
1080
-
1081
-
1082
-
1083
-
1084
-
1085
-
1086
-
1087
-
1088
-
1089
-
1090
-
1091
-
1092
  import gradio as gr
1093
  from predictor import predict, key_already_generated, pre_process_encrypt_send_purchase, decrypt_prediction
1094
  import base64
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
  from predictor import predict, key_already_generated, pre_process_encrypt_send_purchase, decrypt_prediction
3
  import base64