80cols commited on
Commit
7ad09dd
·
verified ·
1 Parent(s): 1d2f8e9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +826 -791
app.py CHANGED
@@ -1,712 +1,747 @@
1
- # """A Gradio app for anonymizing text data using FHE."""
2
 
3
- # import os
4
- # import re
5
- # import subprocess
6
- # import time
7
- # import uuid
8
- # from typing import Dict, List
9
 
10
- # import numpy
11
- # import pandas as pd
12
- # import requests
13
- # from fhe_anonymizer import FHEAnonymizer
14
- # from utils_demo import *
15
 
16
- # from concrete.ml.deployment import FHEModelClient
17
 
18
 
19
 
20
- # import gradio as gr
21
- # import base64
 
22
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
- # # Ensure the directory is clean before starting processes or reading files
25
- # clean_directory()
26
 
27
- # anonymizer = FHEAnonymizer()
28
 
29
- # # Start the Uvicorn server hosting the FastAPI app
30
- # subprocess.Popen(["uvicorn", "server:app"], cwd=CURRENT_DIR)
31
- # time.sleep(3)
32
 
33
- # # Load data from files required for the application
34
- # UUID_MAP = read_json(MAPPING_UUID_PATH)
35
- # ANONYMIZED_DOCUMENT = read_txt(ANONYMIZED_FILE_PATH)
36
- # MAPPING_ANONYMIZED_SENTENCES = read_pickle(MAPPING_ANONYMIZED_SENTENCES_PATH)
37
- # MAPPING_ENCRYPTED_SENTENCES = read_pickle(MAPPING_ENCRYPTED_SENTENCES_PATH)
38
- # ORIGINAL_DOCUMENT = read_txt(ORIGINAL_FILE_PATH).split("\n\n")
39
- # MAPPING_DOC_EMBEDDING = read_pickle(MAPPING_DOC_EMBEDDING_PATH)
40
 
41
- # print(f"{ORIGINAL_DOCUMENT=}\n")
42
- # print(f"{MAPPING_DOC_EMBEDDING.keys()=}")
43
 
44
- # # 4. Data Processing and Operations (No specific operations shown here, assuming it's part of anonymizer or client usage)
45
 
46
- # # 5. Utilizing External Services or APIs
47
- # # (Assuming client initialization and anonymizer setup are parts of using external services or application-specific logic)
48
 
49
- # # Generate a random user ID for this session
50
- # USER_ID = numpy.random.randint(0, 2**32)
51
 
52
 
53
- # def select_static_anonymized_sentences_fn(selected_sentences: List):
54
 
55
- # selected_sentences = [MAPPING_ANONYMIZED_SENTENCES[sentence] for sentence in selected_sentences]
56
 
57
- # anonymized_selected_sentence = sorted(selected_sentences, key=lambda x: x[0])
58
 
59
- # anonymized_selected_sentence = [sentence for _, sentence in anonymized_selected_sentence]
60
 
61
- # return "\n\n".join(anonymized_selected_sentence)
62
 
63
 
64
- # def key_gen_fn() -> Dict:
65
- # """Generate keys for a given user."""
66
 
67
- # print("------------ Step 1: Key Generation:")
68
 
69
- # print(f"Your user ID is: {USER_ID}....")
 
70
 
 
71
 
72
- # client = FHEModelClient(path_dir=DEPLOYMENT_DIR, key_dir=KEYS_DIR / f"{USER_ID}")
73
- # client.load()
 
74
 
75
- # # Creates the private and evaluation keys on the client side
76
- # client.generate_private_and_evaluation_keys()
 
 
 
 
 
77
 
78
- # # Get the serialized evaluation keys
79
- # serialized_evaluation_keys = client.get_serialized_evaluation_keys()
80
- # assert isinstance(serialized_evaluation_keys, bytes)
81
 
82
- # # Save the evaluation key
83
- # evaluation_key_path = KEYS_DIR / f"{USER_ID}/evaluation_key"
84
 
85
- # write_bytes(evaluation_key_path, serialized_evaluation_keys)
 
86
 
87
- # # anonymizer.generate_key()
 
 
 
 
 
 
 
 
 
 
 
 
88
 
89
- # if not evaluation_key_path.is_file():
90
- # error_message = (
91
- # f"Error Encountered While generating the evaluation {evaluation_key_path.is_file()=}"
92
- # )
93
- # print(error_message)
94
- # return {gen_key_btn: gr.update(value=error_message)}
95
- # else:
96
- # print("Keys have been generated ✅")
97
- # return {gen_key_btn: gr.update(value="Keys have been generated ✅")}
98
 
 
 
99
 
100
- # def encrypt_doc_fn(doc):
101
 
102
- # print(f"\n------------ Step 2.1: Doc encryption: {doc=}")
103
 
104
- # if not (KEYS_DIR / f"{USER_ID}/evaluation_key").is_file():
105
- # return {encrypted_doc_box: gr.update(value="Error ❌: Please generate the key first!", lines=10)}
106
 
107
- # # Retrieve the client API
108
- # client = FHEModelClient(path_dir=DEPLOYMENT_DIR, key_dir=KEYS_DIR / f"{USER_ID}")
109
- # client.load()
110
 
111
- # encrypted_tokens = []
112
- # tokens = re.findall(r"(\b[\w\.\/\-@]+\b|[\s,.!?;:'\"-]+|\$\d+(?:\.\d+)?|\€\d+(?:\.\d+)?)", ' '.join(doc))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
113
 
114
- # for token in tokens:
115
- # if token.strip() and re.match(r"\w+", token):
116
- # emb_x = MAPPING_DOC_EMBEDDING[token]
117
- # assert emb_x.shape == (1, 1024)
118
- # encrypted_x = client.quantize_encrypt_serialize(emb_x)
119
- # assert isinstance(encrypted_x, bytes)
120
- # encrypted_tokens.append(encrypted_x)
121
 
122
- # print("Doc encrypted ✅ on Client Side")
123
 
124
- # # No need to save it
125
- # # write_bytes(KEYS_DIR / f"{USER_ID}/encrypted_doc", b"".join(encrypted_tokens))
126
 
127
- # encrypted_quant_tokens_hex = [token.hex()[500:510] for token in encrypted_tokens]
128
 
129
- # return {
130
- # encrypted_doc_box: gr.update(value=" ".join(encrypted_quant_tokens_hex), lines=10),
131
- # anonymized_doc_output: gr.update(visible=True, value=None),
132
- # }
133
 
134
 
135
- # def encrypt_query_fn(query):
136
 
137
- # print(f"\n------------ Step 2: Query encryption: {query=}")
138
 
139
- # if not (KEYS_DIR / f"{USER_ID}/evaluation_key").is_file():
140
- # return {output_encrypted_box: gr.update(value="Error ❌: Please generate the key first!", lines=8)}
141
 
142
- # if is_user_query_valid(query):
143
- # return {
144
- # query_box: gr.update(
145
- # value=(
146
- # "Unable to process ❌: The request exceeds the length limit or falls "
147
- # "outside the scope of this document. Please refine your query."
148
- # )
149
- # )
150
- # }
151
 
152
- # # Retrieve the client API
153
- # client = FHEModelClient(path_dir=DEPLOYMENT_DIR, key_dir=KEYS_DIR / f"{USER_ID}")
154
- # client.load()
155
 
156
- # encrypted_tokens = []
157
 
158
- # # Pattern to identify words and non-words (including punctuation, spaces, etc.)
159
- # tokens = re.findall(r"(\b[\w\.\/\-@]+\b|[\s,.!?;:'\"-]+)", query)
160
 
161
- # for token in tokens:
162
 
163
- # # 1- Ignore non-words tokens
164
- # if bool(re.match(r"^\s+$", token)):
165
- # continue
166
 
167
- # # 2- Directly append non-word tokens or whitespace to processed_tokens
168
 
169
- # # Prediction for each word
170
- # emb_x = get_batch_text_representation([token], EMBEDDINGS_MODEL, TOKENIZER)
171
- # encrypted_x = client.quantize_encrypt_serialize(emb_x)
172
- # assert isinstance(encrypted_x, bytes)
173
 
174
- # encrypted_tokens.append(encrypted_x)
175
 
176
- # print("Data encrypted ✅ on Client Side")
177
 
178
- # assert len({len(token) for token in encrypted_tokens}) == 1
179
 
180
- # write_bytes(KEYS_DIR / f"{USER_ID}/encrypted_input", b"".join(encrypted_tokens))
181
- # write_bytes(
182
- # KEYS_DIR / f"{USER_ID}/encrypted_input_len", len(encrypted_tokens[0]).to_bytes(10, "big")
183
- # )
184
 
185
- # encrypted_quant_tokens_hex = [token.hex()[500:580] for token in encrypted_tokens]
186
 
187
- # return {
188
- # output_encrypted_box: gr.update(value=" ".join(encrypted_quant_tokens_hex), lines=8),
189
- # anonymized_query_output: gr.update(visible=True, value=None),
190
- # identified_words_output_df: gr.update(visible=False, value=None),
191
- # }
192
 
193
 
194
- # def send_input_fn(query) -> Dict:
195
- # """Send the encrypted data and the evaluation key to the server."""
196
 
197
- # print("------------ Step 3.1: Send encrypted_data to the Server")
198
 
199
- # evaluation_key_path = KEYS_DIR / f"{USER_ID}/evaluation_key"
200
- # encrypted_input_path = KEYS_DIR / f"{USER_ID}/encrypted_input"
201
- # encrypted_input_len_path = KEYS_DIR / f"{USER_ID}/encrypted_input_len"
202
 
203
- # if not evaluation_key_path.is_file():
204
- # error_message = (
205
- # "Error Encountered While Sending Data to the Server: "
206
- # f"The key has been generated correctly - {evaluation_key_path.is_file()=}"
207
- # )
208
- # return {anonymized_query_output: gr.update(value=error_message)}
209
 
210
- # if not encrypted_input_path.is_file():
211
- # error_message = (
212
- # "Error Encountered While Sending Data to the Server: The data has not been encrypted "
213
- # f"correctly on the client side - {encrypted_input_path.is_file()=}"
214
- # )
215
- # return {anonymized_query_output: gr.update(value=error_message)}
216
 
217
- # # Define the data and files to post
218
- # data = {"user_id": USER_ID, "input": query}
219
 
220
- # files = [
221
- # ("files", open(evaluation_key_path, "rb")),
222
- # ("files", open(encrypted_input_path, "rb")),
223
- # ("files", open(encrypted_input_len_path, "rb")),
224
- # ]
225
 
226
- # # Send the encrypted input and evaluation key to the server
227
- # url = SERVER_URL + "send_input"
228
 
229
- # with requests.post(
230
- # url=url,
231
- # data=data,
232
- # files=files,
233
- # ) as resp:
234
- # print("Data sent to the server ✅" if resp.ok else "Error ❌ in sending data to the server")
235
 
236
 
237
- # def run_fhe_in_server_fn() -> Dict:
238
- # """Run in FHE the anonymization of the query"""
239
 
240
- # print("------------ Step 3.2: Run in FHE on the Server Side")
241
 
242
- # evaluation_key_path = KEYS_DIR / f"{USER_ID}/evaluation_key"
243
- # encrypted_input_path = KEYS_DIR / f"{USER_ID}/encrypted_input"
244
 
245
- # if not evaluation_key_path.is_file():
246
- # error_message = (
247
- # "Error Encountered While Sending Data to the Server: "
248
- # f"The key has been generated correctly - {evaluation_key_path.is_file()=}"
249
- # )
250
- # return {anonymized_query_output: gr.update(value=error_message)}
251
 
252
- # if not encrypted_input_path.is_file():
253
- # error_message = (
254
- # "Error Encountered While Sending Data to the Server: The data has not been encrypted "
255
- # f"correctly on the client side - {encrypted_input_path.is_file()=}"
256
- # )
257
- # return {anonymized_query_output: gr.update(value=error_message)}
258
 
259
- # data = {
260
- # "user_id": USER_ID,
261
- # }
262
 
263
- # url = SERVER_URL + "run_fhe"
264
 
265
- # with requests.post(
266
- # url=url,
267
- # data=data,
268
- # ) as response:
269
- # if not response.ok:
270
- # return {
271
- # anonymized_query_output: gr.update(
272
- # value=(
273
- # "⚠️ An error occurred on the Server Side. "
274
- # "Please check connectivity and data transmission."
275
- # ),
276
- # ),
277
- # }
278
- # else:
279
- # time.sleep(1)
280
- # print(f"The query anonymization was computed in {response.json():.2f} s per token.")
281
 
282
 
283
- # def get_output_fn() -> Dict:
284
 
285
- # print("------------ Step 3.3: Get the output from the Server Side")
286
 
287
- # if not (KEYS_DIR / f"{USER_ID}/evaluation_key").is_file():
288
- # error_message = (
289
- # "Error Encountered While Sending Data to the Server: "
290
- # "The key has not been generated correctly"
291
- # )
292
- # return {anonymized_query_output: gr.update(value=error_message)}
293
 
294
- # if not (KEYS_DIR / f"{USER_ID}/encrypted_input").is_file():
295
- # error_message = (
296
- # "Error Encountered While Sending Data to the Server: "
297
- # "The data has not been encrypted correctly on the client side"
298
- # )
299
- # return {anonymized_query_output: gr.update(value=error_message)}
300
-
301
- # data = {
302
- # "user_id": USER_ID,
303
- # }
304
-
305
- # # Retrieve the encrypted output
306
- # url = SERVER_URL + "get_output"
307
- # with requests.post(
308
- # url=url,
309
- # data=data,
310
- # ) as response:
311
- # if response.ok:
312
- # print("Data received ✅ from the remote Server")
313
- # response_data = response.json()
314
- # encrypted_output_base64 = response_data["encrypted_output"]
315
- # length_encrypted_output_base64 = response_data["length"]
316
-
317
- # # Decode the base64 encoded data
318
- # encrypted_output = base64.b64decode(encrypted_output_base64)
319
- # length_encrypted_output = base64.b64decode(length_encrypted_output_base64)
320
-
321
- # # Save the encrypted output to bytes in a file as it is too large to pass through
322
- # # regular Gradio buttons (see https://github.com/gradio-app/gradio/issues/1877)
323
-
324
- # write_bytes(CLIENT_DIR / f"{USER_ID}_encrypted_output", encrypted_output)
325
- # write_bytes(CLIENT_DIR / f"{USER_ID}_encrypted_output_len", length_encrypted_output)
326
-
327
- # else:
328
- # print("Error ❌ in getting data to the server")
329
-
330
-
331
- # def decrypt_fn(text) -> Dict:
332
- # """Dencrypt the data on the `Client Side`."""
333
-
334
- # print("------------ Step 4: Dencrypt the data on the `Client Side`")
335
-
336
- # # Get the encrypted output path
337
- # encrypted_output_path = CLIENT_DIR / f"{USER_ID}_encrypted_output"
338
-
339
- # if not encrypted_output_path.is_file():
340
- # error_message = """⚠️ Please ensure that: \n
341
- # - the connectivity \n
342
- # - the query has been submitted \n
343
- # - the evaluation key has been generated \n
344
- # - the server processed the encrypted data \n
345
- # - the Client received the data from the Server before decrypting the prediction
346
- # """
347
- # print(error_message)
348
 
349
- # return error_message, None
350
 
351
- # # Retrieve the client API
352
- # client = FHEModelClient(path_dir=DEPLOYMENT_DIR, key_dir=KEYS_DIR / f"{USER_ID}")
353
- # client.load()
354
 
355
- # # Load the encrypted output as bytes
356
- # encrypted_output = read_bytes(CLIENT_DIR / f"{USER_ID}_encrypted_output")
357
- # length = int.from_bytes(read_bytes(CLIENT_DIR / f"{USER_ID}_encrypted_output_len"), "big")
358
 
359
- # tokens = re.findall(r"(\b[\w\.\/\-@]+\b|[\s,.!?;:'\"-]+)", text)
360
 
361
- # decrypted_output, identified_words_with_prob = [], []
362
 
363
- # i = 0
364
- # for token in tokens:
365
 
366
- # # Directly append non-word tokens or whitespace to processed_tokens
367
- # if bool(re.match(r"^\s+$", token)):
368
- # continue
369
- # else:
370
- # encrypted_token = encrypted_output[i : i + length]
371
- # prediction_proba = client.deserialize_decrypt_dequantize(encrypted_token)
372
- # probability = prediction_proba[0][1]
373
- # i += length
374
 
375
- # if probability >= 0.77:
376
- # identified_words_with_prob.append((token, probability))
377
 
378
- # # Use the existing UUID if available, otherwise generate a new one
379
- # tmp_uuid = UUID_MAP.get(token, str(uuid.uuid4())[:8])
380
- # decrypted_output.append(tmp_uuid)
381
- # UUID_MAP[token] = tmp_uuid
382
- # else:
383
- # decrypted_output.append(token)
384
 
385
- # # Update the UUID map with query.
386
- # write_json(MAPPING_UUID_PATH, UUID_MAP)
387
 
388
- # # Removing Spaces Before Punctuation:
389
- # anonymized_text = re.sub(r"\s([,.!?;:])", r"\1", " ".join(decrypted_output))
390
 
391
- # # Convert the list of identified words and probabilities into a DataFrame
392
- # if identified_words_with_prob:
393
- # identified_df = pd.DataFrame(
394
- # identified_words_with_prob, columns=["Identified Words", "Probability"]
395
- # )
396
- # else:
397
- # identified_df = pd.DataFrame(columns=["Identified Words", "Probability"])
398
 
399
- # print("Decryption done ✅ on Client Side")
400
 
401
- # return anonymized_text, identified_df
402
 
403
 
404
- # def anonymization_with_fn(selected_sentences, query):
405
 
406
- # encrypt_query_fn(query)
407
 
408
- # send_input_fn(query)
409
 
410
- # run_fhe_in_server_fn()
411
 
412
- # get_output_fn()
413
 
414
- # anonymized_text, identified_df = decrypt_fn(query)
415
 
416
- # return {
417
- # anonymized_doc_output: gr.update(value=select_static_anonymized_sentences_fn(selected_sentences)),
418
- # anonymized_query_output: gr.update(value=anonymized_text),
419
- # identified_words_output_df: gr.update(value=identified_df, visible=False),
420
- # }
421
 
422
 
423
- # def query_chatgpt_fn(anonymized_query, anonymized_document):
424
 
425
- # print("------------ Step 5: ChatGPT communication")
426
 
427
- # if not (KEYS_DIR / f"{USER_ID}/evaluation_key").is_file():
428
- # error_message = "Error ❌: Please generate the key first!"
429
- # return {chatgpt_response_anonymized: gr.update(value=error_message)}
430
 
431
- # if not (CLIENT_DIR / f"{USER_ID}_encrypted_output").is_file():
432
- # error_message = "Error ❌: Please encrypt your query first!"
433
- # return {chatgpt_response_anonymized: gr.update(value=error_message)}
434
 
435
- # context_prompt = read_txt(PROMPT_PATH)
436
 
437
- # # Prepare prompt
438
- # query = (
439
- # "Document content:\n```\n"
440
- # + anonymized_document
441
- # + "\n\n```"
442
- # + "Query:\n```\n"
443
- # + anonymized_query
444
- # + "\n```"
445
- # )
446
- # print(f'Prompt of CHATGPT:\n{query}')
447
-
448
- # completion = client.chat.completions.create(
449
- # model="gpt-4-1106-preview", # Replace with "gpt-4" if available
450
- # messages=[
451
- # {"role": "system", "content": context_prompt},
452
- # {"role": "user", "content": query},
453
- # ],
454
- # )
455
- # anonymized_response = completion.choices[0].message.content
456
- # uuid_map = read_json(MAPPING_UUID_PATH)
457
 
458
- # inverse_uuid_map = {
459
- # v: k for k, v in uuid_map.items()
460
- # } # TODO load the inverse mapping from disk for efficiency
461
 
462
- # # Pattern to identify words and non-words (including punctuation, spaces, etc.)
463
- # tokens = re.findall(r"(\b[\w\.\/\-@]+\b|[\s,.!?;:'\"-]+)", anonymized_response)
464
- # processed_tokens = []
465
 
466
- # for token in tokens:
467
- # # Directly append non-word tokens or whitespace to processed_tokens
468
- # if not token.strip() or not re.match(r"\w+", token):
469
- # processed_tokens.append(token)
470
- # continue
471
 
472
- # if token in inverse_uuid_map:
473
- # processed_tokens.append(inverse_uuid_map[token])
474
- # else:
475
- # processed_tokens.append(token)
476
- # deanonymized_response = "".join(processed_tokens)
477
 
478
- # return {chatgpt_response_anonymized: gr.update(value=anonymized_response),
479
- # chatgpt_response_deanonymized: gr.update(value=deanonymized_response)}
480
 
481
 
482
- # demo = gr.Blocks(css=".markdown-body { font-size: 18px; }")
483
 
484
- # with demo:
485
 
486
- # gr.Markdown(
487
- # """
488
- # <p align="center">
489
- # <img width=200 src="https://user-images.githubusercontent.com/5758427/197816413-d9cddad3-ba38-4793-847d-120975e1da11.png">
490
- # </p>
491
- # """)
492
 
493
- # gr.Markdown(
494
- # """
495
- # <h1 style="text-align: center;">Encrypted Anonymization Using Fully Homomorphic Encryption</h1>
496
- # <p align="center">
497
- # <a href="https://github.com/zama-ai/concrete-ml"> <img style="vertical-align: middle; display:inline-block; margin-right: 3px;" width=15 src="file/images/logos/github.png">Concrete-ML</a>
498
- #
499
- # <a href="https://docs.zama.ai/concrete-ml"> <img style="vertical-align: middle; display:inline-block; margin-right: 3px;" width=15 src="file/images/logos/documentation.png">Documentation</a>
500
- #
501
- # <a href=" https://community.zama.ai/c/concrete-ml/8"> <img style="vertical-align: middle; display:inline-block; margin-right: 3px;" width=15 src="file/images/logos/community.png">Community</a>
502
- #
503
- # <a href="https://twitter.com/zama_fhe"> <img style="vertical-align: middle; display:inline-block; margin-right: 3px;" width=15 src="file/images/logos/x.png">@zama_fhe</a>
504
- # </p>
505
- # """
506
- # )
507
 
508
- # gr.Markdown(
509
- # """
510
- # <p align="center" style="font-size: 16px;">
511
- # Anonymization is the process of removing personally identifiable information (PII) data from
512
- # a document in order to protect individual privacy.</p>
513
 
514
- # <p align="center" style="font-size: 16px;">
515
- # Encrypted anonymization uses Fully Homomorphic Encryption (FHE) to anonymize personally
516
- # identifiable information (PII) within encrypted documents, enabling computations to be
517
- # performed on the encrypted data.</p>
518
 
519
- # <p align="center" style="font-size: 16px;">
520
- # In the example above, we're showing how encrypted anonymization can be leveraged to use LLM
521
- # services such as ChatGPT in a privacy-preserving manner.</p>
522
- # """
523
- # )
524
 
525
- # # gr.Markdown(
526
- # # """
527
- # # <p align="center">
528
- # # <img width="75%" height="30%" src="https://raw.githubusercontent.com/kcelia/Img/main/fhe_anonymization_banner.png">
529
- # # </p>
530
- # # """
531
- # # )
532
- # gr.Markdown(
533
- # f"""
534
- # <p align="center">
535
- # <img width="75%" height="30%" src="https://huggingface.co/spaces/Tenefix/private-fhe-fraud-detection/resolve/main/Img/schema.png">
536
- # </p>
537
- # """
538
- # )
539
 
540
 
541
- # ########################## Key Gen Part ##########################
542
 
543
- # gr.Markdown(
544
- # "## Step 1: Generate the keys\n\n"
545
- # """In Fully Homomorphic Encryption (FHE) methods, two types of keys are created. The first
546
- # type, called secret keys, are used to encrypt and decrypt the user's data. The second type,
547
- # called evaluation keys, enables a server to work on the encrypted data without seeing the
548
- # actual data.
549
- # """
550
- # )
551
 
552
- # gen_key_btn = gr.Button("Generate the secret and evaluation keys")
553
 
554
- # gen_key_btn.click(
555
- # key_gen_fn,
556
- # inputs=[],
557
- # outputs=[gen_key_btn],
558
- # )
559
 
560
- # ########################## Main document Part ##########################
561
 
562
- # gr.Markdown("<hr />")
563
- # gr.Markdown("## Step 2.1: Select the document you want to encrypt\n\n"
564
- # """To make it simple, we pre-compiled the following document, but you are free to choose
565
- # on which part you want to run this example.
566
- # """
567
- # )
568
 
569
- # with gr.Row():
570
- # with gr.Column(scale=5):
571
- # original_sentences_box = gr.CheckboxGroup(
572
- # ORIGINAL_DOCUMENT,
573
- # value=ORIGINAL_DOCUMENT,
574
- # label="Contract:",
575
- # show_label=True,
576
- # )
577
 
578
- # with gr.Column(scale=1, min_width=6):
579
- # gr.HTML("<div style='height: 77px;'></div>")
580
- # encrypt_doc_btn = gr.Button("Encrypt the document")
581
 
582
- # with gr.Column(scale=5):
583
- # encrypted_doc_box = gr.Textbox(
584
- # label="Encrypted document:", show_label=True, interactive=False, lines=10
585
- # )
586
 
587
 
588
- # ########################## User Query Part ##########################
589
 
590
- # gr.Markdown("<hr />")
591
- # gr.Markdown("## Step 2.2: Select the prompt you want to encrypt\n\n"
592
- # """Please choose from the predefined options in
593
- # <span style='color:grey'>“Prompt examples”</span> or craft a custom question in
594
- # the <span style='color:grey'>“Customized prompt”</span> text box.
595
- # Remain concise and relevant to the context. Any off-topic query will not be processed.""")
596
 
597
- # with gr.Row():
598
- # with gr.Column(scale=5):
599
 
600
- # with gr.Column(scale=5):
601
- # default_query_box = gr.Dropdown(
602
- # list(DEFAULT_QUERIES.values()), label="PROMPT EXAMPLES:"
603
- # )
604
 
605
- # gr.Markdown("Or")
606
 
607
- # query_box = gr.Textbox(
608
- # value="What is Kate international bank account number?", label="CUSTOMIZED PROMPT:", interactive=True
609
- # )
610
 
611
- # default_query_box.change(
612
- # fn=lambda default_query_box: default_query_box,
613
- # inputs=[default_query_box],
614
- # outputs=[query_box],
615
- # )
616
 
617
- # with gr.Column(scale=1, min_width=6):
618
- # gr.HTML("<div style='height: 77px;'></div>")
619
- # encrypt_query_btn = gr.Button("Encrypt the prompt")
620
- # # gr.HTML("<div style='height: 50px;'></div>")
621
 
622
- # with gr.Column(scale=5):
623
- # output_encrypted_box = gr.Textbox(
624
- # label="Encrypted anonymized query that will be sent to the anonymization server:",
625
- # lines=8,
626
- # )
627
 
628
- # ########################## FHE processing Part ##########################
629
 
630
- # gr.Markdown("<hr />")
631
- # gr.Markdown("## Step 3: Anonymize the document and the prompt using FHE")
632
- # gr.Markdown(
633
- # """Once the client encrypts the document and the prompt locally, it will be sent to a remote
634
- # server to perform the anonymization on encrypted data. When the computation is done, the
635
- # server will return the result to the client for decryption.
636
- # """
637
- # )
638
 
639
- # run_fhe_btn = gr.Button("Anonymize using FHE")
640
 
641
- # with gr.Row():
642
- # with gr.Column(scale=5):
643
 
644
- # anonymized_doc_output = gr.Textbox(
645
- # label="Decrypted and anonymized document", lines=10, interactive=True
646
- # )
647
 
648
- # with gr.Column(scale=5):
649
 
650
- # anonymized_query_output = gr.Textbox(
651
- # label="Decrypted and anonymized prompt", lines=10, interactive=True
652
- # )
653
 
654
 
655
- # identified_words_output_df = gr.Dataframe(label="Identified words:", visible=False)
656
 
657
- # encrypt_doc_btn.click(
658
- # fn=encrypt_doc_fn,
659
- # inputs=[original_sentences_box],
660
- # outputs=[encrypted_doc_box, anonymized_doc_output],
661
- # )
662
 
663
- # encrypt_query_btn.click(
664
- # fn=encrypt_query_fn,
665
- # inputs=[query_box],
666
- # outputs=[
667
- # query_box,
668
- # output_encrypted_box,
669
- # anonymized_query_output,
670
- # identified_words_output_df,
671
- # ],
672
- # )
673
 
674
- # run_fhe_btn.click(
675
- # anonymization_with_fn,
676
- # inputs=[original_sentences_box, query_box],
677
- # outputs=[anonymized_doc_output, anonymized_query_output, identified_words_output_df],
678
- # )
679
 
680
- # ########################## ChatGpt Part ##########################
681
 
682
- # gr.Markdown("<hr />")
683
- # gr.Markdown("## Step 4: Send anonymized prompt to ChatGPT")
684
- # gr.Markdown(
685
- # """After securely anonymizing the query with FHE,
686
- # you can forward it to ChatGPT without having any concern about information leakage."""
687
- # )
688
 
689
- # chatgpt_button = gr.Button("Query ChatGPT")
690
 
691
- # with gr.Row():
692
- # chatgpt_response_anonymized = gr.Textbox(label="ChatGPT's anonymized response:", lines=5)
693
- # chatgpt_response_deanonymized = gr.Textbox(
694
- # label="ChatGPT's non-anonymized response:", lines=5
695
- # )
696
 
697
- # chatgpt_button.click(
698
- # query_chatgpt_fn,
699
- # inputs=[anonymized_query_output, anonymized_doc_output],
700
- # outputs=[chatgpt_response_anonymized, chatgpt_response_deanonymized],
701
- # )
702
 
703
- # gr.Markdown(
704
- # """**Please note**: As this space is intended solely for demonstration purposes, some
705
- # private information may be missed during by the anonymization algorithm. Please validate the
706
- # following query before sending it to ChatGPT."""
707
- # )
708
- # # Launch the app
709
- # demo.launch(share=False)
710
 
711
 
712
 
@@ -714,298 +749,298 @@
714
 
715
 
716
 
717
- import gradio as gr
718
- from predictor import predict, key_already_generated, pre_process_encrypt_send_purchase, decrypt_prediction
719
- import base64
720
 
721
- def key_generated():
722
- """
723
- Check if the evaluation keys have already been generated.
724
- Returns:
725
- bool: True if the evaluation keys have already been generated, False otherwise.
726
- """
727
- if not key_already_generated():
728
- error_message = (
729
- f"Error Encountered While generating the evaluation keys."
730
- )
731
- print(error_message)
732
- return {gen_key_btn: gr.update(value=error_message)}
733
- else:
734
- print("Keys have been generated ✅")
735
- return {gen_key_btn: gr.update(value="Keys have been generated ✅")}
736
 
737
 
738
- demo = gr.Blocks(css=".markdown-body { font-size: 18px; }")
739
 
740
- with demo:
741
- with gr.Row():
742
- with gr.Column(elem_id="center_column"):
743
- gr.Image("Img/zama.png", width=200, show_label=False)
744
- with gr.Column(elem_id="center_column"):
745
- gr.Image("Img/Epita.png", width=200, show_label=False)
746
 
747
 
748
 
749
- gr.Markdown(
750
- """
751
- <h1 style="text-align: center;">Fraud Detection with FHE Model</h1>
752
- <p align="center">
753
- <a href="https://github.com/CirSandro/private-fhe-fraud-detection">
754
- <span style="vertical-align: middle; display:inline-block; margin-right: 3px;">💳</span>private-fhe-fraud-detection
755
- </a>
756
-
757
- <a href="https://docs.zama.ai/concrete-ml">
758
- <span style="vertical-align: middle; display:inline-block; margin-right: 3px;">🔒</span>Documentation Concrete-ML
759
- </a>
760
- </p>
761
- """
762
- )
763
 
764
- gr.Markdown(
765
- """
766
- <p align="center" style="font-size: 16px;">
767
- How to detect bank fraud without using your personal data ?</p>
768
- """
769
- )
770
 
771
- with gr.Accordion("What is bank fraud detection?", open=False):
772
- gr.Markdown(
773
- """
774
- Bank fraud detection is the process of identifying fraudulent activities or transactions
775
- that may pose a risk to a bank or its customers. It is essential to detect fraudulent
776
- activities to prevent financial losses and protect the integrity of the banking system.
777
- """
778
- )
779
 
780
- with gr.Accordion("Why is it important to protect this data?", open=False):
781
- gr.Markdown(
782
- """
783
- Banking and financial data often contain sensitive personal information, such as income,
784
- spending habits, and account numbers. Protecting this information ensures that customers'
785
- privacy is respected and safeguarded from unauthorized access.
786
- """
787
- )
788
 
789
- with gr.Accordion("Why is Fully Homomorphic Encryption (FHE) a good solution?", open=False):
790
- gr.Markdown(
791
- """
792
- Fully Homomorphic Encryption (FHE) is a powerful technique for enhancing privacy and accuracy
793
- in the context of fraud detection, particularly when dealing with sensitive banking data. FHE
794
- allows for the encryption of data, which can then be processed and analyzed without ever needing
795
- to decrypt it.
796
- Each party involved in the detection process can collaborate without compromising user privacy,
797
- minimizing the risk of data leaks or breaches. The data remains confidential throughout the entire
798
- process, ensuring that the privacy of users is maintained.
799
- """
800
- )
801
 
802
- gr.Markdown(
803
- """
804
- <p style="text-align: center;">
805
- Below, we will explain the flow in the image by simulating a purchase you've just made, and show you how our fraud detection model processes the transaction.
806
- </p>
807
- """
808
- )
809
 
810
- # gr.Markdown(
811
- # f"""
812
- # <p align="center">
813
- # <img width="75%" height="30%" src="https://huggingface.co/spaces/Tenefix/private-fhe-fraud-detection/resolve/main/Img/schema.png">
814
- # </p>
815
- # """
816
- # )
817
- with gr.Row():
818
- with gr.Column(elem_id="center_column"):
819
- gr.Image("Img/schema.png", width=200, show_label=False)
820
 
821
- gr.Markdown("<hr />")
822
 
823
- ########################## Key Gen Part ##########################
824
 
825
- gr.Markdown(
826
- "## Step 1: Generate the keys\n\n"
827
- """In Fully Homomorphic Encryption (FHE) methods, two types of keys are created. The first
828
- type, called secret keys, are used to encrypt and decrypt the user's data. The second type,
829
- called evaluation keys, enables a server to work on the encrypted data without seeing the
830
- actual data.
831
- """
832
- )
833
 
834
- gen_key_btn = gr.Button("Generate the secret and evaluation keys")
835
 
836
- gen_key_btn.click(
837
- key_generated,
838
- inputs=[],
839
- outputs=[gen_key_btn],
840
- )#547
841
 
842
- gr.Markdown("<hr />")
843
 
844
- ########################## Encrypt Data ##########################
845
 
846
- gr.Markdown(
847
- "## Step 2: Make your purchase\n\n"
848
- """
849
- 🛍️ It's time to shop! To simulate your latest purchase, please provide the details of your most recent transaction.
850
 
851
- If you don't have an idea, you can pre-fill with an example of fraud or non-fraud.
852
- """
853
- )
854
 
855
- def prefill_fraud():
856
- return 34, 50, 3, False, False, False, True
857
 
858
- def prefill_no_fraud():
859
- return 12, 2, 0.7, True, False, True, False
860
 
861
- with gr.Row():
862
- prefill_button = gr.Button("Exemple Fraud")
863
- prefill_button_no = gr.Button("Exemple No-Fraud")
864
 
865
- with gr.Row():
866
- with gr.Column():
867
- distance_home = gr.Number(
868
- minimum=float(0),
869
- maximum=float(22000),
870
- step=1,
871
- value=10,
872
- label="Distance from Home",
873
- info="How far was the purchase from your home (in km)?"
874
- )
875
- distance_last = gr.Number(
876
- minimum=float(0),
877
- maximum=float(22000),
878
- step=1,
879
- value=1,
880
- label="Distance from Last Transaction",
881
- info="Distance between this purchase and the last one (in km)?"
882
- )
883
- ratio = gr.Number(
884
- minimum=float(0),
885
- maximum=float(10000),
886
- step=0.1,
887
- value=1,
888
- label="Ratio to Median Purchase Price",
889
- info="Purchase ratio compared to your average purchase",
890
- )
891
- repeat_retailer = gr.Checkbox(
892
- label="Repeat Retailer",
893
- info="Check if you are purchasing from the same retailer as your last transaction"
894
- )
895
- used_chip = gr.Checkbox(
896
- label="Used Chip",
897
- info="Check if you used a chip card for this transaction"
898
- )
899
- used_pin_number = gr.Checkbox(
900
- label="Used Pin Number",
901
- info="Check if you used your PIN number during the transaction"
902
- )
903
- online = gr.Checkbox(
904
- label="Online Order",
905
- info="Check if you made your purchase online"
906
- )
907
 
908
 
909
- prefill_button.click(
910
- fn=prefill_fraud,
911
- inputs=[],
912
- outputs=[
913
- distance_home,
914
- distance_last,
915
- ratio,
916
- repeat_retailer,
917
- used_chip,
918
- used_pin_number,
919
- online
920
- ]
921
- )
922
 
923
- prefill_button_no.click(
924
- fn=prefill_no_fraud,
925
- inputs=[],
926
- outputs=[
927
- distance_home,
928
- distance_last,
929
- ratio,
930
- repeat_retailer,
931
- used_chip,
932
- used_pin_number,
933
- online
934
- ]
935
- )
936
 
937
- with gr.Row():
938
- with gr.Column(scale=2):
939
- encrypt_button_applicant = gr.Button("Encrypt the inputs and send to server.")
940
 
941
- encrypted_input_applicant = gr.Textbox(
942
- label="Encrypted input representation:", max_lines=2, interactive=False
943
- )
944
 
945
- encrypt_button_applicant.click(
946
- pre_process_encrypt_send_purchase,
947
- inputs=[distance_home, distance_last, ratio, repeat_retailer, used_chip, used_pin_number, \
948
- online],
949
- outputs=[encrypted_input_applicant, encrypt_button_applicant],
950
- )
951
 
952
- gr.Markdown("<hr />")
953
 
954
- ########################## Model Prediction ##########################
955
 
956
- gr.Markdown("## Step 3: Run the FHE evaluation.")
957
- gr.Markdown("<span style='color:grey'>Server Side</span>")
958
- gr.Markdown(
959
- """
960
- It's high time to launch our prediction, by pressing the button you will launch the
961
- fraud analysis that our fictitious bank offers you.
962
- This server employs a [Random Forest (by Concrete-ML)](https://github.com/zama-ai/concrete-ml/blob/release/1.8.x/docs/references/api/concrete.ml.sklearn.rf.md#class-randomforestclassifier)
963
- classifier model that has been trained on a synthetic data-set.
964
- """
965
- )
966
 
967
- execute_fhe_button = gr.Button("Run the FHE evaluation.")
968
- fhe_execution_time = gr.Textbox(
969
- label="Total FHE execution time (in seconds):", max_lines=1, interactive=False
970
- )
971
 
972
- # Button to send the encodings to the server using post method
973
- execute_fhe_button.click(predict, outputs=[fhe_execution_time, execute_fhe_button])
974
 
975
- gr.Markdown("<hr />")
976
 
977
- ########################## Decrypt Prediction ##########################
978
 
979
- gr.Markdown("## Step 4: Receive the encrypted output from the server and decrypt.")
980
- gr.Markdown(
981
- """
982
- 🔔 You will receive a notification! Is this a Fraud? The message is decrypted by pressing the button.
983
- """
984
- )
985
 
986
- get_output_button = gr.Button("Decrypt the prediction.")
987
- prediction_output = gr.Textbox(
988
- label="Prediction", max_lines=1, interactive=False
989
- )
990
- prediction_bar = gr.HTML(label="Prediction Bar") # For the percentage bar
991
 
992
- get_output_button.click(
993
- decrypt_prediction,
994
- outputs=[prediction_output, get_output_button, prediction_bar],
995
- )
996
 
997
 
998
- gr.Markdown(
999
- """
1000
- You now know that it is possible to detect bank fraud without knowing your personal information.
1001
- """
1002
- )
1003
 
1004
- gr.Markdown(
1005
- "The app was built with [Concrete-ML](https://github.com/zama-ai/concrete-ml), a "
1006
- "Privacy-Preserving Machine Learning (PPML) open-source set of tools by [Zama](https://zama.ai/). "
1007
- "Try it yourself and don't forget to star on Github &#11088;."
1008
- )
1009
 
1010
- if __name__ == "__main__":
1011
- demo.launch()
 
1
+ """A Gradio app for anonymizing text data using FHE."""
2
 
3
+ import os
4
+ import re
5
+ import subprocess
6
+ import time
7
+ import uuid
8
+ from typing import Dict, List
9
 
10
+ import numpy
11
+ import pandas as pd
12
+ import requests
13
+ from fhe_anonymizer import FHEAnonymizer
14
+ from utils_demo import *
15
 
16
+ from concrete.ml.deployment import FHEModelClient
17
 
18
 
19
 
20
+ import gradio as gr
21
+ from predictor import predict, key_already_generated, pre_process_encrypt_send_purchase, decrypt_prediction
22
+ import base64
23
 
24
+ def key_generated():
25
+ """
26
+ Check if the evaluation keys have already been generated.
27
+ Returns:
28
+ bool: True if the evaluation keys have already been generated, False otherwise.
29
+ """
30
+ if not key_already_generated():
31
+ error_message = (
32
+ f"Error Encountered While generating the evaluation keys."
33
+ )
34
+ print(error_message)
35
+ return {gen_key_btn: gr.update(value=error_message)}
36
+ else:
37
+ print("Keys have been generated ✅")
38
+ return {gen_key_btn: gr.update(value="Keys have been generated ✅")}
39
 
40
+
41
+ # demo = gr.Blocks(css=".markdown-body { font-size: 18px; }")
42
 
 
43
 
 
 
 
44
 
 
 
 
 
 
 
 
45
 
 
 
46
 
 
47
 
 
 
48
 
 
 
49
 
50
 
 
51
 
 
52
 
 
53
 
 
54
 
 
55
 
56
 
 
 
57
 
 
58
 
59
+ # Ensure the directory is clean before starting processes or reading files
60
+ clean_directory()
61
 
62
+ anonymizer = FHEAnonymizer()
63
 
64
+ # Start the Uvicorn server hosting the FastAPI app
65
+ subprocess.Popen(["uvicorn", "server:app"], cwd=CURRENT_DIR)
66
+ time.sleep(3)
67
 
68
+ # Load data from files required for the application
69
+ UUID_MAP = read_json(MAPPING_UUID_PATH)
70
+ ANONYMIZED_DOCUMENT = read_txt(ANONYMIZED_FILE_PATH)
71
+ MAPPING_ANONYMIZED_SENTENCES = read_pickle(MAPPING_ANONYMIZED_SENTENCES_PATH)
72
+ MAPPING_ENCRYPTED_SENTENCES = read_pickle(MAPPING_ENCRYPTED_SENTENCES_PATH)
73
+ ORIGINAL_DOCUMENT = read_txt(ORIGINAL_FILE_PATH).split("\n\n")
74
+ MAPPING_DOC_EMBEDDING = read_pickle(MAPPING_DOC_EMBEDDING_PATH)
75
 
76
+ print(f"{ORIGINAL_DOCUMENT=}\n")
77
+ print(f"{MAPPING_DOC_EMBEDDING.keys()=}")
 
78
 
79
+ # 4. Data Processing and Operations (No specific operations shown here, assuming it's part of anonymizer or client usage)
 
80
 
81
+ # 5. Utilizing External Services or APIs
82
+ # (Assuming client initialization and anonymizer setup are parts of using external services or application-specific logic)
83
 
84
+ # Generate a random user ID for this session
85
+ USER_ID = numpy.random.randint(0, 2**32)
86
+
87
+
88
+ def select_static_anonymized_sentences_fn(selected_sentences: List):
89
+
90
+ selected_sentences = [MAPPING_ANONYMIZED_SENTENCES[sentence] for sentence in selected_sentences]
91
+
92
+ anonymized_selected_sentence = sorted(selected_sentences, key=lambda x: x[0])
93
+
94
+ anonymized_selected_sentence = [sentence for _, sentence in anonymized_selected_sentence]
95
+
96
+ return "\n\n".join(anonymized_selected_sentence)
97
 
 
 
 
 
 
 
 
 
 
98
 
99
+ def key_gen_fn() -> Dict:
100
+ """Generate keys for a given user."""
101
 
102
+ print("------------ Step 1: Key Generation:")
103
 
104
+ print(f"Your user ID is: {USER_ID}....")
105
 
 
 
106
 
107
+ client = FHEModelClient(path_dir=DEPLOYMENT_DIR, key_dir=KEYS_DIR / f"{USER_ID}")
108
+ client.load()
 
109
 
110
+ # Creates the private and evaluation keys on the client side
111
+ client.generate_private_and_evaluation_keys()
112
+
113
+ # Get the serialized evaluation keys
114
+ serialized_evaluation_keys = client.get_serialized_evaluation_keys()
115
+ assert isinstance(serialized_evaluation_keys, bytes)
116
+
117
+ # Save the evaluation key
118
+ evaluation_key_path = KEYS_DIR / f"{USER_ID}/evaluation_key"
119
+
120
+ write_bytes(evaluation_key_path, serialized_evaluation_keys)
121
+
122
+ # anonymizer.generate_key()
123
+
124
+ if not evaluation_key_path.is_file():
125
+ error_message = (
126
+ f"Error Encountered While generating the evaluation {evaluation_key_path.is_file()=}"
127
+ )
128
+ print(error_message)
129
+ return {gen_key_btn: gr.update(value=error_message)}
130
+ else:
131
+ print("Keys have been generated ✅")
132
+ return {gen_key_btn: gr.update(value="Keys have been generated ✅")}
133
+
134
+
135
+ def encrypt_doc_fn(doc):
136
+
137
+ print(f"\n------------ Step 2.1: Doc encryption: {doc=}")
138
+
139
+ if not (KEYS_DIR / f"{USER_ID}/evaluation_key").is_file():
140
+ return {encrypted_doc_box: gr.update(value="Error ❌: Please generate the key first!", lines=10)}
141
+
142
+ # Retrieve the client API
143
+ client = FHEModelClient(path_dir=DEPLOYMENT_DIR, key_dir=KEYS_DIR / f"{USER_ID}")
144
+ client.load()
145
+
146
+ encrypted_tokens = []
147
+ tokens = re.findall(r"(\b[\w\.\/\-@]+\b|[\s,.!?;:'\"-]+|\$\d+(?:\.\d+)?|\€\d+(?:\.\d+)?)", ' '.join(doc))
148
 
149
+ for token in tokens:
150
+ if token.strip() and re.match(r"\w+", token):
151
+ emb_x = MAPPING_DOC_EMBEDDING[token]
152
+ assert emb_x.shape == (1, 1024)
153
+ encrypted_x = client.quantize_encrypt_serialize(emb_x)
154
+ assert isinstance(encrypted_x, bytes)
155
+ encrypted_tokens.append(encrypted_x)
156
 
157
+ print("Doc encrypted ✅ on Client Side")
158
 
159
+ # No need to save it
160
+ # write_bytes(KEYS_DIR / f"{USER_ID}/encrypted_doc", b"".join(encrypted_tokens))
161
 
162
+ encrypted_quant_tokens_hex = [token.hex()[500:510] for token in encrypted_tokens]
163
 
164
+ return {
165
+ encrypted_doc_box: gr.update(value=" ".join(encrypted_quant_tokens_hex), lines=10),
166
+ anonymized_doc_output: gr.update(visible=True, value=None),
167
+ }
168
 
169
 
170
+ def encrypt_query_fn(query):
171
 
172
+ print(f"\n------------ Step 2: Query encryption: {query=}")
173
 
174
+ if not (KEYS_DIR / f"{USER_ID}/evaluation_key").is_file():
175
+ return {output_encrypted_box: gr.update(value="Error ❌: Please generate the key first!", lines=8)}
176
 
177
+ if is_user_query_valid(query):
178
+ return {
179
+ query_box: gr.update(
180
+ value=(
181
+ "Unable to process ❌: The request exceeds the length limit or falls "
182
+ "outside the scope of this document. Please refine your query."
183
+ )
184
+ )
185
+ }
186
 
187
+ # Retrieve the client API
188
+ client = FHEModelClient(path_dir=DEPLOYMENT_DIR, key_dir=KEYS_DIR / f"{USER_ID}")
189
+ client.load()
190
 
191
+ encrypted_tokens = []
192
 
193
+ # Pattern to identify words and non-words (including punctuation, spaces, etc.)
194
+ tokens = re.findall(r"(\b[\w\.\/\-@]+\b|[\s,.!?;:'\"-]+)", query)
195
 
196
+ for token in tokens:
197
 
198
+ # 1- Ignore non-words tokens
199
+ if bool(re.match(r"^\s+$", token)):
200
+ continue
201
 
202
+ # 2- Directly append non-word tokens or whitespace to processed_tokens
203
 
204
+ # Prediction for each word
205
+ emb_x = get_batch_text_representation([token], EMBEDDINGS_MODEL, TOKENIZER)
206
+ encrypted_x = client.quantize_encrypt_serialize(emb_x)
207
+ assert isinstance(encrypted_x, bytes)
208
 
209
+ encrypted_tokens.append(encrypted_x)
210
 
211
+ print("Data encrypted ✅ on Client Side")
212
 
213
+ assert len({len(token) for token in encrypted_tokens}) == 1
214
 
215
+ write_bytes(KEYS_DIR / f"{USER_ID}/encrypted_input", b"".join(encrypted_tokens))
216
+ write_bytes(
217
+ KEYS_DIR / f"{USER_ID}/encrypted_input_len", len(encrypted_tokens[0]).to_bytes(10, "big")
218
+ )
219
 
220
+ encrypted_quant_tokens_hex = [token.hex()[500:580] for token in encrypted_tokens]
221
 
222
+ return {
223
+ output_encrypted_box: gr.update(value=" ".join(encrypted_quant_tokens_hex), lines=8),
224
+ anonymized_query_output: gr.update(visible=True, value=None),
225
+ identified_words_output_df: gr.update(visible=False, value=None),
226
+ }
227
 
228
 
229
+ def send_input_fn(query) -> Dict:
230
+ """Send the encrypted data and the evaluation key to the server."""
231
 
232
+ print("------------ Step 3.1: Send encrypted_data to the Server")
233
 
234
+ evaluation_key_path = KEYS_DIR / f"{USER_ID}/evaluation_key"
235
+ encrypted_input_path = KEYS_DIR / f"{USER_ID}/encrypted_input"
236
+ encrypted_input_len_path = KEYS_DIR / f"{USER_ID}/encrypted_input_len"
237
 
238
+ if not evaluation_key_path.is_file():
239
+ error_message = (
240
+ "Error Encountered While Sending Data to the Server: "
241
+ f"The key has been generated correctly - {evaluation_key_path.is_file()=}"
242
+ )
243
+ return {anonymized_query_output: gr.update(value=error_message)}
244
 
245
+ if not encrypted_input_path.is_file():
246
+ error_message = (
247
+ "Error Encountered While Sending Data to the Server: The data has not been encrypted "
248
+ f"correctly on the client side - {encrypted_input_path.is_file()=}"
249
+ )
250
+ return {anonymized_query_output: gr.update(value=error_message)}
251
 
252
+ # Define the data and files to post
253
+ data = {"user_id": USER_ID, "input": query}
254
 
255
+ files = [
256
+ ("files", open(evaluation_key_path, "rb")),
257
+ ("files", open(encrypted_input_path, "rb")),
258
+ ("files", open(encrypted_input_len_path, "rb")),
259
+ ]
260
 
261
+ # Send the encrypted input and evaluation key to the server
262
+ url = SERVER_URL + "send_input"
263
 
264
+ with requests.post(
265
+ url=url,
266
+ data=data,
267
+ files=files,
268
+ ) as resp:
269
+ print("Data sent to the server ✅" if resp.ok else "Error ❌ in sending data to the server")
270
 
271
 
272
+ def run_fhe_in_server_fn() -> Dict:
273
+ """Run in FHE the anonymization of the query"""
274
 
275
+ print("------------ Step 3.2: Run in FHE on the Server Side")
276
 
277
+ evaluation_key_path = KEYS_DIR / f"{USER_ID}/evaluation_key"
278
+ encrypted_input_path = KEYS_DIR / f"{USER_ID}/encrypted_input"
279
 
280
+ if not evaluation_key_path.is_file():
281
+ error_message = (
282
+ "Error Encountered While Sending Data to the Server: "
283
+ f"The key has been generated correctly - {evaluation_key_path.is_file()=}"
284
+ )
285
+ return {anonymized_query_output: gr.update(value=error_message)}
286
 
287
+ if not encrypted_input_path.is_file():
288
+ error_message = (
289
+ "Error Encountered While Sending Data to the Server: The data has not been encrypted "
290
+ f"correctly on the client side - {encrypted_input_path.is_file()=}"
291
+ )
292
+ return {anonymized_query_output: gr.update(value=error_message)}
293
 
294
+ data = {
295
+ "user_id": USER_ID,
296
+ }
297
 
298
+ url = SERVER_URL + "run_fhe"
299
 
300
+ with requests.post(
301
+ url=url,
302
+ data=data,
303
+ ) as response:
304
+ if not response.ok:
305
+ return {
306
+ anonymized_query_output: gr.update(
307
+ value=(
308
+ "⚠️ An error occurred on the Server Side. "
309
+ "Please check connectivity and data transmission."
310
+ ),
311
+ ),
312
+ }
313
+ else:
314
+ time.sleep(1)
315
+ print(f"The query anonymization was computed in {response.json():.2f} s per token.")
316
 
317
 
318
+ def get_output_fn() -> Dict:
319
 
320
+ print("------------ Step 3.3: Get the output from the Server Side")
321
 
322
+ if not (KEYS_DIR / f"{USER_ID}/evaluation_key").is_file():
323
+ error_message = (
324
+ "Error Encountered While Sending Data to the Server: "
325
+ "The key has not been generated correctly"
326
+ )
327
+ return {anonymized_query_output: gr.update(value=error_message)}
328
 
329
+ if not (KEYS_DIR / f"{USER_ID}/encrypted_input").is_file():
330
+ error_message = (
331
+ "Error Encountered While Sending Data to the Server: "
332
+ "The data has not been encrypted correctly on the client side"
333
+ )
334
+ return {anonymized_query_output: gr.update(value=error_message)}
335
+
336
+ data = {
337
+ "user_id": USER_ID,
338
+ }
339
+
340
+ # Retrieve the encrypted output
341
+ url = SERVER_URL + "get_output"
342
+ with requests.post(
343
+ url=url,
344
+ data=data,
345
+ ) as response:
346
+ if response.ok:
347
+ print("Data received ✅ from the remote Server")
348
+ response_data = response.json()
349
+ encrypted_output_base64 = response_data["encrypted_output"]
350
+ length_encrypted_output_base64 = response_data["length"]
351
+
352
+ # Decode the base64 encoded data
353
+ encrypted_output = base64.b64decode(encrypted_output_base64)
354
+ length_encrypted_output = base64.b64decode(length_encrypted_output_base64)
355
+
356
+ # Save the encrypted output to bytes in a file as it is too large to pass through
357
+ # regular Gradio buttons (see https://github.com/gradio-app/gradio/issues/1877)
358
+
359
+ write_bytes(CLIENT_DIR / f"{USER_ID}_encrypted_output", encrypted_output)
360
+ write_bytes(CLIENT_DIR / f"{USER_ID}_encrypted_output_len", length_encrypted_output)
361
+
362
+ else:
363
+ print("Error ❌ in getting data to the server")
364
+
365
+
366
+ def decrypt_fn(text) -> Dict:
367
+ """Dencrypt the data on the `Client Side`."""
368
+
369
+ print("------------ Step 4: Dencrypt the data on the `Client Side`")
370
+
371
+ # Get the encrypted output path
372
+ encrypted_output_path = CLIENT_DIR / f"{USER_ID}_encrypted_output"
373
+
374
+ if not encrypted_output_path.is_file():
375
+ error_message = """⚠️ Please ensure that: \n
376
+ - the connectivity \n
377
+ - the query has been submitted \n
378
+ - the evaluation key has been generated \n
379
+ - the server processed the encrypted data \n
380
+ - the Client received the data from the Server before decrypting the prediction
381
+ """
382
+ print(error_message)
383
 
384
+ return error_message, None
385
 
386
+ # Retrieve the client API
387
+ client = FHEModelClient(path_dir=DEPLOYMENT_DIR, key_dir=KEYS_DIR / f"{USER_ID}")
388
+ client.load()
389
 
390
+ # Load the encrypted output as bytes
391
+ encrypted_output = read_bytes(CLIENT_DIR / f"{USER_ID}_encrypted_output")
392
+ length = int.from_bytes(read_bytes(CLIENT_DIR / f"{USER_ID}_encrypted_output_len"), "big")
393
 
394
+ tokens = re.findall(r"(\b[\w\.\/\-@]+\b|[\s,.!?;:'\"-]+)", text)
395
 
396
+ decrypted_output, identified_words_with_prob = [], []
397
 
398
+ i = 0
399
+ for token in tokens:
400
 
401
+ # Directly append non-word tokens or whitespace to processed_tokens
402
+ if bool(re.match(r"^\s+$", token)):
403
+ continue
404
+ else:
405
+ encrypted_token = encrypted_output[i : i + length]
406
+ prediction_proba = client.deserialize_decrypt_dequantize(encrypted_token)
407
+ probability = prediction_proba[0][1]
408
+ i += length
409
 
410
+ if probability >= 0.77:
411
+ identified_words_with_prob.append((token, probability))
412
 
413
+ # Use the existing UUID if available, otherwise generate a new one
414
+ tmp_uuid = UUID_MAP.get(token, str(uuid.uuid4())[:8])
415
+ decrypted_output.append(tmp_uuid)
416
+ UUID_MAP[token] = tmp_uuid
417
+ else:
418
+ decrypted_output.append(token)
419
 
420
+ # Update the UUID map with query.
421
+ write_json(MAPPING_UUID_PATH, UUID_MAP)
422
 
423
+ # Removing Spaces Before Punctuation:
424
+ anonymized_text = re.sub(r"\s([,.!?;:])", r"\1", " ".join(decrypted_output))
425
 
426
+ # Convert the list of identified words and probabilities into a DataFrame
427
+ if identified_words_with_prob:
428
+ identified_df = pd.DataFrame(
429
+ identified_words_with_prob, columns=["Identified Words", "Probability"]
430
+ )
431
+ else:
432
+ identified_df = pd.DataFrame(columns=["Identified Words", "Probability"])
433
 
434
+ print("Decryption done ✅ on Client Side")
435
 
436
+ return anonymized_text, identified_df
437
 
438
 
439
+ def anonymization_with_fn(selected_sentences, query):
440
 
441
+ encrypt_query_fn(query)
442
 
443
+ send_input_fn(query)
444
 
445
+ run_fhe_in_server_fn()
446
 
447
+ get_output_fn()
448
 
449
+ anonymized_text, identified_df = decrypt_fn(query)
450
 
451
+ return {
452
+ anonymized_doc_output: gr.update(value=select_static_anonymized_sentences_fn(selected_sentences)),
453
+ anonymized_query_output: gr.update(value=anonymized_text),
454
+ identified_words_output_df: gr.update(value=identified_df, visible=False),
455
+ }
456
 
457
 
458
+ def query_chatgpt_fn(anonymized_query, anonymized_document):
459
 
460
+ print("------------ Step 5: ChatGPT communication")
461
 
462
+ if not (KEYS_DIR / f"{USER_ID}/evaluation_key").is_file():
463
+ error_message = "Error ❌: Please generate the key first!"
464
+ return {chatgpt_response_anonymized: gr.update(value=error_message)}
465
 
466
+ if not (CLIENT_DIR / f"{USER_ID}_encrypted_output").is_file():
467
+ error_message = "Error ❌: Please encrypt your query first!"
468
+ return {chatgpt_response_anonymized: gr.update(value=error_message)}
469
 
470
+ context_prompt = read_txt(PROMPT_PATH)
471
 
472
+ # Prepare prompt
473
+ query = (
474
+ "Document content:\n```\n"
475
+ + anonymized_document
476
+ + "\n\n```"
477
+ + "Query:\n```\n"
478
+ + anonymized_query
479
+ + "\n```"
480
+ )
481
+ print(f'Prompt of CHATGPT:\n{query}')
482
+
483
+ completion = client.chat.completions.create(
484
+ model="gpt-4-1106-preview", # Replace with "gpt-4" if available
485
+ messages=[
486
+ {"role": "system", "content": context_prompt},
487
+ {"role": "user", "content": query},
488
+ ],
489
+ )
490
+ anonymized_response = completion.choices[0].message.content
491
+ uuid_map = read_json(MAPPING_UUID_PATH)
492
 
493
+ inverse_uuid_map = {
494
+ v: k for k, v in uuid_map.items()
495
+ } # TODO load the inverse mapping from disk for efficiency
496
 
497
+ # Pattern to identify words and non-words (including punctuation, spaces, etc.)
498
+ tokens = re.findall(r"(\b[\w\.\/\-@]+\b|[\s,.!?;:'\"-]+)", anonymized_response)
499
+ processed_tokens = []
500
 
501
+ for token in tokens:
502
+ # Directly append non-word tokens or whitespace to processed_tokens
503
+ if not token.strip() or not re.match(r"\w+", token):
504
+ processed_tokens.append(token)
505
+ continue
506
 
507
+ if token in inverse_uuid_map:
508
+ processed_tokens.append(inverse_uuid_map[token])
509
+ else:
510
+ processed_tokens.append(token)
511
+ deanonymized_response = "".join(processed_tokens)
512
 
513
+ return {chatgpt_response_anonymized: gr.update(value=anonymized_response),
514
+ chatgpt_response_deanonymized: gr.update(value=deanonymized_response)}
515
 
516
 
517
+ demo = gr.Blocks(css=".markdown-body { font-size: 18px; }")
518
 
519
+ with demo:
520
 
521
+ gr.Markdown(
522
+ """
523
+ <p align="center">
524
+ <img width=200 src="https://user-images.githubusercontent.com/5758427/197816413-d9cddad3-ba38-4793-847d-120975e1da11.png">
525
+ </p>
526
+ """)
527
 
528
+ gr.Markdown(
529
+ """
530
+ <h1 style="text-align: center;">Encrypted Anonymization Using Fully Homomorphic Encryption</h1>
531
+ <p align="center">
532
+ <a href="https://github.com/zama-ai/concrete-ml"> <img style="vertical-align: middle; display:inline-block; margin-right: 3px;" width=15 src="file/images/logos/github.png">Concrete-ML</a>
533
+
534
+ <a href="https://docs.zama.ai/concrete-ml"> <img style="vertical-align: middle; display:inline-block; margin-right: 3px;" width=15 src="file/images/logos/documentation.png">Documentation</a>
535
+
536
+ <a href=" https://community.zama.ai/c/concrete-ml/8"> <img style="vertical-align: middle; display:inline-block; margin-right: 3px;" width=15 src="file/images/logos/community.png">Community</a>
537
+
538
+ <a href="https://twitter.com/zama_fhe"> <img style="vertical-align: middle; display:inline-block; margin-right: 3px;" width=15 src="file/images/logos/x.png">@zama_fhe</a>
539
+ </p>
540
+ """
541
+ )
542
 
543
+ gr.Markdown(
544
+ """
545
+ <p align="center" style="font-size: 16px;">
546
+ Anonymization is the process of removing personally identifiable information (PII) data from
547
+ a document in order to protect individual privacy.</p>
548
 
549
+ <p align="center" style="font-size: 16px;">
550
+ Encrypted anonymization uses Fully Homomorphic Encryption (FHE) to anonymize personally
551
+ identifiable information (PII) within encrypted documents, enabling computations to be
552
+ performed on the encrypted data.</p>
553
 
554
+ <p align="center" style="font-size: 16px;">
555
+ In the example above, we're showing how encrypted anonymization can be leveraged to use LLM
556
+ services such as ChatGPT in a privacy-preserving manner.</p>
557
+ """
558
+ )
559
 
560
+ # gr.Markdown(
561
+ # """
562
+ # <p align="center">
563
+ # <img width="75%" height="30%" src="https://raw.githubusercontent.com/kcelia/Img/main/fhe_anonymization_banner.png">
564
+ # </p>
565
+ # """
566
+ # )
567
+ gr.Markdown(
568
+ f"""
569
+ <p align="center">
570
+ <img width="75%" height="30%" src="https://huggingface.co/spaces/Tenefix/private-fhe-fraud-detection/resolve/main/Img/schema.png">
571
+ </p>
572
+ """
573
+ )
574
 
575
 
576
+ ########################## Key Gen Part ##########################
577
 
578
+ gr.Markdown(
579
+ "## Step 1: Generate the keys\n\n"
580
+ """In Fully Homomorphic Encryption (FHE) methods, two types of keys are created. The first
581
+ type, called secret keys, are used to encrypt and decrypt the user's data. The second type,
582
+ called evaluation keys, enables a server to work on the encrypted data without seeing the
583
+ actual data.
584
+ """
585
+ )
586
 
587
+ gen_key_btn = gr.Button("Generate the secret and evaluation keys")
588
 
589
+ gen_key_btn.click(
590
+ key_gen_fn,
591
+ inputs=[],
592
+ outputs=[gen_key_btn],
593
+ )
594
 
595
+ ########################## Main document Part ##########################
596
 
597
+ gr.Markdown("<hr />")
598
+ gr.Markdown("## Step 2.1: Select the document you want to encrypt\n\n"
599
+ """To make it simple, we pre-compiled the following document, but you are free to choose
600
+ on which part you want to run this example.
601
+ """
602
+ )
603
 
604
+ with gr.Row():
605
+ with gr.Column(scale=5):
606
+ original_sentences_box = gr.CheckboxGroup(
607
+ ORIGINAL_DOCUMENT,
608
+ value=ORIGINAL_DOCUMENT,
609
+ label="Contract:",
610
+ show_label=True,
611
+ )
612
 
613
+ with gr.Column(scale=1, min_width=6):
614
+ gr.HTML("<div style='height: 77px;'></div>")
615
+ encrypt_doc_btn = gr.Button("Encrypt the document")
616
 
617
+ with gr.Column(scale=5):
618
+ encrypted_doc_box = gr.Textbox(
619
+ label="Encrypted document:", show_label=True, interactive=False, lines=10
620
+ )
621
 
622
 
623
+ ########################## User Query Part ##########################
624
 
625
+ gr.Markdown("<hr />")
626
+ gr.Markdown("## Step 2.2: Select the prompt you want to encrypt\n\n"
627
+ """Please choose from the predefined options in
628
+ <span style='color:grey'>“Prompt examples”</span> or craft a custom question in
629
+ the <span style='color:grey'>“Customized prompt”</span> text box.
630
+ Remain concise and relevant to the context. Any off-topic query will not be processed.""")
631
 
632
+ with gr.Row():
633
+ with gr.Column(scale=5):
634
 
635
+ with gr.Column(scale=5):
636
+ default_query_box = gr.Dropdown(
637
+ list(DEFAULT_QUERIES.values()), label="PROMPT EXAMPLES:"
638
+ )
639
 
640
+ gr.Markdown("Or")
641
 
642
+ query_box = gr.Textbox(
643
+ value="What is Kate international bank account number?", label="CUSTOMIZED PROMPT:", interactive=True
644
+ )
645
 
646
+ default_query_box.change(
647
+ fn=lambda default_query_box: default_query_box,
648
+ inputs=[default_query_box],
649
+ outputs=[query_box],
650
+ )
651
 
652
+ with gr.Column(scale=1, min_width=6):
653
+ gr.HTML("<div style='height: 77px;'></div>")
654
+ encrypt_query_btn = gr.Button("Encrypt the prompt")
655
+ # gr.HTML("<div style='height: 50px;'></div>")
656
 
657
+ with gr.Column(scale=5):
658
+ output_encrypted_box = gr.Textbox(
659
+ label="Encrypted anonymized query that will be sent to the anonymization server:",
660
+ lines=8,
661
+ )
662
 
663
+ ########################## FHE processing Part ##########################
664
 
665
+ gr.Markdown("<hr />")
666
+ gr.Markdown("## Step 3: Anonymize the document and the prompt using FHE")
667
+ gr.Markdown(
668
+ """Once the client encrypts the document and the prompt locally, it will be sent to a remote
669
+ server to perform the anonymization on encrypted data. When the computation is done, the
670
+ server will return the result to the client for decryption.
671
+ """
672
+ )
673
 
674
+ run_fhe_btn = gr.Button("Anonymize using FHE")
675
 
676
+ with gr.Row():
677
+ with gr.Column(scale=5):
678
 
679
+ anonymized_doc_output = gr.Textbox(
680
+ label="Decrypted and anonymized document", lines=10, interactive=True
681
+ )
682
 
683
+ with gr.Column(scale=5):
684
 
685
+ anonymized_query_output = gr.Textbox(
686
+ label="Decrypted and anonymized prompt", lines=10, interactive=True
687
+ )
688
 
689
 
690
+ identified_words_output_df = gr.Dataframe(label="Identified words:", visible=False)
691
 
692
+ encrypt_doc_btn.click(
693
+ fn=encrypt_doc_fn,
694
+ inputs=[original_sentences_box],
695
+ outputs=[encrypted_doc_box, anonymized_doc_output],
696
+ )
697
 
698
+ encrypt_query_btn.click(
699
+ fn=encrypt_query_fn,
700
+ inputs=[query_box],
701
+ outputs=[
702
+ query_box,
703
+ output_encrypted_box,
704
+ anonymized_query_output,
705
+ identified_words_output_df,
706
+ ],
707
+ )
708
 
709
+ run_fhe_btn.click(
710
+ anonymization_with_fn,
711
+ inputs=[original_sentences_box, query_box],
712
+ outputs=[anonymized_doc_output, anonymized_query_output, identified_words_output_df],
713
+ )
714
 
715
+ ########################## ChatGpt Part ##########################
716
 
717
+ gr.Markdown("<hr />")
718
+ gr.Markdown("## Step 4: Send anonymized prompt to ChatGPT")
719
+ gr.Markdown(
720
+ """After securely anonymizing the query with FHE,
721
+ you can forward it to ChatGPT without having any concern about information leakage."""
722
+ )
723
 
724
+ chatgpt_button = gr.Button("Query ChatGPT")
725
 
726
+ with gr.Row():
727
+ chatgpt_response_anonymized = gr.Textbox(label="ChatGPT's anonymized response:", lines=5)
728
+ chatgpt_response_deanonymized = gr.Textbox(
729
+ label="ChatGPT's non-anonymized response:", lines=5
730
+ )
731
 
732
+ chatgpt_button.click(
733
+ query_chatgpt_fn,
734
+ inputs=[anonymized_query_output, anonymized_doc_output],
735
+ outputs=[chatgpt_response_anonymized, chatgpt_response_deanonymized],
736
+ )
737
 
738
+ gr.Markdown(
739
+ """**Please note**: As this space is intended solely for demonstration purposes, some
740
+ private information may be missed during by the anonymization algorithm. Please validate the
741
+ following query before sending it to ChatGPT."""
742
+ )
743
+ # Launch the app
744
+ demo.launch(share=False)
745
 
746
 
747
 
 
749
 
750
 
751
 
752
+ # import gradio as gr
753
+ # from predictor import predict, key_already_generated, pre_process_encrypt_send_purchase, decrypt_prediction
754
+ # import base64
755
 
756
+ # def key_generated():
757
+ # """
758
+ # Check if the evaluation keys have already been generated.
759
+ # Returns:
760
+ # bool: True if the evaluation keys have already been generated, False otherwise.
761
+ # """
762
+ # if not key_already_generated():
763
+ # error_message = (
764
+ # f"Error Encountered While generating the evaluation keys."
765
+ # )
766
+ # print(error_message)
767
+ # return {gen_key_btn: gr.update(value=error_message)}
768
+ # else:
769
+ # print("Keys have been generated ✅")
770
+ # return {gen_key_btn: gr.update(value="Keys have been generated ✅")}
771
 
772
 
773
+ # demo = gr.Blocks(css=".markdown-body { font-size: 18px; }")
774
 
775
+ # with demo:
776
+ # with gr.Row():
777
+ # with gr.Column(elem_id="center_column"):
778
+ # gr.Image("Img/zama.png", width=200, show_label=False)
779
+ # with gr.Column(elem_id="center_column"):
780
+ # gr.Image("Img/Epita.png", width=200, show_label=False)
781
 
782
 
783
 
784
+ # gr.Markdown(
785
+ # """
786
+ # <h1 style="text-align: center;">Fraud Detection with FHE Model</h1>
787
+ # <p align="center">
788
+ # <a href="https://github.com/CirSandro/private-fhe-fraud-detection">
789
+ # <span style="vertical-align: middle; display:inline-block; margin-right: 3px;">💳</span>private-fhe-fraud-detection
790
+ # </a>
791
+ #
792
+ # <a href="https://docs.zama.ai/concrete-ml">
793
+ # <span style="vertical-align: middle; display:inline-block; margin-right: 3px;">🔒</span>Documentation Concrete-ML
794
+ # </a>
795
+ # </p>
796
+ # """
797
+ # )
798
 
799
+ # gr.Markdown(
800
+ # """
801
+ # <p align="center" style="font-size: 16px;">
802
+ # How to detect bank fraud without using your personal data ?</p>
803
+ # """
804
+ # )
805
 
806
+ # with gr.Accordion("What is bank fraud detection?", open=False):
807
+ # gr.Markdown(
808
+ # """
809
+ # Bank fraud detection is the process of identifying fraudulent activities or transactions
810
+ # that may pose a risk to a bank or its customers. It is essential to detect fraudulent
811
+ # activities to prevent financial losses and protect the integrity of the banking system.
812
+ # """
813
+ # )
814
 
815
+ # with gr.Accordion("Why is it important to protect this data?", open=False):
816
+ # gr.Markdown(
817
+ # """
818
+ # Banking and financial data often contain sensitive personal information, such as income,
819
+ # spending habits, and account numbers. Protecting this information ensures that customers'
820
+ # privacy is respected and safeguarded from unauthorized access.
821
+ # """
822
+ # )
823
 
824
+ # with gr.Accordion("Why is Fully Homomorphic Encryption (FHE) a good solution?", open=False):
825
+ # gr.Markdown(
826
+ # """
827
+ # Fully Homomorphic Encryption (FHE) is a powerful technique for enhancing privacy and accuracy
828
+ # in the context of fraud detection, particularly when dealing with sensitive banking data. FHE
829
+ # allows for the encryption of data, which can then be processed and analyzed without ever needing
830
+ # to decrypt it.
831
+ # Each party involved in the detection process can collaborate without compromising user privacy,
832
+ # minimizing the risk of data leaks or breaches. The data remains confidential throughout the entire
833
+ # process, ensuring that the privacy of users is maintained.
834
+ # """
835
+ # )
836
 
837
+ # gr.Markdown(
838
+ # """
839
+ # <p style="text-align: center;">
840
+ # Below, we will explain the flow in the image by simulating a purchase you've just made, and show you how our fraud detection model processes the transaction.
841
+ # </p>
842
+ # """
843
+ # )
844
 
845
+ # # gr.Markdown(
846
+ # # f"""
847
+ # # <p align="center">
848
+ # # <img width="75%" height="30%" src="https://huggingface.co/spaces/Tenefix/private-fhe-fraud-detection/resolve/main/Img/schema.png">
849
+ # # </p>
850
+ # # """
851
+ # # )
852
+ # with gr.Row():
853
+ # with gr.Column(elem_id="center_column"):
854
+ # gr.Image("Img/schema.png", width=200, show_label=False)
855
 
856
+ # gr.Markdown("<hr />")
857
 
858
+ # ########################## Key Gen Part ##########################
859
 
860
+ # gr.Markdown(
861
+ # "## Step 1: Generate the keys\n\n"
862
+ # """In Fully Homomorphic Encryption (FHE) methods, two types of keys are created. The first
863
+ # type, called secret keys, are used to encrypt and decrypt the user's data. The second type,
864
+ # called evaluation keys, enables a server to work on the encrypted data without seeing the
865
+ # actual data.
866
+ # """
867
+ # )
868
 
869
+ # gen_key_btn = gr.Button("Generate the secret and evaluation keys")
870
 
871
+ # gen_key_btn.click(
872
+ # key_generated,
873
+ # inputs=[],
874
+ # outputs=[gen_key_btn],
875
+ # )#547
876
 
877
+ # gr.Markdown("<hr />")
878
 
879
+ # ########################## Encrypt Data ##########################
880
 
881
+ # gr.Markdown(
882
+ # "## Step 2: Make your purchase\n\n"
883
+ # """
884
+ # 🛍️ It's time to shop! To simulate your latest purchase, please provide the details of your most recent transaction.
885
 
886
+ # If you don't have an idea, you can pre-fill with an example of fraud or non-fraud.
887
+ # """
888
+ # )
889
 
890
+ # def prefill_fraud():
891
+ # return 34, 50, 3, False, False, False, True
892
 
893
+ # def prefill_no_fraud():
894
+ # return 12, 2, 0.7, True, False, True, False
895
 
896
+ # with gr.Row():
897
+ # prefill_button = gr.Button("Exemple Fraud")
898
+ # prefill_button_no = gr.Button("Exemple No-Fraud")
899
 
900
+ # with gr.Row():
901
+ # with gr.Column():
902
+ # distance_home = gr.Number(
903
+ # minimum=float(0),
904
+ # maximum=float(22000),
905
+ # step=1,
906
+ # value=10,
907
+ # label="Distance from Home",
908
+ # info="How far was the purchase from your home (in km)?"
909
+ # )
910
+ # distance_last = gr.Number(
911
+ # minimum=float(0),
912
+ # maximum=float(22000),
913
+ # step=1,
914
+ # value=1,
915
+ # label="Distance from Last Transaction",
916
+ # info="Distance between this purchase and the last one (in km)?"
917
+ # )
918
+ # ratio = gr.Number(
919
+ # minimum=float(0),
920
+ # maximum=float(10000),
921
+ # step=0.1,
922
+ # value=1,
923
+ # label="Ratio to Median Purchase Price",
924
+ # info="Purchase ratio compared to your average purchase",
925
+ # )
926
+ # repeat_retailer = gr.Checkbox(
927
+ # label="Repeat Retailer",
928
+ # info="Check if you are purchasing from the same retailer as your last transaction"
929
+ # )
930
+ # used_chip = gr.Checkbox(
931
+ # label="Used Chip",
932
+ # info="Check if you used a chip card for this transaction"
933
+ # )
934
+ # used_pin_number = gr.Checkbox(
935
+ # label="Used Pin Number",
936
+ # info="Check if you used your PIN number during the transaction"
937
+ # )
938
+ # online = gr.Checkbox(
939
+ # label="Online Order",
940
+ # info="Check if you made your purchase online"
941
+ # )
942
 
943
 
944
+ # prefill_button.click(
945
+ # fn=prefill_fraud,
946
+ # inputs=[],
947
+ # outputs=[
948
+ # distance_home,
949
+ # distance_last,
950
+ # ratio,
951
+ # repeat_retailer,
952
+ # used_chip,
953
+ # used_pin_number,
954
+ # online
955
+ # ]
956
+ # )
957
 
958
+ # prefill_button_no.click(
959
+ # fn=prefill_no_fraud,
960
+ # inputs=[],
961
+ # outputs=[
962
+ # distance_home,
963
+ # distance_last,
964
+ # ratio,
965
+ # repeat_retailer,
966
+ # used_chip,
967
+ # used_pin_number,
968
+ # online
969
+ # ]
970
+ # )
971
 
972
+ # with gr.Row():
973
+ # with gr.Column(scale=2):
974
+ # encrypt_button_applicant = gr.Button("Encrypt the inputs and send to server.")
975
 
976
+ # encrypted_input_applicant = gr.Textbox(
977
+ # label="Encrypted input representation:", max_lines=2, interactive=False
978
+ # )
979
 
980
+ # encrypt_button_applicant.click(
981
+ # pre_process_encrypt_send_purchase,
982
+ # inputs=[distance_home, distance_last, ratio, repeat_retailer, used_chip, used_pin_number, \
983
+ # online],
984
+ # outputs=[encrypted_input_applicant, encrypt_button_applicant],
985
+ # )
986
 
987
+ # gr.Markdown("<hr />")
988
 
989
+ # ########################## Model Prediction ##########################
990
 
991
+ # gr.Markdown("## Step 3: Run the FHE evaluation.")
992
+ # gr.Markdown("<span style='color:grey'>Server Side</span>")
993
+ # gr.Markdown(
994
+ # """
995
+ # It's high time to launch our prediction, by pressing the button you will launch the
996
+ # fraud analysis that our fictitious bank offers you.
997
+ # This server employs a [Random Forest (by Concrete-ML)](https://github.com/zama-ai/concrete-ml/blob/release/1.8.x/docs/references/api/concrete.ml.sklearn.rf.md#class-randomforestclassifier)
998
+ # classifier model that has been trained on a synthetic data-set.
999
+ # """
1000
+ # )
1001
 
1002
+ # execute_fhe_button = gr.Button("Run the FHE evaluation.")
1003
+ # fhe_execution_time = gr.Textbox(
1004
+ # label="Total FHE execution time (in seconds):", max_lines=1, interactive=False
1005
+ # )
1006
 
1007
+ # # Button to send the encodings to the server using post method
1008
+ # execute_fhe_button.click(predict, outputs=[fhe_execution_time, execute_fhe_button])
1009
 
1010
+ # gr.Markdown("<hr />")
1011
 
1012
+ # ########################## Decrypt Prediction ##########################
1013
 
1014
+ # gr.Markdown("## Step 4: Receive the encrypted output from the server and decrypt.")
1015
+ # gr.Markdown(
1016
+ # """
1017
+ # 🔔 You will receive a notification! Is this a Fraud? The message is decrypted by pressing the button.
1018
+ # """
1019
+ # )
1020
 
1021
+ # get_output_button = gr.Button("Decrypt the prediction.")
1022
+ # prediction_output = gr.Textbox(
1023
+ # label="Prediction", max_lines=1, interactive=False
1024
+ # )
1025
+ # prediction_bar = gr.HTML(label="Prediction Bar") # For the percentage bar
1026
 
1027
+ # get_output_button.click(
1028
+ # decrypt_prediction,
1029
+ # outputs=[prediction_output, get_output_button, prediction_bar],
1030
+ # )
1031
 
1032
 
1033
+ # gr.Markdown(
1034
+ # """
1035
+ # You now know that it is possible to detect bank fraud without knowing your personal information.
1036
+ # """
1037
+ # )
1038
 
1039
+ # gr.Markdown(
1040
+ # "The app was built with [Concrete-ML](https://github.com/zama-ai/concrete-ml), a "
1041
+ # "Privacy-Preserving Machine Learning (PPML) open-source set of tools by [Zama](https://zama.ai/). "
1042
+ # "Try it yourself and don't forget to star on Github &#11088;."
1043
+ # )
1044
 
1045
+ # if __name__ == "__main__":
1046
+ # demo.launch()