80cols commited on
Commit
d8484b9
·
verified ·
1 Parent(s): 5d26570

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +935 -622
app.py CHANGED
@@ -1,44 +1,44 @@
1
- # """A Gradio app for anonymizing text data using FHE."""
2
 
3
- # import os
4
- # import re
5
- # import subprocess
6
- # import time
7
- # import uuid
8
- # from typing import Dict, List
9
 
10
- # import numpy
11
- # import pandas as pd
12
- # import requests
13
- # from fhe_anonymizer import FHEAnonymizer
14
- # from utils_demo import *
15
 
16
- # from concrete.ml.deployment import FHEModelClient
17
 
18
 
19
 
20
- import gradio as gr
21
- from predictor import predict, key_already_generated, pre_process_encrypt_send_purchase, decrypt_prediction
22
- import base64
23
 
24
- def key_generated():
25
- """
26
- Check if the evaluation keys have already been generated.
27
- Returns:
28
- bool: True if the evaluation keys have already been generated, False otherwise.
29
- """
30
- if not key_already_generated():
31
- error_message = (
32
- f"Error Encountered While generating the evaluation keys."
33
- )
34
- print(error_message)
35
- return {gen_key_btn: gr.update(value=error_message)}
36
- else:
37
- print("Keys have been generated ✅")
38
- return {gen_key_btn: gr.update(value="Keys have been generated ✅")}
39
 
40
 
41
- # demo = gr.Blocks(css=".markdown-body { font-size: 18px; }")
42
 
43
 
44
 
@@ -56,33 +56,33 @@ def key_generated():
56
 
57
 
58
 
59
- # # Ensure the directory is clean before starting processes or reading files
60
- # clean_directory()
61
 
62
- # anonymizer = FHEAnonymizer()
63
 
64
- # # Start the Uvicorn server hosting the FastAPI app
65
- # subprocess.Popen(["uvicorn", "server:app"], cwd=CURRENT_DIR)
66
- # time.sleep(3)
67
 
68
- # # Load data from files required for the application
69
- # UUID_MAP = read_json(MAPPING_UUID_PATH)
70
- # ANONYMIZED_DOCUMENT = read_txt(ANONYMIZED_FILE_PATH)
71
- # MAPPING_ANONYMIZED_SENTENCES = read_pickle(MAPPING_ANONYMIZED_SENTENCES_PATH)
72
- # MAPPING_ENCRYPTED_SENTENCES = read_pickle(MAPPING_ENCRYPTED_SENTENCES_PATH)
73
- # ORIGINAL_DOCUMENT = read_txt(ORIGINAL_FILE_PATH).split("\n\n")
74
- # MAPPING_DOC_EMBEDDING = read_pickle(MAPPING_DOC_EMBEDDING_PATH)
75
 
76
- # print(f"{ORIGINAL_DOCUMENT=}\n")
77
- # print(f"{MAPPING_DOC_EMBEDDING.keys()=}")
78
 
79
- # # 4. Data Processing and Operations (No specific operations shown here, assuming it's part of anonymizer or client usage)
80
 
81
- # # 5. Utilizing External Services or APIs
82
- # # (Assuming client initialization and anonymizer setup are parts of using external services or application-specific logic)
83
 
84
- # # Generate a random user ID for this session
85
- # USER_ID = numpy.random.randint(0, 2**32)
86
 
87
 
88
 
@@ -93,433 +93,788 @@ def key_generated():
93
 
94
 
95
 
96
- # def select_static_anonymized_sentences_fn(selected_sentences: List):
97
 
98
- # selected_sentences = [MAPPING_ANONYMIZED_SENTENCES[sentence] for sentence in selected_sentences]
99
 
100
- # anonymized_selected_sentence = sorted(selected_sentences, key=lambda x: x[0])
101
 
102
- # anonymized_selected_sentence = [sentence for _, sentence in anonymized_selected_sentence]
103
 
104
- # return "\n\n".join(anonymized_selected_sentence)
105
 
106
 
107
- # def key_gen_fn() -> Dict:
108
- # """Generate keys for a given user."""
109
 
110
- # print("------------ Step 1: Key Generation:")
111
 
112
- # print(f"Your user ID is: {USER_ID}....")
113
 
114
 
115
- # client = FHEModelClient(path_dir=DEPLOYMENT_DIR, key_dir=KEYS_DIR / f"{USER_ID}")
116
- # client.load()
117
 
118
- # # Creates the private and evaluation keys on the client side
119
- # client.generate_private_and_evaluation_keys()
120
 
121
- # # Get the serialized evaluation keys
122
- # serialized_evaluation_keys = client.get_serialized_evaluation_keys()
123
- # assert isinstance(serialized_evaluation_keys, bytes)
124
 
125
- # # Save the evaluation key
126
- # evaluation_key_path = KEYS_DIR / f"{USER_ID}/evaluation_key"
127
 
128
- # write_bytes(evaluation_key_path, serialized_evaluation_keys)
129
 
130
- # # anonymizer.generate_key()
131
 
132
- # if not evaluation_key_path.is_file():
133
- # error_message = (
134
- # f"Error Encountered While generating the evaluation {evaluation_key_path.is_file()=}"
135
- # )
136
- # print(error_message)
137
- # return {gen_key_btn: gr.update(value=error_message)}
138
- # else:
139
- # print("Keys have been generated ✅")
140
- # return {gen_key_btn: gr.update(value="Keys have been generated ✅")}
141
 
142
 
143
- # def encrypt_doc_fn(doc):
144
 
145
- # print(f"\n------------ Step 2.1: Doc encryption: {doc=}")
146
 
147
- # if not (KEYS_DIR / f"{USER_ID}/evaluation_key").is_file():
148
- # return {encrypted_doc_box: gr.update(value="Error ❌: Please generate the key first!", lines=10)}
149
 
150
- # # Retrieve the client API
151
- # client = FHEModelClient(path_dir=DEPLOYMENT_DIR, key_dir=KEYS_DIR / f"{USER_ID}")
152
- # client.load()
153
 
154
- # encrypted_tokens = []
155
- # tokens = re.findall(r"(\b[\w\.\/\-@]+\b|[\s,.!?;:'\"-]+|\$\d+(?:\.\d+)?|\€\d+(?:\.\d+)?)", ' '.join(doc))
156
 
157
- # for token in tokens:
158
- # if token.strip() and re.match(r"\w+", token):
159
- # emb_x = MAPPING_DOC_EMBEDDING[token]
160
- # assert emb_x.shape == (1, 1024)
161
- # encrypted_x = client.quantize_encrypt_serialize(emb_x)
162
- # assert isinstance(encrypted_x, bytes)
163
- # encrypted_tokens.append(encrypted_x)
164
 
165
- # print("Doc encrypted ✅ on Client Side")
166
 
167
- # # No need to save it
168
- # # write_bytes(KEYS_DIR / f"{USER_ID}/encrypted_doc", b"".join(encrypted_tokens))
169
 
170
- # encrypted_quant_tokens_hex = [token.hex()[500:510] for token in encrypted_tokens]
171
 
172
- # return {
173
- # encrypted_doc_box: gr.update(value=" ".join(encrypted_quant_tokens_hex), lines=10),
174
- # anonymized_doc_output: gr.update(visible=True, value=None),
175
- # }
176
 
177
 
178
- # def encrypt_query_fn(query):
179
 
180
- # print(f"\n------------ Step 2: Query encryption: {query=}")
181
 
182
- # if not (KEYS_DIR / f"{USER_ID}/evaluation_key").is_file():
183
- # return {output_encrypted_box: gr.update(value="Error ❌: Please generate the key first!", lines=8)}
184
 
185
- # if is_user_query_valid(query):
186
- # return {
187
- # query_box: gr.update(
188
- # value=(
189
- # "Unable to process ❌: The request exceeds the length limit or falls "
190
- # "outside the scope of this document. Please refine your query."
191
- # )
192
- # )
193
- # }
194
 
195
- # # Retrieve the client API
196
- # client = FHEModelClient(path_dir=DEPLOYMENT_DIR, key_dir=KEYS_DIR / f"{USER_ID}")
197
- # client.load()
198
 
199
- # encrypted_tokens = []
200
 
201
- # # Pattern to identify words and non-words (including punctuation, spaces, etc.)
202
- # tokens = re.findall(r"(\b[\w\.\/\-@]+\b|[\s,.!?;:'\"-]+)", query)
203
 
204
- # for token in tokens:
205
 
206
- # # 1- Ignore non-words tokens
207
- # if bool(re.match(r"^\s+$", token)):
208
- # continue
209
 
210
- # # 2- Directly append non-word tokens or whitespace to processed_tokens
211
 
212
- # # Prediction for each word
213
- # emb_x = get_batch_text_representation([token], EMBEDDINGS_MODEL, TOKENIZER)
214
- # encrypted_x = client.quantize_encrypt_serialize(emb_x)
215
- # assert isinstance(encrypted_x, bytes)
216
 
217
- # encrypted_tokens.append(encrypted_x)
218
 
219
- # print("Data encrypted ✅ on Client Side")
220
 
221
- # assert len({len(token) for token in encrypted_tokens}) == 1
222
 
223
- # write_bytes(KEYS_DIR / f"{USER_ID}/encrypted_input", b"".join(encrypted_tokens))
224
- # write_bytes(
225
- # KEYS_DIR / f"{USER_ID}/encrypted_input_len", len(encrypted_tokens[0]).to_bytes(10, "big")
226
- # )
227
 
228
- # encrypted_quant_tokens_hex = [token.hex()[500:580] for token in encrypted_tokens]
229
 
230
- # return {
231
- # output_encrypted_box: gr.update(value=" ".join(encrypted_quant_tokens_hex), lines=8),
232
- # anonymized_query_output: gr.update(visible=True, value=None),
233
- # identified_words_output_df: gr.update(visible=False, value=None),
234
- # }
235
 
236
 
237
- # def send_input_fn(query) -> Dict:
238
- # """Send the encrypted data and the evaluation key to the server."""
239
 
240
- # print("------------ Step 3.1: Send encrypted_data to the Server")
241
 
242
- # evaluation_key_path = KEYS_DIR / f"{USER_ID}/evaluation_key"
243
- # encrypted_input_path = KEYS_DIR / f"{USER_ID}/encrypted_input"
244
- # encrypted_input_len_path = KEYS_DIR / f"{USER_ID}/encrypted_input_len"
245
 
246
- # if not evaluation_key_path.is_file():
247
- # error_message = (
248
- # "Error Encountered While Sending Data to the Server: "
249
- # f"The key has been generated correctly - {evaluation_key_path.is_file()=}"
250
- # )
251
- # return {anonymized_query_output: gr.update(value=error_message)}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
252
 
253
- # if not encrypted_input_path.is_file():
254
- # error_message = (
255
- # "Error Encountered While Sending Data to the Server: The data has not been encrypted "
256
- # f"correctly on the client side - {encrypted_input_path.is_file()=}"
257
- # )
258
- # return {anonymized_query_output: gr.update(value=error_message)}
259
 
260
- # # Define the data and files to post
261
- # data = {"user_id": USER_ID, "input": query}
 
 
 
 
 
 
 
 
 
 
 
 
262
 
263
- # files = [
264
- # ("files", open(evaluation_key_path, "rb")),
265
- # ("files", open(encrypted_input_path, "rb")),
266
- # ("files", open(encrypted_input_len_path, "rb")),
267
- # ]
 
 
 
 
 
268
 
269
- # # Send the encrypted input and evaluation key to the server
270
- # url = SERVER_URL + "send_input"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
271
 
272
- # with requests.post(
273
- # url=url,
274
- # data=data,
275
- # files=files,
276
- # ) as resp:
277
- # print("Data sent to the server ✅" if resp.ok else "Error ❌ in sending data to the server")
278
 
 
 
279
 
280
- # def run_fhe_in_server_fn() -> Dict:
281
- # """Run in FHE the anonymization of the query"""
282
 
283
- # print("------------ Step 3.2: Run in FHE on the Server Side")
284
 
285
- # evaluation_key_path = KEYS_DIR / f"{USER_ID}/evaluation_key"
286
- # encrypted_input_path = KEYS_DIR / f"{USER_ID}/encrypted_input"
287
 
288
- # if not evaluation_key_path.is_file():
289
- # error_message = (
290
- # "Error Encountered While Sending Data to the Server: "
291
- # f"The key has been generated correctly - {evaluation_key_path.is_file()=}"
292
- # )
293
- # return {anonymized_query_output: gr.update(value=error_message)}
294
 
295
- # if not encrypted_input_path.is_file():
296
- # error_message = (
297
- # "Error Encountered While Sending Data to the Server: The data has not been encrypted "
298
- # f"correctly on the client side - {encrypted_input_path.is_file()=}"
299
- # )
300
- # return {anonymized_query_output: gr.update(value=error_message)}
301
 
302
- # data = {
303
- # "user_id": USER_ID,
304
- # }
 
 
 
 
 
 
305
 
306
- # url = SERVER_URL + "run_fhe"
307
 
308
- # with requests.post(
309
- # url=url,
310
- # data=data,
311
- # ) as response:
312
- # if not response.ok:
313
- # return {
314
- # anonymized_query_output: gr.update(
315
- # value=(
316
- # "⚠️ An error occurred on the Server Side. "
317
- # "Please check connectivity and data transmission."
318
- # ),
319
- # ),
320
- # }
321
- # else:
322
- # time.sleep(1)
323
- # print(f"The query anonymization was computed in {response.json():.2f} s per token.")
324
 
 
 
 
325
 
326
- # def get_output_fn() -> Dict:
 
 
 
 
 
 
 
 
 
 
327
 
328
- # print("------------ Step 3.3: Get the output from the Server Side")
 
329
 
330
- # if not (KEYS_DIR / f"{USER_ID}/evaluation_key").is_file():
331
- # error_message = (
332
- # "Error Encountered While Sending Data to the Server: "
333
- # "The key has not been generated correctly"
334
- # )
335
- # return {anonymized_query_output: gr.update(value=error_message)}
336
 
337
- # if not (KEYS_DIR / f"{USER_ID}/encrypted_input").is_file():
338
- # error_message = (
339
- # "Error Encountered While Sending Data to the Server: "
340
- # "The data has not been encrypted correctly on the client side"
341
- # )
342
- # return {anonymized_query_output: gr.update(value=error_message)}
343
-
344
- # data = {
345
- # "user_id": USER_ID,
346
- # }
347
-
348
- # # Retrieve the encrypted output
349
- # url = SERVER_URL + "get_output"
350
- # with requests.post(
351
- # url=url,
352
- # data=data,
353
- # ) as response:
354
- # if response.ok:
355
- # print("Data received ✅ from the remote Server")
356
- # response_data = response.json()
357
- # encrypted_output_base64 = response_data["encrypted_output"]
358
- # length_encrypted_output_base64 = response_data["length"]
359
-
360
- # # Decode the base64 encoded data
361
- # encrypted_output = base64.b64decode(encrypted_output_base64)
362
- # length_encrypted_output = base64.b64decode(length_encrypted_output_base64)
363
-
364
- # # Save the encrypted output to bytes in a file as it is too large to pass through
365
- # # regular Gradio buttons (see https://github.com/gradio-app/gradio/issues/1877)
366
-
367
- # write_bytes(CLIENT_DIR / f"{USER_ID}_encrypted_output", encrypted_output)
368
- # write_bytes(CLIENT_DIR / f"{USER_ID}_encrypted_output_len", length_encrypted_output)
369
-
370
- # else:
371
- # print("Error in getting data to the server")
372
-
373
-
374
- # def decrypt_fn(text) -> Dict:
375
- # """Dencrypt the data on the `Client Side`."""
376
-
377
- # print("------------ Step 4: Dencrypt the data on the `Client Side`")
378
-
379
- # # Get the encrypted output path
380
- # encrypted_output_path = CLIENT_DIR / f"{USER_ID}_encrypted_output"
381
-
382
- # if not encrypted_output_path.is_file():
383
- # error_message = """⚠️ Please ensure that: \n
384
- # - the connectivity \n
385
- # - the query has been submitted \n
386
- # - the evaluation key has been generated \n
387
- # - the server processed the encrypted data \n
388
- # - the Client received the data from the Server before decrypting the prediction
389
- # """
390
- # print(error_message)
391
 
392
- # return error_message, None
393
 
394
- # # Retrieve the client API
395
- # client = FHEModelClient(path_dir=DEPLOYMENT_DIR, key_dir=KEYS_DIR / f"{USER_ID}")
396
- # client.load()
397
 
398
- # # Load the encrypted output as bytes
399
- # encrypted_output = read_bytes(CLIENT_DIR / f"{USER_ID}_encrypted_output")
400
- # length = int.from_bytes(read_bytes(CLIENT_DIR / f"{USER_ID}_encrypted_output_len"), "big")
401
 
402
- # tokens = re.findall(r"(\b[\w\.\/\-@]+\b|[\s,.!?;:'\"-]+)", text)
 
 
403
 
404
- # decrypted_output, identified_words_with_prob = [], []
 
 
405
 
406
- # i = 0
407
- # for token in tokens:
408
 
409
- # # Directly append non-word tokens or whitespace to processed_tokens
410
- # if bool(re.match(r"^\s+$", token)):
411
- # continue
412
- # else:
413
- # encrypted_token = encrypted_output[i : i + length]
414
- # prediction_proba = client.deserialize_decrypt_dequantize(encrypted_token)
415
- # probability = prediction_proba[0][1]
416
- # i += length
 
 
417
 
418
- # if probability >= 0.77:
419
- # identified_words_with_prob.append((token, probability))
 
 
 
 
 
 
 
420
 
421
- # # Use the existing UUID if available, otherwise generate a new one
422
- # tmp_uuid = UUID_MAP.get(token, str(uuid.uuid4())[:8])
423
- # decrypted_output.append(tmp_uuid)
424
- # UUID_MAP[token] = tmp_uuid
425
- # else:
426
- # decrypted_output.append(token)
427
 
428
- # # Update the UUID map with query.
429
- # write_json(MAPPING_UUID_PATH, UUID_MAP)
 
430
 
431
- # # Removing Spaces Before Punctuation:
432
- # anonymized_text = re.sub(r"\s([,.!?;:])", r"\1", " ".join(decrypted_output))
 
 
 
433
 
434
- # # Convert the list of identified words and probabilities into a DataFrame
435
- # if identified_words_with_prob:
436
- # identified_df = pd.DataFrame(
437
- # identified_words_with_prob, columns=["Identified Words", "Probability"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
438
  # )
439
- # else:
440
- # identified_df = pd.DataFrame(columns=["Identified Words", "Probability"])
441
 
442
- # print("Decryption done on Client Side")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
443
 
444
- # return anonymized_text, identified_df
445
 
 
446
 
447
- # def anonymization_with_fn(selected_sentences, query):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
448
 
449
- # encrypt_query_fn(query)
 
 
 
 
 
 
450
 
451
- # send_input_fn(query)
 
 
 
 
 
452
 
453
- # run_fhe_in_server_fn()
454
 
455
- # get_output_fn()
456
 
457
- # anonymized_text, identified_df = decrypt_fn(query)
 
 
 
 
 
 
 
 
 
458
 
459
- # return {
460
- # anonymized_doc_output: gr.update(value=select_static_anonymized_sentences_fn(selected_sentences)),
461
- # anonymized_query_output: gr.update(value=anonymized_text),
462
- # identified_words_output_df: gr.update(value=identified_df, visible=False),
463
- # }
464
 
 
 
465
 
466
- # def query_chatgpt_fn(anonymized_query, anonymized_document):
467
 
468
- # print("------------ Step 5: ChatGPT communication")
469
 
470
- # if not (KEYS_DIR / f"{USER_ID}/evaluation_key").is_file():
471
- # error_message = "Error ❌: Please generate the key first!"
472
- # return {chatgpt_response_anonymized: gr.update(value=error_message)}
 
 
 
473
 
474
- # if not (CLIENT_DIR / f"{USER_ID}_encrypted_output").is_file():
475
- # error_message = "Error ❌: Please encrypt your query first!"
476
- # return {chatgpt_response_anonymized: gr.update(value=error_message)}
 
 
477
 
478
- # context_prompt = read_txt(PROMPT_PATH)
 
 
 
 
479
 
480
- # # Prepare prompt
481
- # query = (
482
- # "Document content:\n```\n"
483
- # + anonymized_document
484
- # + "\n\n```"
485
- # + "Query:\n```\n"
486
- # + anonymized_query
487
- # + "\n```"
488
  # )
489
- # print(f'Prompt of CHATGPT:\n{query}')
490
-
491
- # completion = client.chat.completions.create(
492
- # model="gpt-4-1106-preview", # Replace with "gpt-4" if available
493
- # messages=[
494
- # {"role": "system", "content": context_prompt},
495
- # {"role": "user", "content": query},
496
- # ],
497
  # )
498
- # anonymized_response = completion.choices[0].message.content
499
- # uuid_map = read_json(MAPPING_UUID_PATH)
500
 
501
- # inverse_uuid_map = {
502
- # v: k for k, v in uuid_map.items()
503
- # } # TODO load the inverse mapping from disk for efficiency
504
 
505
- # # Pattern to identify words and non-words (including punctuation, spaces, etc.)
506
- # tokens = re.findall(r"(\b[\w\.\/\-@]+\b|[\s,.!?;:'\"-]+)", anonymized_response)
507
- # processed_tokens = []
508
 
509
- # for token in tokens:
510
- # # Directly append non-word tokens or whitespace to processed_tokens
511
- # if not token.strip() or not re.match(r"\w+", token):
512
- # processed_tokens.append(token)
513
- # continue
514
 
515
- # if token in inverse_uuid_map:
516
- # processed_tokens.append(inverse_uuid_map[token])
517
- # else:
518
- # processed_tokens.append(token)
519
- # deanonymized_response = "".join(processed_tokens)
520
 
521
- # return {chatgpt_response_anonymized: gr.update(value=anonymized_response),
522
- # chatgpt_response_deanonymized: gr.update(value=deanonymized_response)}
523
 
524
 
525
 
@@ -540,30 +895,224 @@ def key_generated():
540
 
541
 
542
 
 
543
 
544
 
 
 
 
 
 
 
 
 
 
 
 
 
545
 
 
 
 
 
 
546
 
 
547
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
548
 
 
 
 
 
 
549
 
 
550
 
 
 
 
 
 
 
 
 
551
 
 
552
 
 
 
553
 
 
 
 
554
 
 
555
 
556
- demo = gr.Blocks(css=".markdown-body { font-size: 18px; }")
 
 
557
 
558
- with demo:
559
 
560
- # gr.Markdown(
561
- # """
562
- # <p align="center">
563
- # <img width=200 src="https://user-images.githubusercontent.com/5758427/197816413-d9cddad3-ba38-4793-847d-120975e1da11.png">
564
- # </p>
565
- # """)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
566
 
 
 
 
567
  gr.Markdown(
568
  f"""
569
  <div style="display: flex; justify-content: center; align-items: center;">
@@ -572,6 +1121,9 @@ with demo:
572
  </div>
573
  """
574
  )
 
 
 
575
  gr.Markdown(
576
  """
577
  <h1 style="text-align: center;">Fraud Detection with FHE Model</h1>
@@ -594,46 +1146,6 @@ with demo:
594
  """
595
  )
596
 
597
- # gr.Markdown(
598
- # """
599
- # <h1 style="text-align: center;">Encrypted Anonymization Using Fully Homomorphic Encryption</h1>
600
- # <p align="center">
601
- # <a href="https://github.com/zama-ai/concrete-ml"> <img style="vertical-align: middle; display:inline-block; margin-right: 3px;" width=15 src="file/images/logos/github.png">Concrete-ML</a>
602
- # —
603
- # <a href="https://docs.zama.ai/concrete-ml"> <img style="vertical-align: middle; display:inline-block; margin-right: 3px;" width=15 src="file/images/logos/documentation.png">Documentation</a>
604
- # —
605
- # <a href=" https://community.zama.ai/c/concrete-ml/8"> <img style="vertical-align: middle; display:inline-block; margin-right: 3px;" width=15 src="file/images/logos/community.png">Community</a>
606
- # —
607
- # <a href="https://twitter.com/zama_fhe"> <img style="vertical-align: middle; display:inline-block; margin-right: 3px;" width=15 src="file/images/logos/x.png">@zama_fhe</a>
608
- # </p>
609
- # """
610
- # )
611
-
612
- # gr.Markdown(
613
- # """
614
- # <p align="center" style="font-size: 16px;">
615
- # Anonymization is the process of removing personally identifiable information (PII) data from
616
- # a document in order to protect individual privacy.</p>
617
-
618
- # <p align="center" style="font-size: 16px;">
619
- # Encrypted anonymization uses Fully Homomorphic Encryption (FHE) to anonymize personally
620
- # identifiable information (PII) within encrypted documents, enabling computations to be
621
- # performed on the encrypted data.</p>
622
-
623
- # <p align="center" style="font-size: 16px;">
624
- # In the example above, we're showing how encrypted anonymization can be leveraged to use LLM
625
- # services such as ChatGPT in a privacy-preserving manner.</p>
626
- # """
627
- # )
628
-
629
- # gr.Markdown(
630
- # """
631
- # <p align="center">
632
- # <img width="75%" height="30%" src="https://raw.githubusercontent.com/kcelia/Img/main/fhe_anonymization_banner.png">
633
- # </p>
634
- # """
635
- # )
636
-
637
  with gr.Accordion("What is bank fraud detection?", open=False):
638
  gr.Markdown(
639
  """
@@ -673,7 +1185,6 @@ with demo:
673
  """
674
  )
675
 
676
-
677
  gr.Markdown(
678
  f"""
679
  <p align="center">
@@ -838,7 +1349,7 @@ with demo:
838
 
839
  gr.Markdown("<hr />")
840
 
841
- ######################### Decrypt Prediction ##########################
842
 
843
  gr.Markdown("## Step 4: Receive the encrypted output from the server and decrypt.")
844
  gr.Markdown(
@@ -871,203 +1382,5 @@ with demo:
871
  "Try it yourself and don't forget to star on Github &#11088;."
872
  )
873
 
874
-
875
-
876
-
877
-
878
-
879
-
880
-
881
-
882
-
883
-
884
-
885
-
886
-
887
-
888
-
889
-
890
-
891
-
892
-
893
-
894
-
895
-
896
-
897
-
898
-
899
-
900
-
901
- # ########################## Key Gen Part ##########################
902
-
903
- # gr.Markdown(
904
- # "## Step 1: Generate the keys\n\n"
905
- # """In Fully Homomorphic Encryption (FHE) methods, two types of keys are created. The first
906
- # type, called secret keys, are used to encrypt and decrypt the user's data. The second type,
907
- # called evaluation keys, enables a server to work on the encrypted data without seeing the
908
- # actual data.
909
- # """
910
- # )
911
-
912
- # gen_key_btn = gr.Button("Generate the secret and evaluation keys")
913
-
914
- # gen_key_btn.click(
915
- # key_gen_fn,
916
- # inputs=[],
917
- # outputs=[gen_key_btn],
918
- # )
919
-
920
- # ########################## Main document Part ##########################
921
-
922
- # gr.Markdown("<hr />")
923
- # gr.Markdown("## Step 2.1: Select the document you want to encrypt\n\n"
924
- # """To make it simple, we pre-compiled the following document, but you are free to choose
925
- # on which part you want to run this example.
926
- # """
927
- # )
928
-
929
- # with gr.Row():
930
- # with gr.Column(scale=5):
931
- # original_sentences_box = gr.CheckboxGroup(
932
- # ORIGINAL_DOCUMENT,
933
- # value=ORIGINAL_DOCUMENT,
934
- # label="Contract:",
935
- # show_label=True,
936
- # )
937
-
938
- # with gr.Column(scale=1, min_width=6):
939
- # gr.HTML("<div style='height: 77px;'></div>")
940
- # encrypt_doc_btn = gr.Button("Encrypt the document")
941
-
942
- # with gr.Column(scale=5):
943
- # encrypted_doc_box = gr.Textbox(
944
- # label="Encrypted document:", show_label=True, interactive=False, lines=10
945
- # )
946
-
947
-
948
- # ########################## User Query Part ##########################
949
-
950
- # gr.Markdown("<hr />")
951
- # gr.Markdown("## Step 2.2: Select the prompt you want to encrypt\n\n"
952
- # """Please choose from the predefined options in
953
- # <span style='color:grey'>“Prompt examples”</span> or craft a custom question in
954
- # the <span style='color:grey'>“Customized prompt”</span> text box.
955
- # Remain concise and relevant to the context. Any off-topic query will not be processed.""")
956
-
957
- # with gr.Row():
958
- # with gr.Column(scale=5):
959
-
960
- # with gr.Column(scale=5):
961
- # default_query_box = gr.Dropdown(
962
- # list(DEFAULT_QUERIES.values()), label="PROMPT EXAMPLES:"
963
- # )
964
-
965
- # gr.Markdown("Or")
966
-
967
- # query_box = gr.Textbox(
968
- # value="What is Kate international bank account number?", label="CUSTOMIZED PROMPT:", interactive=True
969
- # )
970
-
971
- # default_query_box.change(
972
- # fn=lambda default_query_box: default_query_box,
973
- # inputs=[default_query_box],
974
- # outputs=[query_box],
975
- # )
976
-
977
- # with gr.Column(scale=1, min_width=6):
978
- # gr.HTML("<div style='height: 77px;'></div>")
979
- # encrypt_query_btn = gr.Button("Encrypt the prompt")
980
- # # gr.HTML("<div style='height: 50px;'></div>")
981
-
982
- # with gr.Column(scale=5):
983
- # output_encrypted_box = gr.Textbox(
984
- # label="Encrypted anonymized query that will be sent to the anonymization server:",
985
- # lines=8,
986
- # )
987
-
988
- # ########################## FHE processing Part ##########################
989
-
990
- # gr.Markdown("<hr />")
991
- # gr.Markdown("## Step 3: Anonymize the document and the prompt using FHE")
992
- # gr.Markdown(
993
- # """Once the client encrypts the document and the prompt locally, it will be sent to a remote
994
- # server to perform the anonymization on encrypted data. When the computation is done, the
995
- # server will return the result to the client for decryption.
996
- # """
997
- # )
998
-
999
- # run_fhe_btn = gr.Button("Anonymize using FHE")
1000
-
1001
- # with gr.Row():
1002
- # with gr.Column(scale=5):
1003
-
1004
- # anonymized_doc_output = gr.Textbox(
1005
- # label="Decrypted and anonymized document", lines=10, interactive=True
1006
- # )
1007
-
1008
- # with gr.Column(scale=5):
1009
-
1010
- # anonymized_query_output = gr.Textbox(
1011
- # label="Decrypted and anonymized prompt", lines=10, interactive=True
1012
- # )
1013
-
1014
-
1015
- # identified_words_output_df = gr.Dataframe(label="Identified words:", visible=False)
1016
-
1017
- # encrypt_doc_btn.click(
1018
- # fn=encrypt_doc_fn,
1019
- # inputs=[original_sentences_box],
1020
- # outputs=[encrypted_doc_box, anonymized_doc_output],
1021
- # )
1022
-
1023
- # encrypt_query_btn.click(
1024
- # fn=encrypt_query_fn,
1025
- # inputs=[query_box],
1026
- # outputs=[
1027
- # query_box,
1028
- # output_encrypted_box,
1029
- # anonymized_query_output,
1030
- # identified_words_output_df,
1031
- # ],
1032
- # )
1033
-
1034
- # run_fhe_btn.click(
1035
- # anonymization_with_fn,
1036
- # inputs=[original_sentences_box, query_box],
1037
- # outputs=[anonymized_doc_output, anonymized_query_output, identified_words_output_df],
1038
- # )
1039
-
1040
- # ########################## ChatGpt Part ##########################
1041
-
1042
- # gr.Markdown("<hr />")
1043
- # gr.Markdown("## Step 4: Send anonymized prompt to ChatGPT")
1044
- # gr.Markdown(
1045
- # """After securely anonymizing the query with FHE,
1046
- # you can forward it to ChatGPT without having any concern about information leakage."""
1047
- # )
1048
-
1049
- # chatgpt_button = gr.Button("Query ChatGPT")
1050
-
1051
- # with gr.Row():
1052
- # chatgpt_response_anonymized = gr.Textbox(label="ChatGPT's anonymized response:", lines=5)
1053
- # chatgpt_response_deanonymized = gr.Textbox(
1054
- # label="ChatGPT's non-anonymized response:", lines=5
1055
- # )
1056
-
1057
- # chatgpt_button.click(
1058
- # query_chatgpt_fn,
1059
- # inputs=[anonymized_query_output, anonymized_doc_output],
1060
- # outputs=[chatgpt_response_anonymized, chatgpt_response_deanonymized],
1061
- # )
1062
-
1063
- # gr.Markdown(
1064
- # """**Please note**: As this space is intended solely for demonstration purposes, some
1065
- # private information may be missed during by the anonymization algorithm. Please validate the
1066
- # following query before sending it to ChatGPT."""
1067
- # )
1068
- # Launch the app
1069
- # demo.launch(share=False)
1070
-
1071
-
1072
  if __name__ == "__main__":
1073
- demo.launch()
 
1
+ # # """A Gradio app for anonymizing text data using FHE."""
2
 
3
+ # # import os
4
+ # # import re
5
+ # # import subprocess
6
+ # # import time
7
+ # # import uuid
8
+ # # from typing import Dict, List
9
 
10
+ # # import numpy
11
+ # # import pandas as pd
12
+ # # import requests
13
+ # # from fhe_anonymizer import FHEAnonymizer
14
+ # # from utils_demo import *
15
 
16
+ # # from concrete.ml.deployment import FHEModelClient
17
 
18
 
19
 
20
+ # import gradio as gr
21
+ # from predictor import predict, key_already_generated, pre_process_encrypt_send_purchase, decrypt_prediction
22
+ # import base64
23
 
24
+ # def key_generated():
25
+ # """
26
+ # Check if the evaluation keys have already been generated.
27
+ # Returns:
28
+ # bool: True if the evaluation keys have already been generated, False otherwise.
29
+ # """
30
+ # if not key_already_generated():
31
+ # error_message = (
32
+ # f"Error Encountered While generating the evaluation keys."
33
+ # )
34
+ # print(error_message)
35
+ # return {gen_key_btn: gr.update(value=error_message)}
36
+ # else:
37
+ # print("Keys have been generated ✅")
38
+ # return {gen_key_btn: gr.update(value="Keys have been generated ✅")}
39
 
40
 
41
+ # # demo = gr.Blocks(css=".markdown-body { font-size: 18px; }")
42
 
43
 
44
 
 
56
 
57
 
58
 
59
+ # # # Ensure the directory is clean before starting processes or reading files
60
+ # # clean_directory()
61
 
62
+ # # anonymizer = FHEAnonymizer()
63
 
64
+ # # # Start the Uvicorn server hosting the FastAPI app
65
+ # # subprocess.Popen(["uvicorn", "server:app"], cwd=CURRENT_DIR)
66
+ # # time.sleep(3)
67
 
68
+ # # # Load data from files required for the application
69
+ # # UUID_MAP = read_json(MAPPING_UUID_PATH)
70
+ # # ANONYMIZED_DOCUMENT = read_txt(ANONYMIZED_FILE_PATH)
71
+ # # MAPPING_ANONYMIZED_SENTENCES = read_pickle(MAPPING_ANONYMIZED_SENTENCES_PATH)
72
+ # # MAPPING_ENCRYPTED_SENTENCES = read_pickle(MAPPING_ENCRYPTED_SENTENCES_PATH)
73
+ # # ORIGINAL_DOCUMENT = read_txt(ORIGINAL_FILE_PATH).split("\n\n")
74
+ # # MAPPING_DOC_EMBEDDING = read_pickle(MAPPING_DOC_EMBEDDING_PATH)
75
 
76
+ # # print(f"{ORIGINAL_DOCUMENT=}\n")
77
+ # # print(f"{MAPPING_DOC_EMBEDDING.keys()=}")
78
 
79
+ # # # 4. Data Processing and Operations (No specific operations shown here, assuming it's part of anonymizer or client usage)
80
 
81
+ # # # 5. Utilizing External Services or APIs
82
+ # # # (Assuming client initialization and anonymizer setup are parts of using external services or application-specific logic)
83
 
84
+ # # # Generate a random user ID for this session
85
+ # # USER_ID = numpy.random.randint(0, 2**32)
86
 
87
 
88
 
 
93
 
94
 
95
 
96
+ # # def select_static_anonymized_sentences_fn(selected_sentences: List):
97
 
98
+ # # selected_sentences = [MAPPING_ANONYMIZED_SENTENCES[sentence] for sentence in selected_sentences]
99
 
100
+ # # anonymized_selected_sentence = sorted(selected_sentences, key=lambda x: x[0])
101
 
102
+ # # anonymized_selected_sentence = [sentence for _, sentence in anonymized_selected_sentence]
103
 
104
+ # # return "\n\n".join(anonymized_selected_sentence)
105
 
106
 
107
+ # # def key_gen_fn() -> Dict:
108
+ # # """Generate keys for a given user."""
109
 
110
+ # # print("------------ Step 1: Key Generation:")
111
 
112
+ # # print(f"Your user ID is: {USER_ID}....")
113
 
114
 
115
+ # # client = FHEModelClient(path_dir=DEPLOYMENT_DIR, key_dir=KEYS_DIR / f"{USER_ID}")
116
+ # # client.load()
117
 
118
+ # # # Creates the private and evaluation keys on the client side
119
+ # # client.generate_private_and_evaluation_keys()
120
 
121
+ # # # Get the serialized evaluation keys
122
+ # # serialized_evaluation_keys = client.get_serialized_evaluation_keys()
123
+ # # assert isinstance(serialized_evaluation_keys, bytes)
124
 
125
+ # # # Save the evaluation key
126
+ # # evaluation_key_path = KEYS_DIR / f"{USER_ID}/evaluation_key"
127
 
128
+ # # write_bytes(evaluation_key_path, serialized_evaluation_keys)
129
 
130
+ # # # anonymizer.generate_key()
131
 
132
+ # # if not evaluation_key_path.is_file():
133
+ # # error_message = (
134
+ # # f"Error Encountered While generating the evaluation {evaluation_key_path.is_file()=}"
135
+ # # )
136
+ # # print(error_message)
137
+ # # return {gen_key_btn: gr.update(value=error_message)}
138
+ # # else:
139
+ # # print("Keys have been generated ✅")
140
+ # # return {gen_key_btn: gr.update(value="Keys have been generated ✅")}
141
 
142
 
143
+ # # def encrypt_doc_fn(doc):
144
 
145
+ # # print(f"\n------------ Step 2.1: Doc encryption: {doc=}")
146
 
147
+ # # if not (KEYS_DIR / f"{USER_ID}/evaluation_key").is_file():
148
+ # # return {encrypted_doc_box: gr.update(value="Error ❌: Please generate the key first!", lines=10)}
149
 
150
+ # # # Retrieve the client API
151
+ # # client = FHEModelClient(path_dir=DEPLOYMENT_DIR, key_dir=KEYS_DIR / f"{USER_ID}")
152
+ # # client.load()
153
 
154
+ # # encrypted_tokens = []
155
+ # # tokens = re.findall(r"(\b[\w\.\/\-@]+\b|[\s,.!?;:'\"-]+|\$\d+(?:\.\d+)?|\€\d+(?:\.\d+)?)", ' '.join(doc))
156
 
157
+ # # for token in tokens:
158
+ # # if token.strip() and re.match(r"\w+", token):
159
+ # # emb_x = MAPPING_DOC_EMBEDDING[token]
160
+ # # assert emb_x.shape == (1, 1024)
161
+ # # encrypted_x = client.quantize_encrypt_serialize(emb_x)
162
+ # # assert isinstance(encrypted_x, bytes)
163
+ # # encrypted_tokens.append(encrypted_x)
164
 
165
+ # # print("Doc encrypted ✅ on Client Side")
166
 
167
+ # # # No need to save it
168
+ # # # write_bytes(KEYS_DIR / f"{USER_ID}/encrypted_doc", b"".join(encrypted_tokens))
169
 
170
+ # # encrypted_quant_tokens_hex = [token.hex()[500:510] for token in encrypted_tokens]
171
 
172
+ # # return {
173
+ # # encrypted_doc_box: gr.update(value=" ".join(encrypted_quant_tokens_hex), lines=10),
174
+ # # anonymized_doc_output: gr.update(visible=True, value=None),
175
+ # # }
176
 
177
 
178
+ # # def encrypt_query_fn(query):
179
 
180
+ # # print(f"\n------------ Step 2: Query encryption: {query=}")
181
 
182
+ # # if not (KEYS_DIR / f"{USER_ID}/evaluation_key").is_file():
183
+ # # return {output_encrypted_box: gr.update(value="Error ❌: Please generate the key first!", lines=8)}
184
 
185
+ # # if is_user_query_valid(query):
186
+ # # return {
187
+ # # query_box: gr.update(
188
+ # # value=(
189
+ # # "Unable to process ❌: The request exceeds the length limit or falls "
190
+ # # "outside the scope of this document. Please refine your query."
191
+ # # )
192
+ # # )
193
+ # # }
194
 
195
+ # # # Retrieve the client API
196
+ # # client = FHEModelClient(path_dir=DEPLOYMENT_DIR, key_dir=KEYS_DIR / f"{USER_ID}")
197
+ # # client.load()
198
 
199
+ # # encrypted_tokens = []
200
 
201
+ # # # Pattern to identify words and non-words (including punctuation, spaces, etc.)
202
+ # # tokens = re.findall(r"(\b[\w\.\/\-@]+\b|[\s,.!?;:'\"-]+)", query)
203
 
204
+ # # for token in tokens:
205
 
206
+ # # # 1- Ignore non-words tokens
207
+ # # if bool(re.match(r"^\s+$", token)):
208
+ # # continue
209
 
210
+ # # # 2- Directly append non-word tokens or whitespace to processed_tokens
211
 
212
+ # # # Prediction for each word
213
+ # # emb_x = get_batch_text_representation([token], EMBEDDINGS_MODEL, TOKENIZER)
214
+ # # encrypted_x = client.quantize_encrypt_serialize(emb_x)
215
+ # # assert isinstance(encrypted_x, bytes)
216
 
217
+ # # encrypted_tokens.append(encrypted_x)
218
 
219
+ # # print("Data encrypted ✅ on Client Side")
220
 
221
+ # # assert len({len(token) for token in encrypted_tokens}) == 1
222
 
223
+ # # write_bytes(KEYS_DIR / f"{USER_ID}/encrypted_input", b"".join(encrypted_tokens))
224
+ # # write_bytes(
225
+ # # KEYS_DIR / f"{USER_ID}/encrypted_input_len", len(encrypted_tokens[0]).to_bytes(10, "big")
226
+ # # )
227
 
228
+ # # encrypted_quant_tokens_hex = [token.hex()[500:580] for token in encrypted_tokens]
229
 
230
+ # # return {
231
+ # # output_encrypted_box: gr.update(value=" ".join(encrypted_quant_tokens_hex), lines=8),
232
+ # # anonymized_query_output: gr.update(visible=True, value=None),
233
+ # # identified_words_output_df: gr.update(visible=False, value=None),
234
+ # # }
235
 
236
 
237
+ # # def send_input_fn(query) -> Dict:
238
+ # # """Send the encrypted data and the evaluation key to the server."""
239
 
240
+ # # print("------------ Step 3.1: Send encrypted_data to the Server")
241
 
242
+ # # evaluation_key_path = KEYS_DIR / f"{USER_ID}/evaluation_key"
243
+ # # encrypted_input_path = KEYS_DIR / f"{USER_ID}/encrypted_input"
244
+ # # encrypted_input_len_path = KEYS_DIR / f"{USER_ID}/encrypted_input_len"
245
 
246
+ # # if not evaluation_key_path.is_file():
247
+ # # error_message = (
248
+ # # "Error Encountered While Sending Data to the Server: "
249
+ # # f"The key has been generated correctly - {evaluation_key_path.is_file()=}"
250
+ # # )
251
+ # # return {anonymized_query_output: gr.update(value=error_message)}
252
+
253
+ # # if not encrypted_input_path.is_file():
254
+ # # error_message = (
255
+ # # "Error Encountered While Sending Data to the Server: The data has not been encrypted "
256
+ # # f"correctly on the client side - {encrypted_input_path.is_file()=}"
257
+ # # )
258
+ # # return {anonymized_query_output: gr.update(value=error_message)}
259
+
260
+ # # # Define the data and files to post
261
+ # # data = {"user_id": USER_ID, "input": query}
262
+
263
+ # # files = [
264
+ # # ("files", open(evaluation_key_path, "rb")),
265
+ # # ("files", open(encrypted_input_path, "rb")),
266
+ # # ("files", open(encrypted_input_len_path, "rb")),
267
+ # # ]
268
+
269
+ # # # Send the encrypted input and evaluation key to the server
270
+ # # url = SERVER_URL + "send_input"
271
+
272
+ # # with requests.post(
273
+ # # url=url,
274
+ # # data=data,
275
+ # # files=files,
276
+ # # ) as resp:
277
+ # # print("Data sent to the server ✅" if resp.ok else "Error ❌ in sending data to the server")
278
 
 
 
 
 
 
 
279
 
280
+ # # def run_fhe_in_server_fn() -> Dict:
281
+ # # """Run in FHE the anonymization of the query"""
282
+
283
+ # # print("------------ Step 3.2: Run in FHE on the Server Side")
284
+
285
+ # # evaluation_key_path = KEYS_DIR / f"{USER_ID}/evaluation_key"
286
+ # # encrypted_input_path = KEYS_DIR / f"{USER_ID}/encrypted_input"
287
+
288
+ # # if not evaluation_key_path.is_file():
289
+ # # error_message = (
290
+ # # "Error Encountered While Sending Data to the Server: "
291
+ # # f"The key has been generated correctly - {evaluation_key_path.is_file()=}"
292
+ # # )
293
+ # # return {anonymized_query_output: gr.update(value=error_message)}
294
 
295
+ # # if not encrypted_input_path.is_file():
296
+ # # error_message = (
297
+ # # "Error Encountered While Sending Data to the Server: The data has not been encrypted "
298
+ # # f"correctly on the client side - {encrypted_input_path.is_file()=}"
299
+ # # )
300
+ # # return {anonymized_query_output: gr.update(value=error_message)}
301
+
302
+ # # data = {
303
+ # # "user_id": USER_ID,
304
+ # # }
305
 
306
+ # # url = SERVER_URL + "run_fhe"
307
+
308
+ # # with requests.post(
309
+ # # url=url,
310
+ # # data=data,
311
+ # # ) as response:
312
+ # # if not response.ok:
313
+ # # return {
314
+ # # anonymized_query_output: gr.update(
315
+ # # value=(
316
+ # # "⚠️ An error occurred on the Server Side. "
317
+ # # "Please check connectivity and data transmission."
318
+ # # ),
319
+ # # ),
320
+ # # }
321
+ # # else:
322
+ # # time.sleep(1)
323
+ # # print(f"The query anonymization was computed in {response.json():.2f} s per token.")
324
+
325
+
326
+ # # def get_output_fn() -> Dict:
327
+
328
+ # # print("------------ Step 3.3: Get the output from the Server Side")
329
+
330
+ # # if not (KEYS_DIR / f"{USER_ID}/evaluation_key").is_file():
331
+ # # error_message = (
332
+ # # "Error Encountered While Sending Data to the Server: "
333
+ # # "The key has not been generated correctly"
334
+ # # )
335
+ # # return {anonymized_query_output: gr.update(value=error_message)}
336
+
337
+ # # if not (KEYS_DIR / f"{USER_ID}/encrypted_input").is_file():
338
+ # # error_message = (
339
+ # # "Error Encountered While Sending Data to the Server: "
340
+ # # "The data has not been encrypted correctly on the client side"
341
+ # # )
342
+ # # return {anonymized_query_output: gr.update(value=error_message)}
343
+
344
+ # # data = {
345
+ # # "user_id": USER_ID,
346
+ # # }
347
+
348
+ # # # Retrieve the encrypted output
349
+ # # url = SERVER_URL + "get_output"
350
+ # # with requests.post(
351
+ # # url=url,
352
+ # # data=data,
353
+ # # ) as response:
354
+ # # if response.ok:
355
+ # # print("Data received ✅ from the remote Server")
356
+ # # response_data = response.json()
357
+ # # encrypted_output_base64 = response_data["encrypted_output"]
358
+ # # length_encrypted_output_base64 = response_data["length"]
359
 
360
+ # # # Decode the base64 encoded data
361
+ # # encrypted_output = base64.b64decode(encrypted_output_base64)
362
+ # # length_encrypted_output = base64.b64decode(length_encrypted_output_base64)
363
+
364
+ # # # Save the encrypted output to bytes in a file as it is too large to pass through
365
+ # # # regular Gradio buttons (see https://github.com/gradio-app/gradio/issues/1877)
366
 
367
+ # # write_bytes(CLIENT_DIR / f"{USER_ID}_encrypted_output", encrypted_output)
368
+ # # write_bytes(CLIENT_DIR / f"{USER_ID}_encrypted_output_len", length_encrypted_output)
369
 
370
+ # # else:
371
+ # # print("Error in getting data to the server")
372
 
 
373
 
374
+ # # def decrypt_fn(text) -> Dict:
375
+ # # """Dencrypt the data on the `Client Side`."""
376
 
377
+ # # print("------------ Step 4: Dencrypt the data on the `Client Side`")
 
 
 
 
 
378
 
379
+ # # # Get the encrypted output path
380
+ # # encrypted_output_path = CLIENT_DIR / f"{USER_ID}_encrypted_output"
 
 
 
 
381
 
382
+ # # if not encrypted_output_path.is_file():
383
+ # # error_message = """⚠️ Please ensure that: \n
384
+ # # - the connectivity \n
385
+ # # - the query has been submitted \n
386
+ # # - the evaluation key has been generated \n
387
+ # # - the server processed the encrypted data \n
388
+ # # - the Client received the data from the Server before decrypting the prediction
389
+ # # """
390
+ # # print(error_message)
391
 
392
+ # # return error_message, None
393
 
394
+ # # # Retrieve the client API
395
+ # # client = FHEModelClient(path_dir=DEPLOYMENT_DIR, key_dir=KEYS_DIR / f"{USER_ID}")
396
+ # # client.load()
397
+
398
+ # # # Load the encrypted output as bytes
399
+ # # encrypted_output = read_bytes(CLIENT_DIR / f"{USER_ID}_encrypted_output")
400
+ # # length = int.from_bytes(read_bytes(CLIENT_DIR / f"{USER_ID}_encrypted_output_len"), "big")
 
 
 
 
 
 
 
 
 
401
 
402
+ # # tokens = re.findall(r"(\b[\w\.\/\-@]+\b|[\s,.!?;:'\"-]+)", text)
403
+
404
+ # # decrypted_output, identified_words_with_prob = [], []
405
 
406
+ # # i = 0
407
+ # # for token in tokens:
408
+
409
+ # # # Directly append non-word tokens or whitespace to processed_tokens
410
+ # # if bool(re.match(r"^\s+$", token)):
411
+ # # continue
412
+ # # else:
413
+ # # encrypted_token = encrypted_output[i : i + length]
414
+ # # prediction_proba = client.deserialize_decrypt_dequantize(encrypted_token)
415
+ # # probability = prediction_proba[0][1]
416
+ # # i += length
417
 
418
+ # # if probability >= 0.77:
419
+ # # identified_words_with_prob.append((token, probability))
420
 
421
+ # # # Use the existing UUID if available, otherwise generate a new one
422
+ # # tmp_uuid = UUID_MAP.get(token, str(uuid.uuid4())[:8])
423
+ # # decrypted_output.append(tmp_uuid)
424
+ # # UUID_MAP[token] = tmp_uuid
425
+ # # else:
426
+ # # decrypted_output.append(token)
427
 
428
+ # # # Update the UUID map with query.
429
+ # # write_json(MAPPING_UUID_PATH, UUID_MAP)
430
+
431
+ # # # Removing Spaces Before Punctuation:
432
+ # # anonymized_text = re.sub(r"\s([,.!?;:])", r"\1", " ".join(decrypted_output))
433
+
434
+ # # # Convert the list of identified words and probabilities into a DataFrame
435
+ # # if identified_words_with_prob:
436
+ # # identified_df = pd.DataFrame(
437
+ # # identified_words_with_prob, columns=["Identified Words", "Probability"]
438
+ # # )
439
+ # # else:
440
+ # # identified_df = pd.DataFrame(columns=["Identified Words", "Probability"])
441
+
442
+ # # print("Decryption done ✅ on Client Side")
443
+
444
+ # # return anonymized_text, identified_df
445
+
446
+
447
+ # # def anonymization_with_fn(selected_sentences, query):
448
+
449
+ # # encrypt_query_fn(query)
450
+
451
+ # # send_input_fn(query)
452
+
453
+ # # run_fhe_in_server_fn()
454
+
455
+ # # get_output_fn()
456
+
457
+ # # anonymized_text, identified_df = decrypt_fn(query)
458
+
459
+ # # return {
460
+ # # anonymized_doc_output: gr.update(value=select_static_anonymized_sentences_fn(selected_sentences)),
461
+ # # anonymized_query_output: gr.update(value=anonymized_text),
462
+ # # identified_words_output_df: gr.update(value=identified_df, visible=False),
463
+ # # }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
464
 
 
465
 
466
+ # # def query_chatgpt_fn(anonymized_query, anonymized_document):
 
 
467
 
468
+ # # print("------------ Step 5: ChatGPT communication")
 
 
469
 
470
+ # # if not (KEYS_DIR / f"{USER_ID}/evaluation_key").is_file():
471
+ # # error_message = "Error ❌: Please generate the key first!"
472
+ # # return {chatgpt_response_anonymized: gr.update(value=error_message)}
473
 
474
+ # # if not (CLIENT_DIR / f"{USER_ID}_encrypted_output").is_file():
475
+ # # error_message = "Error ❌: Please encrypt your query first!"
476
+ # # return {chatgpt_response_anonymized: gr.update(value=error_message)}
477
 
478
+ # # context_prompt = read_txt(PROMPT_PATH)
 
479
 
480
+ # # # Prepare prompt
481
+ # # query = (
482
+ # # "Document content:\n```\n"
483
+ # # + anonymized_document
484
+ # # + "\n\n```"
485
+ # # + "Query:\n```\n"
486
+ # # + anonymized_query
487
+ # # + "\n```"
488
+ # # )
489
+ # # print(f'Prompt of CHATGPT:\n{query}')
490
 
491
+ # # completion = client.chat.completions.create(
492
+ # # model="gpt-4-1106-preview", # Replace with "gpt-4" if available
493
+ # # messages=[
494
+ # # {"role": "system", "content": context_prompt},
495
+ # # {"role": "user", "content": query},
496
+ # # ],
497
+ # # )
498
+ # # anonymized_response = completion.choices[0].message.content
499
+ # # uuid_map = read_json(MAPPING_UUID_PATH)
500
 
501
+ # # inverse_uuid_map = {
502
+ # # v: k for k, v in uuid_map.items()
503
+ # # } # TODO load the inverse mapping from disk for efficiency
 
 
 
504
 
505
+ # # # Pattern to identify words and non-words (including punctuation, spaces, etc.)
506
+ # # tokens = re.findall(r"(\b[\w\.\/\-@]+\b|[\s,.!?;:'\"-]+)", anonymized_response)
507
+ # # processed_tokens = []
508
 
509
+ # # for token in tokens:
510
+ # # # Directly append non-word tokens or whitespace to processed_tokens
511
+ # # if not token.strip() or not re.match(r"\w+", token):
512
+ # # processed_tokens.append(token)
513
+ # # continue
514
 
515
+ # # if token in inverse_uuid_map:
516
+ # # processed_tokens.append(inverse_uuid_map[token])
517
+ # # else:
518
+ # # processed_tokens.append(token)
519
+ # # deanonymized_response = "".join(processed_tokens)
520
+
521
+ # # return {chatgpt_response_anonymized: gr.update(value=anonymized_response),
522
+ # # chatgpt_response_deanonymized: gr.update(value=deanonymized_response)}
523
+
524
+
525
+
526
+
527
+
528
+
529
+
530
+
531
+
532
+
533
+
534
+
535
+
536
+
537
+
538
+
539
+
540
+
541
+
542
+
543
+
544
+
545
+
546
+
547
+
548
+
549
+
550
+
551
+
552
+
553
+
554
+
555
+
556
+ # demo = gr.Blocks(css=".markdown-body { font-size: 18px; }")
557
+
558
+ # with demo:
559
+
560
+ # # gr.Markdown(
561
+ # # """
562
+ # # <p align="center">
563
+ # # <img width=200 src="https://user-images.githubusercontent.com/5758427/197816413-d9cddad3-ba38-4793-847d-120975e1da11.png">
564
+ # # </p>
565
+ # # """)
566
+
567
+ # gr.Markdown(
568
+ # f"""
569
+ # <div style="display: flex; justify-content: center; align-items: center;">
570
+ # <img style="margin-right: 50px;" width=200 src="https://huggingface.co/spaces/Tenefix/private-fhe-fraud-detection/resolve/main/Img/zama.png">
571
+ # <img width=200 src="https://huggingface.co/spaces/Tenefix/private-fhe-fraud-detection/resolve/main/Img/Epita.png">
572
+ # </div>
573
+ # """
574
+ # )
575
+ # gr.Markdown(
576
+ # """
577
+ # <h1 style="text-align: center;">Fraud Detection with FHE Model</h1>
578
+ # <p align="center">
579
+ # <a href="https://github.com/CirSandro/private-fhe-fraud-detection">
580
+ # <span style="vertical-align: middle; display:inline-block; margin-right: 3px;">💳</span>private-fhe-fraud-detection
581
+ # </a>
582
+ # —
583
+ # <a href="https://docs.zama.ai/concrete-ml">
584
+ # <span style="vertical-align: middle; display:inline-block; margin-right: 3px;">🔒</span>Documentation Concrete-ML
585
+ # </a>
586
+ # </p>
587
+ # """
588
+ # )
589
+
590
+ # gr.Markdown(
591
+ # """
592
+ # <p align="center" style="font-size: 16px;">
593
+ # How to detect bank fraud without using your personal data ?</p>
594
+ # """
595
+ # )
596
+
597
+ # # gr.Markdown(
598
+ # # """
599
+ # # <h1 style="text-align: center;">Encrypted Anonymization Using Fully Homomorphic Encryption</h1>
600
+ # # <p align="center">
601
+ # # <a href="https://github.com/zama-ai/concrete-ml"> <img style="vertical-align: middle; display:inline-block; margin-right: 3px;" width=15 src="file/images/logos/github.png">Concrete-ML</a>
602
+ # # —
603
+ # # <a href="https://docs.zama.ai/concrete-ml"> <img style="vertical-align: middle; display:inline-block; margin-right: 3px;" width=15 src="file/images/logos/documentation.png">Documentation</a>
604
+ # # —
605
+ # # <a href=" https://community.zama.ai/c/concrete-ml/8"> <img style="vertical-align: middle; display:inline-block; margin-right: 3px;" width=15 src="file/images/logos/community.png">Community</a>
606
+ # # —
607
+ # # <a href="https://twitter.com/zama_fhe"> <img style="vertical-align: middle; display:inline-block; margin-right: 3px;" width=15 src="file/images/logos/x.png">@zama_fhe</a>
608
+ # # </p>
609
+ # # """
610
+ # # )
611
+
612
+ # # gr.Markdown(
613
+ # # """
614
+ # # <p align="center" style="font-size: 16px;">
615
+ # # Anonymization is the process of removing personally identifiable information (PII) data from
616
+ # # a document in order to protect individual privacy.</p>
617
+
618
+ # # <p align="center" style="font-size: 16px;">
619
+ # # Encrypted anonymization uses Fully Homomorphic Encryption (FHE) to anonymize personally
620
+ # # identifiable information (PII) within encrypted documents, enabling computations to be
621
+ # # performed on the encrypted data.</p>
622
+
623
+ # # <p align="center" style="font-size: 16px;">
624
+ # # In the example above, we're showing how encrypted anonymization can be leveraged to use LLM
625
+ # # services such as ChatGPT in a privacy-preserving manner.</p>
626
+ # # """
627
+ # # )
628
+
629
+ # # gr.Markdown(
630
+ # # """
631
+ # # <p align="center">
632
+ # # <img width="75%" height="30%" src="https://raw.githubusercontent.com/kcelia/Img/main/fhe_anonymization_banner.png">
633
+ # # </p>
634
+ # # """
635
+ # # )
636
+
637
+ # with gr.Accordion("What is bank fraud detection?", open=False):
638
+ # gr.Markdown(
639
+ # """
640
+ # Bank fraud detection is the process of identifying fraudulent activities or transactions
641
+ # that may pose a risk to a bank or its customers. It is essential to detect fraudulent
642
+ # activities to prevent financial losses and protect the integrity of the banking system.
643
+ # """
644
  # )
 
 
645
 
646
+ # with gr.Accordion("Why is it important to protect this data?", open=False):
647
+ # gr.Markdown(
648
+ # """
649
+ # Banking and financial data often contain sensitive personal information, such as income,
650
+ # spending habits, and account numbers. Protecting this information ensures that customers'
651
+ # privacy is respected and safeguarded from unauthorized access.
652
+ # """
653
+ # )
654
+
655
+ # with gr.Accordion("Why is Fully Homomorphic Encryption (FHE) a good solution?", open=False):
656
+ # gr.Markdown(
657
+ # """
658
+ # Fully Homomorphic Encryption (FHE) is a powerful technique for enhancing privacy and accuracy
659
+ # in the context of fraud detection, particularly when dealing with sensitive banking data. FHE
660
+ # allows for the encryption of data, which can then be processed and analyzed without ever needing
661
+ # to decrypt it.
662
+ # Each party involved in the detection process can collaborate without compromising user privacy,
663
+ # minimizing the risk of data leaks or breaches. The data remains confidential throughout the entire
664
+ # process, ensuring that the privacy of users is maintained.
665
+ # """
666
+ # )
667
+
668
+ # gr.Markdown(
669
+ # """
670
+ # <p style="text-align: center;">
671
+ # Below, we will explain the flow in the image by simulating a purchase you've just made, and show you how our fraud detection model processes the transaction.
672
+ # </p>
673
+ # """
674
+ # )
675
+
676
+
677
+ # gr.Markdown(
678
+ # f"""
679
+ # <p align="center">
680
+ # <img width="75%" height="30%" src="https://huggingface.co/spaces/Tenefix/private-fhe-fraud-detection/resolve/main/Img/schema.png">
681
+ # </p>
682
+ # """
683
+ # )
684
+
685
+ # gr.Markdown("<hr />")
686
+
687
+ # ########################## Key Gen Part ##########################
688
+
689
+ # gr.Markdown(
690
+ # "## Step 1: Generate the keys\n\n"
691
+ # """In Fully Homomorphic Encryption (FHE) methods, two types of keys are created. The first
692
+ # type, called secret keys, are used to encrypt and decrypt the user's data. The second type,
693
+ # called evaluation keys, enables a server to work on the encrypted data without seeing the
694
+ # actual data.
695
+ # """
696
+ # )
697
+
698
+ # gen_key_btn = gr.Button("Generate the secret and evaluation keys")
699
+
700
+ # gen_key_btn.click(
701
+ # key_generated,
702
+ # inputs=[],
703
+ # outputs=[gen_key_btn],
704
+ # )#547
705
 
706
+ # gr.Markdown("<hr />")
707
 
708
+ # ########################## Encrypt Data ##########################
709
 
710
+ # gr.Markdown(
711
+ # "## Step 2: Make your purchase\n\n"
712
+ # """
713
+ # 🛍️ It's time to shop! To simulate your latest purchase, please provide the details of your most recent transaction.
714
+
715
+ # If you don't have an idea, you can pre-fill with an example of fraud or non-fraud.
716
+ # """
717
+ # )
718
+
719
+ # def prefill_fraud():
720
+ # return 34, 50, 3, False, False, False, True
721
+
722
+ # def prefill_no_fraud():
723
+ # return 12, 2, 0.7, True, False, True, False
724
+
725
+ # with gr.Row():
726
+ # prefill_button = gr.Button("Exemple Fraud")
727
+ # prefill_button_no = gr.Button("Exemple No-Fraud")
728
+
729
+ # with gr.Row():
730
+ # with gr.Column():
731
+ # distance_home = gr.Number(
732
+ # minimum=float(0),
733
+ # maximum=float(22000),
734
+ # step=1,
735
+ # value=10,
736
+ # label="Distance from Home",
737
+ # info="How far was the purchase from your home (in km)?"
738
+ # )
739
+ # distance_last = gr.Number(
740
+ # minimum=float(0),
741
+ # maximum=float(22000),
742
+ # step=1,
743
+ # value=1,
744
+ # label="Distance from Last Transaction",
745
+ # info="Distance between this purchase and the last one (in km)?"
746
+ # )
747
+ # ratio = gr.Number(
748
+ # minimum=float(0),
749
+ # maximum=float(10000),
750
+ # step=0.1,
751
+ # value=1,
752
+ # label="Ratio to Median Purchase Price",
753
+ # info="Purchase ratio compared to your average purchase",
754
+ # )
755
+ # repeat_retailer = gr.Checkbox(
756
+ # label="Repeat Retailer",
757
+ # info="Check if you are purchasing from the same retailer as your last transaction"
758
+ # )
759
+ # used_chip = gr.Checkbox(
760
+ # label="Used Chip",
761
+ # info="Check if you used a chip card for this transaction"
762
+ # )
763
+ # used_pin_number = gr.Checkbox(
764
+ # label="Used Pin Number",
765
+ # info="Check if you used your PIN number during the transaction"
766
+ # )
767
+ # online = gr.Checkbox(
768
+ # label="Online Order",
769
+ # info="Check if you made your purchase online"
770
+ # )
771
+
772
+
773
+ # prefill_button.click(
774
+ # fn=prefill_fraud,
775
+ # inputs=[],
776
+ # outputs=[
777
+ # distance_home,
778
+ # distance_last,
779
+ # ratio,
780
+ # repeat_retailer,
781
+ # used_chip,
782
+ # used_pin_number,
783
+ # online
784
+ # ]
785
+ # )
786
+
787
+ # prefill_button_no.click(
788
+ # fn=prefill_no_fraud,
789
+ # inputs=[],
790
+ # outputs=[
791
+ # distance_home,
792
+ # distance_last,
793
+ # ratio,
794
+ # repeat_retailer,
795
+ # used_chip,
796
+ # used_pin_number,
797
+ # online
798
+ # ]
799
+ # )
800
 
801
+ # with gr.Row():
802
+ # with gr.Column(scale=2):
803
+ # encrypt_button_applicant = gr.Button("Encrypt the inputs and send to server.")
804
+
805
+ # encrypted_input_applicant = gr.Textbox(
806
+ # label="Encrypted input representation:", max_lines=2, interactive=False
807
+ # )
808
 
809
+ # encrypt_button_applicant.click(
810
+ # pre_process_encrypt_send_purchase,
811
+ # inputs=[distance_home, distance_last, ratio, repeat_retailer, used_chip, used_pin_number, \
812
+ # online],
813
+ # outputs=[encrypted_input_applicant, encrypt_button_applicant],
814
+ # )
815
 
816
+ # gr.Markdown("<hr />")
817
 
818
+ # ########################## Model Prediction ##########################
819
 
820
+ # gr.Markdown("## Step 3: Run the FHE evaluation.")
821
+ # gr.Markdown("<span style='color:grey'>Server Side</span>")
822
+ # gr.Markdown(
823
+ # """
824
+ # It's high time to launch our prediction, by pressing the button you will launch the
825
+ # fraud analysis that our fictitious bank offers you.
826
+ # This server employs a [Random Forest (by Concrete-ML)](https://github.com/zama-ai/concrete-ml/blob/release/1.8.x/docs/references/api/concrete.ml.sklearn.rf.md#class-randomforestclassifier)
827
+ # classifier model that has been trained on a synthetic data-set.
828
+ # """
829
+ # )
830
 
831
+ # execute_fhe_button = gr.Button("Run the FHE evaluation.")
832
+ # fhe_execution_time = gr.Textbox(
833
+ # label="Total FHE execution time (in seconds):", max_lines=1, interactive=False
834
+ # )
 
835
 
836
+ # # Button to send the encodings to the server using post method
837
+ # execute_fhe_button.click(predict, outputs=[fhe_execution_time, execute_fhe_button])
838
 
839
+ # gr.Markdown("<hr />")
840
 
841
+ # ######################### Decrypt Prediction ##########################
842
 
843
+ # gr.Markdown("## Step 4: Receive the encrypted output from the server and decrypt.")
844
+ # gr.Markdown(
845
+ # """
846
+ # 🔔 You will receive a notification! Is this a Fraud? The message is decrypted by pressing the button.
847
+ # """
848
+ # )
849
 
850
+ # get_output_button = gr.Button("Decrypt the prediction.")
851
+ # prediction_output = gr.Textbox(
852
+ # label="Prediction", max_lines=1, interactive=False
853
+ # )
854
+ # prediction_bar = gr.HTML(label="Prediction Bar") # For the percentage bar
855
 
856
+ # get_output_button.click(
857
+ # decrypt_prediction,
858
+ # outputs=[prediction_output, get_output_button, prediction_bar],
859
+ # )
860
+
861
 
862
+ # gr.Markdown(
863
+ # """
864
+ # You now know that it is possible to detect bank fraud without knowing your personal information.
865
+ # """
 
 
 
 
866
  # )
867
+
868
+ # gr.Markdown(
869
+ # "The app was built with [Concrete-ML](https://github.com/zama-ai/concrete-ml), a "
870
+ # "Privacy-Preserving Machine Learning (PPML) open-source set of tools by [Zama](https://zama.ai/). "
871
+ # "Try it yourself and don't forget to star on Github &#11088;."
 
 
 
872
  # )
 
 
873
 
 
 
 
874
 
 
 
 
875
 
 
 
 
 
 
876
 
 
 
 
 
 
877
 
 
 
878
 
879
 
880
 
 
895
 
896
 
897
 
898
+
899
 
900
 
901
+ # # ########################## Key Gen Part ##########################
902
+
903
+ # # gr.Markdown(
904
+ # # "## Step 1: Generate the keys\n\n"
905
+ # # """In Fully Homomorphic Encryption (FHE) methods, two types of keys are created. The first
906
+ # # type, called secret keys, are used to encrypt and decrypt the user's data. The second type,
907
+ # # called evaluation keys, enables a server to work on the encrypted data without seeing the
908
+ # # actual data.
909
+ # # """
910
+ # # )
911
+
912
+ # # gen_key_btn = gr.Button("Generate the secret and evaluation keys")
913
 
914
+ # # gen_key_btn.click(
915
+ # # key_gen_fn,
916
+ # # inputs=[],
917
+ # # outputs=[gen_key_btn],
918
+ # # )
919
 
920
+ # # ########################## Main document Part ##########################
921
 
922
+ # # gr.Markdown("<hr />")
923
+ # # gr.Markdown("## Step 2.1: Select the document you want to encrypt\n\n"
924
+ # # """To make it simple, we pre-compiled the following document, but you are free to choose
925
+ # # on which part you want to run this example.
926
+ # # """
927
+ # # )
928
+
929
+ # # with gr.Row():
930
+ # # with gr.Column(scale=5):
931
+ # # original_sentences_box = gr.CheckboxGroup(
932
+ # # ORIGINAL_DOCUMENT,
933
+ # # value=ORIGINAL_DOCUMENT,
934
+ # # label="Contract:",
935
+ # # show_label=True,
936
+ # # )
937
+
938
+ # # with gr.Column(scale=1, min_width=6):
939
+ # # gr.HTML("<div style='height: 77px;'></div>")
940
+ # # encrypt_doc_btn = gr.Button("Encrypt the document")
941
+
942
+ # # with gr.Column(scale=5):
943
+ # # encrypted_doc_box = gr.Textbox(
944
+ # # label="Encrypted document:", show_label=True, interactive=False, lines=10
945
+ # # )
946
+
947
+
948
+ # # ########################## User Query Part ##########################
949
+
950
+ # # gr.Markdown("<hr />")
951
+ # # gr.Markdown("## Step 2.2: Select the prompt you want to encrypt\n\n"
952
+ # # """Please choose from the predefined options in
953
+ # # <span style='color:grey'>“Prompt examples”</span> or craft a custom question in
954
+ # # the <span style='color:grey'>“Customized prompt”</span> text box.
955
+ # # Remain concise and relevant to the context. Any off-topic query will not be processed.""")
956
+
957
+ # # with gr.Row():
958
+ # # with gr.Column(scale=5):
959
+
960
+ # # with gr.Column(scale=5):
961
+ # # default_query_box = gr.Dropdown(
962
+ # # list(DEFAULT_QUERIES.values()), label="PROMPT EXAMPLES:"
963
+ # # )
964
+
965
+ # # gr.Markdown("Or")
966
+
967
+ # # query_box = gr.Textbox(
968
+ # # value="What is Kate international bank account number?", label="CUSTOMIZED PROMPT:", interactive=True
969
+ # # )
970
+
971
+ # # default_query_box.change(
972
+ # # fn=lambda default_query_box: default_query_box,
973
+ # # inputs=[default_query_box],
974
+ # # outputs=[query_box],
975
+ # # )
976
+
977
+ # # with gr.Column(scale=1, min_width=6):
978
+ # # gr.HTML("<div style='height: 77px;'></div>")
979
+ # # encrypt_query_btn = gr.Button("Encrypt the prompt")
980
+ # # # gr.HTML("<div style='height: 50px;'></div>")
981
 
982
+ # # with gr.Column(scale=5):
983
+ # # output_encrypted_box = gr.Textbox(
984
+ # # label="Encrypted anonymized query that will be sent to the anonymization server:",
985
+ # # lines=8,
986
+ # # )
987
 
988
+ # # ########################## FHE processing Part ##########################
989
 
990
+ # # gr.Markdown("<hr />")
991
+ # # gr.Markdown("## Step 3: Anonymize the document and the prompt using FHE")
992
+ # # gr.Markdown(
993
+ # # """Once the client encrypts the document and the prompt locally, it will be sent to a remote
994
+ # # server to perform the anonymization on encrypted data. When the computation is done, the
995
+ # # server will return the result to the client for decryption.
996
+ # # """
997
+ # # )
998
 
999
+ # # run_fhe_btn = gr.Button("Anonymize using FHE")
1000
 
1001
+ # # with gr.Row():
1002
+ # # with gr.Column(scale=5):
1003
 
1004
+ # # anonymized_doc_output = gr.Textbox(
1005
+ # # label="Decrypted and anonymized document", lines=10, interactive=True
1006
+ # # )
1007
 
1008
+ # # with gr.Column(scale=5):
1009
 
1010
+ # # anonymized_query_output = gr.Textbox(
1011
+ # # label="Decrypted and anonymized prompt", lines=10, interactive=True
1012
+ # # )
1013
 
 
1014
 
1015
+ # # identified_words_output_df = gr.Dataframe(label="Identified words:", visible=False)
1016
+
1017
+ # # encrypt_doc_btn.click(
1018
+ # # fn=encrypt_doc_fn,
1019
+ # # inputs=[original_sentences_box],
1020
+ # # outputs=[encrypted_doc_box, anonymized_doc_output],
1021
+ # # )
1022
+
1023
+ # # encrypt_query_btn.click(
1024
+ # # fn=encrypt_query_fn,
1025
+ # # inputs=[query_box],
1026
+ # # outputs=[
1027
+ # # query_box,
1028
+ # # output_encrypted_box,
1029
+ # # anonymized_query_output,
1030
+ # # identified_words_output_df,
1031
+ # # ],
1032
+ # # )
1033
+
1034
+ # # run_fhe_btn.click(
1035
+ # # anonymization_with_fn,
1036
+ # # inputs=[original_sentences_box, query_box],
1037
+ # # outputs=[anonymized_doc_output, anonymized_query_output, identified_words_output_df],
1038
+ # # )
1039
+
1040
+ # # ########################## ChatGpt Part ##########################
1041
+
1042
+ # # gr.Markdown("<hr />")
1043
+ # # gr.Markdown("## Step 4: Send anonymized prompt to ChatGPT")
1044
+ # # gr.Markdown(
1045
+ # # """After securely anonymizing the query with FHE,
1046
+ # # you can forward it to ChatGPT without having any concern about information leakage."""
1047
+ # # )
1048
+
1049
+ # # chatgpt_button = gr.Button("Query ChatGPT")
1050
+
1051
+ # # with gr.Row():
1052
+ # # chatgpt_response_anonymized = gr.Textbox(label="ChatGPT's anonymized response:", lines=5)
1053
+ # # chatgpt_response_deanonymized = gr.Textbox(
1054
+ # # label="ChatGPT's non-anonymized response:", lines=5
1055
+ # # )
1056
+
1057
+ # # chatgpt_button.click(
1058
+ # # query_chatgpt_fn,
1059
+ # # inputs=[anonymized_query_output, anonymized_doc_output],
1060
+ # # outputs=[chatgpt_response_anonymized, chatgpt_response_deanonymized],
1061
+ # # )
1062
+
1063
+ # # gr.Markdown(
1064
+ # # """**Please note**: As this space is intended solely for demonstration purposes, some
1065
+ # # private information may be missed during by the anonymization algorithm. Please validate the
1066
+ # # following query before sending it to ChatGPT."""
1067
+ # # )
1068
+ # # Launch the app
1069
+ # # demo.launch(share=False)
1070
+
1071
+
1072
+ # if __name__ == "__main__":
1073
+ # demo.launch()
1074
+
1075
+
1076
+
1077
+
1078
+
1079
+
1080
+
1081
+
1082
+
1083
+
1084
+
1085
+
1086
+
1087
+
1088
+
1089
+
1090
+
1091
+
1092
+ import gradio as gr
1093
+ from predictor import predict, key_already_generated, pre_process_encrypt_send_purchase, decrypt_prediction
1094
+ import base64
1095
+
1096
+ def key_generated():
1097
+ """
1098
+ Check if the evaluation keys have already been generated.
1099
+ Returns:
1100
+ bool: True if the evaluation keys have already been generated, False otherwise.
1101
+ """
1102
+ if not key_already_generated():
1103
+ error_message = (
1104
+ f"Error Encountered While generating the evaluation keys."
1105
+ )
1106
+ print(error_message)
1107
+ return {gen_key_btn: gr.update(value=error_message)}
1108
+ else:
1109
+ print("Keys have been generated ✅")
1110
+ return {gen_key_btn: gr.update(value="Keys have been generated ✅")}
1111
+
1112
 
1113
+ demo = gr.Blocks(css=".markdown-body { font-size: 18px; }")
1114
+
1115
+ with demo:
1116
  gr.Markdown(
1117
  f"""
1118
  <div style="display: flex; justify-content: center; align-items: center;">
 
1121
  </div>
1122
  """
1123
  )
1124
+
1125
+
1126
+
1127
  gr.Markdown(
1128
  """
1129
  <h1 style="text-align: center;">Fraud Detection with FHE Model</h1>
 
1146
  """
1147
  )
1148
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1149
  with gr.Accordion("What is bank fraud detection?", open=False):
1150
  gr.Markdown(
1151
  """
 
1185
  """
1186
  )
1187
 
 
1188
  gr.Markdown(
1189
  f"""
1190
  <p align="center">
 
1349
 
1350
  gr.Markdown("<hr />")
1351
 
1352
+ ########################## Decrypt Prediction ##########################
1353
 
1354
  gr.Markdown("## Step 4: Receive the encrypted output from the server and decrypt.")
1355
  gr.Markdown(
 
1382
  "Try it yourself and don't forget to star on Github &#11088;."
1383
  )
1384
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1385
  if __name__ == "__main__":
1386
+ demo.launch()