Sparkonix commited on
Commit
5407d4c
·
1 Parent(s): 8a0711e

changed api for fetching unmasked email

Browse files
Dockerfile CHANGED
@@ -15,9 +15,12 @@ COPY . .
15
  ENV PORT=7860
16
  ENV MODEL_PATH="Sparkonix/email-classifier-model"
17
 
18
- # Change this to point to SQLite database location
19
  ENV DATABASE_PATH="/data/emails.db"
20
 
 
 
 
21
  # Add this line to set cache location to a writable directory
22
  ENV HF_HOME="/app/.cache/huggingface"
23
 
 
15
  ENV PORT=7860
16
  ENV MODEL_PATH="Sparkonix/email-classifier-model"
17
 
18
+ # SQLite database path
19
  ENV DATABASE_PATH="/data/emails.db"
20
 
21
+ # Global access key for email retrieval
22
+ ENV EMAIL_ACCESS_KEY="access_key_123"
23
+
24
  # Add this line to set cache location to a writable directory
25
  ENV HF_HOME="/app/.cache/huggingface"
26
 
Email_Classification_API_Tests.postman_collection.json CHANGED
@@ -12,20 +12,14 @@
12
  "type": "string"
13
  },
14
  {
15
- "key": "emailId",
16
- "value": ""
 
17
  },
18
  {
19
  "key": "accessKey",
20
- "value": ""
21
- },
22
- {
23
- "key": "piiEmailId",
24
- "value": ""
25
- },
26
- {
27
- "key": "piiAccessKey",
28
- "value": ""
29
  }
30
  ],
31
  "item": [
@@ -125,15 +119,7 @@
125
  "pm.test(\"Category is valid\", function() {",
126
  " var jsonData = pm.response.json();",
127
  " pm.expect([\"Incident\", \"Request\", \"Change\", \"Problem\"]).to.include(jsonData.category_of_the_email);",
128
- "});",
129
- "",
130
- "// Store the email_id and access_key for database retrieval tests",
131
- "var responseJson = pm.response.json();",
132
- "if (responseJson.email_id && responseJson.access_key) {",
133
- " pm.collectionVariables.set(\"emailId\", responseJson.email_id);",
134
- " pm.collectionVariables.set(\"accessKey\", responseJson.access_key);",
135
- " console.log(\"Stored email_id and access_key for retrieval tests\");",
136
- "}"
137
  ],
138
  "type": "text/javascript"
139
  }
@@ -191,13 +177,10 @@
191
  " pm.expect(jsonData.list_of_masked_entities).to.be.an(\"array\").that.is.not.empty;",
192
  "});",
193
  "",
194
- "// Store the email_id and access_key for database retrieval tests specifically for PII emails",
195
  "var responseJson = pm.response.json();",
196
- "if (responseJson.email_id && responseJson.access_key) {",
197
- " pm.collectionVariables.set(\"piiEmailId\", responseJson.email_id);",
198
- " pm.collectionVariables.set(\"piiAccessKey\", responseJson.access_key);",
199
- " console.log(\"Stored PII email_id and access_key for retrieval tests\");",
200
- "}"
201
  ],
202
  "type": "text/javascript"
203
  }
@@ -450,13 +433,13 @@
450
  "response": []
451
  },
452
  {
453
- "name": "Retrieve Original Email",
454
  "request": {
455
  "method": "POST",
456
  "header": [],
457
  "body": {
458
  "mode": "raw",
459
- "raw": "{\n \"email_id\": \"{{piiEmailId}}\",\n \"access_key\": \"{{piiAccessKey}}\"\n}",
460
  "options": {
461
  "raw": {
462
  "language": "json"
@@ -464,18 +447,17 @@
464
  }
465
  },
466
  "url": {
467
- "raw": "{{baseUrl}}/api/v1/original-email/retrieve",
468
  "host": [
469
  "{{baseUrl}}"
470
  ],
471
  "path": [
472
  "api",
473
  "v1",
474
- "original-email",
475
- "retrieve"
476
  ]
477
  },
478
- "description": "Retrieve the original unmasked email from the database"
479
  },
480
  "event": [
481
  {
@@ -503,8 +485,9 @@
503
  " pm.expect(jsonData.data).to.have.property(\"masked_email\");",
504
  " pm.expect(jsonData.data).to.have.property(\"masked_entities\");",
505
  " ",
506
- " // Check that the original email contains the PII (for the PII test email)",
507
- " if (pm.collectionVariables.get(\"piiEmailId\")) {",
 
508
  " pm.expect(jsonData.data.original_email).to.include(\"John Smith\");",
509
  " pm.expect(jsonData.data.original_email).to.include(\"john.smith@example.com\");",
510
  " pm.expect(jsonData.data.original_email).to.include(\"555-123-4567\");",
@@ -518,13 +501,13 @@
518
  "response": []
519
  },
520
  {
521
- "name": "Retrieve With Invalid Access Key",
522
  "request": {
523
  "method": "POST",
524
  "header": [],
525
  "body": {
526
  "mode": "raw",
527
- "raw": "{\n \"email_id\": \"{{piiEmailId}}\",\n \"access_key\": \"invalid_access_key_123456\"\n}",
528
  "options": {
529
  "raw": {
530
  "language": "json"
@@ -532,26 +515,75 @@
532
  }
533
  },
534
  "url": {
535
- "raw": "{{baseUrl}}/api/v1/original-email/retrieve",
536
  "host": [
537
  "{{baseUrl}}"
538
  ],
539
  "path": [
540
  "api",
541
  "v1",
542
- "original-email",
543
- "retrieve"
544
  ]
545
  },
546
- "description": "Test security by attempting to retrieve email with invalid access key"
547
  },
548
  "event": [
549
  {
550
  "listen": "test",
551
  "script": {
552
  "exec": [
553
- "// Check that we get an error (404) for invalid access key",
554
  "pm.test(\"Should return error for invalid access key\", function() {",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
555
  " pm.expect(pm.response.code).to.equal(404);",
556
  "});",
557
  "",
 
12
  "type": "string"
13
  },
14
  {
15
+ "key": "maskedEmailWithPII",
16
+ "value": "",
17
+ "type": "string"
18
  },
19
  {
20
  "key": "accessKey",
21
+ "value": "local_dev_secure_access_key_20240516",
22
+ "type": "string"
 
 
 
 
 
 
 
23
  }
24
  ],
25
  "item": [
 
119
  "pm.test(\"Category is valid\", function() {",
120
  " var jsonData = pm.response.json();",
121
  " pm.expect([\"Incident\", \"Request\", \"Change\", \"Problem\"]).to.include(jsonData.category_of_the_email);",
122
+ "});"
 
 
 
 
 
 
 
 
123
  ],
124
  "type": "text/javascript"
125
  }
 
177
  " pm.expect(jsonData.list_of_masked_entities).to.be.an(\"array\").that.is.not.empty;",
178
  "});",
179
  "",
180
+ "// Store the masked email for later unmasking test",
181
  "var responseJson = pm.response.json();",
182
+ "pm.collectionVariables.set(\"maskedEmailWithPII\", responseJson.masked_email);",
183
+ "console.log(\"Stored masked email for unmasking tests\");"
 
 
 
184
  ],
185
  "type": "text/javascript"
186
  }
 
433
  "response": []
434
  },
435
  {
436
+ "name": "Unmask Email by Content",
437
  "request": {
438
  "method": "POST",
439
  "header": [],
440
  "body": {
441
  "mode": "raw",
442
+ "raw": "{\n \"masked_email\": \"{{maskedEmailWithPII}}\",\n \"access_key\": \"{{accessKey}}\"\n}",
443
  "options": {
444
  "raw": {
445
  "language": "json"
 
447
  }
448
  },
449
  "url": {
450
+ "raw": "{{baseUrl}}/api/v1/unmask-email",
451
  "host": [
452
  "{{baseUrl}}"
453
  ],
454
  "path": [
455
  "api",
456
  "v1",
457
+ "unmask-email"
 
458
  ]
459
  },
460
+ "description": "Retrieve the original unmasked email using the masked content"
461
  },
462
  "event": [
463
  {
 
485
  " pm.expect(jsonData.data).to.have.property(\"masked_email\");",
486
  " pm.expect(jsonData.data).to.have.property(\"masked_entities\");",
487
  " ",
488
+ " // Check that the original email contains the PII",
489
+ " var maskedEmailWithPII = pm.collectionVariables.get(\"maskedEmailWithPII\");",
490
+ " if (maskedEmailWithPII) {",
491
  " pm.expect(jsonData.data.original_email).to.include(\"John Smith\");",
492
  " pm.expect(jsonData.data.original_email).to.include(\"john.smith@example.com\");",
493
  " pm.expect(jsonData.data.original_email).to.include(\"555-123-4567\");",
 
501
  "response": []
502
  },
503
  {
504
+ "name": "Unmask Email with Invalid Access Key",
505
  "request": {
506
  "method": "POST",
507
  "header": [],
508
  "body": {
509
  "mode": "raw",
510
+ "raw": "{\n \"masked_email\": \"{{maskedEmailWithPII}}\",\n \"access_key\": \"invalid_access_key_123456\"\n}",
511
  "options": {
512
  "raw": {
513
  "language": "json"
 
515
  }
516
  },
517
  "url": {
518
+ "raw": "{{baseUrl}}/api/v1/unmask-email",
519
  "host": [
520
  "{{baseUrl}}"
521
  ],
522
  "path": [
523
  "api",
524
  "v1",
525
+ "unmask-email"
 
526
  ]
527
  },
528
+ "description": "Test security by attempting to unmask email with invalid access key"
529
  },
530
  "event": [
531
  {
532
  "listen": "test",
533
  "script": {
534
  "exec": [
535
+ "// Check that we get an error (401) for invalid access key",
536
  "pm.test(\"Should return error for invalid access key\", function() {",
537
+ " pm.expect(pm.response.code).to.equal(401);",
538
+ "});",
539
+ "",
540
+ "// Check error message",
541
+ "pm.test(\"Response contains appropriate error message\", function() {",
542
+ " var jsonData = pm.response.json();",
543
+ " pm.expect(jsonData).to.have.property(\"detail\");",
544
+ " pm.expect(jsonData.detail).to.include(\"Invalid access key\");",
545
+ "});"
546
+ ],
547
+ "type": "text/javascript"
548
+ }
549
+ }
550
+ ],
551
+ "response": []
552
+ },
553
+ {
554
+ "name": "Unmask with Non-existent Email Content",
555
+ "request": {
556
+ "method": "POST",
557
+ "header": [],
558
+ "body": {
559
+ "mode": "raw",
560
+ "raw": "{\n \"masked_email\": \"This is a masked email that does not exist in the database [FULL_NAME].\",\n \"access_key\": \"{{accessKey}}\"\n}",
561
+ "options": {
562
+ "raw": {
563
+ "language": "json"
564
+ }
565
+ }
566
+ },
567
+ "url": {
568
+ "raw": "{{baseUrl}}/api/v1/unmask-email",
569
+ "host": [
570
+ "{{baseUrl}}"
571
+ ],
572
+ "path": [
573
+ "api",
574
+ "v1",
575
+ "unmask-email"
576
+ ]
577
+ },
578
+ "description": "Test error handling when masked email content doesn't exist"
579
+ },
580
+ "event": [
581
+ {
582
+ "listen": "test",
583
+ "script": {
584
+ "exec": [
585
+ "// Check that we get a 404 error for non-existent email content",
586
+ "pm.test(\"Should return 404 for non-existent email content\", function() {",
587
  " pm.expect(pm.response.code).to.equal(404);",
588
  "});",
589
  "",
database.py CHANGED
@@ -7,7 +7,6 @@ import sqlite3
7
  from typing import Dict, Any, Optional, List, Tuple
8
  from datetime import datetime
9
  import uuid
10
- import hashlib
11
 
12
 
13
  class EmailDatabase:
@@ -30,6 +29,9 @@ class EmailDatabase:
30
  "/data/emails.db" # This path persists in Hugging Face Spaces
31
  )
32
 
 
 
 
33
  # Ensure the data directory exists
34
  self._ensure_data_directory()
35
 
@@ -64,14 +66,10 @@ class EmailDatabase:
64
  masked_email TEXT NOT NULL,
65
  masked_entities TEXT NOT NULL,
66
  category TEXT,
67
- created_at TEXT NOT NULL,
68
- access_key TEXT NOT NULL
69
  )
70
  ''')
71
 
72
- # Create an index on the access_key field
73
- cursor.execute('CREATE INDEX IF NOT EXISTS idx_access_key ON emails (access_key)')
74
-
75
  conn.commit()
76
  except Exception as e:
77
  conn.rollback()
@@ -83,17 +81,8 @@ class EmailDatabase:
83
  """Generate a unique ID for the email record."""
84
  return str(uuid.uuid4())
85
 
86
- def _generate_access_key(self, email_id: str) -> str:
87
- """
88
- Generate an access key for retrieving the original email.
89
- This acts as a security measure to prevent unauthorized access.
90
- """
91
- # Use a combination of the email ID and a timestamp, hashed
92
- data = f"{email_id}:{datetime.now().isoformat()}:{os.urandom(8).hex()}"
93
- return hashlib.sha256(data.encode()).hexdigest()
94
-
95
  def store_email(self, original_email: str, masked_email: str,
96
- masked_entities: List[Dict[str, Any]], category: Optional[str] = None) -> Tuple[str, str]:
97
  """
98
  Store the original email along with its masked version and related information.
99
 
@@ -104,32 +93,30 @@ class EmailDatabase:
104
  category: Optional category of the email
105
 
106
  Returns:
107
- Tuple of (email_id, access_key) for future reference
108
  """
109
  conn = self._get_connection()
110
  try:
111
  cursor = conn.cursor()
112
 
113
  email_id = self._generate_id()
114
- access_key = self._generate_access_key(email_id)
115
 
116
  # Store the email data
117
  cursor.execute(
118
- 'INSERT INTO emails (id, original_email, masked_email, masked_entities, category, created_at, access_key) '
119
- 'VALUES (?, ?, ?, ?, ?, ?, ?)',
120
  (
121
  email_id,
122
  original_email,
123
  masked_email,
124
  json.dumps(masked_entities), # Convert to JSON string for SQLite
125
  category,
126
- datetime.now().isoformat(),
127
- access_key
128
  )
129
  )
130
 
131
  conn.commit()
132
- return email_id, access_key
133
  except Exception as e:
134
  conn.rollback()
135
  raise e
@@ -147,14 +134,18 @@ class EmailDatabase:
147
  Returns:
148
  Dictionary with email data or None if not found or access_key is invalid
149
  """
 
 
 
 
150
  conn = self._get_connection()
151
  try:
152
  cursor = conn.cursor()
153
 
154
  cursor.execute(
155
  'SELECT id, original_email, masked_email, masked_entities, category, created_at '
156
- 'FROM emails WHERE id = ? AND access_key = ?',
157
- (email_id, access_key)
158
  )
159
 
160
  row = cursor.fetchone()
@@ -203,5 +194,40 @@ class EmailDatabase:
203
  "category": row[3],
204
  "created_at": row[4]
205
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
206
  finally:
207
  conn.close()
 
7
  from typing import Dict, Any, Optional, List, Tuple
8
  from datetime import datetime
9
  import uuid
 
10
 
11
 
12
  class EmailDatabase:
 
29
  "/data/emails.db" # This path persists in Hugging Face Spaces
30
  )
31
 
32
+ # Get the global access key from environment variables
33
+ self.access_key = os.environ.get("EMAIL_ACCESS_KEY", "default_secure_access_key")
34
+
35
  # Ensure the data directory exists
36
  self._ensure_data_directory()
37
 
 
66
  masked_email TEXT NOT NULL,
67
  masked_entities TEXT NOT NULL,
68
  category TEXT,
69
+ created_at TEXT NOT NULL
 
70
  )
71
  ''')
72
 
 
 
 
73
  conn.commit()
74
  except Exception as e:
75
  conn.rollback()
 
81
  """Generate a unique ID for the email record."""
82
  return str(uuid.uuid4())
83
 
 
 
 
 
 
 
 
 
 
84
  def store_email(self, original_email: str, masked_email: str,
85
+ masked_entities: List[Dict[str, Any]], category: Optional[str] = None) -> str:
86
  """
87
  Store the original email along with its masked version and related information.
88
 
 
93
  category: Optional category of the email
94
 
95
  Returns:
96
+ email_id for future reference
97
  """
98
  conn = self._get_connection()
99
  try:
100
  cursor = conn.cursor()
101
 
102
  email_id = self._generate_id()
 
103
 
104
  # Store the email data
105
  cursor.execute(
106
+ 'INSERT INTO emails (id, original_email, masked_email, masked_entities, category, created_at) '
107
+ 'VALUES (?, ?, ?, ?, ?, ?)',
108
  (
109
  email_id,
110
  original_email,
111
  masked_email,
112
  json.dumps(masked_entities), # Convert to JSON string for SQLite
113
  category,
114
+ datetime.now().isoformat()
 
115
  )
116
  )
117
 
118
  conn.commit()
119
+ return email_id
120
  except Exception as e:
121
  conn.rollback()
122
  raise e
 
134
  Returns:
135
  Dictionary with email data or None if not found or access_key is invalid
136
  """
137
+ # Verify the access key matches the global access key
138
+ if access_key != self.access_key:
139
+ return None
140
+
141
  conn = self._get_connection()
142
  try:
143
  cursor = conn.cursor()
144
 
145
  cursor.execute(
146
  'SELECT id, original_email, masked_email, masked_entities, category, created_at '
147
+ 'FROM emails WHERE id = ?',
148
+ (email_id,)
149
  )
150
 
151
  row = cursor.fetchone()
 
194
  "category": row[3],
195
  "created_at": row[4]
196
  }
197
+ finally:
198
+ conn.close()
199
+
200
+ def get_email_by_masked_content(self, masked_email: str) -> Optional[Dict[str, Any]]:
201
+ """
202
+ Retrieve the original email using the masked email content.
203
+
204
+ Args:
205
+ masked_email: The masked version of the email to search for
206
+
207
+ Returns:
208
+ Dictionary with full email data or None if not found
209
+ """
210
+ conn = self._get_connection()
211
+ try:
212
+ cursor = conn.cursor()
213
+
214
+ cursor.execute(
215
+ 'SELECT id, original_email, masked_email, masked_entities, category, created_at '
216
+ 'FROM emails WHERE masked_email = ?',
217
+ (masked_email,)
218
+ )
219
+
220
+ row = cursor.fetchone()
221
+ if not row:
222
+ return None
223
+
224
+ return {
225
+ "id": row[0],
226
+ "original_email": row[1],
227
+ "masked_email": row[2],
228
+ "masked_entities": json.loads(row[3]), # Convert from JSON string back to Python dict
229
+ "category": row[4],
230
+ "created_at": row[5]
231
+ }
232
  finally:
233
  conn.close()
main.py CHANGED
@@ -46,9 +46,9 @@ class EmailOutput(BaseModel):
46
  masked_email: str
47
  category_of_the_email: str
48
 
49
- class EmailRetrievalInput(BaseModel):
50
- """Input model for retrieving original email"""
51
- email_id: str
52
  access_key: str
53
 
54
  @app.post("/classify", response_model=EmailOutput)
@@ -63,7 +63,7 @@ async def classify_email(email_input: EmailInput) -> Dict[str, Any]:
63
  The classified email data with masked PII
64
  """
65
  try:
66
- # Process the email to mask PII
67
  processed_data = pii_masker.process_email(email_input.input_email_body)
68
 
69
  # Classify the masked email
@@ -79,35 +79,44 @@ async def classify_email(email_input: EmailInput) -> Dict[str, Any]:
79
  except Exception as e:
80
  raise HTTPException(status_code=500, detail=f"Error processing email: {str(e)}")
81
 
82
- @app.post("/api/v1/original-email/retrieve", response_model=Dict[str, Any])
83
- async def retrieve_original_email_v1(retrieval_input: EmailRetrievalInput) -> Dict[str, Any]:
84
  """
85
- New API endpoint to retrieve the original unmasked email from SQLite database.
86
 
87
  Args:
88
- retrieval_input: The email ID and access key
89
 
90
  Returns:
91
  The original email data with PII information
92
  """
93
  try:
94
- email_data = pii_masker.get_original_email(
95
- retrieval_input.email_id,
96
- retrieval_input.access_key
97
- )
 
 
98
 
99
  if not email_data:
100
- raise HTTPException(status_code=404, detail="Email not found or invalid access key")
101
 
102
  return {
103
  "status": "success",
104
- "data": email_data,
 
 
 
 
 
 
 
105
  "message": "Original email retrieved successfully"
106
  }
107
  except Exception as e:
108
  if isinstance(e, HTTPException):
109
  raise e
110
- raise HTTPException(status_code=500, detail=f"Error retrieving email: {str(e)}")
111
 
112
  @app.get("/health")
113
  async def health_check():
 
46
  masked_email: str
47
  category_of_the_email: str
48
 
49
+ class MaskedEmailInput(BaseModel):
50
+ """Input model for retrieving original email by masked email content"""
51
+ masked_email: str
52
  access_key: str
53
 
54
  @app.post("/classify", response_model=EmailOutput)
 
63
  The classified email data with masked PII
64
  """
65
  try:
66
+ # Process the email to mask PII and store original in database
67
  processed_data = pii_masker.process_email(email_input.input_email_body)
68
 
69
  # Classify the masked email
 
79
  except Exception as e:
80
  raise HTTPException(status_code=500, detail=f"Error processing email: {str(e)}")
81
 
82
+ @app.post("/api/v1/unmask-email", response_model=Dict[str, Any])
83
+ async def unmask_email(masked_email_input: MaskedEmailInput) -> Dict[str, Any]:
84
  """
85
+ Retrieve the original unmasked email using the masked email content from the classify response.
86
 
87
  Args:
88
+ masked_email_input: Contains the masked email and access key
89
 
90
  Returns:
91
  The original email data with PII information
92
  """
93
  try:
94
+ # Verify access key matches the global access key
95
+ if masked_email_input.access_key != os.environ.get("EMAIL_ACCESS_KEY", "default_secure_access_key"):
96
+ raise HTTPException(status_code=401, detail="Invalid access key")
97
+
98
+ # Retrieve the original email using the masked content
99
+ email_data = pii_masker.get_original_by_masked_email(masked_email_input.masked_email)
100
 
101
  if not email_data:
102
+ raise HTTPException(status_code=404, detail="Original email not found for the provided masked email")
103
 
104
  return {
105
  "status": "success",
106
+ "data": {
107
+ "id": email_data["id"],
108
+ "original_email": email_data["original_email"],
109
+ "masked_email": email_data["masked_email"],
110
+ "masked_entities": email_data["masked_entities"],
111
+ "category": email_data.get("category", ""),
112
+ "created_at": email_data.get("created_at", "")
113
+ },
114
  "message": "Original email retrieved successfully"
115
  }
116
  except Exception as e:
117
  if isinstance(e, HTTPException):
118
  raise e
119
+ raise HTTPException(status_code=500, detail=f"Error retrieving original email: {str(e)}")
120
 
121
  @app.get("/health")
122
  async def health_check():
utils.py CHANGED
@@ -329,21 +329,20 @@ class PIIMasker:
329
  # Mask the email
330
  masked_email, entity_info = self.mask_text(email_text)
331
 
332
- # Store the email in the SQLite database
333
- email_id, access_key = self.db.store_email(
334
  original_email=email_text,
335
  masked_email=masked_email,
336
  masked_entities=entity_info
337
  )
338
 
339
- # Return the processed data with database references
340
  return {
341
- "input_email_body": email_text, # Return original input for compatibility
342
  "list_of_masked_entities": entity_info,
343
  "masked_email": masked_email,
344
  "category_of_the_email": "",
345
- "email_id": email_id,
346
- "access_key": access_key # Include access key for immediate retrieval if needed
347
  }
348
 
349
  def get_original_email(self, email_id: str, access_key: str) -> Optional[Dict[str, Any]]:
@@ -369,4 +368,16 @@ class PIIMasker:
369
  Returns:
370
  The masked email data or None if not found
371
  """
372
- return self.db.get_email_by_id(email_id)
 
 
 
 
 
 
 
 
 
 
 
 
 
329
  # Mask the email
330
  masked_email, entity_info = self.mask_text(email_text)
331
 
332
+ # Store the email in the SQLite database - only get back email_id now
333
+ email_id = self.db.store_email(
334
  original_email=email_text,
335
  masked_email=masked_email,
336
  masked_entities=entity_info
337
  )
338
 
339
+ # Return the processed data with just the email_id
340
  return {
341
+ "input_email_body": email_text, # Return original input for API compatibility
342
  "list_of_masked_entities": entity_info,
343
  "masked_email": masked_email,
344
  "category_of_the_email": "",
345
+ "email_id": email_id
 
346
  }
347
 
348
  def get_original_email(self, email_id: str, access_key: str) -> Optional[Dict[str, Any]]:
 
368
  Returns:
369
  The masked email data or None if not found
370
  """
371
+ return self.db.get_email_by_id(email_id)
372
+
373
+ def get_original_by_masked_email(self, masked_email: str) -> Optional[Dict[str, Any]]:
374
+ """
375
+ Retrieve the original unmasked email using the masked email content.
376
+
377
+ Args:
378
+ masked_email: The masked version of the email to search for
379
+
380
+ Returns:
381
+ The original email data or None if not found
382
+ """
383
+ return self.db.get_email_by_masked_content(masked_email)