RCaz commited on
Commit
cf6a965
·
1 Parent(s): c359aed

encryption of data added

Browse files
.github/workflows/tests_prod.yml ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [ main ]
6
+ pull_request:
7
+
8
+ jobs:
9
+ test:
10
+ runs-on: ubuntu-latest
11
+
12
+ steps:
13
+ - name: Checkout repository
14
+ uses: actions/checkout@v4
15
+
16
+ - name: Set up Python
17
+ uses: actions/setup-python@v5
18
+ with:
19
+ python-version: "3.11"
20
+
21
+ - name: Install dependencies
22
+ run: |
23
+ python -m pip install --upgrade pip
24
+ pip install -r requirements.txt
25
+ pip install pytest
26
+
27
+ - name: Update and encrypt-data for prod
28
+ run: python encryption.py # or bash encrypt-data.sh
29
+
30
+ - name: Run tests
31
+ env:
32
+ PYTHONPATH: .
33
+ ENCRYPTION_KEY: ${{ secrets.TEST_ENCRYPTION_KEY }}
34
+ run: |
35
+ python -m pytest -v
README.md CHANGED
@@ -14,4 +14,4 @@ short_description: a bot that answer questions about professional projets
14
  license: mit
15
  ---
16
 
17
- An example chatbot using [Gradio](https://gradio.app), [`huggingface_hub`](https://huggingface.co/docs/huggingface_hub/v0.22.2/en/index), and the [Hugging Face Inference API](https://huggingface.co/docs/api-inference/index).
 
14
  license: mit
15
  ---
16
 
17
+ An chatbot with rag for curriculum vitae
data/encryption.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from cryptography.fernet import Fernet
3
+ from tqdm import tqdm
4
+
5
+ def get_key():
6
+ # Generate a key
7
+ key = Fernet.generate_key()
8
+
9
+ # Create a Fernet object using the key
10
+ fernet = Fernet(key)
11
+
12
+ return key, fernet
13
+
14
+ def encrypt(fernet,paths:list):
15
+
16
+ for folder_data in paths:
17
+ print(os)
18
+
19
+ for file in os.listdir(folder_data):
20
+
21
+ file_path = os.path.join(folder_data,file)
22
+
23
+ # Open the files to be encrypted in binary read mode
24
+ with open(file_path, 'rb') as f:
25
+ original = f.read()
26
+
27
+ # Encrypt the files content
28
+ encrypted = fernet.encrypt(original)
29
+
30
+ # Save the encrypted data
31
+ os.makedirs('data/encrypted_data', exist_ok=True)
32
+ with open(f'data/encrypted_data/{file}', 'wb') as f:
33
+ f.write(encrypted)
34
+ return True
35
+
36
+ def save_key(key):
37
+ # Load existing .env and save the key
38
+ with open(".env", "r") as f:
39
+ lines = f.readlines()
40
+ # insert secret at the end of existing doc
41
+ lines.insert(len(lines),f"\n\nsecret_key = '{key.decode()}'")
42
+
43
+ # Write back to file
44
+ with open(".env", "w") as f:
45
+ f.writelines(lines)
46
+
47
+ return True
48
+
49
+
50
+ if __name__ == "__main__":
51
+ key, fernet = get_key()
52
+ encrypt(fernet,paths=["data/CV_competences","data/research_paper"])
53
+ save_key(key)
dockerfile ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM mcr.microsoft.com/devcontainers/python:1-3.11-bookworm
2
+
3
+ WORKDIR /workspace
4
+
5
+ COPY packages.txt* requirements.txt* ./
6
+
7
+ RUN if [ -f packages.txt ]; then \
8
+ apt-get update && \
9
+ apt-get upgrade -y && \
10
+ xargs apt-get install -y < packages.txt && \
11
+ rm -rf /var/lib/apt/lists/*; \
12
+ fi
13
+
14
+ RUN if [ -f requirements.txt ]; then \
15
+ pip3 install --user -r requirements.txt; \
16
+ fi && \
17
+ pip3 install --user streamlit
18
+
19
+ COPY . .
20
+
21
+ EXPOSE 8501
22
+
23
+ HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health || exit 1
24
+
25
+ CMD ["streamlit", "run", "app/Welcome.py", "--server.enableCORS", "false", "--server.enableXsrfProtection", "false", "--server.port", "8501", "--server.address", "0.0.0.0"]
requirements.txt CHANGED
@@ -1,2 +1,4 @@
1
  langchain-core
2
  gradio
 
 
 
1
  langchain-core
2
  gradio
3
+ pytest-cov
4
+ pytest
tests/test_encrypt_files.py ADDED
@@ -0,0 +1,399 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import pytest
3
+ import tempfile
4
+ import shutil
5
+ from pathlib import Path
6
+ from cryptography.fernet import Fernet
7
+
8
+ # Import the functions to test
9
+ # Assuming the main file is named 'encryption.py'
10
+ # from encryption import get_key, encrypt, save_key, decrypt, get_all_files
11
+
12
+
13
+ # For testing purposes, include the functions here
14
+ # In real scenario, import from the main module
15
+ def get_key():
16
+ key = Fernet.generate_key()
17
+ fernet = Fernet(key)
18
+ return key, fernet
19
+
20
+
21
+ def get_all_files(root_path: str = "./"):
22
+ files = []
23
+ for root, dirs, filenames in os.walk(root_path):
24
+ for filename in filenames:
25
+ file_path = os.path.join(root, filename)
26
+ files.append(file_path)
27
+ return files
28
+
29
+
30
+ def encrypt(fernet, root_path: str = "./", output_dir: str = "./encrypted_data"):
31
+ try:
32
+ files = get_all_files(root_path)
33
+ if not files:
34
+ return False
35
+
36
+ os.makedirs(output_dir, exist_ok=True)
37
+
38
+ for file_path in files:
39
+ try:
40
+ with open(file_path, 'rb') as f:
41
+ original = f.read()
42
+ encrypted = fernet.encrypt(original)
43
+ relative_path = os.path.relpath(file_path, root_path)
44
+ output_path = os.path.join(output_dir, relative_path)
45
+ os.makedirs(os.path.dirname(output_path), exist_ok=True)
46
+ with open(output_path, 'wb') as f:
47
+ f.write(encrypted)
48
+ except Exception:
49
+ continue
50
+ return True
51
+ except Exception:
52
+ return False
53
+
54
+
55
+ def save_key(key, env_path: str = ".env"):
56
+ try:
57
+ if os.path.exists(env_path):
58
+ with open(env_path, "r") as f:
59
+ lines = f.readlines()
60
+ else:
61
+ lines = []
62
+
63
+ if lines and not lines[-1].endswith('\n'):
64
+ lines.append('\n')
65
+ lines.append(f"\nSECRET_KEY='{key.decode()}'\n")
66
+
67
+ with open(env_path, "w") as f:
68
+ f.writelines(lines)
69
+ return True
70
+ except Exception:
71
+ return False
72
+
73
+
74
+ def decrypt(fernet, encrypted_path: str, output_path: str):
75
+ try:
76
+ with open(encrypted_path, 'rb') as f:
77
+ encrypted = f.read()
78
+ decrypted = fernet.decrypt(encrypted)
79
+ os.makedirs(os.path.dirname(output_path), exist_ok=True)
80
+ with open(output_path, 'wb') as f:
81
+ f.write(decrypted)
82
+ return True
83
+ except Exception:
84
+ return False
85
+
86
+
87
+ # ============================================================================
88
+ # UNIT TESTS
89
+ # ============================================================================
90
+
91
+ class TestGetKey:
92
+ """Unit tests for get_key function"""
93
+
94
+ def test_get_key_returns_tuple(self):
95
+ """Test that get_key returns a tuple"""
96
+ result = get_key()
97
+ assert isinstance(result, tuple)
98
+ assert len(result) == 2
99
+
100
+ def test_get_key_returns_valid_key(self):
101
+ """Test that get_key returns valid Fernet key"""
102
+ key, fernet = get_key()
103
+ assert isinstance(key, bytes)
104
+ assert isinstance(fernet, Fernet)
105
+
106
+ def test_key_can_encrypt_decrypt(self):
107
+ """Test that generated key can encrypt and decrypt"""
108
+ key, fernet = get_key()
109
+ message = b"Test message"
110
+ encrypted = fernet.encrypt(message)
111
+ decrypted = fernet.decrypt(encrypted)
112
+ assert decrypted == message
113
+
114
+ def test_keys_are_unique(self):
115
+ """Test that each call generates a unique key"""
116
+ key1, _ = get_key()
117
+ key2, _ = get_key()
118
+ assert key1 != key2
119
+
120
+
121
+ class TestGetAllFiles:
122
+ """Unit tests for get_all_files function"""
123
+
124
+ @pytest.fixture
125
+ def temp_dir(self):
126
+ """Create temporary directory with test files"""
127
+ temp_dir = tempfile.mkdtemp()
128
+
129
+ # Create test files
130
+ Path(temp_dir, "file1.txt").write_text("content1")
131
+ Path(temp_dir, "file2.txt").write_text("content2")
132
+
133
+ # Create subdirectory with file
134
+ subdir = Path(temp_dir, "subdir")
135
+ subdir.mkdir()
136
+ Path(subdir, "file3.txt").write_text("content3")
137
+
138
+ yield temp_dir
139
+
140
+ # Cleanup
141
+ shutil.rmtree(temp_dir)
142
+
143
+ def test_finds_all_files(self, temp_dir):
144
+ """Test that all files are found"""
145
+ files = get_all_files(temp_dir)
146
+ assert len(files) == 3
147
+
148
+ def test_empty_directory(self):
149
+ """Test with empty directory"""
150
+ with tempfile.TemporaryDirectory() as temp_dir:
151
+ files = get_all_files(temp_dir)
152
+ assert files == []
153
+
154
+ def test_returns_list(self, temp_dir):
155
+ """Test that function returns a list"""
156
+ files = get_all_files(temp_dir)
157
+ assert isinstance(files, list)
158
+
159
+
160
+ class TestSaveKey:
161
+ """Unit tests for save_key function"""
162
+
163
+ @pytest.fixture
164
+ def temp_env_file(self):
165
+ """Create temporary .env file"""
166
+ fd, path = tempfile.mkstemp(suffix=".env")
167
+ os.close(fd)
168
+ yield path
169
+ if os.path.exists(path):
170
+ os.remove(path)
171
+
172
+ def test_save_key_creates_file(self, temp_env_file):
173
+ """Test that save_key creates .env file"""
174
+ os.remove(temp_env_file) # Remove to test creation
175
+ key, _ = get_key()
176
+ result = save_key(key, temp_env_file)
177
+ assert result is True
178
+ assert os.path.exists(temp_env_file)
179
+
180
+ def test_save_key_appends_to_existing(self, temp_env_file):
181
+ """Test that save_key appends to existing file"""
182
+ # Write initial content
183
+ with open(temp_env_file, "w") as f:
184
+ f.write("EXISTING_VAR=value\n")
185
+
186
+ key, _ = get_key()
187
+ save_key(key, temp_env_file)
188
+
189
+ with open(temp_env_file, "r") as f:
190
+ content = f.read()
191
+
192
+ assert "EXISTING_VAR=value" in content
193
+ assert "SECRET_KEY=" in content
194
+
195
+ def test_saved_key_format(self, temp_env_file):
196
+ """Test that saved key has correct format"""
197
+ key, _ = get_key()
198
+ save_key(key, temp_env_file)
199
+
200
+ with open(temp_env_file, "r") as f:
201
+ content = f.read()
202
+
203
+ assert f"SECRET_KEY='{key.decode()}'" in content
204
+
205
+
206
+ class TestDecrypt:
207
+ """Unit tests for decrypt function"""
208
+
209
+ @pytest.fixture
210
+ def encrypted_file(self):
211
+ """Create temporary encrypted file"""
212
+ key, fernet = get_key()
213
+ fd, path = tempfile.mkstemp()
214
+
215
+ content = b"Test content to encrypt"
216
+ encrypted = fernet.encrypt(content)
217
+
218
+ with os.fdopen(fd, 'wb') as f:
219
+ f.write(encrypted)
220
+
221
+ yield path, fernet, content
222
+
223
+ if os.path.exists(path):
224
+ os.remove(path)
225
+
226
+ def test_decrypt_success(self, encrypted_file):
227
+ """Test successful decryption"""
228
+ encrypted_path, fernet, original_content = encrypted_file
229
+
230
+ with tempfile.NamedTemporaryFile(delete=False) as output:
231
+ output_path = output.name
232
+
233
+ try:
234
+ result = decrypt(fernet, encrypted_path, output_path)
235
+ assert result is True
236
+
237
+ with open(output_path, 'rb') as f:
238
+ decrypted = f.read()
239
+
240
+ assert decrypted == original_content
241
+ finally:
242
+ if os.path.exists(output_path):
243
+ os.remove(output_path)
244
+
245
+ def test_decrypt_wrong_key(self, encrypted_file):
246
+ """Test decryption with wrong key fails"""
247
+ encrypted_path, _, _ = encrypted_file
248
+ _, wrong_fernet = get_key() # Different key
249
+
250
+ with tempfile.NamedTemporaryFile(delete=False) as output:
251
+ output_path = output.name
252
+
253
+ try:
254
+ result = decrypt(wrong_fernet, encrypted_path, output_path)
255
+ assert result is False
256
+ finally:
257
+ if os.path.exists(output_path):
258
+ os.remove(output_path)
259
+
260
+
261
+ # ============================================================================
262
+ # INTEGRATION TESTS
263
+ # ============================================================================
264
+
265
+ class TestEncryptionWorkflow:
266
+ """Integration tests for complete encryption workflow"""
267
+
268
+ @pytest.fixture
269
+ def test_environment(self):
270
+ """Create complete test environment"""
271
+ # Create temporary directories
272
+ source_dir = tempfile.mkdtemp()
273
+ output_dir = tempfile.mkdtemp()
274
+ env_file = tempfile.NamedTemporaryFile(delete=False, suffix=".env")
275
+ env_file.close()
276
+
277
+ # Create test files
278
+ Path(source_dir, "test1.txt").write_text("Content 1")
279
+ Path(source_dir, "test2.csv").write_text("a,b,c\n1,2,3")
280
+
281
+ subdir = Path(source_dir, "subdir")
282
+ subdir.mkdir()
283
+ Path(subdir, "test3.json").write_text('{"key": "value"}')
284
+
285
+ yield source_dir, output_dir, env_file.name
286
+
287
+ # Cleanup
288
+ shutil.rmtree(source_dir, ignore_errors=True)
289
+ shutil.rmtree(output_dir, ignore_errors=True)
290
+ if os.path.exists(env_file.name):
291
+ os.remove(env_file.name)
292
+
293
+ def test_full_encryption_workflow(self, test_environment):
294
+ """Test complete encryption workflow"""
295
+ source_dir, output_dir, env_file = test_environment
296
+
297
+ # Generate key
298
+ key, fernet = get_key()
299
+
300
+ # Encrypt files
301
+ result = encrypt(fernet, source_dir, output_dir)
302
+ assert result is True
303
+
304
+ # Verify encrypted files exist
305
+ encrypted_files = get_all_files(output_dir)
306
+ assert len(encrypted_files) == 3
307
+
308
+ # Save key
309
+ result = save_key(key, env_file)
310
+ assert result is True
311
+
312
+ # Verify key was saved
313
+ with open(env_file, "r") as f:
314
+ content = f.read()
315
+ assert "SECRET_KEY=" in content
316
+
317
+ def test_encrypt_decrypt_roundtrip(self, test_environment):
318
+ """Test that files can be encrypted and then decrypted"""
319
+ source_dir, output_dir, _ = test_environment
320
+
321
+ # Generate key
322
+ key, fernet = get_key()
323
+
324
+ # Encrypt
325
+ encrypt(fernet, source_dir, output_dir)
326
+
327
+ # Decrypt one file
328
+ encrypted_file = os.path.join(output_dir, "test1.txt")
329
+ decrypted_dir = tempfile.mkdtemp()
330
+ decrypted_file = os.path.join(decrypted_dir, "test1.txt")
331
+
332
+ try:
333
+ decrypt(fernet, encrypted_file, decrypted_file)
334
+
335
+ # Verify content matches original
336
+ with open(decrypted_file, 'r') as f:
337
+ content = f.read()
338
+ assert content == "Content 1"
339
+ finally:
340
+ shutil.rmtree(decrypted_dir, ignore_errors=True)
341
+
342
+ def test_preserves_directory_structure(self, test_environment):
343
+ """Test that directory structure is preserved"""
344
+ source_dir, output_dir, _ = test_environment
345
+
346
+ key, fernet = get_key()
347
+ encrypt(fernet, source_dir, output_dir)
348
+
349
+ # Check that subdirectory exists
350
+ encrypted_subdir_file = os.path.join(output_dir, "subdir", "test3.json")
351
+ assert os.path.exists(encrypted_subdir_file)
352
+
353
+ def test_empty_directory_handling(self):
354
+ """Test handling of empty directory"""
355
+ with tempfile.TemporaryDirectory() as source_dir:
356
+ with tempfile.TemporaryDirectory() as output_dir:
357
+ key, fernet = get_key()
358
+ result = encrypt(fernet, source_dir, output_dir)
359
+ assert result is False
360
+
361
+
362
+ # ============================================================================
363
+ # PARAMETRIZED TESTS
364
+ # ============================================================================
365
+
366
+ class TestEncryptionWithDifferentFileTypes:
367
+ """Test encryption with various file types"""
368
+
369
+ @pytest.mark.parametrize("filename,content", [
370
+ ("test.txt", b"Plain text content"),
371
+ ("test.json", b'{"key": "value"}'),
372
+ ("test.csv", b"a,b,c\n1,2,3"),
373
+ ("test.bin", bytes(range(256))),
374
+ ("test.pdf", b"%PDF-1.4\n%\xe2\xe3\xcf\xd3"),
375
+ ])
376
+ def test_encrypt_different_file_types(self, filename, content):
377
+ """Test encryption of different file types"""
378
+ with tempfile.TemporaryDirectory() as source_dir:
379
+ with tempfile.TemporaryDirectory() as output_dir:
380
+ # Create test file
381
+ file_path = Path(source_dir, filename)
382
+ file_path.write_bytes(content)
383
+
384
+ # Encrypt
385
+ key, fernet = get_key()
386
+ result = encrypt(fernet, source_dir, output_dir)
387
+ assert result is True
388
+
389
+ # Verify encrypted file exists
390
+ encrypted_path = Path(output_dir, filename)
391
+ assert encrypted_path.exists()
392
+
393
+ # Verify content is different (encrypted)
394
+ encrypted_content = encrypted_path.read_bytes()
395
+ assert encrypted_content != content
396
+
397
+
398
+ if __name__ == "__main__":
399
+ pytest.main([__file__, "-v", "--tb=short"])