File size: 973 Bytes
ea9ca44
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
import os
import unittest
from src.preprocess.job_preprocess import preprocess_jobs

class TestJobPreprocessing(unittest.TestCase):

    def setUp(self):
        self.raw_dir = os.path.join(os.path.dirname(__file__), "../data/jobs/raw")
        self.out_dir = os.path.join(os.path.dirname(__file__), "../data/jobs/preprocessed")
        os.makedirs(self.raw_dir, exist_ok=True)
        with open(os.path.join(self.raw_dir, "sample_job.txt"), "w", encoding="utf-8") as f:
            f.write("Senior Developer!!!\nMust have\tPython, ML & AI skills…")

    def test_job_cleaning(self):
        preprocess_jobs(self.raw_dir, self.out_dir)
        out_file = os.path.join(self.out_dir, "sample_job.txt")
        with open(out_file, "r", encoding="utf-8") as f:
            cleaned = f.read()
        self.assertIn("senior developer", cleaned)
        self.assertNotIn("!!!", cleaned)
        self.assertTrue(cleaned.islower())

if __name__ == "__main__":
    unittest.main()