File size: 2,590 Bytes
ea9ca44
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
import os
import re
import pytest
from src.extraction.resume_extractor import extract_resume_information_as_lists, process_resume_file

# -------- Helpers -------- #

def is_valid_list_format(block: str) -> bool:
    """
    Validate that a block of text looks like a Python list, e.g. ["a", "b"] or []
    """
    return bool(re.match(r'^\[.*\]$', block.strip(), re.DOTALL))

# -------- Tests -------- #

def test_extract_from_text(monkeypatch):
    """
    Test extracting entities from a small mock resume text.
    Monkeypatch the OpenAI call to avoid API usage.
    """

    mock_resume = "John Doe\nSkills: Python, SQL\nEducation: B.Tech in Computer Science"
    mock_output = """Hard Skills:
["Python", "SQL"]

Soft Skills:
[]

Work Experience:
[]

Education:
["B.Tech in Computer Science"]

Certifications:
[]

Projects:
[]"""

    # Monkeypatch function to bypass API
    monkeypatch.setattr(
        "src.extraction.resume_extractor.extract_resume_information_as_lists",
        lambda text: mock_output
    )

    extracted = extract_resume_information_as_lists(mock_resume)

    # Ensure all categories exist
    assert "Hard Skills:" in extracted
    assert "Soft Skills:" in extracted
    assert "Work Experience:" in extracted
    assert "Education:" in extracted
    assert "Certifications:" in extracted
    assert "Projects:" in extracted

    # Validate at least one block is a list
    matches = re.findall(r'\[.*?\]', extracted, re.DOTALL)
    assert all(is_valid_list_format(m) for m in matches)

def test_process_resume_file(tmp_path, monkeypatch):
    """
    Test end-to-end file processing: input resume -> output entity file.
    """

    # Create fake input resume file
    resume_text = "Skills: Python, SQL\nEducation: B.Tech in CS"
    input_file = tmp_path / "resume1.txt"
    input_file.write_text(resume_text)

    # Expected fake output
    mock_output = """Hard Skills:
["Python", "SQL"]

Soft Skills:
[]

Work Experience:
[]

Education:
["B.Tech in CS"]

Certifications:
[]

Projects:
[]"""

    # Monkeypatch extractor
    monkeypatch.setattr(
        "src.extraction.resume_extractor.extract_resume_information_as_lists",
        lambda text: mock_output
    )

    # Process file
    output_dir = tmp_path / "entities"
    process_resume_file(str(input_file), str(output_dir))

    # Verify output file exists
    out_file = output_dir / "resume1_entities.txt"
    assert out_file.exists()

    # Verify contents
    content = out_file.read_text()
    assert "Hard Skills:" in content
    assert "Soft Skills:" in content
    assert "[]" in content