Spaces:
Sleeping
Sleeping
| import os | |
| import re | |
| import pytest | |
| from src.extraction.resume_extractor import extract_resume_information_as_lists, process_resume_file | |
| # -------- Helpers -------- # | |
| def is_valid_list_format(block: str) -> bool: | |
| """ | |
| Validate that a block of text looks like a Python list, e.g. ["a", "b"] or [] | |
| """ | |
| return bool(re.match(r'^\[.*\]$', block.strip(), re.DOTALL)) | |
| # -------- Tests -------- # | |
| def test_extract_from_text(monkeypatch): | |
| """ | |
| Test extracting entities from a small mock resume text. | |
| Monkeypatch the OpenAI call to avoid API usage. | |
| """ | |
| mock_resume = "John Doe\nSkills: Python, SQL\nEducation: B.Tech in Computer Science" | |
| mock_output = """Hard Skills: | |
| ["Python", "SQL"] | |
| Soft Skills: | |
| [] | |
| Work Experience: | |
| [] | |
| Education: | |
| ["B.Tech in Computer Science"] | |
| Certifications: | |
| [] | |
| Projects: | |
| []""" | |
| # Monkeypatch function to bypass API | |
| monkeypatch.setattr( | |
| "src.extraction.resume_extractor.extract_resume_information_as_lists", | |
| lambda text: mock_output | |
| ) | |
| extracted = extract_resume_information_as_lists(mock_resume) | |
| # Ensure all categories exist | |
| assert "Hard Skills:" in extracted | |
| assert "Soft Skills:" in extracted | |
| assert "Work Experience:" in extracted | |
| assert "Education:" in extracted | |
| assert "Certifications:" in extracted | |
| assert "Projects:" in extracted | |
| # Validate at least one block is a list | |
| matches = re.findall(r'\[.*?\]', extracted, re.DOTALL) | |
| assert all(is_valid_list_format(m) for m in matches) | |
| def test_process_resume_file(tmp_path, monkeypatch): | |
| """ | |
| Test end-to-end file processing: input resume -> output entity file. | |
| """ | |
| # Create fake input resume file | |
| resume_text = "Skills: Python, SQL\nEducation: B.Tech in CS" | |
| input_file = tmp_path / "resume1.txt" | |
| input_file.write_text(resume_text) | |
| # Expected fake output | |
| mock_output = """Hard Skills: | |
| ["Python", "SQL"] | |
| Soft Skills: | |
| [] | |
| Work Experience: | |
| [] | |
| Education: | |
| ["B.Tech in CS"] | |
| Certifications: | |
| [] | |
| Projects: | |
| []""" | |
| # Monkeypatch extractor | |
| monkeypatch.setattr( | |
| "src.extraction.resume_extractor.extract_resume_information_as_lists", | |
| lambda text: mock_output | |
| ) | |
| # Process file | |
| output_dir = tmp_path / "entities" | |
| process_resume_file(str(input_file), str(output_dir)) | |
| # Verify output file exists | |
| out_file = output_dir / "resume1_entities.txt" | |
| assert out_file.exists() | |
| # Verify contents | |
| content = out_file.read_text() | |
| assert "Hard Skills:" in content | |
| assert "Soft Skills:" in content | |
| assert "[]" in content | |