iris_backend / backend /tests /test_extraction.py
Muhammed Sameer
Initial commit - Iris Full (under development)
ea9ca44
import os
import re
import pytest
from src.extraction.resume_extractor import extract_resume_information_as_lists, process_resume_file
# -------- Helpers -------- #
def is_valid_list_format(block: str) -> bool:
"""
Validate that a block of text looks like a Python list, e.g. ["a", "b"] or []
"""
return bool(re.match(r'^\[.*\]$', block.strip(), re.DOTALL))
# -------- Tests -------- #
def test_extract_from_text(monkeypatch):
"""
Test extracting entities from a small mock resume text.
Monkeypatch the OpenAI call to avoid API usage.
"""
mock_resume = "John Doe\nSkills: Python, SQL\nEducation: B.Tech in Computer Science"
mock_output = """Hard Skills:
["Python", "SQL"]
Soft Skills:
[]
Work Experience:
[]
Education:
["B.Tech in Computer Science"]
Certifications:
[]
Projects:
[]"""
# Monkeypatch function to bypass API
monkeypatch.setattr(
"src.extraction.resume_extractor.extract_resume_information_as_lists",
lambda text: mock_output
)
extracted = extract_resume_information_as_lists(mock_resume)
# Ensure all categories exist
assert "Hard Skills:" in extracted
assert "Soft Skills:" in extracted
assert "Work Experience:" in extracted
assert "Education:" in extracted
assert "Certifications:" in extracted
assert "Projects:" in extracted
# Validate at least one block is a list
matches = re.findall(r'\[.*?\]', extracted, re.DOTALL)
assert all(is_valid_list_format(m) for m in matches)
def test_process_resume_file(tmp_path, monkeypatch):
"""
Test end-to-end file processing: input resume -> output entity file.
"""
# Create fake input resume file
resume_text = "Skills: Python, SQL\nEducation: B.Tech in CS"
input_file = tmp_path / "resume1.txt"
input_file.write_text(resume_text)
# Expected fake output
mock_output = """Hard Skills:
["Python", "SQL"]
Soft Skills:
[]
Work Experience:
[]
Education:
["B.Tech in CS"]
Certifications:
[]
Projects:
[]"""
# Monkeypatch extractor
monkeypatch.setattr(
"src.extraction.resume_extractor.extract_resume_information_as_lists",
lambda text: mock_output
)
# Process file
output_dir = tmp_path / "entities"
process_resume_file(str(input_file), str(output_dir))
# Verify output file exists
out_file = output_dir / "resume1_entities.txt"
assert out_file.exists()
# Verify contents
content = out_file.read_text()
assert "Hard Skills:" in content
assert "Soft Skills:" in content
assert "[]" in content