Spaces:

sameer2026
/

iris_backend

Sleeping

iris_backend / backend /tests /test_extraction.py

Muhammed Sameer

Initial commit - Iris Full (under development)

ea9ca44 4 months ago

2.59 kB

	import os
	import re
	import pytest
	from src.extraction.resume_extractor import extract_resume_information_as_lists, process_resume_file

	# -------- Helpers -------- #

	def is_valid_list_format(block: str) -> bool:
	"""
	Validate that a block of text looks like a Python list, e.g. ["a", "b"] or []
	"""
	return bool(re.match(r'^\[.*\]$', block.strip(), re.DOTALL))

	# -------- Tests -------- #

	def test_extract_from_text(monkeypatch):
	"""
	Test extracting entities from a small mock resume text.
	Monkeypatch the OpenAI call to avoid API usage.
	"""

	mock_resume = "John Doe\nSkills: Python, SQL\nEducation: B.Tech in Computer Science"
	mock_output = """Hard Skills:
	["Python", "SQL"]

	Soft Skills:
	[]

	Work Experience:
	[]

	Education:
	["B.Tech in Computer Science"]

	Certifications:
	[]

	Projects:
	[]"""

	# Monkeypatch function to bypass API
	monkeypatch.setattr(
	"src.extraction.resume_extractor.extract_resume_information_as_lists",
	lambda text: mock_output
	)

	extracted = extract_resume_information_as_lists(mock_resume)

	# Ensure all categories exist
	assert "Hard Skills:" in extracted
	assert "Soft Skills:" in extracted
	assert "Work Experience:" in extracted
	assert "Education:" in extracted
	assert "Certifications:" in extracted
	assert "Projects:" in extracted

	# Validate at least one block is a list
	matches = re.findall(r'\[.*?\]', extracted, re.DOTALL)
	assert all(is_valid_list_format(m) for m in matches)

	def test_process_resume_file(tmp_path, monkeypatch):
	"""
	Test end-to-end file processing: input resume -> output entity file.
	"""

	# Create fake input resume file
	resume_text = "Skills: Python, SQL\nEducation: B.Tech in CS"
	input_file = tmp_path / "resume1.txt"
	input_file.write_text(resume_text)

	# Expected fake output
	mock_output = """Hard Skills:
	["Python", "SQL"]

	Soft Skills:
	[]

	Work Experience:
	[]

	Education:
	["B.Tech in CS"]

	Certifications:
	[]

	Projects:
	[]"""

	# Monkeypatch extractor
	monkeypatch.setattr(
	"src.extraction.resume_extractor.extract_resume_information_as_lists",
	lambda text: mock_output
	)

	# Process file
	output_dir = tmp_path / "entities"
	process_resume_file(str(input_file), str(output_dir))

	# Verify output file exists
	out_file = output_dir / "resume1_entities.txt"
	assert out_file.exists()

	# Verify contents
	content = out_file.read_text()
	assert "Hard Skills:" in content
	assert "Soft Skills:" in content
	assert "[]" in content