Refactor job writing agent: Improved code structure by introducing new data loading classes, enhancing logging practices, and ensuring safe environment variable access. Updated workflow logic for better readability and maintainability.
a01026b | # -*- coding: utf-8 -*- | |
| """ | |
| Job Description Loader Module | |
| This module provides the JobDescriptionLoader class responsible for loading and parsing | |
| job description files and URLs, extracting both the job posting text and company name. | |
| """ | |
| import logging | |
| from typing import Callable, Any, Optional, Tuple, Awaitable | |
| from langchain_core.documents import Document | |
| from job_writing_agent.utils.document_processing import get_job_description | |
| from job_writing_agent.utils.logging.logging_decorators import ( | |
| log_async, | |
| log_errors, | |
| ) | |
| logger = logging.getLogger(__name__) | |
| class JobDescriptionLoader: | |
| """ | |
| Responsible for loading and parsing job description documents. | |
| This class follows SOLID principles: | |
| - Single Responsibility: Only handles job description parsing | |
| - Dependency Inversion: Parser is injected for testability | |
| - Open/Closed: Can extend with different parsers without modification | |
| - Interface Segregation: Focused interface (only job description methods) | |
| Example: | |
| >>> loader = JobDescriptionLoader() | |
| >>> job_text, company = await loader.parse_job_description("https://example.com/job") | |
| >>> | |
| >>> # With custom parser for testing | |
| >>> async def mock_parser(source): | |
| ... return Document(page_content="test", metadata={"company_name": "TestCo"}) | |
| >>> loader = JobDescriptionLoader(parser=mock_parser) | |
| """ | |
| def __init__(self, parser: Optional[Callable[[Any], Awaitable[Document]]] = None): | |
| """ | |
| Initialize JobDescriptionLoader with optional parser dependency injection. | |
| Parameters | |
| ---------- | |
| parser: Optional[Callable[[Any], Awaitable[Document]]] | |
| Async function to parse job description documents. Defaults to | |
| `get_job_description` from document_processing. Can be injected | |
| for testing or custom parsing. | |
| The parser should: | |
| - Take one argument (source: str) - URL or file path | |
| - Return an awaitable that resolves to a Document object | |
| - Document should have page_content (str) and metadata (dict) | |
| """ | |
| self._parser = parser or get_job_description | |
| async def parse_job_description( | |
| self, job_description_source: Any | |
| ) -> Tuple[str, str]: | |
| """ | |
| Parse a job description and return its text and company name. | |
| Extracts both the job posting text and company name from the document. | |
| Company name is extracted from document metadata if available. | |
| Parameters | |
| ---------- | |
| job_description_source: Any | |
| Source accepted by the parser function (URL, file path, etc.). | |
| Can be a URL starting with http:// or https://, or a local file path. | |
| Returns | |
| ------- | |
| Tuple[str, str] | |
| A tuple of (job_posting_text, company_name). | |
| If company name is not found in metadata, returns empty string. | |
| Raises | |
| ------ | |
| AssertionError | |
| If job_description_source is None. | |
| Exception | |
| If parsing fails. | |
| """ | |
| company_name = "" | |
| job_posting_text = "" | |
| logger.info("Parsing job description from: %s", job_description_source) | |
| assert job_description_source is not None, ( | |
| "Job description source cannot be None" | |
| ) | |
| job_description_document: Document = await self._parser(job_description_source) | |
| # Extract company name from metadata | |
| if hasattr(job_description_document, "metadata") and isinstance( | |
| job_description_document.metadata, dict | |
| ): | |
| company_name = job_description_document.metadata.get("company_name", "") | |
| if not company_name: | |
| logger.warning("Company name not found in job description metadata.") | |
| else: | |
| logger.warning( | |
| "Metadata attribute missing or not a dict in job description document." | |
| ) | |
| # Extract job posting text | |
| if hasattr(job_description_document, "page_content"): | |
| job_posting_text = job_description_document.page_content or "" | |
| if not job_posting_text: | |
| logger.info("Parsed job posting text is empty.") | |
| else: | |
| logger.warning( | |
| "page_content attribute missing in job description document." | |
| ) | |
| return job_posting_text, company_name | |
| async def _load_job_description(self, jd_source: Any) -> Tuple[str, str]: | |
| """ | |
| Load job description text and company name, raising if missing. | |
| This is a wrapper around parse_job_description() that validates the | |
| source first. Used by subgraph nodes for consistent error handling. | |
| Parameters | |
| ---------- | |
| jd_source: Any | |
| Source for the job description (URL, file path, etc.). | |
| Returns | |
| ------- | |
| Tuple[str, str] | |
| A tuple of (job_posting_text, company_name). | |
| Raises | |
| ------ | |
| ValueError | |
| If jd_source is None or empty. | |
| """ | |
| if not jd_source: | |
| raise ValueError("job_description_source is required") | |
| return await self.parse_job_description(jd_source) | |
| async def get_application_form_details(self, job_description_source: Any): | |
| """ | |
| Placeholder for future method to get application form details. | |
| This method will be implemented to extract form fields and requirements | |
| from job application forms. | |
| Parameters | |
| ---------- | |
| job_description_source: Any | |
| Source of the job description or application form. | |
| """ | |
| # TODO: Implement form field extraction | |
| pass | |
| async def _prompt_user_for_job_description(self) -> str: | |
| """ | |
| Prompt the user for input (synchronous input wrapped for async use). | |
| This method wraps the synchronous input() function to be used in async | |
| contexts. In a production async UI, this would be replaced with an | |
| async input mechanism. | |
| Note: This is a shared utility method. In a future refactoring, this | |
| could be extracted to a separate UserInputHelper class following the | |
| Interface Segregation Principle. | |
| Parameters | |
| ---------- | |
| prompt_msg: str | |
| Message to display to the user. | |
| Returns | |
| ------- | |
| str | |
| User input string. | |
| """ | |
| # In a real async UI replace input with an async call. | |
| return input("Please paste the job description in text format: ") | |