import os import logging from mistralai import Mistral from model import JobDocument from .constant import MISTRAL_MODEL logger = logging.getLogger(__name__) logging.basicConfig(level=logging.INFO) class JobProcessor: def __init__(self, api_key: str = None): if api_key is None: # Use the default API key from environment variable api_key = os.environ.get("MISTRAL_API_KEY") if not api_key: raise ValueError("API key for Mistral is not set.") self.client = Mistral(api_key=api_key) def get_job_content(self, job_text: str) -> dict: """ Process the job description text and return structured content. Parameters ---------- job_text: str The job description text. Returns ------- dict: A dictionary containing the processed job content. """ if not job_text: raise ValueError("Job description text is empty.") response = { "job": { "description": job_text, } } # Use the OCR model to extract structured data from the job description logger.info("Processing OCR for job description") chat_response = self.client.chat.parse( model=MISTRAL_MODEL, messages=[ { "role": "system", "content": "Extract the job title, responsibilities, requirements, location, and salary range from the job description.", }, {"role": "user", "content": job_text}, ], response_format=JobDocument, # max_tokens=256, temperature=0, ) response["job"]["annotation"] = chat_response.choices[0].message.content return response if __name__ == "__main__": pass