File size: 1,881 Bytes
4db8ed6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
import os
import logging
from mistralai import Mistral

from model import JobDocument
from .constant import MISTRAL_MODEL

logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO)

class JobProcessor:
    def __init__(self, api_key: str = None):
        if api_key is None:
            # Use the default API key from environment variable
            api_key = os.environ.get("MISTRAL_API_KEY")
            if not api_key:
                raise ValueError("API key for Mistral is not set.")
        self.client = Mistral(api_key=api_key)

    def get_job_content(self, job_text: str) -> dict:
        """
        Process the job description text and return structured content.

        Parameters
        ----------
        job_text: str
            The job description text.

        Returns
        -------
        dict: A dictionary containing the processed job content.
        """
        if not job_text:
            raise ValueError("Job description text is empty.")

        response = {
            "job": {
                "description": job_text,
            }
        }

        # Use the OCR model to extract structured data from the job description
        logger.info("Processing OCR for job description")
        chat_response = self.client.chat.parse(
            model=MISTRAL_MODEL,
            messages=[
                {
                    "role": "system",
                    "content": "Extract the job title, responsibilities, requirements, location, and salary range from the job description.",
                },
                {"role": "user", "content": job_text},
            ],
            response_format=JobDocument,
            # max_tokens=256,
            temperature=0,
        )
        response["job"]["annotation"] = chat_response.choices[0].message.content
        return response

if __name__ == "__main__":
    pass