|
|
import os |
|
|
import logging |
|
|
from mistralai import Mistral |
|
|
|
|
|
from model import JobDocument |
|
|
from .constant import MISTRAL_MODEL |
|
|
|
|
|
logger = logging.getLogger(__name__) |
|
|
logging.basicConfig(level=logging.INFO) |
|
|
|
|
|
class JobProcessor: |
|
|
def __init__(self, api_key: str = None): |
|
|
if api_key is None: |
|
|
|
|
|
api_key = os.environ.get("MISTRAL_API_KEY") |
|
|
if not api_key: |
|
|
raise ValueError("API key for Mistral is not set.") |
|
|
self.client = Mistral(api_key=api_key) |
|
|
|
|
|
def get_job_content(self, job_text: str) -> dict: |
|
|
""" |
|
|
Process the job description text and return structured content. |
|
|
|
|
|
Parameters |
|
|
---------- |
|
|
job_text: str |
|
|
The job description text. |
|
|
|
|
|
Returns |
|
|
------- |
|
|
dict: A dictionary containing the processed job content. |
|
|
""" |
|
|
if not job_text: |
|
|
raise ValueError("Job description text is empty.") |
|
|
|
|
|
response = { |
|
|
"job": { |
|
|
"description": job_text, |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
logger.info("Processing OCR for job description") |
|
|
chat_response = self.client.chat.parse( |
|
|
model=MISTRAL_MODEL, |
|
|
messages=[ |
|
|
{ |
|
|
"role": "system", |
|
|
"content": "Extract the job title, responsibilities, requirements, location, and salary range from the job description.", |
|
|
}, |
|
|
{"role": "user", "content": job_text}, |
|
|
], |
|
|
response_format=JobDocument, |
|
|
|
|
|
temperature=0, |
|
|
) |
|
|
response["job"]["annotation"] = chat_response.choices[0].message.content |
|
|
return response |
|
|
|
|
|
if __name__ == "__main__": |
|
|
pass |
|
|
|