Spaces:
Build error
Build error
| from pypdf import PdfReader | |
| import docx2txt | |
| from huggingface_hub import InferenceClient | |
| import os | |
| def ingest_document(stream): | |
| ''' | |
| A function that takes a BytesIO in-memory stream, decodes the content into a string | |
| then returns it. | |
| the stream is either from a pdf or a docx file. | |
| ''' | |
| # get the uploaded document's extension | |
| suffix = stream.name.split(".")[-1] | |
| content_text = "" | |
| match suffix: | |
| # extract text if pdf file with pypdf | |
| case "pdf": | |
| reader = PdfReader(stream) | |
| for i, page in enumerate(reader.pages): | |
| # don't take over 2 pages of the job description (could be too many tokens) | |
| if i > 3: | |
| break | |
| content_text += page.extract_text(extraction_mode="layout", layout_mode_space_vertically=False, layout_mode_scale_weight=1.0) | |
| # extract text if docx file with docx2txt | |
| case "docx": | |
| content_text = docx2txt.process(stream) | |
| return content_text | |
| class CoverLetterMaker(): | |
| def __init__(self, resume, job_desc, model_id="mistralai/Mistral-Nemo-Instruct-2407", language="English", word_count=350): | |
| # model_id is the id of the huggingface repository of the model | |
| self.model_id = model_id | |
| self.language = language | |
| self.word_count = word_count | |
| # prompt messages structure : system role is used in order to supervise the model's behaviour, user role is used to ask for the generation of the cover letter | |
| match self.language: | |
| case "English": | |
| self.messages = [ | |
| { | |
| "role": "system", | |
| "content": "You are a helpful assistant that specializes in helping candidates to write and format their cover letters. Your will use the language they use to speak to you", | |
| }, | |
| { | |
| "role": "user", | |
| "content": "whitout any headers, write in less than " + str(self.word_count) + "words a simple but professional way, a cover letter based on my resume and the description of the job I am applying for below :\n Resume: " + resume + "\n Job description: " + job_desc, | |
| }, | |
| ] | |
| case "Français": | |
| self.messages = [ | |
| { | |
| "role": "system", | |
| "content": "Vous êtes un assistant utile qui se spécialise dans l'aide aux candidats pour rédiger et mettre en forme leurs lettres de motivation.", | |
| }, | |
| { | |
| "role": "user", | |
| "content": "Sans aucune entête, écrivez en moins de " + str(self.word_count*0.75) + "mots et de manière simple mais professionnelle une lettre de motivation basée sur mon CV et l'offre d'emploi du poste auquel je postule ci-dessous :\n CV: " + resume + "\n offre d'emploi: " + job_desc, | |
| }, | |
| ] | |
| # get the token from HF_TOKEN environement variable | |
| hf_token = os.environ.get('HF_TOKEN') | |
| self.api_token = hf_token | |
| self.inference_client = InferenceClient(model=model_id, token=hf_token, timeout=120) | |
| def generate_letter(self): | |
| # convert word count to number of tokens and add a safety margin | |
| token_number = int(self.word_count//0.7) + 150 | |
| # call the inference api and generate answers | |
| data = self.inference_client.chat_completion(self.messages, max_tokens=token_number) | |
| return data.choices[0].message.content |