Spaces:

jaothan
/

letter-generator_FR

Build error

App Files Files Community

letter-generator_FR / utils.py

jaothan

Upload 4 files

9f80f33 verified about 1 year ago

raw

history blame contribute delete

3.61 kB

	from pypdf import PdfReader
	import docx2txt
	from huggingface_hub import InferenceClient
	import os


	def ingest_document(stream):
	'''
	A function that takes a BytesIO in-memory stream, decodes the content into a string
	then returns it.
	the stream is either from a pdf or a docx file.
	'''

	# get the uploaded document's extension
	suffix = stream.name.split(".")[-1]
	content_text = ""

	match suffix:
	# extract text if pdf file with pypdf
	case "pdf":
	reader = PdfReader(stream)
	for i, page in enumerate(reader.pages):
	# don't take over 2 pages of the job description (could be too many tokens)
	if i > 3:
	break
	content_text += page.extract_text(extraction_mode="layout", layout_mode_space_vertically=False, layout_mode_scale_weight=1.0)

	# extract text if docx file with docx2txt
	case "docx":
	content_text = docx2txt.process(stream)

	return content_text

	class CoverLetterMaker():

	def __init__(self, resume, job_desc, model_id="mistralai/Mistral-Nemo-Instruct-2407", language="English", word_count=350):
	# model_id is the id of the huggingface repository of the model
	self.model_id = model_id
	self.language = language
	self.word_count = word_count

	# prompt messages structure : system role is used in order to supervise the model's behaviour, user role is used to ask for the generation of the cover letter
	match self.language:
	case "English":
	self.messages = [
	{
	"role": "system",
	"content": "You are a helpful assistant that specializes in helping candidates to write and format their cover letters. Your will use the language they use to speak to you",
	},
	{
	"role": "user",
	"content": "whitout any headers, write in less than " + str(self.word_count) + "words a simple but professional way, a cover letter based on my resume and the description of the job I am applying for below :\n Resume: " + resume + "\n Job description: " + job_desc,
	},
	]
	case "Français":
	self.messages = [
	{
	"role": "system",
	"content": "Vous êtes un assistant utile qui se spécialise dans l'aide aux candidats pour rédiger et mettre en forme leurs lettres de motivation.",
	},
	{
	"role": "user",
	"content": "Sans aucune entête, écrivez en moins de " + str(self.word_count*0.75) + "mots et de manière simple mais professionnelle une lettre de motivation basée sur mon CV et l'offre d'emploi du poste auquel je postule ci-dessous :\n CV: " + resume + "\n offre d'emploi: " + job_desc,
	},
	]

	# get the token from HF_TOKEN environement variable
	hf_token = os.environ.get('HF_TOKEN')
	self.api_token = hf_token

	self.inference_client = InferenceClient(model=model_id, token=hf_token, timeout=120)

	def generate_letter(self):
	# convert word count to number of tokens and add a safety margin
	token_number = int(self.word_count//0.7) + 150
	# call the inference api and generate answers
	data = self.inference_client.chat_completion(self.messages, max_tokens=token_number)
	return data.choices[0].message.content