Spaces:

Nassiraaa
/

COMPLETNESS

Sleeping

App Files Files Community

COMPLETNESS / app.py

Nassiraaa

Create app.py

911e595 verified over 1 year ago

raw

history blame contribute delete

3.64 kB

	import streamlit as st
	from doctr.models import ocr_predictor
	from doctr.io import DocumentFile
	from huggingface_hub import hf_hub_download
	from llama_cpp import Llama
	import concurrent.futures

	# Download the model (do this only once, outside of any function)
	@st.cache_resource
	def load_model():
	model_path = hf_hub_download("TheBloke/Mistral-7B-Instruct-v0.2-GGUF", filename="mistral-7b-instruct-v0.2.Q4_K_M.gguf")
	return Llama(model_path=model_path, n_ctx=32768, n_gpu_layers=2)

	# Initialize models
	llm = load_model()
	ocr_model = ocr_predictor(pretrained=True)

	@st.cache_data
	def extract_text(pdf_bytes):
	doc = DocumentFile.from_pdf(pdf_bytes)
	result = ocr_model(doc)
	return " ".join(word.value for page in result.pages for block in page.blocks for line in block.lines for word in line.words)

	def check_cv_section(section, text):
	prompt = f"""Analyze the following CV text and determine if the "{section}" section exists.
	Respond with 'true' if it exists, or 'false' if it doesn't.
	Be aware of synonyms and variations in section titles.
	CV text:
	{text}
	Respond in the format:
	{section}: true/false
	Explanation: Briefly explain your reasoning, mentioning any relevant keywords or phrases found.
	"""
	response = llm(prompt, max_tokens=200)
	result = response['choices'][0]['text'].strip()
	parts = result.split('\n')
	presence = parts[0].split(':')[1].strip().lower() == 'true'
	explanation = parts[1].split(':', 1)[1].strip() if len(parts) > 1 else ""
	return {section: presence}, {section: explanation}

	def check_cv_sections(text):
	sections = [
	"Personal Information",
	"Summary and objective (About / profile)",
	"Education",
	"Work Experience",
	"Skills",
	"Languages",
	"Certificates",
	"Interests",
	"References (optional)"
	]

	results = {}
	explanations = {}

	with concurrent.futures.ThreadPoolExecutor() as executor:
	future_to_section = {executor.submit(check_cv_section, section, text): section for section in sections}
	for future in concurrent.futures.as_completed(future_to_section):
	section_result, section_explanation = future.result()
	results.update(section_result)
	explanations.update(section_explanation)

	return results, explanations

	def calculate_cv_score(sections):
	essentials_sections = {
	"Profile \| Summary": 1,
	"Skill\|Expertise\|Competencies": 4,
	"Education": 5,
	"Projects": 5,
	"Professional experience": 5,
	"Languages": 2
	}

	return sum(value for essential, value in essentials_sections.items()
	if any(s.lower() in essential.lower() for s in sections if sections[s]))

	def main():
	st.title('Analyse de CV avec DocTR et Mistral')
	uploaded_file = st.file_uploader("Uploader un fichier PDF", type="pdf")

	if uploaded_file is not None:
	pdf_bytes = uploaded_file.read()
	text = extract_text(pdf_bytes)

	if st.checkbox("Afficher le texte extrait du CV"):
	st.text_area("Texte extrait du CV", text, height=200)

	sections, explanations = check_cv_sections(text)
	cv_score = calculate_cv_score(sections)

	st.header("CV Completeness")
	for section, present in sections.items():
	st.write(f"{section}: {present}")
	if explanations[section]:
	st.write(f"Explanation: {explanations[section]}")
	st.write("---")

	st.header(f'CV Score: {cv_score}')

	if __name__ == '__main__':
	main()