Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| from doctr.models import ocr_predictor | |
| from doctr.io import DocumentFile | |
| from huggingface_hub import hf_hub_download | |
| from llama_cpp import Llama | |
| import concurrent.futures | |
| # Download the model (do this only once, outside of any function) | |
| def load_model(): | |
| model_path = hf_hub_download("TheBloke/Mistral-7B-Instruct-v0.2-GGUF", filename="mistral-7b-instruct-v0.2.Q4_K_M.gguf") | |
| return Llama(model_path=model_path, n_ctx=32768, n_gpu_layers=2) | |
| # Initialize models | |
| llm = load_model() | |
| ocr_model = ocr_predictor(pretrained=True) | |
| def extract_text(pdf_bytes): | |
| doc = DocumentFile.from_pdf(pdf_bytes) | |
| result = ocr_model(doc) | |
| return " ".join(word.value for page in result.pages for block in page.blocks for line in block.lines for word in line.words) | |
| def check_cv_section(section, text): | |
| prompt = f"""Analyze the following CV text and determine if the "{section}" section exists. | |
| Respond with 'true' if it exists, or 'false' if it doesn't. | |
| Be aware of synonyms and variations in section titles. | |
| CV text: | |
| {text} | |
| Respond in the format: | |
| {section}: true/false | |
| Explanation: Briefly explain your reasoning, mentioning any relevant keywords or phrases found. | |
| """ | |
| response = llm(prompt, max_tokens=200) | |
| result = response['choices'][0]['text'].strip() | |
| parts = result.split('\n') | |
| presence = parts[0].split(':')[1].strip().lower() == 'true' | |
| explanation = parts[1].split(':', 1)[1].strip() if len(parts) > 1 else "" | |
| return {section: presence}, {section: explanation} | |
| def check_cv_sections(text): | |
| sections = [ | |
| "Personal Information", | |
| "Summary and objective (About / profile)", | |
| "Education", | |
| "Work Experience", | |
| "Skills", | |
| "Languages", | |
| "Certificates", | |
| "Interests", | |
| "References (optional)" | |
| ] | |
| results = {} | |
| explanations = {} | |
| with concurrent.futures.ThreadPoolExecutor() as executor: | |
| future_to_section = {executor.submit(check_cv_section, section, text): section for section in sections} | |
| for future in concurrent.futures.as_completed(future_to_section): | |
| section_result, section_explanation = future.result() | |
| results.update(section_result) | |
| explanations.update(section_explanation) | |
| return results, explanations | |
| def calculate_cv_score(sections): | |
| essentials_sections = { | |
| "Profile | Summary": 1, | |
| "Skill|Expertise|Competencies": 4, | |
| "Education": 5, | |
| "Projects": 5, | |
| "Professional experience": 5, | |
| "Languages": 2 | |
| } | |
| return sum(value for essential, value in essentials_sections.items() | |
| if any(s.lower() in essential.lower() for s in sections if sections[s])) | |
| def main(): | |
| st.title('Analyse de CV avec DocTR et Mistral') | |
| uploaded_file = st.file_uploader("Uploader un fichier PDF", type="pdf") | |
| if uploaded_file is not None: | |
| pdf_bytes = uploaded_file.read() | |
| text = extract_text(pdf_bytes) | |
| if st.checkbox("Afficher le texte extrait du CV"): | |
| st.text_area("Texte extrait du CV", text, height=200) | |
| sections, explanations = check_cv_sections(text) | |
| cv_score = calculate_cv_score(sections) | |
| st.header("CV Completeness") | |
| for section, present in sections.items(): | |
| st.write(f"{section}: {present}") | |
| if explanations[section]: | |
| st.write(f"Explanation: {explanations[section]}") | |
| st.write("---") | |
| st.header(f'CV Score: {cv_score}') | |
| if __name__ == '__main__': | |
| main() |