Spaces:

Nassiraaa
/

COMPLETNESS

Sleeping

App Files Files Community

Nassiraaa commited on Jul 15, 2024

Commit

911e595

verified ·

1 Parent(s): 3c519ac

Create app.py

Browse files

Files changed (1) hide show

app.py +103 -0

app.py ADDED Viewed

	@@ -0,0 +1,103 @@

+import streamlit as st
+from doctr.models import ocr_predictor
+from doctr.io import DocumentFile
+from huggingface_hub import hf_hub_download
+from llama_cpp import Llama
+import concurrent.futures
+# Download the model (do this only once, outside of any function)
+@st.cache_resource
+def load_model():
+    model_path = hf_hub_download("TheBloke/Mistral-7B-Instruct-v0.2-GGUF", filename="mistral-7b-instruct-v0.2.Q4_K_M.gguf")
+    return Llama(model_path=model_path, n_ctx=32768, n_gpu_layers=2)
+# Initialize models
+llm = load_model()
+ocr_model = ocr_predictor(pretrained=True)
+@st.cache_data
+def extract_text(pdf_bytes):
+    doc = DocumentFile.from_pdf(pdf_bytes)
+    result = ocr_model(doc)
+    return " ".join(word.value for page in result.pages for block in page.blocks for line in block.lines for word in line.words)
+def check_cv_section(section, text):
+    prompt = f"""Analyze the following CV text and determine if the "{section}" section exists.
+Respond with 'true' if it exists, or 'false' if it doesn't.
+Be aware of synonyms and variations in section titles.
+CV text:
+{text}
+Respond in the format:
+{section}: true/false
+Explanation: Briefly explain your reasoning, mentioning any relevant keywords or phrases found.
+"""
+    response = llm(prompt, max_tokens=200)
+    result = response['choices'][0]['text'].strip()
+    parts = result.split('\n')
+    presence = parts[0].split(':')[1].strip().lower() == 'true'
+    explanation = parts[1].split(':', 1)[1].strip() if len(parts) > 1 else ""
+    return {section: presence}, {section: explanation}
+def check_cv_sections(text):
+    sections = [
+        "Personal Information",
+        "Summary and objective (About / profile)",
+        "Education",
+        "Work Experience",
+        "Skills",
+        "Languages",
+        "Certificates",
+        "Interests",
+        "References (optional)"
+    ]
+    results = {}
+    explanations = {}
+    with concurrent.futures.ThreadPoolExecutor() as executor:
+        future_to_section = {executor.submit(check_cv_section, section, text): section for section in sections}
+        for future in concurrent.futures.as_completed(future_to_section):
+            section_result, section_explanation = future.result()
+            results.update(section_result)
+            explanations.update(section_explanation)
+    return results, explanations
+def calculate_cv_score(sections):
+    essentials_sections = {
+        "Profile | Summary": 1,
+        "Skill|Expertise|Competencies": 4,
+        "Education": 5,
+        "Projects": 5,
+        "Professional experience": 5,
+        "Languages": 2
+    }
+    return sum(value for essential, value in essentials_sections.items()
+               if any(s.lower() in essential.lower() for s in sections if sections[s]))
+def main():
+    st.title('Analyse de CV avec DocTR et Mistral')
+    uploaded_file = st.file_uploader("Uploader un fichier PDF", type="pdf")
+    if uploaded_file is not None:
+        pdf_bytes = uploaded_file.read()
+        text = extract_text(pdf_bytes)
+        if st.checkbox("Afficher le texte extrait du CV"):
+            st.text_area("Texte extrait du CV", text, height=200)
+        sections, explanations = check_cv_sections(text)
+        cv_score = calculate_cv_score(sections)
+        st.header("CV Completeness")
+        for section, present in sections.items():
+            st.write(f"{section}: {present}")
+            if explanations[section]:
+                st.write(f"Explanation: {explanations[section]}")
+            st.write("---")
+        st.header(f'CV Score: {cv_score}')
+if __name__ == '__main__':
+    main()