Nassiraaa commited on
Commit
911e595
·
verified ·
1 Parent(s): 3c519ac

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +103 -0
app.py ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from doctr.models import ocr_predictor
3
+ from doctr.io import DocumentFile
4
+ from huggingface_hub import hf_hub_download
5
+ from llama_cpp import Llama
6
+ import concurrent.futures
7
+
8
+ # Download the model (do this only once, outside of any function)
9
+ @st.cache_resource
10
+ def load_model():
11
+ model_path = hf_hub_download("TheBloke/Mistral-7B-Instruct-v0.2-GGUF", filename="mistral-7b-instruct-v0.2.Q4_K_M.gguf")
12
+ return Llama(model_path=model_path, n_ctx=32768, n_gpu_layers=2)
13
+
14
+ # Initialize models
15
+ llm = load_model()
16
+ ocr_model = ocr_predictor(pretrained=True)
17
+
18
+ @st.cache_data
19
+ def extract_text(pdf_bytes):
20
+ doc = DocumentFile.from_pdf(pdf_bytes)
21
+ result = ocr_model(doc)
22
+ return " ".join(word.value for page in result.pages for block in page.blocks for line in block.lines for word in line.words)
23
+
24
+ def check_cv_section(section, text):
25
+ prompt = f"""Analyze the following CV text and determine if the "{section}" section exists.
26
+ Respond with 'true' if it exists, or 'false' if it doesn't.
27
+ Be aware of synonyms and variations in section titles.
28
+ CV text:
29
+ {text}
30
+ Respond in the format:
31
+ {section}: true/false
32
+ Explanation: Briefly explain your reasoning, mentioning any relevant keywords or phrases found.
33
+ """
34
+ response = llm(prompt, max_tokens=200)
35
+ result = response['choices'][0]['text'].strip()
36
+ parts = result.split('\n')
37
+ presence = parts[0].split(':')[1].strip().lower() == 'true'
38
+ explanation = parts[1].split(':', 1)[1].strip() if len(parts) > 1 else ""
39
+ return {section: presence}, {section: explanation}
40
+
41
+ def check_cv_sections(text):
42
+ sections = [
43
+ "Personal Information",
44
+ "Summary and objective (About / profile)",
45
+ "Education",
46
+ "Work Experience",
47
+ "Skills",
48
+ "Languages",
49
+ "Certificates",
50
+ "Interests",
51
+ "References (optional)"
52
+ ]
53
+
54
+ results = {}
55
+ explanations = {}
56
+
57
+ with concurrent.futures.ThreadPoolExecutor() as executor:
58
+ future_to_section = {executor.submit(check_cv_section, section, text): section for section in sections}
59
+ for future in concurrent.futures.as_completed(future_to_section):
60
+ section_result, section_explanation = future.result()
61
+ results.update(section_result)
62
+ explanations.update(section_explanation)
63
+
64
+ return results, explanations
65
+
66
+ def calculate_cv_score(sections):
67
+ essentials_sections = {
68
+ "Profile | Summary": 1,
69
+ "Skill|Expertise|Competencies": 4,
70
+ "Education": 5,
71
+ "Projects": 5,
72
+ "Professional experience": 5,
73
+ "Languages": 2
74
+ }
75
+
76
+ return sum(value for essential, value in essentials_sections.items()
77
+ if any(s.lower() in essential.lower() for s in sections if sections[s]))
78
+
79
+ def main():
80
+ st.title('Analyse de CV avec DocTR et Mistral')
81
+ uploaded_file = st.file_uploader("Uploader un fichier PDF", type="pdf")
82
+
83
+ if uploaded_file is not None:
84
+ pdf_bytes = uploaded_file.read()
85
+ text = extract_text(pdf_bytes)
86
+
87
+ if st.checkbox("Afficher le texte extrait du CV"):
88
+ st.text_area("Texte extrait du CV", text, height=200)
89
+
90
+ sections, explanations = check_cv_sections(text)
91
+ cv_score = calculate_cv_score(sections)
92
+
93
+ st.header("CV Completeness")
94
+ for section, present in sections.items():
95
+ st.write(f"{section}: {present}")
96
+ if explanations[section]:
97
+ st.write(f"Explanation: {explanations[section]}")
98
+ st.write("---")
99
+
100
+ st.header(f'CV Score: {cv_score}')
101
+
102
+ if __name__ == '__main__':
103
+ main()