Spaces:
Runtime error
Runtime error
Update utils/mistral.py
Browse files- utils/mistral.py +39 -34
utils/mistral.py
CHANGED
|
@@ -41,7 +41,7 @@ def Data_Cleaner(text):
|
|
| 41 |
def Model_ProfessionalDetails_Output(resume, client):
|
| 42 |
system_role = {
|
| 43 |
"role": "system",
|
| 44 |
-
"content": "You are a skilled resume parser. Your task is to extract professional details from resumes in a structured JSON format defined by the User. Ensure accuracy and completeness while maintaining the format provided and if field are missing just return
|
| 45 |
}
|
| 46 |
user_prompt = {
|
| 47 |
"role": "user",
|
|
@@ -52,16 +52,16 @@ def Model_ProfessionalDetails_Output(resume, client):
|
|
| 52 |
"technical_skills": ["List all technical skills, programming languages, frameworks, and technologies mentioned in the resume, ensuring they are not mixed with other skill types."],
|
| 53 |
"non_technical_skills": ["Identify and list non-technical skills such as leadership, teamwork, and communication skills, ensuring they are not mixed with technical skills."],
|
| 54 |
"tools": ["Enumerate and extract all software tools, platforms, and applications referenced in the resume, distinctly separate from skills."],
|
|
|
|
| 55 |
"projects": ["Extract all projects names or titles mentioned in the resume."],
|
| 56 |
"projects_experience": ["Summarize overall project experiences, providing a brief description of each project as detailed in the resume."],
|
| 57 |
-
"experience": ["Calculate total professional work experience in years and months based on the resume."],
|
| 58 |
-
"companies_worked_at": ["List the names of all companies where employment is mentioned in the resume."],
|
| 59 |
"certifications": ["Extract and list all certifications obtained as stated in the resume."],
|
| 60 |
"roles": ["Include the names of all job titles or roles held as indicated in the resume."],
|
| 61 |
-
"qualifications": ["List educational qualifications
|
| 62 |
-
"
|
| 63 |
-
"
|
| 64 |
-
"year_of_graduation": ["Extract the year of graduation from the resume. If not found, return
|
| 65 |
}}
|
| 66 |
}}
|
| 67 |
output:
|
|
@@ -84,7 +84,7 @@ def Model_ProfessionalDetails_Output(resume, client):
|
|
| 84 |
def Model_PersonalDetails_Output(resume, client):
|
| 85 |
system_role = {
|
| 86 |
"role": "system",
|
| 87 |
-
"content": "You are a skilled resume parser. Your task is to extract professional details from resumes in a structured JSON format defined by the User. Ensure accuracy and completeness while maintaining the format provided and if field are missing just return
|
| 88 |
}
|
| 89 |
user_prompt = {
|
| 90 |
"role": "user",
|
|
@@ -92,11 +92,11 @@ def Model_PersonalDetails_Output(resume, client):
|
|
| 92 |
Extract the text in the following output JSON string as:
|
| 93 |
{{
|
| 94 |
"personal": {{
|
| 95 |
-
"name": "Extract the full name based on the resume. If not found, return
|
| 96 |
-
"contact_number": "Extract the contact number from the resume. If not found, return
|
| 97 |
-
"email": "Extract the email address from the resume. If not found, return
|
| 98 |
-
"Address": "Extract the Address or address from the resume. If not found, return
|
| 99 |
-
"link": "Extract any relevant links (e.g., portfolio, LinkedIn) from the resume. If not found, return
|
| 100 |
}}
|
| 101 |
}}
|
| 102 |
output:
|
|
@@ -281,11 +281,11 @@ def is_valid_contact(contact):
|
|
| 281 |
|
| 282 |
|
| 283 |
def validate_contact_email(personal_data):
|
| 284 |
-
contact = personal_data.get('contact',
|
| 285 |
-
email = personal_data.get('email',
|
| 286 |
|
| 287 |
-
valid_contact = is_valid_contact(contact) if contact !=
|
| 288 |
-
valid_email = is_valid_email(email) if email !=
|
| 289 |
|
| 290 |
invalid_contact = 'Invalid contact' if not valid_contact else 'Valid contact'
|
| 291 |
invalid_email = 'Invalid email' if not valid_email else 'Valid email'
|
|
@@ -324,39 +324,44 @@ def process_resume_data(file_path):
|
|
| 324 |
# Combine both personal and professional details into a structured output
|
| 325 |
result = {
|
| 326 |
"personal": {
|
| 327 |
-
"name": per_data.get('personal', {}).get('name', 'Not found'),
|
| 328 |
-
"contact": per_data.get('personal', {}).get('contact_number', 'Not found'),
|
| 329 |
-
"email": per_data.get('personal', {}).get('email', 'Not found'),
|
| 330 |
-
"location": per_data.get('personal', {}).get('Address', 'Not found'),
|
| 331 |
"linkedin": linkedin_links,
|
| 332 |
"github": github_links,
|
| 333 |
"other_links": hyperlinks # Store remaining links if needed
|
| 334 |
},
|
| 335 |
"professional": {
|
| 336 |
-
"technical_skills": pro_data.get('professional', {}).get('technical_skills', 'Not found'),
|
| 337 |
-
"non_technical_skills": pro_data.get('professional', {}).get('non_technical_skills', 'Not found'),
|
| 338 |
-
"tools": pro_data.get('professional', {}).get('tools', 'Not found'),
|
| 339 |
"experience": [
|
| 340 |
{
|
| 341 |
-
"company": pro_data.get('professional', {}).get('companies_worked_at', 'Not found'),
|
| 342 |
-
"projects": pro_data.get('professional', {}).get('projects', 'Not found'),
|
| 343 |
-
"role": pro_data.get('professional', {}).get('worked_as', 'Not found'),
|
| 344 |
-
"years": pro_data.get('professional', {}).get('experience', 'Not found'),
|
| 345 |
-
"project_experience": pro_data.get('professional', {}).get('projects_experience', 'Not found')
|
| 346 |
}
|
| 347 |
],
|
| 348 |
"education": [
|
| 349 |
{
|
| 350 |
-
"qualification": pro_data.get('professional', {}).get('qualification', 'Not found'),
|
| 351 |
-
"university": pro_data.get('professional', {}).get('university', 'Not found'),
|
| 352 |
-
"course": pro_data.get('professional', {}).get('course', 'Not found'),
|
| 353 |
-
"certificate": pro_data.get('professional', {}).get('certification', 'Not found')
|
| 354 |
}
|
| 355 |
]
|
| 356 |
}
|
| 357 |
}
|
| 358 |
|
| 359 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 360 |
valid_contact, invalid_contact, valid_email, invalid_email = validate_contact_email(result['personal'])
|
| 361 |
result['personal']['valid_contact'] = valid_contact
|
| 362 |
result['personal']['invalid_contact'] = invalid_contact
|
|
|
|
| 41 |
def Model_ProfessionalDetails_Output(resume, client):
|
| 42 |
system_role = {
|
| 43 |
"role": "system",
|
| 44 |
+
"content": "You are a skilled resume parser. Your task is to extract professional details from resumes in a structured JSON format defined by the User. Ensure accuracy and completeness while maintaining the format provided and if field are missing just return []."
|
| 45 |
}
|
| 46 |
user_prompt = {
|
| 47 |
"role": "user",
|
|
|
|
| 52 |
"technical_skills": ["List all technical skills, programming languages, frameworks, and technologies mentioned in the resume, ensuring they are not mixed with other skill types."],
|
| 53 |
"non_technical_skills": ["Identify and list non-technical skills such as leadership, teamwork, and communication skills, ensuring they are not mixed with technical skills."],
|
| 54 |
"tools": ["Enumerate and extract all software tools, platforms, and applications referenced in the resume, distinctly separate from skills."],
|
| 55 |
+
"companies_worked_at": ["List the names of all companies where employment is mentioned in the resume."],
|
| 56 |
"projects": ["Extract all projects names or titles mentioned in the resume."],
|
| 57 |
"projects_experience": ["Summarize overall project experiences, providing a brief description of each project as detailed in the resume."],
|
| 58 |
+
"experience": ["Calculate total professional work experience in years and months based on the resume."],
|
|
|
|
| 59 |
"certifications": ["Extract and list all certifications obtained as stated in the resume."],
|
| 60 |
"roles": ["Include the names of all job titles or roles held as indicated in the resume."],
|
| 61 |
+
"qualifications": ["List and Extract all educational qualifications, including degrees (e.g., BBA, MBA), their full forms, and associated levels (e.g., undergraduate, postgraduate) from resume. If none are found, return []."],
|
| 62 |
+
"university": ["Identify and Extract the name of the University, College, or Institute attended, based on the resume. If not found, return []."],
|
| 63 |
+
"courses": ["Extract the names of completed courses or based on the resume. If none are found, return []."],
|
| 64 |
+
"year_of_graduation": ["Extract the year of graduation from the resume. If not found, return []."]
|
| 65 |
}}
|
| 66 |
}}
|
| 67 |
output:
|
|
|
|
| 84 |
def Model_PersonalDetails_Output(resume, client):
|
| 85 |
system_role = {
|
| 86 |
"role": "system",
|
| 87 |
+
"content": "You are a skilled resume parser. Your task is to extract professional details from resumes in a structured JSON format defined by the User. Ensure accuracy and completeness while maintaining the format provided and if field are missing just return []."
|
| 88 |
}
|
| 89 |
user_prompt = {
|
| 90 |
"role": "user",
|
|
|
|
| 92 |
Extract the text in the following output JSON string as:
|
| 93 |
{{
|
| 94 |
"personal": {{
|
| 95 |
+
"name": ["Extract the full name based on the resume. If not found, return []."],
|
| 96 |
+
"contact_number": ["Extract the contact number from the resume. If not found, return []."],
|
| 97 |
+
"email": ["Extract the email address from the resume. If not found, return []."],
|
| 98 |
+
"Address": ["Extract the Address or address from the resume. If not found, return []."],
|
| 99 |
+
"link": ["Extract any relevant links (e.g., portfolio, LinkedIn) from the resume. If not found, return []."]
|
| 100 |
}}
|
| 101 |
}}
|
| 102 |
output:
|
|
|
|
| 281 |
|
| 282 |
|
| 283 |
def validate_contact_email(personal_data):
|
| 284 |
+
contact = personal_data.get('contact', [])
|
| 285 |
+
email = personal_data.get('email', [])
|
| 286 |
|
| 287 |
+
valid_contact = is_valid_contact(contact) if contact != [] else False
|
| 288 |
+
valid_email = is_valid_email(email) if email != [] else False
|
| 289 |
|
| 290 |
invalid_contact = 'Invalid contact' if not valid_contact else 'Valid contact'
|
| 291 |
invalid_email = 'Invalid email' if not valid_email else 'Valid email'
|
|
|
|
| 324 |
# Combine both personal and professional details into a structured output
|
| 325 |
result = {
|
| 326 |
"personal": {
|
| 327 |
+
"name": per_data.get('personal', {}).get(['name'], ['Not found']),
|
| 328 |
+
"contact": per_data.get('personal', {}).get(['contact_number'], ['Not found']),
|
| 329 |
+
"email": per_data.get('personal', {}).get(['email'], ['Not found']),
|
| 330 |
+
"location": per_data.get('personal', {}).get(['Address'], ['Not found']),
|
| 331 |
"linkedin": linkedin_links,
|
| 332 |
"github": github_links,
|
| 333 |
"other_links": hyperlinks # Store remaining links if needed
|
| 334 |
},
|
| 335 |
"professional": {
|
| 336 |
+
"technical_skills": pro_data.get('professional', {}).get(['technical_skills'], ['Not found']),
|
| 337 |
+
"non_technical_skills": pro_data.get('professional', {}).get(['non_technical_skills'], ['Not found']),
|
| 338 |
+
"tools": pro_data.get('professional', {}).get(['tools'], ['Not found']),
|
| 339 |
"experience": [
|
| 340 |
{
|
| 341 |
+
"company": pro_data.get('professional', {}).get('companies_worked_at', ['Not found']),
|
| 342 |
+
"projects": pro_data.get('professional', {}).get('projects', ['Not found']),
|
| 343 |
+
"role": pro_data.get('professional', {}).get('worked_as', ['Not found']),
|
| 344 |
+
"years": pro_data.get('professional', {}).get('experience', ['Not found']),
|
| 345 |
+
"project_experience": pro_data.get('professional', {}).get('projects_experience', ['Not found'])
|
| 346 |
}
|
| 347 |
],
|
| 348 |
"education": [
|
| 349 |
{
|
| 350 |
+
"qualification": pro_data.get('professional', {}).get('qualification', ['Not found']),
|
| 351 |
+
"university": pro_data.get('professional', {}).get('university', ['Not found']),
|
| 352 |
+
"course": pro_data.get('professional', {}).get('course', ['Not found']),
|
| 353 |
+
"certificate": pro_data.get('professional', {}).get('certification', ['Not found'])
|
| 354 |
}
|
| 355 |
]
|
| 356 |
}
|
| 357 |
}
|
| 358 |
|
| 359 |
+
|
| 360 |
+
|
| 361 |
+
#Appending the list if any available as a text
|
| 362 |
+
result['personal']['other_links'] += per_data.get('personal', {}).get('link', ['Not found'])
|
| 363 |
+
|
| 364 |
+
#Added the validator for details, Validate contact and email
|
| 365 |
valid_contact, invalid_contact, valid_email, invalid_email = validate_contact_email(result['personal'])
|
| 366 |
result['personal']['valid_contact'] = valid_contact
|
| 367 |
result['personal']['invalid_contact'] = invalid_contact
|