Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -381,6 +381,7 @@ def classify_esco_by_hierarchical_level(responsibilities: List[str]) -> dict:
|
|
| 381 |
level5_code = gpt_call("Identify fifth-level occupational group", user_prompt5).strip()
|
| 382 |
# Handle the case where the LLM might return just the code part
|
| 383 |
level5_code = code_sanitize(level5_code, list5_output)
|
|
|
|
| 384 |
result.update(get_level_ESCO_info(level5_df, level5_code, 'Level_5'))
|
| 385 |
|
| 386 |
## Et voila!!
|
|
@@ -909,10 +910,10 @@ def process_pdf(file):
|
|
| 909 |
"qualification": qualification,
|
| 910 |
"ccoq_levels": {f"Level_{i}_CCOG_{field}": occ_group.get(f"Level_{i}_CCOG_{field}")
|
| 911 |
for i in range(1, 5) for field in ["code", "name", "desc"]},
|
| 912 |
-
"interview_questions":
|
| 913 |
"skills": joined_skills,
|
| 914 |
"esco_levels": {f"Level_{i}_ESCO_{field}": esco_occ.get(f"Level_{i}_ESCO_{field}")
|
| 915 |
-
for i in range(1,
|
| 916 |
"esco_skills": esco_skills,
|
| 917 |
"processing_time": time.strftime("%Y-%m-%d %H:%M:%S")
|
| 918 |
}
|
|
@@ -922,6 +923,7 @@ def process_pdf(file):
|
|
| 922 |
json.dump(result_data, f, indent=2)
|
| 923 |
json_path = f.name
|
| 924 |
log_debug(f"Results saved to temporary JSON file: {json_path}")
|
|
|
|
| 925 |
|
| 926 |
# Format outputs for display through html cards
|
| 927 |
formatted_skills = format_skill_cards(joined_skills)
|
|
@@ -1009,7 +1011,7 @@ def generate_word_document(json_path: Optional[str]) -> str:
|
|
| 1009 |
|
| 1010 |
# Default values for all fields
|
| 1011 |
default_values = {
|
| 1012 |
-
"
|
| 1013 |
"responsibilities": "No responsibilities extracted.",
|
| 1014 |
"classified_job_family": "No job family identified.",
|
| 1015 |
"qualification": ["No qualification information available."],
|
|
@@ -1021,7 +1023,7 @@ def generate_word_document(json_path: Optional[str]) -> str:
|
|
| 1021 |
# Safely build the result dictionary with fallbacks
|
| 1022 |
try:
|
| 1023 |
result = {
|
| 1024 |
-
"
|
| 1025 |
"responsibilities": data.get("responsibilities", default_values["responsibilities"]),
|
| 1026 |
"classified_job_family": data.get("job_family", default_values["classified_job_family"]),
|
| 1027 |
"qualification": data.get("qualification", default_values["qualification"]),
|
|
@@ -1050,53 +1052,85 @@ def generate_word_document(json_path: Optional[str]) -> str:
|
|
| 1050 |
doc.add_paragraph(f"Generated on {time.strftime('%Y-%m-%d %H:%M:%S')}")
|
| 1051 |
doc.add_paragraph("International Organization for Migration", style="Intense Quote")
|
| 1052 |
|
|
|
|
|
|
|
|
|
|
| 1053 |
|
| 1054 |
|
| 1055 |
-
doc.add_heading('
|
| 1056 |
doc.add_paragraph(f"File: {result['file']}")
|
| 1057 |
-
doc.add_paragraph(f"Job Family: {result['classified_job_family']}")
|
| 1058 |
|
| 1059 |
-
doc.add_heading('Responsibilities', level=2)
|
| 1060 |
doc.add_paragraph(result['responsibilities'])
|
| 1061 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1062 |
# Skills (Extracted)
|
| 1063 |
doc.add_heading('Skills (Extracted)', level=2)
|
| 1064 |
skills_list = result['skills']
|
| 1065 |
if isinstance(skills_list, dict):
|
| 1066 |
skills_list = skills_list.get("skills", [])
|
| 1067 |
for skill in skills_list:
|
| 1068 |
-
doc.add_paragraph(f"{skill.get('skill_name', 'Unnamed Skill')}
|
| 1069 |
-
|
| 1070 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1071 |
for item in result['qualification']:
|
| 1072 |
doc.add_paragraph(item, style='List Bullet')
|
| 1073 |
|
| 1074 |
-
doc.add_heading('Interview Questions', level=2)
|
| 1075 |
for item in result['interview']:
|
| 1076 |
doc.add_paragraph(item, style='List Bullet')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1077 |
|
| 1078 |
if result["esco_levels"]:
|
| 1079 |
-
doc.add_heading('ESCO
|
| 1080 |
for key, value in result["esco_levels"].items():
|
| 1081 |
-
|
|
|
|
|
|
|
|
|
|
| 1082 |
|
| 1083 |
-
if result["ccog_levels"]:
|
| 1084 |
-
doc.add_heading('C-COG Levels', level=2)
|
| 1085 |
-
for key, value in result["ccog_levels"].items():
|
| 1086 |
-
doc.add_paragraph(f"{key}: {value}")
|
| 1087 |
|
| 1088 |
|
| 1089 |
# Skills (ESCO)
|
| 1090 |
-
doc.add_heading('Skills (ESCO)', level=2)
|
| 1091 |
esco_skills_list = result['skills_esco']
|
| 1092 |
if isinstance(esco_skills_list, dict):
|
| 1093 |
esco_skills_list = esco_skills_list.get("skills", [])
|
| 1094 |
for skill in esco_skills_list:
|
| 1095 |
-
doc.add_paragraph(f"{skill.get('skill_name', 'Unnamed Skill')}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1096 |
|
| 1097 |
-
# Footer
|
| 1098 |
-
doc.add_paragraph()
|
| 1099 |
-
doc.add_paragraph("DISCLAIMER: This document contains material generated by artificial intelligence technology. While efforts have been made to ensure accuracy, please be aware that AI-generated content may not always fully represent the intent or expertise of human-authored material and may contain errors or inaccuracies. An AI model might generate content that sounds plausible but that is either factually incorrect or unrelated to the given context. These unexpected outcomes, also called AI hallucinations, can stem from biases, under-performing information retrieval, lack of real-world understanding, or limitations in training data.", style='Footer')
|
| 1100 |
|
| 1101 |
except Exception as e:
|
| 1102 |
log_debug(f"Error generating document content: {str(e)}")
|
|
@@ -1108,7 +1142,7 @@ def generate_word_document(json_path: Optional[str]) -> str:
|
|
| 1108 |
# FILE SAVING WITH MULTIPLE FALLBACKS
|
| 1109 |
try:
|
| 1110 |
# Generate appropriate filename
|
| 1111 |
-
base_name = os.path.splitext(os.path.basename(result['
|
| 1112 |
if base_name:
|
| 1113 |
clean_name = re.sub(r'[^\w\-]', '_', base_name)[:50] # Sanitize and truncate
|
| 1114 |
output_filename = f"{clean_name}_analysis_{time.strftime('%Y%m%d')}.docx"
|
|
|
|
| 381 |
level5_code = gpt_call("Identify fifth-level occupational group", user_prompt5).strip()
|
| 382 |
# Handle the case where the LLM might return just the code part
|
| 383 |
level5_code = code_sanitize(level5_code, list5_output)
|
| 384 |
+
log_debug(f"Level 5 ESCO code: {level5_code}")
|
| 385 |
result.update(get_level_ESCO_info(level5_df, level5_code, 'Level_5'))
|
| 386 |
|
| 387 |
## Et voila!!
|
|
|
|
| 910 |
"qualification": qualification,
|
| 911 |
"ccoq_levels": {f"Level_{i}_CCOG_{field}": occ_group.get(f"Level_{i}_CCOG_{field}")
|
| 912 |
for i in range(1, 5) for field in ["code", "name", "desc"]},
|
| 913 |
+
"interview_questions": interview,
|
| 914 |
"skills": joined_skills,
|
| 915 |
"esco_levels": {f"Level_{i}_ESCO_{field}": esco_occ.get(f"Level_{i}_ESCO_{field}")
|
| 916 |
+
for i in range(1, 6) for field in ["code", "name", "desc"]},
|
| 917 |
"esco_skills": esco_skills,
|
| 918 |
"processing_time": time.strftime("%Y-%m-%d %H:%M:%S")
|
| 919 |
}
|
|
|
|
| 923 |
json.dump(result_data, f, indent=2)
|
| 924 |
json_path = f.name
|
| 925 |
log_debug(f"Results saved to temporary JSON file: {json_path}")
|
| 926 |
+
log_debug(f"Results data: {result_data}")
|
| 927 |
|
| 928 |
# Format outputs for display through html cards
|
| 929 |
formatted_skills = format_skill_cards(joined_skills)
|
|
|
|
| 1011 |
|
| 1012 |
# Default values for all fields
|
| 1013 |
default_values = {
|
| 1014 |
+
"file_name": "Unknown file",
|
| 1015 |
"responsibilities": "No responsibilities extracted.",
|
| 1016 |
"classified_job_family": "No job family identified.",
|
| 1017 |
"qualification": ["No qualification information available."],
|
|
|
|
| 1023 |
# Safely build the result dictionary with fallbacks
|
| 1024 |
try:
|
| 1025 |
result = {
|
| 1026 |
+
"file_name": data.get("file_name", default_values["file_name"]),
|
| 1027 |
"responsibilities": data.get("responsibilities", default_values["responsibilities"]),
|
| 1028 |
"classified_job_family": data.get("job_family", default_values["classified_job_family"]),
|
| 1029 |
"qualification": data.get("qualification", default_values["qualification"]),
|
|
|
|
| 1052 |
doc.add_paragraph(f"Generated on {time.strftime('%Y-%m-%d %H:%M:%S')}")
|
| 1053 |
doc.add_paragraph("International Organization for Migration", style="Intense Quote")
|
| 1054 |
|
| 1055 |
+
doc.add_heading('AI DISCLAIMER', level=2)
|
| 1056 |
+
doc_para = doc.add_paragraph()
|
| 1057 |
+
doc_para.add_run('This document contains material generated by artificial intelligence technology. While efforts have been made to ensure accuracy, please be aware that AI-generated content may not always fully represent the intent or expertise of human-authored material and may contain errors or inaccuracies. An AI model might generate content that sounds plausible but that is either factually incorrect or unrelated to the given context. These unexpected outcomes, also called AI hallucinations, can stem from biases, under-performing information retrieval, lack of real-world understanding, or limitations in training dat
|
| 1058 |
|
| 1059 |
|
| 1060 |
+
doc.add_heading('Input Information', level=2)
|
| 1061 |
doc.add_paragraph(f"File: {result['file']}")
|
|
|
|
| 1062 |
|
|
|
|
| 1063 |
doc.add_paragraph(result['responsibilities'])
|
| 1064 |
|
| 1065 |
+
|
| 1066 |
+
doc.add_heading('Job Family Classification', level=2)
|
| 1067 |
+
doc.add_paragraph(f" {result['classified_job_family']}")
|
| 1068 |
+
|
| 1069 |
+
# Helper function to add a bold label with regular value
|
| 1070 |
+
def add_skill_detail(paragraph_text, value):
|
| 1071 |
+
para = doc.add_paragraph()
|
| 1072 |
+
para.add_run(paragraph_text).bold = True
|
| 1073 |
+
para.add_run(f" {value}")
|
| 1074 |
+
|
| 1075 |
+
|
| 1076 |
# Skills (Extracted)
|
| 1077 |
doc.add_heading('Skills (Extracted)', level=2)
|
| 1078 |
skills_list = result['skills']
|
| 1079 |
if isinstance(skills_list, dict):
|
| 1080 |
skills_list = skills_list.get("skills", [])
|
| 1081 |
for skill in skills_list:
|
| 1082 |
+
doc.add_paragraph(f"• {skill.get('skill_name', 'Unnamed Skill')}", style='List Bullet')
|
| 1083 |
+
|
| 1084 |
+
add_skill_detail("Importance:", skill.get('importance', 'N/A'))
|
| 1085 |
+
add_skill_detail("Type:", skill.get('type', 'N/A'))
|
| 1086 |
+
add_skill_detail("Proficiency Level:", skill.get('proficiency_level', 'N/A'))
|
| 1087 |
+
add_skill_detail("Distinctive Elements:", skill.get('distinctive_elements', 'N/A'))
|
| 1088 |
+
add_skill_detail("Resume Signals:", skill.get('resume_signals', 'N/A'))
|
| 1089 |
+
add_skill_detail("Assessment Method:", skill.get('assessment_method', 'N/A'))
|
| 1090 |
+
|
| 1091 |
+
doc.add_heading('Suggested Qualifications', level=2)
|
| 1092 |
for item in result['qualification']:
|
| 1093 |
doc.add_paragraph(item, style='List Bullet')
|
| 1094 |
|
| 1095 |
+
doc.add_heading('Suggested Interview Questions', level=2)
|
| 1096 |
for item in result['interview']:
|
| 1097 |
doc.add_paragraph(item, style='List Bullet')
|
| 1098 |
+
|
| 1099 |
+
|
| 1100 |
+
if result["ccog_levels"]:
|
| 1101 |
+
doc.add_heading('UN Common Classification of Occupational Groups', level=2)
|
| 1102 |
+
for key, value in result["ccog_levels"].items():
|
| 1103 |
+
paragraph = doc.add_paragraph()
|
| 1104 |
+
run = paragraph.add_run(f"{key}: ")
|
| 1105 |
+
run.bold = True
|
| 1106 |
+
paragraph.add_run(str(value))
|
| 1107 |
|
| 1108 |
if result["esco_levels"]:
|
| 1109 |
+
doc.add_heading('ESCO Framework Occupational Groups', level=2)
|
| 1110 |
for key, value in result["esco_levels"].items():
|
| 1111 |
+
paragraph = doc.add_paragraph()
|
| 1112 |
+
run = paragraph.add_run(f"{key}: ")
|
| 1113 |
+
run.bold = True
|
| 1114 |
+
paragraph.add_run(str(value))
|
| 1115 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1116 |
|
| 1117 |
|
| 1118 |
# Skills (ESCO)
|
| 1119 |
+
doc.add_heading('Mapped Skills (ESCO)', level=2)
|
| 1120 |
esco_skills_list = result['skills_esco']
|
| 1121 |
if isinstance(esco_skills_list, dict):
|
| 1122 |
esco_skills_list = esco_skills_list.get("skills", [])
|
| 1123 |
for skill in esco_skills_list:
|
| 1124 |
+
doc.add_paragraph(f"• {skill.get('skill_name', 'Unnamed Skill')}", style='List Bullet')
|
| 1125 |
+
|
| 1126 |
+
add_skill_detail("Importance:", skill.get('importance', 'N/A'))
|
| 1127 |
+
add_skill_detail("Type:", skill.get('type', 'N/A'))
|
| 1128 |
+
add_skill_detail("Proficiency Level:", skill.get('proficiency_level', 'N/A'))
|
| 1129 |
+
add_skill_detail("Distinctive Elements:", skill.get('distinctive_elements', 'N/A'))
|
| 1130 |
+
add_skill_detail("Resume Signals:", skill.get('resume_signals', 'N/A'))
|
| 1131 |
+
add_skill_detail("Assessment Method:", skill.get('assessment_method', 'N/A'))
|
| 1132 |
+
|
| 1133 |
|
|
|
|
|
|
|
|
|
|
| 1134 |
|
| 1135 |
except Exception as e:
|
| 1136 |
log_debug(f"Error generating document content: {str(e)}")
|
|
|
|
| 1142 |
# FILE SAVING WITH MULTIPLE FALLBACKS
|
| 1143 |
try:
|
| 1144 |
# Generate appropriate filename
|
| 1145 |
+
base_name = os.path.splitext(os.path.basename(result['file_name']))[0]
|
| 1146 |
if base_name:
|
| 1147 |
clean_name = re.sub(r'[^\w\-]', '_', base_name)[:50] # Sanitize and truncate
|
| 1148 |
output_filename = f"{clean_name}_analysis_{time.strftime('%Y%m%d')}.docx"
|