edouardlgp commited on
Commit
43d46fb
·
verified ·
1 Parent(s): 91721d4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +57 -23
app.py CHANGED
@@ -381,6 +381,7 @@ def classify_esco_by_hierarchical_level(responsibilities: List[str]) -> dict:
381
  level5_code = gpt_call("Identify fifth-level occupational group", user_prompt5).strip()
382
  # Handle the case where the LLM might return just the code part
383
  level5_code = code_sanitize(level5_code, list5_output)
 
384
  result.update(get_level_ESCO_info(level5_df, level5_code, 'Level_5'))
385
 
386
  ## Et voila!!
@@ -909,10 +910,10 @@ def process_pdf(file):
909
  "qualification": qualification,
910
  "ccoq_levels": {f"Level_{i}_CCOG_{field}": occ_group.get(f"Level_{i}_CCOG_{field}")
911
  for i in range(1, 5) for field in ["code", "name", "desc"]},
912
- "interview_questions": build_interview(responsibilities, skills),
913
  "skills": joined_skills,
914
  "esco_levels": {f"Level_{i}_ESCO_{field}": esco_occ.get(f"Level_{i}_ESCO_{field}")
915
- for i in range(1, 5) for field in ["code", "name", "desc"]},
916
  "esco_skills": esco_skills,
917
  "processing_time": time.strftime("%Y-%m-%d %H:%M:%S")
918
  }
@@ -922,6 +923,7 @@ def process_pdf(file):
922
  json.dump(result_data, f, indent=2)
923
  json_path = f.name
924
  log_debug(f"Results saved to temporary JSON file: {json_path}")
 
925
 
926
  # Format outputs for display through html cards
927
  formatted_skills = format_skill_cards(joined_skills)
@@ -1009,7 +1011,7 @@ def generate_word_document(json_path: Optional[str]) -> str:
1009
 
1010
  # Default values for all fields
1011
  default_values = {
1012
- "file": "Unknown file",
1013
  "responsibilities": "No responsibilities extracted.",
1014
  "classified_job_family": "No job family identified.",
1015
  "qualification": ["No qualification information available."],
@@ -1021,7 +1023,7 @@ def generate_word_document(json_path: Optional[str]) -> str:
1021
  # Safely build the result dictionary with fallbacks
1022
  try:
1023
  result = {
1024
- "file": data.get("file", default_values["file"]),
1025
  "responsibilities": data.get("responsibilities", default_values["responsibilities"]),
1026
  "classified_job_family": data.get("job_family", default_values["classified_job_family"]),
1027
  "qualification": data.get("qualification", default_values["qualification"]),
@@ -1050,53 +1052,85 @@ def generate_word_document(json_path: Optional[str]) -> str:
1050
  doc.add_paragraph(f"Generated on {time.strftime('%Y-%m-%d %H:%M:%S')}")
1051
  doc.add_paragraph("International Organization for Migration", style="Intense Quote")
1052
 
 
 
 
1053
 
1054
 
1055
- doc.add_heading('Position Description Analysis Report', level=1)
1056
  doc.add_paragraph(f"File: {result['file']}")
1057
- doc.add_paragraph(f"Job Family: {result['classified_job_family']}")
1058
 
1059
- doc.add_heading('Responsibilities', level=2)
1060
  doc.add_paragraph(result['responsibilities'])
1061
 
 
 
 
 
 
 
 
 
 
 
 
1062
  # Skills (Extracted)
1063
  doc.add_heading('Skills (Extracted)', level=2)
1064
  skills_list = result['skills']
1065
  if isinstance(skills_list, dict):
1066
  skills_list = skills_list.get("skills", [])
1067
  for skill in skills_list:
1068
- doc.add_paragraph(f"{skill.get('skill_name', 'Unnamed Skill')} - {skill.get('description', '')}")
1069
-
1070
- doc.add_heading('Qualifications', level=2)
 
 
 
 
 
 
 
1071
  for item in result['qualification']:
1072
  doc.add_paragraph(item, style='List Bullet')
1073
 
1074
- doc.add_heading('Interview Questions', level=2)
1075
  for item in result['interview']:
1076
  doc.add_paragraph(item, style='List Bullet')
 
 
 
 
 
 
 
 
 
1077
 
1078
  if result["esco_levels"]:
1079
- doc.add_heading('ESCO Levels', level=2)
1080
  for key, value in result["esco_levels"].items():
1081
- doc.add_paragraph(f"{key}: {value}")
 
 
 
1082
 
1083
- if result["ccog_levels"]:
1084
- doc.add_heading('C-COG Levels', level=2)
1085
- for key, value in result["ccog_levels"].items():
1086
- doc.add_paragraph(f"{key}: {value}")
1087
 
1088
 
1089
  # Skills (ESCO)
1090
- doc.add_heading('Skills (ESCO)', level=2)
1091
  esco_skills_list = result['skills_esco']
1092
  if isinstance(esco_skills_list, dict):
1093
  esco_skills_list = esco_skills_list.get("skills", [])
1094
  for skill in esco_skills_list:
1095
- doc.add_paragraph(f"{skill.get('skill_name', 'Unnamed Skill')} - {skill.get('description', '')}")
 
 
 
 
 
 
 
 
1096
 
1097
- # Footer
1098
- doc.add_paragraph()
1099
- doc.add_paragraph("DISCLAIMER: This document contains material generated by artificial intelligence technology. While efforts have been made to ensure accuracy, please be aware that AI-generated content may not always fully represent the intent or expertise of human-authored material and may contain errors or inaccuracies. An AI model might generate content that sounds plausible but that is either factually incorrect or unrelated to the given context. These unexpected outcomes, also called AI hallucinations, can stem from biases, under-performing information retrieval, lack of real-world understanding, or limitations in training data.", style='Footer')
1100
 
1101
  except Exception as e:
1102
  log_debug(f"Error generating document content: {str(e)}")
@@ -1108,7 +1142,7 @@ def generate_word_document(json_path: Optional[str]) -> str:
1108
  # FILE SAVING WITH MULTIPLE FALLBACKS
1109
  try:
1110
  # Generate appropriate filename
1111
- base_name = os.path.splitext(os.path.basename(result['file']))[0]
1112
  if base_name:
1113
  clean_name = re.sub(r'[^\w\-]', '_', base_name)[:50] # Sanitize and truncate
1114
  output_filename = f"{clean_name}_analysis_{time.strftime('%Y%m%d')}.docx"
 
381
  level5_code = gpt_call("Identify fifth-level occupational group", user_prompt5).strip()
382
  # Handle the case where the LLM might return just the code part
383
  level5_code = code_sanitize(level5_code, list5_output)
384
+ log_debug(f"Level 5 ESCO code: {level5_code}")
385
  result.update(get_level_ESCO_info(level5_df, level5_code, 'Level_5'))
386
 
387
  ## Et voila!!
 
910
  "qualification": qualification,
911
  "ccoq_levels": {f"Level_{i}_CCOG_{field}": occ_group.get(f"Level_{i}_CCOG_{field}")
912
  for i in range(1, 5) for field in ["code", "name", "desc"]},
913
+ "interview_questions": interview,
914
  "skills": joined_skills,
915
  "esco_levels": {f"Level_{i}_ESCO_{field}": esco_occ.get(f"Level_{i}_ESCO_{field}")
916
+ for i in range(1, 6) for field in ["code", "name", "desc"]},
917
  "esco_skills": esco_skills,
918
  "processing_time": time.strftime("%Y-%m-%d %H:%M:%S")
919
  }
 
923
  json.dump(result_data, f, indent=2)
924
  json_path = f.name
925
  log_debug(f"Results saved to temporary JSON file: {json_path}")
926
+ log_debug(f"Results data: {result_data}")
927
 
928
  # Format outputs for display through html cards
929
  formatted_skills = format_skill_cards(joined_skills)
 
1011
 
1012
  # Default values for all fields
1013
  default_values = {
1014
+ "file_name": "Unknown file",
1015
  "responsibilities": "No responsibilities extracted.",
1016
  "classified_job_family": "No job family identified.",
1017
  "qualification": ["No qualification information available."],
 
1023
  # Safely build the result dictionary with fallbacks
1024
  try:
1025
  result = {
1026
+ "file_name": data.get("file_name", default_values["file_name"]),
1027
  "responsibilities": data.get("responsibilities", default_values["responsibilities"]),
1028
  "classified_job_family": data.get("job_family", default_values["classified_job_family"]),
1029
  "qualification": data.get("qualification", default_values["qualification"]),
 
1052
  doc.add_paragraph(f"Generated on {time.strftime('%Y-%m-%d %H:%M:%S')}")
1053
  doc.add_paragraph("International Organization for Migration", style="Intense Quote")
1054
 
1055
+ doc.add_heading('AI DISCLAIMER', level=2)
1056
+ doc_para = doc.add_paragraph()
1057
+ doc_para.add_run('This document contains material generated by artificial intelligence technology. While efforts have been made to ensure accuracy, please be aware that AI-generated content may not always fully represent the intent or expertise of human-authored material and may contain errors or inaccuracies. An AI model might generate content that sounds plausible but that is either factually incorrect or unrelated to the given context. These unexpected outcomes, also called AI hallucinations, can stem from biases, under-performing information retrieval, lack of real-world understanding, or limitations in training dat
1058
 
1059
 
1060
+ doc.add_heading('Input Information', level=2)
1061
  doc.add_paragraph(f"File: {result['file']}")
 
1062
 
 
1063
  doc.add_paragraph(result['responsibilities'])
1064
 
1065
+
1066
+ doc.add_heading('Job Family Classification', level=2)
1067
+ doc.add_paragraph(f" {result['classified_job_family']}")
1068
+
1069
+ # Helper function to add a bold label with regular value
1070
+ def add_skill_detail(paragraph_text, value):
1071
+ para = doc.add_paragraph()
1072
+ para.add_run(paragraph_text).bold = True
1073
+ para.add_run(f" {value}")
1074
+
1075
+
1076
  # Skills (Extracted)
1077
  doc.add_heading('Skills (Extracted)', level=2)
1078
  skills_list = result['skills']
1079
  if isinstance(skills_list, dict):
1080
  skills_list = skills_list.get("skills", [])
1081
  for skill in skills_list:
1082
+ doc.add_paragraph(f"{skill.get('skill_name', 'Unnamed Skill')}", style='List Bullet')
1083
+
1084
+ add_skill_detail("Importance:", skill.get('importance', 'N/A'))
1085
+ add_skill_detail("Type:", skill.get('type', 'N/A'))
1086
+ add_skill_detail("Proficiency Level:", skill.get('proficiency_level', 'N/A'))
1087
+ add_skill_detail("Distinctive Elements:", skill.get('distinctive_elements', 'N/A'))
1088
+ add_skill_detail("Resume Signals:", skill.get('resume_signals', 'N/A'))
1089
+ add_skill_detail("Assessment Method:", skill.get('assessment_method', 'N/A'))
1090
+
1091
+ doc.add_heading('Suggested Qualifications', level=2)
1092
  for item in result['qualification']:
1093
  doc.add_paragraph(item, style='List Bullet')
1094
 
1095
+ doc.add_heading('Suggested Interview Questions', level=2)
1096
  for item in result['interview']:
1097
  doc.add_paragraph(item, style='List Bullet')
1098
+
1099
+
1100
+ if result["ccog_levels"]:
1101
+ doc.add_heading('UN Common Classification of Occupational Groups', level=2)
1102
+ for key, value in result["ccog_levels"].items():
1103
+ paragraph = doc.add_paragraph()
1104
+ run = paragraph.add_run(f"{key}: ")
1105
+ run.bold = True
1106
+ paragraph.add_run(str(value))
1107
 
1108
  if result["esco_levels"]:
1109
+ doc.add_heading('ESCO Framework Occupational Groups', level=2)
1110
  for key, value in result["esco_levels"].items():
1111
+ paragraph = doc.add_paragraph()
1112
+ run = paragraph.add_run(f"{key}: ")
1113
+ run.bold = True
1114
+ paragraph.add_run(str(value))
1115
 
 
 
 
 
1116
 
1117
 
1118
  # Skills (ESCO)
1119
+ doc.add_heading('Mapped Skills (ESCO)', level=2)
1120
  esco_skills_list = result['skills_esco']
1121
  if isinstance(esco_skills_list, dict):
1122
  esco_skills_list = esco_skills_list.get("skills", [])
1123
  for skill in esco_skills_list:
1124
+ doc.add_paragraph(f"{skill.get('skill_name', 'Unnamed Skill')}", style='List Bullet')
1125
+
1126
+ add_skill_detail("Importance:", skill.get('importance', 'N/A'))
1127
+ add_skill_detail("Type:", skill.get('type', 'N/A'))
1128
+ add_skill_detail("Proficiency Level:", skill.get('proficiency_level', 'N/A'))
1129
+ add_skill_detail("Distinctive Elements:", skill.get('distinctive_elements', 'N/A'))
1130
+ add_skill_detail("Resume Signals:", skill.get('resume_signals', 'N/A'))
1131
+ add_skill_detail("Assessment Method:", skill.get('assessment_method', 'N/A'))
1132
+
1133
 
 
 
 
1134
 
1135
  except Exception as e:
1136
  log_debug(f"Error generating document content: {str(e)}")
 
1142
  # FILE SAVING WITH MULTIPLE FALLBACKS
1143
  try:
1144
  # Generate appropriate filename
1145
+ base_name = os.path.splitext(os.path.basename(result['file_name']))[0]
1146
  if base_name:
1147
  clean_name = re.sub(r'[^\w\-]', '_', base_name)[:50] # Sanitize and truncate
1148
  output_filename = f"{clean_name}_analysis_{time.strftime('%Y%m%d')}.docx"