edouardlgp commited on
Commit
b950432
·
verified ·
1 Parent(s): ec8a468

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +247 -83
app.py CHANGED
@@ -46,36 +46,42 @@ warnings.filterwarnings("ignore", category=UserWarning, message="CropBox.*")
46
  # ================= DataFrame initializations =================
47
  try:
48
  job_families_df = pd.read_csv("job_families1.csv", on_bad_lines='skip')
 
49
  except Exception as e:
50
- print(f"Error reading job_families1.csv: {e}")
51
  job_families_df = pd.DataFrame() # Fallback to an empty DataFrame or handle the error appropriately
52
 
53
  try:
54
  occupational_groups_df = pd.read_csv("occupational_groups.csv", on_bad_lines='skip')
 
55
  except Exception as e:
56
  log_debug(f"Error reading occupational_groups.csv: {e}")
57
  occupational_groups_df = pd.DataFrame() # Fallback to an empty DataFrame or handle the error appropriately
58
 
59
  try:
60
  esco_df = pd.read_csv("ISCOGroups_en.csv", on_bad_lines='skip', dtype={'code': str} ) # Force 'code' to be read as string
 
61
  except Exception as e:
62
  log_debug(f"Error reading ISCOGroups_en.csv: {e}")
63
  esco_df = pd.DataFrame() # Fallback to an empty DataFrame or handle the error appropriately
64
 
65
  try:
66
  esco_level5_df = pd.read_csv("occupations_en.csv", on_bad_lines='skip', dtype={'code': str, 'iscoGroup': str, } ) # Force 'code' to be read as string
 
67
  except Exception as e:
68
  log_debug(f"Error reading occupations_en.csv: {e}")
69
  esco_level5_df = pd.DataFrame() # Fallback to an empty DataFrame or handle the error appropriately
70
 
71
  try:
72
  esco_skill_df = pd.read_csv("skills_en.csv", on_bad_lines='skip')
 
73
  except Exception as e:
74
  log_debug(f"Error reading skills_en.csv: {e}")
75
  esco_skill_df = pd.DataFrame() # Fallback to an empty DataFrame or handle the error appropriately
76
 
77
  try:
78
  esco_skill_map_df = pd.read_csv("occupationSkillRelations_en.csv", on_bad_lines='skip')
 
79
  except Exception as e:
80
  log_debug(f"Error reading occupationSkillRelations_en.csv: {e}")
81
  esco_skill_map_df = pd.DataFrame() # Fallback to an empty DataFrame or handle the error appropriately
@@ -756,84 +762,231 @@ def process_pdf(file):
756
  )
757
  # ================= Build Word Report =================
758
  from docx import Document
759
-
760
- def generate_word_document(result):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
761
  doc = Document()
 
 
 
 
 
 
 
 
 
 
 
 
 
762
 
763
- # Add a title
764
- doc.add_heading('Job Description Analysis', level=1)
765
-
766
- # Add file name
767
- doc.add_heading('File Name', level=2)
768
- doc.add_paragraph(result["file"])
769
-
770
- # Add responsibilities
771
- doc.add_heading('Responsibilities', level=2)
772
- doc.add_paragraph(result["responsibilities"])
773
-
774
- # Add job family
775
- doc.add_heading('Classified Job Family', level=2)
776
- doc.add_paragraph(result["classified_job_family"])
777
-
778
- # Add qualifications
779
- doc.add_heading('Qualification', level=2)
780
- doc.add_paragraph("\n".join(result["qualification"]))
781
-
782
- # Add CCOG Levels
783
- doc.add_heading('CCOG Levels', level=2)
784
- for i in range(1, 5):
785
- for field in ["code", "name", "desc"]:
786
- key = f"Level_{i}_CCOG_{field}"
787
- if key in result:
788
- doc.add_paragraph(f"{key}: {result[key]}")
789
-
790
- # Add interview questions
791
- doc.add_heading('Interview Questions', level=2)
792
- doc.add_paragraph("\n".join(result["interview"]))
793
-
794
- # Add skills
795
- doc.add_heading('Skills', level=2)
796
- for skill in result["skills"]["skills"]:
797
- doc.add_paragraph(f"Skill Name: {skill['skill_name']}")
798
- doc.add_paragraph(f"Description: {skill['skill_description']}")
799
- doc.add_paragraph(f"Code: {skill['skill_code']}")
800
- doc.add_paragraph(f"Importance: {skill.get('importance', 'N/A')}")
801
- doc.add_paragraph(f"Type: {skill.get('type', 'N/A')}")
802
- doc.add_paragraph(f"Proficiency Level: {skill.get('proficiency_level', 'N/A')}")
803
- doc.add_paragraph(f"Distinctive Elements: {skill.get('distinctive_elements', 'N/A')}")
804
- doc.add_paragraph(f"Resume Signals: {skill.get('resume_signals', 'N/A')}")
805
- doc.add_paragraph(f"Assessment Method: {skill.get('assessment_method', 'N/A')}")
806
- doc.add_paragraph("") # Add an empty line for separation
807
-
808
- # Add ESCO Levels if available
809
- if "skills_esco" in result and result["skills_esco"]:
810
- doc.add_heading('ESCO Levels', level=2)
811
- for i in range(1, 6):
812
- for field in ["code", "name", "desc"]:
813
- key = f"Level_{i}_ESCO_{field}"
814
- if key in result:
815
- doc.add_paragraph(f"{key}: {result[key]}")
816
-
817
- # Add ESCO Skills
818
- doc.add_heading('ESCO Skills', level=2)
819
- for skill in result["skills_esco"]["skills"]:
820
- doc.add_paragraph(f"Skill Name: {skill['skill_name']}")
821
- doc.add_paragraph(f"Description: {skill['skill_description']}")
822
- doc.add_paragraph(f"Code: {skill['skill_code']}")
823
- doc.add_paragraph(f"Importance: {skill.get('importance', 'N/A')}")
824
- doc.add_paragraph(f"Type: {skill.get('type', 'N/A')}")
825
- doc.add_paragraph(f"Proficiency Level: {skill.get('proficiency_level', 'N/A')}")
826
- doc.add_paragraph(f"Distinctive Elements: {skill.get('distinctive_elements', 'N/A')}")
827
- doc.add_paragraph(f"Resume Signals: {skill.get('resume_signals', 'N/A')}")
828
- doc.add_paragraph(f"Assessment Method: {skill.get('assessment_method', 'N/A')}")
829
- doc.add_paragraph("") # Add an empty line for separation
830
-
831
- # Save the document to a temporary file
832
- temp_file_path = "job_description_analysis.docx"
833
- doc.save(temp_file_path)
834
-
835
- return temp_file_path
836
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
837
 
838
 
839
  # ================= GRADIO INTERFACE =================
@@ -1024,7 +1177,9 @@ label {
1024
 
1025
  with gr.Row():
1026
  with gr.Column():
1027
- file_input = gr.File(label="Upload a Post Description PDF file", file_types=[".pdf"])
 
 
1028
  submit_btn = gr.Button(
1029
  value="✨ Analyse Post Description",
1030
  variant="primary",
@@ -1040,19 +1195,19 @@ label {
1040
  with gr.Row():
1041
  with gr.Column():
1042
  gr.Markdown("### CCOG Levels")
1043
- ccoq_levels_output = gr.JSON(label="CCOG Levels")
1044
  with gr.Column():
1045
  gr.Markdown("### Skills")
1046
- skills_output = gr.JSON(label="Skills")
1047
 
1048
 
1049
  with gr.Row():
1050
  with gr.Column():
1051
  gr.Markdown("### ESCO Levels")
1052
- esco_levels_output = gr.JSON(label="ESCO Levels")
1053
  with gr.Column():
1054
  gr.Markdown("### ESCO Skills")
1055
- esco_skills_output = gr.JSON(label="ESCO Skills")
1056
 
1057
  with gr.Row():
1058
  with gr.Column():
@@ -1101,8 +1256,17 @@ label {
1101
 
1102
  download_btn.click(
1103
  fn=generate_word_document,
1104
- inputs=[file_name_output, responsibilities_output, job_family_output, qualification_output,
1105
- ccoq_levels_output, interview_output, skills_output, esco_levels_output, esco_skills_output],
 
 
 
 
 
 
 
 
 
1106
  outputs=gr.File(label="Download Word Document")
1107
  )
1108
 
 
46
  # ================= DataFrame initializations =================
47
  try:
48
  job_families_df = pd.read_csv("job_families1.csv", on_bad_lines='skip')
49
+ log_debug(f"Reading {len(job_families_df)} job_families")
50
  except Exception as e:
51
+ log_debug(f"Error reading job_families1.csv: {e}")
52
  job_families_df = pd.DataFrame() # Fallback to an empty DataFrame or handle the error appropriately
53
 
54
  try:
55
  occupational_groups_df = pd.read_csv("occupational_groups.csv", on_bad_lines='skip')
56
+ log_debug(f"Reading {len(occupational_groups_df)} occupational_groups")
57
  except Exception as e:
58
  log_debug(f"Error reading occupational_groups.csv: {e}")
59
  occupational_groups_df = pd.DataFrame() # Fallback to an empty DataFrame or handle the error appropriately
60
 
61
  try:
62
  esco_df = pd.read_csv("ISCOGroups_en.csv", on_bad_lines='skip', dtype={'code': str} ) # Force 'code' to be read as string
63
+ log_debug(f"Reading {len( esco_df)} esco groups")
64
  except Exception as e:
65
  log_debug(f"Error reading ISCOGroups_en.csv: {e}")
66
  esco_df = pd.DataFrame() # Fallback to an empty DataFrame or handle the error appropriately
67
 
68
  try:
69
  esco_level5_df = pd.read_csv("occupations_en.csv", on_bad_lines='skip', dtype={'code': str, 'iscoGroup': str, } ) # Force 'code' to be read as string
70
+ log_debug(f"Reading {len(sco_level5_df)} sco_level5")
71
  except Exception as e:
72
  log_debug(f"Error reading occupations_en.csv: {e}")
73
  esco_level5_df = pd.DataFrame() # Fallback to an empty DataFrame or handle the error appropriately
74
 
75
  try:
76
  esco_skill_df = pd.read_csv("skills_en.csv", on_bad_lines='skip')
77
+ log_debug(f"Reading {len(esco_skill_df)} esco_skill")
78
  except Exception as e:
79
  log_debug(f"Error reading skills_en.csv: {e}")
80
  esco_skill_df = pd.DataFrame() # Fallback to an empty DataFrame or handle the error appropriately
81
 
82
  try:
83
  esco_skill_map_df = pd.read_csv("occupationSkillRelations_en.csv", on_bad_lines='skip')
84
+ log_debug(f"Reading {len(esco_skill_map_df)} esco_skill_map")
85
  except Exception as e:
86
  log_debug(f"Error reading occupationSkillRelations_en.csv: {e}")
87
  esco_skill_map_df = pd.DataFrame() # Fallback to an empty DataFrame or handle the error appropriately
 
762
  )
763
  # ================= Build Word Report =================
764
  from docx import Document
765
+ import os
766
+ import re
767
+ import time
768
+ import tempfile
769
+ from typing import Dict, List, Union
770
+
771
+ def generate_word_document(
772
+ file_name: str,
773
+ responsibilities: str,
774
+ job_family: str,
775
+ qualification: str,
776
+ ccoq_levels: Dict,
777
+ interview: str,
778
+ skills: List[Dict],
779
+ esco_levels: Dict,
780
+ esco_skills: Dict
781
+ ) -> str:
782
+ """
783
+ Generate a comprehensive Word document from analysis results with multiple fallback mechanisms.
784
+
785
+ Args:
786
+ file_name: Original PDF filename
787
+ responsibilities: Extracted responsibilities text
788
+ job_family: Identified job family
789
+ qualification: Required qualifications
790
+ ccoq_levels: CCOG classification levels
791
+ interview: Generated interview questions
792
+ skills: List of required skills
793
+ esco_levels: ESCO classification levels
794
+ esco_skills: ESCO mapped skills
795
+
796
+ Returns:
797
+ Path to the generated Word document
798
+ """
799
+ # Initialize document with metadata
800
  doc = Document()
801
+ doc.core_properties.author = "IOM Talent Management System"
802
+ doc.core_properties.title = "Position Description Analysis Report"
803
+
804
+ # Default values for all fields
805
+ default_values = {
806
+ "file": "Unknown file",
807
+ "responsibilities": "No responsibilities extracted",
808
+ "classified_job_family": "No job family identified",
809
+ "qualification": ["No qualification information available"],
810
+ "interview": ["No interview questions generated"],
811
+ "skills": {"skills": [{"skill_name": "No skills identified", "description": "", "code": ""}]},
812
+ "skills_esco": {"skills": [{"skill_name": "No ESCO skills identified", "description": "", "code": ""}]}
813
+ }
814
 
815
+ # Safely build the result dictionary with fallbacks
816
+ try:
817
+ result = {
818
+ "file": file_name if file_name and isinstance(file_name, str) else default_values["file"],
819
+ "responsibilities": responsibilities if responsibilities else default_values["responsibilities"],
820
+ "classified_job_family": job_family if job_family else default_values["classified_job_family"],
821
+ "qualification": qualification.split('\n') if qualification else default_values["qualification"],
822
+ "interview": interview.split('\n') if interview else default_values["interview"],
823
+ "skills": {"skills": skills} if skills and isinstance(skills, list) else default_values["skills"],
824
+ "skills_esco": esco_skills if esco_skills and isinstance(esco_skills, dict) else default_values["skills_esco"]
825
+ }
826
+
827
+ # Add level information with validation
828
+ if ccoq_levels and isinstance(ccoq_levels, dict):
829
+ result.update({k: v for k, v in ccoq_levels.items() if v is not None})
830
+
831
+ if esco_levels and isinstance(esco_levels, dict):
832
+ result.update({k: v for k, v in esco_levels.items() if v is not None})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
833
 
834
+ except Exception as e:
835
+ log_debug(f"Error building result dictionary: {str(e)}")
836
+ result = default_values
837
+
838
+ # ================= DOCUMENT CONTENT GENERATION =================
839
+ try:
840
+ # Document header
841
+ doc.add_heading('Job Description Analysis Report', level=0)
842
+ doc.add_paragraph(f"Generated on {time.strftime('%Y-%m-%d %H:%M:%S')}")
843
+ doc.add_paragraph("International Organization for Migration", style="Intense Quote")
844
+
845
+ # Metadata table
846
+ table = doc.add_table(rows=1, cols=2)
847
+ table.style = 'Light Shading Accent 1'
848
+ hdr_cells = table.rows[0].cells
849
+ hdr_cells[0].text = 'Field'
850
+ hdr_cells[1].text = 'Value'
851
+
852
+ def _add_table_row(table, field, value):
853
+ row = table.add_row().cells
854
+ row[0].text = field
855
+ row[1].text = str(value or "Not available")
856
+
857
+ _add_table_row(table, "File Name", result["file"])
858
+ _add_table_row(table, "Job Family", result["classified_job_family"])
859
+
860
+ # Section generator with error handling
861
+ def _add_section(heading, content, level=2):
862
+ doc.add_heading(heading, level=level)
863
+ if not content:
864
+ doc.add_paragraph("No information available", style='Subtle Emphasis')
865
+ return
866
+
867
+ if isinstance(content, (list, tuple)):
868
+ for item in content:
869
+ if item and str(item).strip():
870
+ doc.add_paragraph(str(item).strip(), style='List Bullet' if level > 2 else None)
871
+ elif isinstance(content, dict):
872
+ for k, v in content.items():
873
+ if v is not None:
874
+ doc.add_paragraph(f"{k}: {v}")
875
+ elif isinstance(content, str):
876
+ doc.add_paragraph(content)
877
+
878
+ # Core sections
879
+ _add_section("1. Responsibilities", result["responsibilities"])
880
+ _add_section("2. Qualifications", result["qualification"])
881
+
882
+ # Skills sections with robust handling
883
+ def _add_skills_section(heading, skills_data):
884
+ doc.add_heading(heading, level=2)
885
+ if not skills_data or not skills_data.get("skills"):
886
+ doc.add_paragraph("No skills information available", style='Subtle Emphasis')
887
+ return
888
+
889
+ try:
890
+ skills_table = doc.add_table(rows=1, cols=4)
891
+ skills_table.style = 'Medium List 2 Accent 1'
892
+ hdr = skills_table.rows[0].cells
893
+ hdr[0].text = 'Skill'
894
+ hdr[1].text = 'Description'
895
+ hdr[2].text = 'Proficiency'
896
+ hdr[3].text = 'Assessment'
897
+
898
+ for skill in skills_data["skills"]:
899
+ if not isinstance(skill, dict):
900
+ continue
901
+
902
+ row = skills_table.add_row().cells
903
+ row[0].text = str(skill.get("skill_name", "Unnamed skill"))
904
+ row[1].text = str(skill.get("skill_description", ""))[:100] + ("..." if len(str(skill.get("skill_description", ""))) > 100 else "")
905
+ row[2].text = str(skill.get("proficiency_level", "Not specified"))
906
+ row[3].text = str(skill.get("assessment_method", "Not specified"))
907
+ except Exception as e:
908
+ doc.add_paragraph(f"Could not display skills table: {str(e)}", style='Subtle Emphasis')
909
+
910
+ _add_skills_section("3. Required Skills", result["skills"])
911
+ _add_skills_section("4. ESCO Mapped Skills", result["skills_esco"])
912
+
913
+ # Classification sections
914
+ def _add_classification_section(heading, prefix, levels=4):
915
+ doc.add_heading(heading, level=2)
916
+ found = False
917
+ for i in range(1, levels+1):
918
+ code = result.get(f"{prefix}_{i}_code")
919
+ name = result.get(f"{prefix}_{i}_name")
920
+ desc = result.get(f"{prefix}_{i}_desc")
921
+
922
+ if any([code, name, desc]):
923
+ found = True
924
+ doc.add_heading(f"Level {i}", level=3)
925
+ if code:
926
+ doc.add_paragraph(f"Code: {code}")
927
+ if name:
928
+ doc.add_paragraph(f"Name: {name}")
929
+ if desc:
930
+ doc.add_paragraph(f"Description: {desc}")
931
+
932
+ if not found:
933
+ doc.add_paragraph("No classification information available", style='Subtle Emphasis')
934
+
935
+ _add_classification_section("5. CCOG Classification", "Level_CCOG")
936
+ _add_classification_section("6. ESCO Classification", "Level_ESCO", levels=5)
937
+
938
+ # Interview questions
939
+ doc.add_heading("7. Suggested Interview Questions", level=2)
940
+ if result["interview"] and any(q.strip() for q in result["interview"]):
941
+ for i, question in enumerate(result["interview"], 1):
942
+ if question.strip():
943
+ doc.add_paragraph(f"{i}. {question}", style='List Number')
944
+ else:
945
+ doc.add_paragraph("No interview questions generated", style='Subtle Emphasis')
946
+
947
+ # Footer
948
+ doc.add_paragraph()
949
+ doc.add_paragraph("Generated by IOM Talent Management AI Tool", style='Footer')
950
+
951
+ except Exception as e:
952
+ log_debug(f"Error generating document content: {str(e)}")
953
+ # Fallback to simple error document
954
+ doc = Document()
955
+ doc.add_heading("Partial Report Generated", level=1)
956
+ doc.add_paragraph(f"Some sections could not be generated due to: {str(e)}")
957
+
958
+ # ================= FILE SAVING WITH MULTIPLE FALLBACKS =================
959
+ try:
960
+ # Generate appropriate filename
961
+ if file_name and isinstance(file_name, str):
962
+ base_name = os.path.splitext(os.path.basename(file_name))[0]
963
+ clean_name = re.sub(r'[^\w\-]', '_', base_name)[:50] # Sanitize and truncate
964
+ output_filename = f"{clean_name}_analysis_{time.strftime('%Y%m%d')}.docx"
965
+ else:
966
+ output_filename = f"job_analysis_{time.strftime('%Y%m%d_%H%M%S')}.docx"
967
+
968
+ # Try saving to reports directory first
969
+ output_dir = "generated_reports"
970
+ try:
971
+ os.makedirs(output_dir, exist_ok=True)
972
+ output_path = os.path.join(output_dir, output_filename)
973
+ doc.save(output_path)
974
+ return output_path
975
+ except PermissionError:
976
+ # Fallback to system temp directory
977
+ temp_dir = tempfile.gettempdir()
978
+ temp_path = os.path.join(temp_dir, output_filename)
979
+ doc.save(temp_path)
980
+ return temp_path
981
+
982
+ except Exception as e:
983
+ # Ultimate fallback with error document
984
+ error_doc = Document()
985
+ error_doc.add_heading("Error Generating Report", level=1)
986
+ error_doc.add_paragraph(f"Could not save report due to: {str(e)}")
987
+ fallback_path = os.path.join(tempfile.gettempdir(), f"error_report_{time.strftime('%Y%m%d_%H%M%S')}.docx")
988
+ error_doc.save(fallback_path)
989
+ return fallback_path
990
 
991
 
992
  # ================= GRADIO INTERFACE =================
 
1177
 
1178
  with gr.Row():
1179
  with gr.Column():
1180
+ file_input = gr.File(
1181
+ label="Upload a Post Description PDF file",
1182
+ file_types=[".pdf"])
1183
  submit_btn = gr.Button(
1184
  value="✨ Analyse Post Description",
1185
  variant="primary",
 
1195
  with gr.Row():
1196
  with gr.Column():
1197
  gr.Markdown("### CCOG Levels")
1198
+ ccoq_levels_output = gr.Textbox(label="CCOG Levels")
1199
  with gr.Column():
1200
  gr.Markdown("### Skills")
1201
+ skills_output = gr.Textbox(label="Skills")
1202
 
1203
 
1204
  with gr.Row():
1205
  with gr.Column():
1206
  gr.Markdown("### ESCO Levels")
1207
+ esco_levels_output = gr.Textbox(label="ESCO Levels")
1208
  with gr.Column():
1209
  gr.Markdown("### ESCO Skills")
1210
+ esco_skills_output = gr.Textbox(label="ESCO Skills")
1211
 
1212
  with gr.Row():
1213
  with gr.Column():
 
1256
 
1257
  download_btn.click(
1258
  fn=generate_word_document,
1259
+ inputs=[
1260
+ file_name_output,
1261
+ responsibilities_output,
1262
+ job_family_output,
1263
+ qualification_output,
1264
+ ccoq_levels_output,
1265
+ interview_output,
1266
+ skills_output,
1267
+ esco_levels_output,
1268
+ esco_skills_output
1269
+ ],
1270
  outputs=gr.File(label="Download Word Document")
1271
  )
1272