edouardlgp commited on
Commit
970a84f
·
verified ·
1 Parent(s): 4c2a32a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +52 -55
app.py CHANGED
@@ -788,6 +788,8 @@ def format_esco_card(esco_data):
788
  return f"<div class='esco-container'>{card}</div>"
789
 
790
  # ================= Process Analysis =================
 
 
791
  from concurrent.futures import ThreadPoolExecutor
792
 
793
  def process_pdf(file):
@@ -802,7 +804,8 @@ def process_pdf(file):
802
  [],
803
  {},
804
  {},
805
- "No file uploaded."
 
806
  )
807
 
808
  try:
@@ -820,19 +823,18 @@ def process_pdf(file):
820
  [],
821
  {},
822
  {},
823
- "No responsibilities section found."
 
824
  )
825
 
826
  # Use ThreadPoolExecutor to parallelize independent tasks
827
  with ThreadPoolExecutor() as executor:
828
- # Submit tasks to the executor
829
  job_family_future = executor.submit(classify_job_family, responsibilities)
830
  occ_group_future = executor.submit(classify_occupational_group_by_level, responsibilities)
831
  esco_occ_future = executor.submit(classify_esco_by_hierarchical_level, responsibilities)
832
  qualification_future = executor.submit(extract_qualification, responsibilities)
833
  skills_future = executor.submit(extract_skills, responsibilities)
834
 
835
- # Retrieve results from futures
836
  job_family = job_family_future.result()
837
  occ_group = occ_group_future.result()
838
  esco_occ = esco_occ_future.result()
@@ -850,8 +852,6 @@ def process_pdf(file):
850
  Level_5_code = esco_occ["Level_5_ESCO_code"]
851
  skill_esco_extract = review_skills(Level_5_code)
852
  skill_esco_map = map_proficiency_and_assessment(skill_esco_extract, responsibilities)
853
- else:
854
- log_debug(f"No Level 5 ESCO code found for {os.path.basename(file.name)}, skipping ESCO skills mapping")
855
 
856
  time.sleep(6)
857
  assessment_lookup = {item['skill_name']: item for item in skill_map}
@@ -870,66 +870,62 @@ def process_pdf(file):
870
  for skill in skills
871
  ]
872
 
873
- # Format skills before returning
874
- formatted_skills = format_skill_cards(joined_skills)
875
-
876
-
877
- joined_skills_esco = []
878
- if has_esco and skill_esco_extract:
879
- assessment_esco_lookup = {item['skill_name']: item for item in skill_esco_map}
880
- joined_skills_esco = [
881
- {
882
- "skill_name": skill["skill_name"],
883
- "skill_description": skill["skill_description"],
884
- "skill_code": skill["skill_code"],
885
- **assessment_esco_lookup.get(skill["skill_name"], {})
886
- }
887
- for skill in skill_esco_extract
888
- ]
889
-
890
- interview = build_interview(responsibilities, skills)
 
 
 
 
 
 
 
891
 
892
- # Prepare the results for each output component
893
- ccoq_levels = {f"Level_{i}_CCOG_{field}": occ_group.get(f"Level_{i}_CCOG_{field}")
894
- for i in range(1, 5) for field in ["code", "name", "desc"]}
895
- formatted_ccog = format_ccog_card(ccoq_levels)
896
-
897
- if has_esco:
898
- esco_levels = {f"Level_{i}_ESCO_{field}": esco_occ.get(f"Level_{i}_ESCO_{field}")
899
- for i in range(1, 6) for field in ["code", "name", "desc"]}
900
- esco_skills = {
901
- "file": os.path.basename(file.name),
902
- "classified_job_family": job_family,
903
- "skills": joined_skills_esco
904
- }
905
- else:
906
- esco_levels = {f"Level_{i}_ESCO_{field}": None
907
- for i in range(1, 6) for field in ["code", "name", "desc"]}
908
- esco_skills = None
909
 
910
- formatted_esco_levels = format_esco_card(esco_levels)
911
- formatted_esco_skills = format_skill_cards(esco_skills)
912
-
913
- debug_message = "Processing completed successfully."
914
  return (
915
  os.path.basename(file.name),
916
  responsibilities,
917
  job_family,
918
  "\n".join(qualification),
919
- #ccoq_levels,
920
  formatted_ccog,
921
- "\n".join(interview),
922
- #joined_skills,
923
  formatted_skills,
924
- # esco_levels,
925
  formatted_esco_levels,
926
- #esco_skills,
927
  formatted_esco_skills,
928
- debug_message if DEBUG else None
 
929
  )
930
 
931
  except Exception as e:
932
  error_message = f"Error processing PDF: {str(e)}"
 
 
933
  return (
934
  error_message,
935
  "",
@@ -940,7 +936,8 @@ def process_pdf(file):
940
  [],
941
  {},
942
  {},
943
- error_message
 
944
  )
945
  # ================= Build Word Report =================
946
  from docx import Document
@@ -1765,10 +1762,10 @@ progress::-webkit-progress-value {
1765
  with gr.Row():
1766
  with gr.Column():
1767
  file_input = gr.File(
1768
- label="Upload a Post Description PDF file",
1769
  file_types=[".pdf"])
1770
  submit_btn = gr.Button(
1771
- value="✨ Analyse Post Description",
1772
  variant="primary",
1773
  elem_classes="btn-primary"
1774
  )
@@ -1858,7 +1855,7 @@ progress::-webkit-progress-value {
1858
  esco_levels_output,
1859
  esco_skills_output
1860
  ],
1861
- outputs=gr.File(label="Download Word Document")
1862
  )
1863
 
1864
  if __name__ == "__main__":
 
788
  return f"<div class='esco-container'>{card}</div>"
789
 
790
  # ================= Process Analysis =================
791
+ import tempfile
792
+ import json
793
  from concurrent.futures import ThreadPoolExecutor
794
 
795
  def process_pdf(file):
 
804
  [],
805
  {},
806
  {},
807
+ "No file uploaded.",
808
+ None # JSON path
809
  )
810
 
811
  try:
 
823
  [],
824
  {},
825
  {},
826
+ "No responsibilities section found.",
827
+ None # JSON path
828
  )
829
 
830
  # Use ThreadPoolExecutor to parallelize independent tasks
831
  with ThreadPoolExecutor() as executor:
 
832
  job_family_future = executor.submit(classify_job_family, responsibilities)
833
  occ_group_future = executor.submit(classify_occupational_group_by_level, responsibilities)
834
  esco_occ_future = executor.submit(classify_esco_by_hierarchical_level, responsibilities)
835
  qualification_future = executor.submit(extract_qualification, responsibilities)
836
  skills_future = executor.submit(extract_skills, responsibilities)
837
 
 
838
  job_family = job_family_future.result()
839
  occ_group = occ_group_future.result()
840
  esco_occ = esco_occ_future.result()
 
852
  Level_5_code = esco_occ["Level_5_ESCO_code"]
853
  skill_esco_extract = review_skills(Level_5_code)
854
  skill_esco_map = map_proficiency_and_assessment(skill_esco_extract, responsibilities)
 
 
855
 
856
  time.sleep(6)
857
  assessment_lookup = {item['skill_name']: item for item in skill_map}
 
870
  for skill in skills
871
  ]
872
 
873
+ # Prepare all data for JSON output
874
+ result_data = {
875
+ "file_name": os.path.basename(file.name),
876
+ "responsibilities": responsibilities,
877
+ "job_family": job_family,
878
+ "qualification": qualification,
879
+ "ccoq_levels": {f"Level_{i}_CCOG_{field}": occ_group.get(f"Level_{i}_CCOG_{field}")
880
+ for i in range(1, 5) for field in ["code", "name", "desc"]},
881
+ "interview_questions": build_interview(responsibilities, skills),
882
+ "skills": joined_skills,
883
+ "esco_levels": {f"Level_{i}_ESCO_{field}": esco_occ.get(f"Level_{i}_ESCO_{field}")
884
+ for i in range(1, 6) for field in ["code", "name", "desc"]},
885
+ "esco_skills": {
886
+ "skills": [
887
+ {
888
+ "skill_name": skill["skill_name"],
889
+ "skill_description": skill["skill_description"],
890
+ "skill_code": skill["skill_code"],
891
+ **assessment_esco_lookup.get(skill["skill_name"], {})
892
+ }
893
+ for skill in (skill_esco_extract if has_esco else [])
894
+ ]
895
+ },
896
+ "processing_time": time.strftime("%Y-%m-%d %H:%M:%S")
897
+ }
898
 
899
+ # Save to temporary JSON file
900
+ with tempfile.NamedTemporaryFile(suffix=".json", delete=False, mode='w') as f:
901
+ json.dump(result_data, f, indent=2)
902
+ json_path = f.name
903
+ log_debug(f"Results saved to temporary JSON file: {json_path}")
904
+
905
+ # Format outputs for display
906
+ formatted_skills = format_skill_cards(joined_skills)
907
+ formatted_ccog = format_ccog_card(result_data['ccoq_levels'])
908
+ formatted_esco_levels = format_esco_card(result_data['esco_levels'])
909
+ formatted_esco_skills = format_skill_cards(result_data['esco_skills'])
 
 
 
 
 
 
910
 
 
 
 
 
911
  return (
912
  os.path.basename(file.name),
913
  responsibilities,
914
  job_family,
915
  "\n".join(qualification),
 
916
  formatted_ccog,
917
+ "\n".join(result_data['interview_questions']),
 
918
  formatted_skills,
 
919
  formatted_esco_levels,
 
920
  formatted_esco_skills,
921
+ "Processing completed successfully." if DEBUG else None,
922
+ json_path # Return path to JSON file
923
  )
924
 
925
  except Exception as e:
926
  error_message = f"Error processing PDF: {str(e)}"
927
+ log_debug(error_message)
928
+ traceback.print_exc()
929
  return (
930
  error_message,
931
  "",
 
936
  [],
937
  {},
938
  {},
939
+ error_message,
940
+ None # No JSON path on error
941
  )
942
  # ================= Build Word Report =================
943
  from docx import Document
 
1762
  with gr.Row():
1763
  with gr.Column():
1764
  file_input = gr.File(
1765
+ label="Upload a Post Description PDF file - not a scanned file!!!",
1766
  file_types=[".pdf"])
1767
  submit_btn = gr.Button(
1768
+ value="✨ Analyse the Post Description - takes about 90 sec...",
1769
  variant="primary",
1770
  elem_classes="btn-primary"
1771
  )
 
1855
  esco_levels_output,
1856
  esco_skills_output
1857
  ],
1858
+ outputs=gr.File(label="Download the corresponding Word report")
1859
  )
1860
 
1861
  if __name__ == "__main__":