Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -788,6 +788,8 @@ def format_esco_card(esco_data):
|
|
| 788 |
return f"<div class='esco-container'>{card}</div>"
|
| 789 |
|
| 790 |
# ================= Process Analysis =================
|
|
|
|
|
|
|
| 791 |
from concurrent.futures import ThreadPoolExecutor
|
| 792 |
|
| 793 |
def process_pdf(file):
|
|
@@ -802,7 +804,8 @@ def process_pdf(file):
|
|
| 802 |
[],
|
| 803 |
{},
|
| 804 |
{},
|
| 805 |
-
"No file uploaded."
|
|
|
|
| 806 |
)
|
| 807 |
|
| 808 |
try:
|
|
@@ -820,19 +823,18 @@ def process_pdf(file):
|
|
| 820 |
[],
|
| 821 |
{},
|
| 822 |
{},
|
| 823 |
-
"No responsibilities section found."
|
|
|
|
| 824 |
)
|
| 825 |
|
| 826 |
# Use ThreadPoolExecutor to parallelize independent tasks
|
| 827 |
with ThreadPoolExecutor() as executor:
|
| 828 |
-
# Submit tasks to the executor
|
| 829 |
job_family_future = executor.submit(classify_job_family, responsibilities)
|
| 830 |
occ_group_future = executor.submit(classify_occupational_group_by_level, responsibilities)
|
| 831 |
esco_occ_future = executor.submit(classify_esco_by_hierarchical_level, responsibilities)
|
| 832 |
qualification_future = executor.submit(extract_qualification, responsibilities)
|
| 833 |
skills_future = executor.submit(extract_skills, responsibilities)
|
| 834 |
|
| 835 |
-
# Retrieve results from futures
|
| 836 |
job_family = job_family_future.result()
|
| 837 |
occ_group = occ_group_future.result()
|
| 838 |
esco_occ = esco_occ_future.result()
|
|
@@ -850,8 +852,6 @@ def process_pdf(file):
|
|
| 850 |
Level_5_code = esco_occ["Level_5_ESCO_code"]
|
| 851 |
skill_esco_extract = review_skills(Level_5_code)
|
| 852 |
skill_esco_map = map_proficiency_and_assessment(skill_esco_extract, responsibilities)
|
| 853 |
-
else:
|
| 854 |
-
log_debug(f"No Level 5 ESCO code found for {os.path.basename(file.name)}, skipping ESCO skills mapping")
|
| 855 |
|
| 856 |
time.sleep(6)
|
| 857 |
assessment_lookup = {item['skill_name']: item for item in skill_map}
|
|
@@ -870,66 +870,62 @@ def process_pdf(file):
|
|
| 870 |
for skill in skills
|
| 871 |
]
|
| 872 |
|
| 873 |
-
#
|
| 874 |
-
|
| 875 |
-
|
| 876 |
-
|
| 877 |
-
|
| 878 |
-
|
| 879 |
-
|
| 880 |
-
|
| 881 |
-
|
| 882 |
-
|
| 883 |
-
|
| 884 |
-
|
| 885 |
-
|
| 886 |
-
|
| 887 |
-
|
| 888 |
-
|
| 889 |
-
|
| 890 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 891 |
|
| 892 |
-
#
|
| 893 |
-
|
| 894 |
-
|
| 895 |
-
|
| 896 |
-
|
| 897 |
-
|
| 898 |
-
|
| 899 |
-
|
| 900 |
-
|
| 901 |
-
|
| 902 |
-
|
| 903 |
-
"skills": joined_skills_esco
|
| 904 |
-
}
|
| 905 |
-
else:
|
| 906 |
-
esco_levels = {f"Level_{i}_ESCO_{field}": None
|
| 907 |
-
for i in range(1, 6) for field in ["code", "name", "desc"]}
|
| 908 |
-
esco_skills = None
|
| 909 |
|
| 910 |
-
formatted_esco_levels = format_esco_card(esco_levels)
|
| 911 |
-
formatted_esco_skills = format_skill_cards(esco_skills)
|
| 912 |
-
|
| 913 |
-
debug_message = "Processing completed successfully."
|
| 914 |
return (
|
| 915 |
os.path.basename(file.name),
|
| 916 |
responsibilities,
|
| 917 |
job_family,
|
| 918 |
"\n".join(qualification),
|
| 919 |
-
#ccoq_levels,
|
| 920 |
formatted_ccog,
|
| 921 |
-
"\n".join(
|
| 922 |
-
#joined_skills,
|
| 923 |
formatted_skills,
|
| 924 |
-
# esco_levels,
|
| 925 |
formatted_esco_levels,
|
| 926 |
-
#esco_skills,
|
| 927 |
formatted_esco_skills,
|
| 928 |
-
|
|
|
|
| 929 |
)
|
| 930 |
|
| 931 |
except Exception as e:
|
| 932 |
error_message = f"Error processing PDF: {str(e)}"
|
|
|
|
|
|
|
| 933 |
return (
|
| 934 |
error_message,
|
| 935 |
"",
|
|
@@ -940,7 +936,8 @@ def process_pdf(file):
|
|
| 940 |
[],
|
| 941 |
{},
|
| 942 |
{},
|
| 943 |
-
error_message
|
|
|
|
| 944 |
)
|
| 945 |
# ================= Build Word Report =================
|
| 946 |
from docx import Document
|
|
@@ -1765,10 +1762,10 @@ progress::-webkit-progress-value {
|
|
| 1765 |
with gr.Row():
|
| 1766 |
with gr.Column():
|
| 1767 |
file_input = gr.File(
|
| 1768 |
-
label="Upload a Post Description PDF file",
|
| 1769 |
file_types=[".pdf"])
|
| 1770 |
submit_btn = gr.Button(
|
| 1771 |
-
value="✨ Analyse Post Description",
|
| 1772 |
variant="primary",
|
| 1773 |
elem_classes="btn-primary"
|
| 1774 |
)
|
|
@@ -1858,7 +1855,7 @@ progress::-webkit-progress-value {
|
|
| 1858 |
esco_levels_output,
|
| 1859 |
esco_skills_output
|
| 1860 |
],
|
| 1861 |
-
outputs=gr.File(label="Download Word
|
| 1862 |
)
|
| 1863 |
|
| 1864 |
if __name__ == "__main__":
|
|
|
|
| 788 |
return f"<div class='esco-container'>{card}</div>"
|
| 789 |
|
| 790 |
# ================= Process Analysis =================
|
| 791 |
+
import tempfile
|
| 792 |
+
import json
|
| 793 |
from concurrent.futures import ThreadPoolExecutor
|
| 794 |
|
| 795 |
def process_pdf(file):
|
|
|
|
| 804 |
[],
|
| 805 |
{},
|
| 806 |
{},
|
| 807 |
+
"No file uploaded.",
|
| 808 |
+
None # JSON path
|
| 809 |
)
|
| 810 |
|
| 811 |
try:
|
|
|
|
| 823 |
[],
|
| 824 |
{},
|
| 825 |
{},
|
| 826 |
+
"No responsibilities section found.",
|
| 827 |
+
None # JSON path
|
| 828 |
)
|
| 829 |
|
| 830 |
# Use ThreadPoolExecutor to parallelize independent tasks
|
| 831 |
with ThreadPoolExecutor() as executor:
|
|
|
|
| 832 |
job_family_future = executor.submit(classify_job_family, responsibilities)
|
| 833 |
occ_group_future = executor.submit(classify_occupational_group_by_level, responsibilities)
|
| 834 |
esco_occ_future = executor.submit(classify_esco_by_hierarchical_level, responsibilities)
|
| 835 |
qualification_future = executor.submit(extract_qualification, responsibilities)
|
| 836 |
skills_future = executor.submit(extract_skills, responsibilities)
|
| 837 |
|
|
|
|
| 838 |
job_family = job_family_future.result()
|
| 839 |
occ_group = occ_group_future.result()
|
| 840 |
esco_occ = esco_occ_future.result()
|
|
|
|
| 852 |
Level_5_code = esco_occ["Level_5_ESCO_code"]
|
| 853 |
skill_esco_extract = review_skills(Level_5_code)
|
| 854 |
skill_esco_map = map_proficiency_and_assessment(skill_esco_extract, responsibilities)
|
|
|
|
|
|
|
| 855 |
|
| 856 |
time.sleep(6)
|
| 857 |
assessment_lookup = {item['skill_name']: item for item in skill_map}
|
|
|
|
| 870 |
for skill in skills
|
| 871 |
]
|
| 872 |
|
| 873 |
+
# Prepare all data for JSON output
|
| 874 |
+
result_data = {
|
| 875 |
+
"file_name": os.path.basename(file.name),
|
| 876 |
+
"responsibilities": responsibilities,
|
| 877 |
+
"job_family": job_family,
|
| 878 |
+
"qualification": qualification,
|
| 879 |
+
"ccoq_levels": {f"Level_{i}_CCOG_{field}": occ_group.get(f"Level_{i}_CCOG_{field}")
|
| 880 |
+
for i in range(1, 5) for field in ["code", "name", "desc"]},
|
| 881 |
+
"interview_questions": build_interview(responsibilities, skills),
|
| 882 |
+
"skills": joined_skills,
|
| 883 |
+
"esco_levels": {f"Level_{i}_ESCO_{field}": esco_occ.get(f"Level_{i}_ESCO_{field}")
|
| 884 |
+
for i in range(1, 6) for field in ["code", "name", "desc"]},
|
| 885 |
+
"esco_skills": {
|
| 886 |
+
"skills": [
|
| 887 |
+
{
|
| 888 |
+
"skill_name": skill["skill_name"],
|
| 889 |
+
"skill_description": skill["skill_description"],
|
| 890 |
+
"skill_code": skill["skill_code"],
|
| 891 |
+
**assessment_esco_lookup.get(skill["skill_name"], {})
|
| 892 |
+
}
|
| 893 |
+
for skill in (skill_esco_extract if has_esco else [])
|
| 894 |
+
]
|
| 895 |
+
},
|
| 896 |
+
"processing_time": time.strftime("%Y-%m-%d %H:%M:%S")
|
| 897 |
+
}
|
| 898 |
|
| 899 |
+
# Save to temporary JSON file
|
| 900 |
+
with tempfile.NamedTemporaryFile(suffix=".json", delete=False, mode='w') as f:
|
| 901 |
+
json.dump(result_data, f, indent=2)
|
| 902 |
+
json_path = f.name
|
| 903 |
+
log_debug(f"Results saved to temporary JSON file: {json_path}")
|
| 904 |
+
|
| 905 |
+
# Format outputs for display
|
| 906 |
+
formatted_skills = format_skill_cards(joined_skills)
|
| 907 |
+
formatted_ccog = format_ccog_card(result_data['ccoq_levels'])
|
| 908 |
+
formatted_esco_levels = format_esco_card(result_data['esco_levels'])
|
| 909 |
+
formatted_esco_skills = format_skill_cards(result_data['esco_skills'])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 910 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 911 |
return (
|
| 912 |
os.path.basename(file.name),
|
| 913 |
responsibilities,
|
| 914 |
job_family,
|
| 915 |
"\n".join(qualification),
|
|
|
|
| 916 |
formatted_ccog,
|
| 917 |
+
"\n".join(result_data['interview_questions']),
|
|
|
|
| 918 |
formatted_skills,
|
|
|
|
| 919 |
formatted_esco_levels,
|
|
|
|
| 920 |
formatted_esco_skills,
|
| 921 |
+
"Processing completed successfully." if DEBUG else None,
|
| 922 |
+
json_path # Return path to JSON file
|
| 923 |
)
|
| 924 |
|
| 925 |
except Exception as e:
|
| 926 |
error_message = f"Error processing PDF: {str(e)}"
|
| 927 |
+
log_debug(error_message)
|
| 928 |
+
traceback.print_exc()
|
| 929 |
return (
|
| 930 |
error_message,
|
| 931 |
"",
|
|
|
|
| 936 |
[],
|
| 937 |
{},
|
| 938 |
{},
|
| 939 |
+
error_message,
|
| 940 |
+
None # No JSON path on error
|
| 941 |
)
|
| 942 |
# ================= Build Word Report =================
|
| 943 |
from docx import Document
|
|
|
|
| 1762 |
with gr.Row():
|
| 1763 |
with gr.Column():
|
| 1764 |
file_input = gr.File(
|
| 1765 |
+
label="Upload a Post Description PDF file - not a scanned file!!!",
|
| 1766 |
file_types=[".pdf"])
|
| 1767 |
submit_btn = gr.Button(
|
| 1768 |
+
value="✨ Analyse the Post Description - takes about 90 sec...",
|
| 1769 |
variant="primary",
|
| 1770 |
elem_classes="btn-primary"
|
| 1771 |
)
|
|
|
|
| 1855 |
esco_levels_output,
|
| 1856 |
esco_skills_output
|
| 1857 |
],
|
| 1858 |
+
outputs=gr.File(label="Download the corresponding Word report")
|
| 1859 |
)
|
| 1860 |
|
| 1861 |
if __name__ == "__main__":
|