Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -33,10 +33,6 @@ def log_debug(message):
|
|
| 33 |
# Initialize debug logging
|
| 34 |
log_debug("Application starting...")
|
| 35 |
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
# Load environment variables
|
| 41 |
load_dotenv()
|
| 42 |
|
|
@@ -46,7 +42,8 @@ logging.getLogger('pdfminer').setLevel(logging.ERROR)
|
|
| 46 |
# Suppress specific warnings
|
| 47 |
warnings.filterwarnings("ignore", category=UserWarning, message="CropBox.*")
|
| 48 |
|
| 49 |
-
|
|
|
|
| 50 |
try:
|
| 51 |
job_families_df = pd.read_csv("job_families1.csv", on_bad_lines='skip')
|
| 52 |
except Exception as e:
|
|
@@ -84,7 +81,7 @@ except Exception as e:
|
|
| 84 |
esco_skill_map_df = pd.DataFrame() # Fallback to an empty DataFrame or handle the error appropriately
|
| 85 |
|
| 86 |
|
| 87 |
-
#
|
| 88 |
def initialize_openai_client():
|
| 89 |
try:
|
| 90 |
client = openai.AzureOpenAI(
|
|
@@ -111,7 +108,8 @@ def gpt_call(system_prompt: str, user_prompt: str) -> str:
|
|
| 111 |
return response.choices[0].message.content.strip()
|
| 112 |
except Exception as e:
|
| 113 |
return f"ERROR: {e}"
|
| 114 |
-
|
|
|
|
| 115 |
def extract_text_from_pdf(pdf_path: str) -> str:
|
| 116 |
text = ""
|
| 117 |
with pdfplumber.open(pdf_path) as pdf:
|
|
@@ -127,6 +125,7 @@ def extract_text_from_pdf(pdf_path: str) -> str:
|
|
| 127 |
text += "\n"
|
| 128 |
return text
|
| 129 |
|
|
|
|
| 130 |
def extract_section_from_pdf(full_text: str, section_title: str) -> str:
|
| 131 |
user_prompt = f"""
|
| 132 |
Carefully evaluate the provided position description (PD) document and extract the content of the section titled "{section_title}" from the following text.
|
|
@@ -142,9 +141,6 @@ def extract_section_from_pdf(full_text: str, section_title: str) -> str:
|
|
| 142 |
|
| 143 |
def classify_job_family(responsibilities: List[str]) -> str:
|
| 144 |
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
job_family_list = "\n".join(f"- {row['Job_family']}: {row['Job_subfamily']}" for _, row in job_families_df.iterrows())
|
| 149 |
user_prompt = f"""
|
| 150 |
Here is a list of job responsibilities:
|
|
@@ -183,7 +179,6 @@ def code_sanitize(input_string, valid_codes):
|
|
| 183 |
|
| 184 |
def classify_occupational_group_by_level(responsibilities: List[str]) -> dict:
|
| 185 |
|
| 186 |
-
|
| 187 |
result = {}
|
| 188 |
try:
|
| 189 |
for level in range(1, 5):
|
|
@@ -616,6 +611,7 @@ def _extract_json(raw: str) -> str:
|
|
| 616 |
json_text = json_text.strip()
|
| 617 |
return json_text
|
| 618 |
|
|
|
|
| 619 |
from concurrent.futures import ThreadPoolExecutor
|
| 620 |
|
| 621 |
def process_pdf(file):
|
|
@@ -758,7 +754,7 @@ def process_pdf(file):
|
|
| 758 |
{},
|
| 759 |
error_message
|
| 760 |
)
|
| 761 |
-
|
| 762 |
from docx import Document
|
| 763 |
|
| 764 |
def generate_word_document(result):
|
|
@@ -840,7 +836,8 @@ def generate_word_document(result):
|
|
| 840 |
|
| 841 |
|
| 842 |
|
| 843 |
-
|
|
|
|
| 844 |
title="AI-powered tool to review Job Position Description",
|
| 845 |
css="""
|
| 846 |
@import url('https://fonts.googleapis.com/css2?family=Lato:wght@400;700&display=swap');
|
|
|
|
| 33 |
# Initialize debug logging
|
| 34 |
log_debug("Application starting...")
|
| 35 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
# Load environment variables
|
| 37 |
load_dotenv()
|
| 38 |
|
|
|
|
| 42 |
# Suppress specific warnings
|
| 43 |
warnings.filterwarnings("ignore", category=UserWarning, message="CropBox.*")
|
| 44 |
|
| 45 |
+
|
| 46 |
+
# ================= DataFrame initializations =================
|
| 47 |
try:
|
| 48 |
job_families_df = pd.read_csv("job_families1.csv", on_bad_lines='skip')
|
| 49 |
except Exception as e:
|
|
|
|
| 81 |
esco_skill_map_df = pd.DataFrame() # Fallback to an empty DataFrame or handle the error appropriately
|
| 82 |
|
| 83 |
|
| 84 |
+
# ================= LLM API =================
|
| 85 |
def initialize_openai_client():
|
| 86 |
try:
|
| 87 |
client = openai.AzureOpenAI(
|
|
|
|
| 108 |
return response.choices[0].message.content.strip()
|
| 109 |
except Exception as e:
|
| 110 |
return f"ERROR: {e}"
|
| 111 |
+
|
| 112 |
+
# ================= Extract text =================
|
| 113 |
def extract_text_from_pdf(pdf_path: str) -> str:
|
| 114 |
text = ""
|
| 115 |
with pdfplumber.open(pdf_path) as pdf:
|
|
|
|
| 125 |
text += "\n"
|
| 126 |
return text
|
| 127 |
|
| 128 |
+
# ================= AI Functions =================
|
| 129 |
def extract_section_from_pdf(full_text: str, section_title: str) -> str:
|
| 130 |
user_prompt = f"""
|
| 131 |
Carefully evaluate the provided position description (PD) document and extract the content of the section titled "{section_title}" from the following text.
|
|
|
|
| 141 |
|
| 142 |
def classify_job_family(responsibilities: List[str]) -> str:
|
| 143 |
|
|
|
|
|
|
|
|
|
|
| 144 |
job_family_list = "\n".join(f"- {row['Job_family']}: {row['Job_subfamily']}" for _, row in job_families_df.iterrows())
|
| 145 |
user_prompt = f"""
|
| 146 |
Here is a list of job responsibilities:
|
|
|
|
| 179 |
|
| 180 |
def classify_occupational_group_by_level(responsibilities: List[str]) -> dict:
|
| 181 |
|
|
|
|
| 182 |
result = {}
|
| 183 |
try:
|
| 184 |
for level in range(1, 5):
|
|
|
|
| 611 |
json_text = json_text.strip()
|
| 612 |
return json_text
|
| 613 |
|
| 614 |
+
# ================= Process Analysis =================
|
| 615 |
from concurrent.futures import ThreadPoolExecutor
|
| 616 |
|
| 617 |
def process_pdf(file):
|
|
|
|
| 754 |
{},
|
| 755 |
error_message
|
| 756 |
)
|
| 757 |
+
# ================= Build Word Report =================
|
| 758 |
from docx import Document
|
| 759 |
|
| 760 |
def generate_word_document(result):
|
|
|
|
| 836 |
|
| 837 |
|
| 838 |
|
| 839 |
+
# ================= GRADIO INTERFACE =================
|
| 840 |
+
with gr.Blocks(
|
| 841 |
title="AI-powered tool to review Job Position Description",
|
| 842 |
css="""
|
| 843 |
@import url('https://fonts.googleapis.com/css2?family=Lato:wght@400;700&display=swap');
|