Spaces:

edouardlgp
/

Job_Classification

Running

App Files Files Community

edouardlgp commited on May 11, 2025

Commit

92adc42

verified ·

1 Parent(s): 89c7d5b

Update app.py

Browse files

Files changed (1) hide show

app.py +10 -13

app.py CHANGED Viewed

@@ -33,10 +33,6 @@ def log_debug(message):
 # Initialize debug logging
 log_debug("Application starting...")
 # Load environment variables
 load_dotenv()
@@ -46,7 +42,8 @@ logging.getLogger('pdfminer').setLevel(logging.ERROR)
 # Suppress specific warnings
 warnings.filterwarnings("ignore", category=UserWarning, message="CropBox.*")
-# Global DataFrame initializations
 try:
     job_families_df = pd.read_csv("job_families1.csv", on_bad_lines='skip')
 except Exception as e:
@@ -84,7 +81,7 @@ except Exception as e:
     esco_skill_map_df = pd.DataFrame()  # Fallback to an empty DataFrame or handle the error appropriately
-# Initialize OpenAI client
 def initialize_openai_client():
     try:
         client = openai.AzureOpenAI(
@@ -111,7 +108,8 @@ def gpt_call(system_prompt: str, user_prompt: str) -> str:
         return response.choices[0].message.content.strip()
     except Exception as e:
         return f"ERROR: {e}"
 def extract_text_from_pdf(pdf_path: str) -> str:
     text = ""
     with pdfplumber.open(pdf_path) as pdf:
@@ -127,6 +125,7 @@ def extract_text_from_pdf(pdf_path: str) -> str:
                     text += "\n"
     return text
 def extract_section_from_pdf(full_text: str, section_title: str) -> str:
     user_prompt = f"""
     Carefully evaluate the provided position description (PD) document and extract the content of the section titled "{section_title}" from the following text.
@@ -142,9 +141,6 @@ def extract_section_from_pdf(full_text: str, section_title: str) -> str:
 def classify_job_family(responsibilities: List[str]) -> str:
     job_family_list = "\n".join(f"- {row['Job_family']}: {row['Job_subfamily']}" for _, row in job_families_df.iterrows())
     user_prompt = f"""
     Here is a list of job responsibilities:
@@ -183,7 +179,6 @@ def code_sanitize(input_string, valid_codes):
 def classify_occupational_group_by_level(responsibilities: List[str]) -> dict:
     result = {}
     try:
         for level in range(1, 5):
@@ -616,6 +611,7 @@ def _extract_json(raw: str) -> str:
     json_text = json_text.strip()
     return json_text
 from concurrent.futures import ThreadPoolExecutor
 def process_pdf(file):
@@ -758,7 +754,7 @@ def process_pdf(file):
             {},
             error_message
         )
 from docx import Document
 def generate_word_document(result):
@@ -840,7 +836,8 @@ def generate_word_document(result):
     title="AI-powered tool to review Job Position Description",
 css="""
     @import url('https://fonts.googleapis.com/css2?family=Lato:wght@400;700&display=swap');

 # Initialize debug logging
 log_debug("Application starting...")
 # Load environment variables
 load_dotenv()
 # Suppress specific warnings
 warnings.filterwarnings("ignore", category=UserWarning, message="CropBox.*")
+# ================= DataFrame initializations =================
 try:
     job_families_df = pd.read_csv("job_families1.csv", on_bad_lines='skip')
 except Exception as e:
     esco_skill_map_df = pd.DataFrame()  # Fallback to an empty DataFrame or handle the error appropriately
+# ================= LLM API =================
 def initialize_openai_client():
     try:
         client = openai.AzureOpenAI(
         return response.choices[0].message.content.strip()
     except Exception as e:
         return f"ERROR: {e}"
+# ================= Extract text =================
 def extract_text_from_pdf(pdf_path: str) -> str:
     text = ""
     with pdfplumber.open(pdf_path) as pdf:
                     text += "\n"
     return text
+# ================= AI Functions =================
 def extract_section_from_pdf(full_text: str, section_title: str) -> str:
     user_prompt = f"""
     Carefully evaluate the provided position description (PD) document and extract the content of the section titled "{section_title}" from the following text.
 def classify_job_family(responsibilities: List[str]) -> str:
     job_family_list = "\n".join(f"- {row['Job_family']}: {row['Job_subfamily']}" for _, row in job_families_df.iterrows())
     user_prompt = f"""
     Here is a list of job responsibilities:
 def classify_occupational_group_by_level(responsibilities: List[str]) -> dict:
     result = {}
     try:
         for level in range(1, 5):
     json_text = json_text.strip()
     return json_text
+# ================= Process Analysis =================
 from concurrent.futures import ThreadPoolExecutor
 def process_pdf(file):
             {},
             error_message
         )
+# ================= Build Word Report =================
 from docx import Document
 def generate_word_document(result):
+# ================= GRADIO INTERFACE =================
+with gr.Blocks(
     title="AI-powered tool to review Job Position Description",
 css="""
     @import url('https://fonts.googleapis.com/css2?family=Lato:wght@400;700&display=swap');