edouardlgp commited on
Commit
92adc42
·
verified ·
1 Parent(s): 89c7d5b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -13
app.py CHANGED
@@ -33,10 +33,6 @@ def log_debug(message):
33
  # Initialize debug logging
34
  log_debug("Application starting...")
35
 
36
-
37
-
38
-
39
-
40
  # Load environment variables
41
  load_dotenv()
42
 
@@ -46,7 +42,8 @@ logging.getLogger('pdfminer').setLevel(logging.ERROR)
46
  # Suppress specific warnings
47
  warnings.filterwarnings("ignore", category=UserWarning, message="CropBox.*")
48
 
49
- # Global DataFrame initializations
 
50
  try:
51
  job_families_df = pd.read_csv("job_families1.csv", on_bad_lines='skip')
52
  except Exception as e:
@@ -84,7 +81,7 @@ except Exception as e:
84
  esco_skill_map_df = pd.DataFrame() # Fallback to an empty DataFrame or handle the error appropriately
85
 
86
 
87
- # Initialize OpenAI client
88
  def initialize_openai_client():
89
  try:
90
  client = openai.AzureOpenAI(
@@ -111,7 +108,8 @@ def gpt_call(system_prompt: str, user_prompt: str) -> str:
111
  return response.choices[0].message.content.strip()
112
  except Exception as e:
113
  return f"ERROR: {e}"
114
-
 
115
  def extract_text_from_pdf(pdf_path: str) -> str:
116
  text = ""
117
  with pdfplumber.open(pdf_path) as pdf:
@@ -127,6 +125,7 @@ def extract_text_from_pdf(pdf_path: str) -> str:
127
  text += "\n"
128
  return text
129
 
 
130
  def extract_section_from_pdf(full_text: str, section_title: str) -> str:
131
  user_prompt = f"""
132
  Carefully evaluate the provided position description (PD) document and extract the content of the section titled "{section_title}" from the following text.
@@ -142,9 +141,6 @@ def extract_section_from_pdf(full_text: str, section_title: str) -> str:
142
 
143
  def classify_job_family(responsibilities: List[str]) -> str:
144
 
145
-
146
-
147
-
148
  job_family_list = "\n".join(f"- {row['Job_family']}: {row['Job_subfamily']}" for _, row in job_families_df.iterrows())
149
  user_prompt = f"""
150
  Here is a list of job responsibilities:
@@ -183,7 +179,6 @@ def code_sanitize(input_string, valid_codes):
183
 
184
  def classify_occupational_group_by_level(responsibilities: List[str]) -> dict:
185
 
186
-
187
  result = {}
188
  try:
189
  for level in range(1, 5):
@@ -616,6 +611,7 @@ def _extract_json(raw: str) -> str:
616
  json_text = json_text.strip()
617
  return json_text
618
 
 
619
  from concurrent.futures import ThreadPoolExecutor
620
 
621
  def process_pdf(file):
@@ -758,7 +754,7 @@ def process_pdf(file):
758
  {},
759
  error_message
760
  )
761
-
762
  from docx import Document
763
 
764
  def generate_word_document(result):
@@ -840,7 +836,8 @@ def generate_word_document(result):
840
 
841
 
842
 
843
-
 
844
  title="AI-powered tool to review Job Position Description",
845
  css="""
846
  @import url('https://fonts.googleapis.com/css2?family=Lato:wght@400;700&display=swap');
 
33
  # Initialize debug logging
34
  log_debug("Application starting...")
35
 
 
 
 
 
36
  # Load environment variables
37
  load_dotenv()
38
 
 
42
  # Suppress specific warnings
43
  warnings.filterwarnings("ignore", category=UserWarning, message="CropBox.*")
44
 
45
+
46
+ # ================= DataFrame initializations =================
47
  try:
48
  job_families_df = pd.read_csv("job_families1.csv", on_bad_lines='skip')
49
  except Exception as e:
 
81
  esco_skill_map_df = pd.DataFrame() # Fallback to an empty DataFrame or handle the error appropriately
82
 
83
 
84
+ # ================= LLM API =================
85
  def initialize_openai_client():
86
  try:
87
  client = openai.AzureOpenAI(
 
108
  return response.choices[0].message.content.strip()
109
  except Exception as e:
110
  return f"ERROR: {e}"
111
+
112
+ # ================= Extract text =================
113
  def extract_text_from_pdf(pdf_path: str) -> str:
114
  text = ""
115
  with pdfplumber.open(pdf_path) as pdf:
 
125
  text += "\n"
126
  return text
127
 
128
+ # ================= AI Functions =================
129
  def extract_section_from_pdf(full_text: str, section_title: str) -> str:
130
  user_prompt = f"""
131
  Carefully evaluate the provided position description (PD) document and extract the content of the section titled "{section_title}" from the following text.
 
141
 
142
  def classify_job_family(responsibilities: List[str]) -> str:
143
 
 
 
 
144
  job_family_list = "\n".join(f"- {row['Job_family']}: {row['Job_subfamily']}" for _, row in job_families_df.iterrows())
145
  user_prompt = f"""
146
  Here is a list of job responsibilities:
 
179
 
180
  def classify_occupational_group_by_level(responsibilities: List[str]) -> dict:
181
 
 
182
  result = {}
183
  try:
184
  for level in range(1, 5):
 
611
  json_text = json_text.strip()
612
  return json_text
613
 
614
+ # ================= Process Analysis =================
615
  from concurrent.futures import ThreadPoolExecutor
616
 
617
  def process_pdf(file):
 
754
  {},
755
  error_message
756
  )
757
+ # ================= Build Word Report =================
758
  from docx import Document
759
 
760
  def generate_word_document(result):
 
836
 
837
 
838
 
839
+ # ================= GRADIO INTERFACE =================
840
+ with gr.Blocks(
841
  title="AI-powered tool to review Job Position Description",
842
  css="""
843
  @import url('https://fonts.googleapis.com/css2?family=Lato:wght@400;700&display=swap');