edouardlgp commited on
Commit
5d87b03
·
verified ·
1 Parent(s): 9ea8425

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -6
app.py CHANGED
@@ -78,8 +78,16 @@ def extract_section_from_pdf(full_text: str, section_title: str) -> str:
78
  """
79
  return gpt_call("You are an HR expert working for IOM.", user_prompt)
80
 
81
- def classify_job_family(responsibilities: List[str]) -> str:
82
- job_families_df = pd.read_csv("job_families1.csv")
 
 
 
 
 
 
 
 
83
  job_family_list = "\n".join(f"- {row['Job_family']}: {row['Job_subfamily']}" for _, row in job_families_df.iterrows())
84
  user_prompt = f"""
85
  Here is a list of job responsibilities:
@@ -117,7 +125,13 @@ def code_sanitize(input_string, valid_codes):
117
  return None
118
 
119
  def classify_occupational_group_by_level(responsibilities: List[str]) -> dict:
120
- occupational_groups_df = pd.read_csv("occupational_groups.csv")
 
 
 
 
 
 
121
  result = {}
122
  try:
123
  for level in range(1, 5):
@@ -147,13 +161,32 @@ def classify_occupational_group_by_level(responsibilities: List[str]) -> dict:
147
  return result
148
 
149
  def get_skills_info_esco(Level_5_code):
150
- esco_level5_df = pd.read_csv("occupations_en.csv", dtype={'code': str, 'iscoGroup': str})
 
 
 
 
 
 
151
  matches = esco_level5_df[esco_level5_df['code'] == Level_5_code]
152
  conceptUris = matches['conceptUri'].values.tolist()
153
- esco_skill_map_df = pd.read_csv("occupationSkillRelations_en.csv")
 
 
 
 
 
 
154
  skills = esco_skill_map_df[esco_skill_map_df['occupationUri'].isin(conceptUris)]
155
  skillUris = skills['skillUri'].values.tolist()
156
- esco_skill_df = pd.read_csv("skills_en.csv")
 
 
 
 
 
 
 
157
  thisskillslist = esco_skill_df[esco_skill_df['conceptUri'].isin(skillUris)]
158
  result = thisskillslist[['preferredLabel', 'conceptUri', 'description']].drop_duplicates()
159
  result = result.rename(columns={'preferredLabel': 'skill_name', 'description': 'skill_description', 'conceptUri': 'skill_code'})
 
78
  """
79
  return gpt_call("You are an HR expert working for IOM.", user_prompt)
80
 
81
+ def classify_job_family(responsibilities: List[str]) -> str:
82
+
83
+ try:
84
+ job_families_df = pd.read_csv("job_families1.csv", on_bad_lines='skip')
85
+ except Exception as e:
86
+ print(f"Error reading job_families1.csv: {e}")
87
+ job_families_df = pd.DataFrame() # Fallback to an empty DataFrame or handle the error appropriately
88
+
89
+
90
+
91
  job_family_list = "\n".join(f"- {row['Job_family']}: {row['Job_subfamily']}" for _, row in job_families_df.iterrows())
92
  user_prompt = f"""
93
  Here is a list of job responsibilities:
 
125
  return None
126
 
127
  def classify_occupational_group_by_level(responsibilities: List[str]) -> dict:
128
+
129
+ try:
130
+ occupational_groups_df = pd.read_csv("occupational_groups.csv", on_bad_lines='skip')
131
+ except Exception as e:
132
+ print(f"Error reading occupational_groups.csv: {e}")
133
+ occupational_groups_df = pd.DataFrame() # Fallback to an empty DataFrame or handle the error appropriately
134
+
135
  result = {}
136
  try:
137
  for level in range(1, 5):
 
161
  return result
162
 
163
  def get_skills_info_esco(Level_5_code):
164
+
165
+ try:
166
+ esco_level5_df = pd.read_csv("occupations_en.csv", on_bad_lines='skip', dtype={'code': str, 'iscoGroup': str})
167
+ except Exception as e:
168
+ print(f"Error reading occupations_en.csv: {e}")
169
+ esco_level5_df = pd.DataFrame() # Fallback to an empty DataFrame or handle the error appropriately
170
+
171
  matches = esco_level5_df[esco_level5_df['code'] == Level_5_code]
172
  conceptUris = matches['conceptUri'].values.tolist()
173
+
174
+ try:
175
+ esco_skill_map_df = pd.read_csv("occupationSkillRelations_en.csv", on_bad_lines='skip')
176
+ except Exception as e:
177
+ print(f"Error reading occupationSkillRelations_en.csv: {e}")
178
+ esco_skill_map_df = pd.DataFrame() # Fallback to an empty DataFrame or handle the error appropriately
179
+
180
  skills = esco_skill_map_df[esco_skill_map_df['occupationUri'].isin(conceptUris)]
181
  skillUris = skills['skillUri'].values.tolist()
182
+
183
+
184
+ try:
185
+ esco_skill_df = pd.read_csv("skills_en.csv", on_bad_lines='skip')
186
+ except Exception as e:
187
+ print(f"Error reading skills_en.csv: {e}")
188
+ esco_skill_df = pd.DataFrame() # Fallback to an empty DataFrame or handle the error appropriately
189
+
190
  thisskillslist = esco_skill_df[esco_skill_df['conceptUri'].isin(skillUris)]
191
  result = thisskillslist[['preferredLabel', 'conceptUri', 'description']].drop_duplicates()
192
  result = result.rename(columns={'preferredLabel': 'skill_name', 'description': 'skill_description', 'conceptUri': 'skill_code'})