Krepselis commited on
Commit
1fda076
·
verified ·
1 Parent(s): 1475c0d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +111 -13
app.py CHANGED
@@ -19,6 +19,8 @@ import json
19
  from st_copy_to_clipboard import st_copy_to_clipboard # For copy to clipboard functionality
20
  from huggingface_hub import HfApi, hf_hub_download, HfFolder # For HF Datasets
21
  from huggingface_hub.utils import RepositoryNotFoundError, EntryNotFoundError # For HF Datasets
 
 
22
 
23
  # Load environment variables
24
  load_dotenv()
@@ -28,6 +30,31 @@ HF_FEEDBACK_DATASET_REPO_ID = os.getenv("HF_FEEDBACK_DATASET_REPO_ID", "Krepseli
28
  HF_FEEDBACK_FILENAME_IN_REPO = os.getenv("HF_FEEDBACK_FILENAME_IN_REPO", "feedback_log.parquet") # Default
29
  HF_TOKEN_FOR_DATASET = os.getenv("HF_FEEDBACK_DATASET_TOKEN") # Needs to be set for writing
30
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  # --- Import your custom modules ---
32
  try:
33
  from cv_match import find_similar_jobs, generate_embedding_for_skills
@@ -53,12 +80,21 @@ st.set_page_config(
53
  # --- Constants and Normalization Data ---
54
  APP_DIR = os.path.dirname(os.path.abspath(__file__))
55
 
56
- SIMILARITY_THRESHOLD = 40.0 # Default similarity threshold
57
  MAX_JOBS_TO_DISPLAY_PER_PAGE = 5
58
- TOP_N_RESULTS_FROM_SEARCH = int(os.getenv('TOP_N_RESULTS_FOR_APP_QUERY', '20000')) # How many to fetch initially
59
  CANONICAL_LANGUAGES_FOR_FILTER = ["English", "Danish", "German", "Spanish", "French", "Norwegian", "Swedish"]
60
 
61
- # --- Helper Functions ---
 
 
 
 
 
 
 
 
 
62
  def get_job_languages_from_metadata(job_match_dict): # Now expects the whole job_match dict
63
  extracted_languages_set = set()
64
 
@@ -282,21 +318,38 @@ if not os.getenv("OPENAI_API_KEY"):
282
 
283
  # --- Main App Layout ---
284
  with st.sidebar:
 
 
285
  st.header("🚀 Get Started")
286
  uploaded_file = st.file_uploader("1. Upload Your CV", type=['pdf', 'docx', 'txt', 'md'],
287
  key="cv_uploader_key",
288
  help="Supports PDF, DOCX, Markdown, and TXT files.",
289
  on_change=lambda: st.session_state.update(all_job_matches_cache=None, generated_cover_letters={}, cv_text_cache=None, cv_skills=None, feedback_given_jobs={}))
290
  st.markdown("---")
 
291
  st.header("🔍 Filter Job Matches")
292
- unique_locations_options = []; unique_categories_options = []
 
 
 
 
 
 
 
 
 
 
293
  if st.session_state.all_job_matches_cache:
294
- unique_locations_options = sorted(list(set(job.get('area', 'N/A') for job in st.session_state.all_job_matches_cache if job.get('area'))))
295
  raw_categories_from_cache = [job.get('category') for job in st.session_state.all_job_matches_cache]
296
- unique_categories_options = sorted(list(set(cat for cat in raw_categories_from_cache if isinstance(cat, str) and cat.strip())))
297
- selected_locations = st.multiselect("Job Locations (Area)", options=unique_locations_options, placeholder="Any Location" if unique_locations_options else "Upload CV")
 
 
 
 
298
  selected_categories = st.multiselect("Job Categories", options=unique_categories_options, placeholder="Any Category" if unique_categories_options else "Upload CV")
299
  selected_languages = st.multiselect("Required Languages", options=CANONICAL_LANGUAGES_FOR_FILTER, placeholder="Any Language")
 
300
  st.markdown("---")
301
  st.info("Tip: Upload CV first, then apply filters.")
302
  st.caption(f"Displays top {MAX_JOBS_TO_DISPLAY_PER_PAGE} matches after filters from up to {TOP_N_RESULTS_FROM_SEARCH} initial results.")
@@ -344,9 +397,52 @@ if uploaded_file is not None:
344
 
345
 
346
  if st.session_state.all_job_matches_cache is not None:
347
- current_matches_to_filter = list(st.session_state.all_job_matches_cache)
348
- if selected_locations: current_matches_to_filter = [job for job in current_matches_to_filter if job.get('area') in selected_locations]
349
- if selected_categories: current_matches_to_filter = [job for job in current_matches_to_filter if job.get('category') in selected_categories]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
350
  if selected_languages: current_matches_to_filter = [job for job in current_matches_to_filter if any(lang in selected_languages for lang in get_job_languages_from_metadata(job))]
351
 
352
  final_display_matches = [j for j in current_matches_to_filter if isinstance(j.get('score'), (int, float)) and j.get('score', 0) >= SIMILARITY_THRESHOLD]
@@ -365,7 +461,8 @@ if st.session_state.all_job_matches_cache is not None:
365
  job_title = job_match.get('title', 'N/A')
366
  job_company = job_match.get('company', 'N/A')
367
  job_area_display = job_match.get('area', 'N/A')
368
- job_category_display = job_match.get('category', 'N/A')
 
369
  job_status = job_match.get('status', 'unknown').capitalize()
370
  job_url_primary = job_match.get('application_url', job_match.get('url', '#'))
371
  job_score = job_match.get('score', 0.0)
@@ -387,9 +484,10 @@ if st.session_state.all_job_matches_cache is not None:
387
  st.markdown("**Key CV skills contributing to this match:**")
388
  for skill_text, skill_sim_score in contributing_skills:
389
  st.markdown(f"- `{skill_text}` (Contribution: {skill_sim_score:.2f})")
390
- elif not contributing_skills:
391
  st.caption("Skill contribution analysis not available or no strong contributing skills.")
392
-
 
393
  action_cols = st.columns([1, 1])
394
  with action_cols[0]:
395
  if job_url_primary and job_url_primary != '#': st.link_button("Apply Now 🚀", url=job_url_primary, type="primary", use_container_width=True)
 
19
  from st_copy_to_clipboard import st_copy_to_clipboard # For copy to clipboard functionality
20
  from huggingface_hub import HfApi, hf_hub_download, HfFolder # For HF Datasets
21
  from huggingface_hub.utils import RepositoryNotFoundError, EntryNotFoundError # For HF Datasets
22
+ from category_translation import CATEGORY_TRANSLATIONS, REVERSE_CATEGORY_TRANSLATIONS
23
+
24
 
25
  # Load environment variables
26
  load_dotenv()
 
30
  HF_FEEDBACK_FILENAME_IN_REPO = os.getenv("HF_FEEDBACK_FILENAME_IN_REPO", "feedback_log.parquet") # Default
31
  HF_TOKEN_FOR_DATASET = os.getenv("HF_FEEDBACK_DATASET_TOKEN") # Needs to be set for writing
32
 
33
+ # Mapping of raw location names to standardized city names
34
+ LOCATION_STANDARDIZATION = {
35
+ "kbh": "Copenhagen", "københavn": "Copenhagen", "copenhagen": "Copenhagen",
36
+ "århus": "Aarhus", "aarhus": "Aarhus",
37
+ "odense": "Odense",
38
+ "aalborg": "Aalborg", "alborg": "Aalborg",
39
+ "esbjerg": "Esbjerg",
40
+ "randers": "Randers",
41
+ "horsens": "Horsens",
42
+ "kolding": "Kolding",
43
+ "vejle": "Vejle",
44
+ "roskilde": "Roskilde",
45
+ "silkeborg": "Silkeborg",
46
+ "herning": "Herning",
47
+ "hørsholm": "Hørsholm", "horsholm": "Hørsholm",
48
+ "helsingør": "Helsingør", "helsingor": "Helsingør",
49
+ "næstved": "Næstved", "naestved": "Næstved",
50
+ "viborg": "Viborg",
51
+ "fredericia": "Fredericia",
52
+ "køge": "Køge", "koege": "Køge",
53
+ "taastrup": "Taastrup",
54
+ "holstebro": "Holstebro"
55
+ }
56
+
57
+
58
  # --- Import your custom modules ---
59
  try:
60
  from cv_match import find_similar_jobs, generate_embedding_for_skills
 
80
  # --- Constants and Normalization Data ---
81
  APP_DIR = os.path.dirname(os.path.abspath(__file__))
82
 
83
+ SIMILARITY_THRESHOLD = 20.0 # Default similarity threshold
84
  MAX_JOBS_TO_DISPLAY_PER_PAGE = 5
85
+ TOP_N_RESULTS_FROM_SEARCH = int(os.getenv('TOP_N_RESULTS_FOR_APP_QUERY', '200')) # How many to fetch initially
86
  CANONICAL_LANGUAGES_FOR_FILTER = ["English", "Danish", "German", "Spanish", "French", "Norwegian", "Swedish"]
87
 
88
+ def standardize_location(location_raw):
89
+ if not location_raw or not isinstance(location_raw, str):
90
+ return None
91
+ location_clean = location_raw.strip().lower()
92
+ for key, value in LOCATION_STANDARDIZATION.items():
93
+ if key in location_clean:
94
+ return value
95
+ return location_raw.strip() # fallback
96
+
97
+
98
  def get_job_languages_from_metadata(job_match_dict): # Now expects the whole job_match dict
99
  extracted_languages_set = set()
100
 
 
318
 
319
  # --- Main App Layout ---
320
  with st.sidebar:
321
+ # Replace this block inside your Streamlit sidebar (under `with st.sidebar:`)
322
+
323
  st.header("🚀 Get Started")
324
  uploaded_file = st.file_uploader("1. Upload Your CV", type=['pdf', 'docx', 'txt', 'md'],
325
  key="cv_uploader_key",
326
  help="Supports PDF, DOCX, Markdown, and TXT files.",
327
  on_change=lambda: st.session_state.update(all_job_matches_cache=None, generated_cover_letters={}, cv_text_cache=None, cv_skills=None, feedback_given_jobs={}))
328
  st.markdown("---")
329
+
330
  st.header("🔍 Filter Job Matches")
331
+
332
+ # ✅ Hardcoded top 20 cities
333
+ unique_locations_options = [
334
+ "Copenhagen", "Aarhus", "Odense", "Aalborg", "Esbjerg", "Randers", "Horsens", "Kolding",
335
+ "Vejle", "Roskilde", "Silkeborg", "Herning", "Hørsholm", "Helsingør", "Næstved", "Viborg",
336
+ "Fredericia", "Køge", "Taastrup", "Holstebro", "Rest of Denmark"
337
+ ]
338
+
339
+
340
+ # ✅ Categories are still dynamically built
341
+ unique_categories_options = []
342
  if st.session_state.all_job_matches_cache:
 
343
  raw_categories_from_cache = [job.get('category') for job in st.session_state.all_job_matches_cache]
344
+ unique_categories_options = sorted(
345
+ list({CATEGORY_TRANSLATIONS.get(cat.strip(), cat.strip()) for cat in raw_categories_from_cache if isinstance(cat, str) and cat.strip()})
346
+ )
347
+
348
+ # ✅ Multiselect widgets
349
+ selected_locations = st.multiselect("Job Locations (Area)", options=unique_locations_options, placeholder="Any Location")
350
  selected_categories = st.multiselect("Job Categories", options=unique_categories_options, placeholder="Any Category" if unique_categories_options else "Upload CV")
351
  selected_languages = st.multiselect("Required Languages", options=CANONICAL_LANGUAGES_FOR_FILTER, placeholder="Any Language")
352
+
353
  st.markdown("---")
354
  st.info("Tip: Upload CV first, then apply filters.")
355
  st.caption(f"Displays top {MAX_JOBS_TO_DISPLAY_PER_PAGE} matches after filters from up to {TOP_N_RESULTS_FROM_SEARCH} initial results.")
 
397
 
398
 
399
  if st.session_state.all_job_matches_cache is not None:
400
+ current_matches_to_filter = list(st.session_state.all_job_matches_cache)
401
+
402
+ # Location filter
403
+ if selected_locations:
404
+ def location_matches(job_area):
405
+ standardized = standardize_location(job_area)
406
+ if not standardized:
407
+ return False
408
+
409
+ # Match selected cities
410
+ if standardized in selected_locations:
411
+ return True
412
+
413
+ # Match Rest of Denmark
414
+ if "Rest of Denmark" in selected_locations and standardized not in unique_locations_options[:-1]:
415
+ return True
416
+
417
+ return False
418
+
419
+ current_matches_to_filter = [
420
+ job for job in current_matches_to_filter
421
+ if location_matches(job.get('area'))
422
+ ]
423
+
424
+
425
+
426
+
427
+ # ✅ Category filter
428
+ if selected_categories:
429
+ selected_categories_danish = [
430
+ REVERSE_CATEGORY_TRANSLATIONS.get(cat, cat) for cat in selected_categories
431
+ ]
432
+ current_matches_to_filter = [
433
+ job for job in current_matches_to_filter
434
+ if job.get('category') in selected_categories_danish
435
+ ]
436
+
437
+ # ✅ Language filter
438
+ if selected_languages:
439
+ current_matches_to_filter = [
440
+ job for job in current_matches_to_filter
441
+ if any(lang in selected_languages for lang in get_job_languages_from_metadata(job))
442
+ ]
443
+
444
+
445
+
446
  if selected_languages: current_matches_to_filter = [job for job in current_matches_to_filter if any(lang in selected_languages for lang in get_job_languages_from_metadata(job))]
447
 
448
  final_display_matches = [j for j in current_matches_to_filter if isinstance(j.get('score'), (int, float)) and j.get('score', 0) >= SIMILARITY_THRESHOLD]
 
461
  job_title = job_match.get('title', 'N/A')
462
  job_company = job_match.get('company', 'N/A')
463
  job_area_display = job_match.get('area', 'N/A')
464
+ job_category_raw = job_match.get('category', 'N/A')
465
+ job_category_display = CATEGORY_TRANSLATIONS.get(job_category_raw, job_category_raw)
466
  job_status = job_match.get('status', 'unknown').capitalize()
467
  job_url_primary = job_match.get('application_url', job_match.get('url', '#'))
468
  job_score = job_match.get('score', 0.0)
 
484
  st.markdown("**Key CV skills contributing to this match:**")
485
  for skill_text, skill_sim_score in contributing_skills:
486
  st.markdown(f"- `{skill_text}` (Contribution: {skill_sim_score:.2f})")
487
+ else:
488
  st.caption("Skill contribution analysis not available or no strong contributing skills.")
489
+
490
+
491
  action_cols = st.columns([1, 1])
492
  with action_cols[0]:
493
  if job_url_primary and job_url_primary != '#': st.link_button("Apply Now 🚀", url=job_url_primary, type="primary", use_container_width=True)