Krepselis commited on
Commit
1475c0d
·
verified ·
1 Parent(s): 8ae0cb4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -110
app.py CHANGED
@@ -19,8 +19,6 @@ import json
19
  from st_copy_to_clipboard import st_copy_to_clipboard # For copy to clipboard functionality
20
  from huggingface_hub import HfApi, hf_hub_download, HfFolder # For HF Datasets
21
  from huggingface_hub.utils import RepositoryNotFoundError, EntryNotFoundError # For HF Datasets
22
- from category_translation import CATEGORY_TRANSLATIONS, REVERSE_CATEGORY_TRANSLATIONS
23
-
24
 
25
  # Load environment variables
26
  load_dotenv()
@@ -30,31 +28,6 @@ HF_FEEDBACK_DATASET_REPO_ID = os.getenv("HF_FEEDBACK_DATASET_REPO_ID", "Krepseli
30
  HF_FEEDBACK_FILENAME_IN_REPO = os.getenv("HF_FEEDBACK_FILENAME_IN_REPO", "feedback_log.parquet") # Default
31
  HF_TOKEN_FOR_DATASET = os.getenv("HF_FEEDBACK_DATASET_TOKEN") # Needs to be set for writing
32
 
33
- # Mapping of raw location names to standardized city names
34
- LOCATION_STANDARDIZATION = {
35
- "kbh": "Copenhagen", "københavn": "Copenhagen", "copenhagen": "Copenhagen",
36
- "århus": "Aarhus", "aarhus": "Aarhus",
37
- "odense": "Odense",
38
- "aalborg": "Aalborg", "alborg": "Aalborg",
39
- "esbjerg": "Esbjerg",
40
- "randers": "Randers",
41
- "horsens": "Horsens",
42
- "kolding": "Kolding",
43
- "vejle": "Vejle",
44
- "roskilde": "Roskilde",
45
- "silkeborg": "Silkeborg",
46
- "herning": "Herning",
47
- "hørsholm": "Hørsholm", "horsholm": "Hørsholm",
48
- "helsingør": "Helsingør", "helsingor": "Helsingør",
49
- "næstved": "Næstved", "naestved": "Næstved",
50
- "viborg": "Viborg",
51
- "fredericia": "Fredericia",
52
- "køge": "Køge", "koege": "Køge",
53
- "taastrup": "Taastrup",
54
- "holstebro": "Holstebro"
55
- }
56
-
57
-
58
  # --- Import your custom modules ---
59
  try:
60
  from cv_match import find_similar_jobs, generate_embedding_for_skills
@@ -80,21 +53,12 @@ st.set_page_config(
80
  # --- Constants and Normalization Data ---
81
  APP_DIR = os.path.dirname(os.path.abspath(__file__))
82
 
83
- SIMILARITY_THRESHOLD = 20.0 # Default similarity threshold
84
  MAX_JOBS_TO_DISPLAY_PER_PAGE = 5
85
  TOP_N_RESULTS_FROM_SEARCH = int(os.getenv('TOP_N_RESULTS_FOR_APP_QUERY', '20000')) # How many to fetch initially
86
  CANONICAL_LANGUAGES_FOR_FILTER = ["English", "Danish", "German", "Spanish", "French", "Norwegian", "Swedish"]
87
 
88
- def standardize_location(location_raw):
89
- if not location_raw or not isinstance(location_raw, str):
90
- return None
91
- location_clean = location_raw.strip().lower()
92
- for key, value in LOCATION_STANDARDIZATION.items():
93
- if key in location_clean:
94
- return value
95
- return location_raw.strip() # fallback
96
-
97
-
98
  def get_job_languages_from_metadata(job_match_dict): # Now expects the whole job_match dict
99
  extracted_languages_set = set()
100
 
@@ -318,38 +282,21 @@ if not os.getenv("OPENAI_API_KEY"):
318
 
319
  # --- Main App Layout ---
320
  with st.sidebar:
321
- # Replace this block inside your Streamlit sidebar (under `with st.sidebar:`)
322
-
323
  st.header("🚀 Get Started")
324
  uploaded_file = st.file_uploader("1. Upload Your CV", type=['pdf', 'docx', 'txt', 'md'],
325
  key="cv_uploader_key",
326
  help="Supports PDF, DOCX, Markdown, and TXT files.",
327
  on_change=lambda: st.session_state.update(all_job_matches_cache=None, generated_cover_letters={}, cv_text_cache=None, cv_skills=None, feedback_given_jobs={}))
328
  st.markdown("---")
329
-
330
  st.header("🔍 Filter Job Matches")
331
-
332
- # ✅ Hardcoded top 20 cities
333
- unique_locations_options = [
334
- "Copenhagen", "Aarhus", "Odense", "Aalborg", "Esbjerg", "Randers", "Horsens", "Kolding",
335
- "Vejle", "Roskilde", "Silkeborg", "Herning", "Hørsholm", "Helsingør", "Næstved", "Viborg",
336
- "Fredericia", "Køge", "Taastrup", "Holstebro", "Rest of Denmark"
337
- ]
338
-
339
-
340
- # ✅ Categories are still dynamically built
341
- unique_categories_options = []
342
  if st.session_state.all_job_matches_cache:
 
343
  raw_categories_from_cache = [job.get('category') for job in st.session_state.all_job_matches_cache]
344
- unique_categories_options = sorted(
345
- list({CATEGORY_TRANSLATIONS.get(cat.strip(), cat.strip()) for cat in raw_categories_from_cache if isinstance(cat, str) and cat.strip()})
346
- )
347
-
348
- # ✅ Multiselect widgets
349
- selected_locations = st.multiselect("Job Locations (Area)", options=unique_locations_options, placeholder="Any Location")
350
  selected_categories = st.multiselect("Job Categories", options=unique_categories_options, placeholder="Any Category" if unique_categories_options else "Upload CV")
351
  selected_languages = st.multiselect("Required Languages", options=CANONICAL_LANGUAGES_FOR_FILTER, placeholder="Any Language")
352
-
353
  st.markdown("---")
354
  st.info("Tip: Upload CV first, then apply filters.")
355
  st.caption(f"Displays top {MAX_JOBS_TO_DISPLAY_PER_PAGE} matches after filters from up to {TOP_N_RESULTS_FROM_SEARCH} initial results.")
@@ -397,52 +344,9 @@ if uploaded_file is not None:
397
 
398
 
399
  if st.session_state.all_job_matches_cache is not None:
400
- current_matches_to_filter = list(st.session_state.all_job_matches_cache)
401
-
402
- # Location filter
403
- if selected_locations:
404
- def location_matches(job_area):
405
- standardized = standardize_location(job_area)
406
- if not standardized:
407
- return False
408
-
409
- # Match selected cities
410
- if standardized in selected_locations:
411
- return True
412
-
413
- # Match Rest of Denmark
414
- if "Rest of Denmark" in selected_locations and standardized not in unique_locations_options[:-1]:
415
- return True
416
-
417
- return False
418
-
419
- current_matches_to_filter = [
420
- job for job in current_matches_to_filter
421
- if location_matches(job.get('area'))
422
- ]
423
-
424
-
425
-
426
-
427
- # ✅ Category filter
428
- if selected_categories:
429
- selected_categories_danish = [
430
- REVERSE_CATEGORY_TRANSLATIONS.get(cat, cat) for cat in selected_categories
431
- ]
432
- current_matches_to_filter = [
433
- job for job in current_matches_to_filter
434
- if job.get('category') in selected_categories_danish
435
- ]
436
-
437
- # ✅ Language filter
438
- if selected_languages:
439
- current_matches_to_filter = [
440
- job for job in current_matches_to_filter
441
- if any(lang in selected_languages for lang in get_job_languages_from_metadata(job))
442
- ]
443
-
444
-
445
-
446
  if selected_languages: current_matches_to_filter = [job for job in current_matches_to_filter if any(lang in selected_languages for lang in get_job_languages_from_metadata(job))]
447
 
448
  final_display_matches = [j for j in current_matches_to_filter if isinstance(j.get('score'), (int, float)) and j.get('score', 0) >= SIMILARITY_THRESHOLD]
@@ -461,8 +365,7 @@ if st.session_state.all_job_matches_cache is not None:
461
  job_title = job_match.get('title', 'N/A')
462
  job_company = job_match.get('company', 'N/A')
463
  job_area_display = job_match.get('area', 'N/A')
464
- job_category_raw = job_match.get('category', 'N/A')
465
- job_category_display = CATEGORY_TRANSLATIONS.get(job_category_raw, job_category_raw)
466
  job_status = job_match.get('status', 'unknown').capitalize()
467
  job_url_primary = job_match.get('application_url', job_match.get('url', '#'))
468
  job_score = job_match.get('score', 0.0)
@@ -484,10 +387,9 @@ if st.session_state.all_job_matches_cache is not None:
484
  st.markdown("**Key CV skills contributing to this match:**")
485
  for skill_text, skill_sim_score in contributing_skills:
486
  st.markdown(f"- `{skill_text}` (Contribution: {skill_sim_score:.2f})")
487
- else:
488
  st.caption("Skill contribution analysis not available or no strong contributing skills.")
489
-
490
-
491
  action_cols = st.columns([1, 1])
492
  with action_cols[0]:
493
  if job_url_primary and job_url_primary != '#': st.link_button("Apply Now 🚀", url=job_url_primary, type="primary", use_container_width=True)
 
19
  from st_copy_to_clipboard import st_copy_to_clipboard # For copy to clipboard functionality
20
  from huggingface_hub import HfApi, hf_hub_download, HfFolder # For HF Datasets
21
  from huggingface_hub.utils import RepositoryNotFoundError, EntryNotFoundError # For HF Datasets
 
 
22
 
23
  # Load environment variables
24
  load_dotenv()
 
28
  HF_FEEDBACK_FILENAME_IN_REPO = os.getenv("HF_FEEDBACK_FILENAME_IN_REPO", "feedback_log.parquet") # Default
29
  HF_TOKEN_FOR_DATASET = os.getenv("HF_FEEDBACK_DATASET_TOKEN") # Needs to be set for writing
30
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  # --- Import your custom modules ---
32
  try:
33
  from cv_match import find_similar_jobs, generate_embedding_for_skills
 
53
  # --- Constants and Normalization Data ---
54
  APP_DIR = os.path.dirname(os.path.abspath(__file__))
55
 
56
+ SIMILARITY_THRESHOLD = 40.0 # Default similarity threshold
57
  MAX_JOBS_TO_DISPLAY_PER_PAGE = 5
58
  TOP_N_RESULTS_FROM_SEARCH = int(os.getenv('TOP_N_RESULTS_FOR_APP_QUERY', '20000')) # How many to fetch initially
59
  CANONICAL_LANGUAGES_FOR_FILTER = ["English", "Danish", "German", "Spanish", "French", "Norwegian", "Swedish"]
60
 
61
+ # --- Helper Functions ---
 
 
 
 
 
 
 
 
 
62
  def get_job_languages_from_metadata(job_match_dict): # Now expects the whole job_match dict
63
  extracted_languages_set = set()
64
 
 
282
 
283
  # --- Main App Layout ---
284
  with st.sidebar:
 
 
285
  st.header("🚀 Get Started")
286
  uploaded_file = st.file_uploader("1. Upload Your CV", type=['pdf', 'docx', 'txt', 'md'],
287
  key="cv_uploader_key",
288
  help="Supports PDF, DOCX, Markdown, and TXT files.",
289
  on_change=lambda: st.session_state.update(all_job_matches_cache=None, generated_cover_letters={}, cv_text_cache=None, cv_skills=None, feedback_given_jobs={}))
290
  st.markdown("---")
 
291
  st.header("🔍 Filter Job Matches")
292
+ unique_locations_options = []; unique_categories_options = []
 
 
 
 
 
 
 
 
 
 
293
  if st.session_state.all_job_matches_cache:
294
+ unique_locations_options = sorted(list(set(job.get('area', 'N/A') for job in st.session_state.all_job_matches_cache if job.get('area'))))
295
  raw_categories_from_cache = [job.get('category') for job in st.session_state.all_job_matches_cache]
296
+ unique_categories_options = sorted(list(set(cat for cat in raw_categories_from_cache if isinstance(cat, str) and cat.strip())))
297
+ selected_locations = st.multiselect("Job Locations (Area)", options=unique_locations_options, placeholder="Any Location" if unique_locations_options else "Upload CV")
 
 
 
 
298
  selected_categories = st.multiselect("Job Categories", options=unique_categories_options, placeholder="Any Category" if unique_categories_options else "Upload CV")
299
  selected_languages = st.multiselect("Required Languages", options=CANONICAL_LANGUAGES_FOR_FILTER, placeholder="Any Language")
 
300
  st.markdown("---")
301
  st.info("Tip: Upload CV first, then apply filters.")
302
  st.caption(f"Displays top {MAX_JOBS_TO_DISPLAY_PER_PAGE} matches after filters from up to {TOP_N_RESULTS_FROM_SEARCH} initial results.")
 
344
 
345
 
346
  if st.session_state.all_job_matches_cache is not None:
347
+ current_matches_to_filter = list(st.session_state.all_job_matches_cache)
348
+ if selected_locations: current_matches_to_filter = [job for job in current_matches_to_filter if job.get('area') in selected_locations]
349
+ if selected_categories: current_matches_to_filter = [job for job in current_matches_to_filter if job.get('category') in selected_categories]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
350
  if selected_languages: current_matches_to_filter = [job for job in current_matches_to_filter if any(lang in selected_languages for lang in get_job_languages_from_metadata(job))]
351
 
352
  final_display_matches = [j for j in current_matches_to_filter if isinstance(j.get('score'), (int, float)) and j.get('score', 0) >= SIMILARITY_THRESHOLD]
 
365
  job_title = job_match.get('title', 'N/A')
366
  job_company = job_match.get('company', 'N/A')
367
  job_area_display = job_match.get('area', 'N/A')
368
+ job_category_display = job_match.get('category', 'N/A')
 
369
  job_status = job_match.get('status', 'unknown').capitalize()
370
  job_url_primary = job_match.get('application_url', job_match.get('url', '#'))
371
  job_score = job_match.get('score', 0.0)
 
387
  st.markdown("**Key CV skills contributing to this match:**")
388
  for skill_text, skill_sim_score in contributing_skills:
389
  st.markdown(f"- `{skill_text}` (Contribution: {skill_sim_score:.2f})")
390
+ elif not contributing_skills:
391
  st.caption("Skill contribution analysis not available or no strong contributing skills.")
392
+
 
393
  action_cols = st.columns([1, 1])
394
  with action_cols[0]:
395
  if job_url_primary and job_url_primary != '#': st.link_button("Apply Now 🚀", url=job_url_primary, type="primary", use_container_width=True)