Update app.py
Browse files
app.py
CHANGED
|
@@ -19,6 +19,8 @@ import json
|
|
| 19 |
from st_copy_to_clipboard import st_copy_to_clipboard # For copy to clipboard functionality
|
| 20 |
from huggingface_hub import HfApi, hf_hub_download, HfFolder # For HF Datasets
|
| 21 |
from huggingface_hub.utils import RepositoryNotFoundError, EntryNotFoundError # For HF Datasets
|
|
|
|
|
|
|
| 22 |
|
| 23 |
# Load environment variables
|
| 24 |
load_dotenv()
|
|
@@ -28,6 +30,31 @@ HF_FEEDBACK_DATASET_REPO_ID = os.getenv("HF_FEEDBACK_DATASET_REPO_ID", "Krepseli
|
|
| 28 |
HF_FEEDBACK_FILENAME_IN_REPO = os.getenv("HF_FEEDBACK_FILENAME_IN_REPO", "feedback_log.parquet") # Default
|
| 29 |
HF_TOKEN_FOR_DATASET = os.getenv("HF_FEEDBACK_DATASET_TOKEN") # Needs to be set for writing
|
| 30 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
# --- Import your custom modules ---
|
| 32 |
try:
|
| 33 |
from cv_match import find_similar_jobs, generate_embedding_for_skills
|
|
@@ -53,12 +80,21 @@ st.set_page_config(
|
|
| 53 |
# --- Constants and Normalization Data ---
|
| 54 |
APP_DIR = os.path.dirname(os.path.abspath(__file__))
|
| 55 |
|
| 56 |
-
SIMILARITY_THRESHOLD =
|
| 57 |
MAX_JOBS_TO_DISPLAY_PER_PAGE = 5
|
| 58 |
-
TOP_N_RESULTS_FROM_SEARCH = int(os.getenv('TOP_N_RESULTS_FOR_APP_QUERY', '
|
| 59 |
CANONICAL_LANGUAGES_FOR_FILTER = ["English", "Danish", "German", "Spanish", "French", "Norwegian", "Swedish"]
|
| 60 |
|
| 61 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
def get_job_languages_from_metadata(job_match_dict): # Now expects the whole job_match dict
|
| 63 |
extracted_languages_set = set()
|
| 64 |
|
|
@@ -282,21 +318,38 @@ if not os.getenv("OPENAI_API_KEY"):
|
|
| 282 |
|
| 283 |
# --- Main App Layout ---
|
| 284 |
with st.sidebar:
|
|
|
|
|
|
|
| 285 |
st.header("🚀 Get Started")
|
| 286 |
uploaded_file = st.file_uploader("1. Upload Your CV", type=['pdf', 'docx', 'txt', 'md'],
|
| 287 |
key="cv_uploader_key",
|
| 288 |
help="Supports PDF, DOCX, Markdown, and TXT files.",
|
| 289 |
on_change=lambda: st.session_state.update(all_job_matches_cache=None, generated_cover_letters={}, cv_text_cache=None, cv_skills=None, feedback_given_jobs={}))
|
| 290 |
st.markdown("---")
|
|
|
|
| 291 |
st.header("🔍 Filter Job Matches")
|
| 292 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 293 |
if st.session_state.all_job_matches_cache:
|
| 294 |
-
unique_locations_options = sorted(list(set(job.get('area', 'N/A') for job in st.session_state.all_job_matches_cache if job.get('area'))))
|
| 295 |
raw_categories_from_cache = [job.get('category') for job in st.session_state.all_job_matches_cache]
|
| 296 |
-
unique_categories_options = sorted(
|
| 297 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 298 |
selected_categories = st.multiselect("Job Categories", options=unique_categories_options, placeholder="Any Category" if unique_categories_options else "Upload CV")
|
| 299 |
selected_languages = st.multiselect("Required Languages", options=CANONICAL_LANGUAGES_FOR_FILTER, placeholder="Any Language")
|
|
|
|
| 300 |
st.markdown("---")
|
| 301 |
st.info("Tip: Upload CV first, then apply filters.")
|
| 302 |
st.caption(f"Displays top {MAX_JOBS_TO_DISPLAY_PER_PAGE} matches after filters from up to {TOP_N_RESULTS_FROM_SEARCH} initial results.")
|
|
@@ -344,9 +397,52 @@ if uploaded_file is not None:
|
|
| 344 |
|
| 345 |
|
| 346 |
if st.session_state.all_job_matches_cache is not None:
|
| 347 |
-
current_matches_to_filter = list(st.session_state.all_job_matches_cache)
|
| 348 |
-
|
| 349 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 350 |
if selected_languages: current_matches_to_filter = [job for job in current_matches_to_filter if any(lang in selected_languages for lang in get_job_languages_from_metadata(job))]
|
| 351 |
|
| 352 |
final_display_matches = [j for j in current_matches_to_filter if isinstance(j.get('score'), (int, float)) and j.get('score', 0) >= SIMILARITY_THRESHOLD]
|
|
@@ -365,7 +461,8 @@ if st.session_state.all_job_matches_cache is not None:
|
|
| 365 |
job_title = job_match.get('title', 'N/A')
|
| 366 |
job_company = job_match.get('company', 'N/A')
|
| 367 |
job_area_display = job_match.get('area', 'N/A')
|
| 368 |
-
|
|
|
|
| 369 |
job_status = job_match.get('status', 'unknown').capitalize()
|
| 370 |
job_url_primary = job_match.get('application_url', job_match.get('url', '#'))
|
| 371 |
job_score = job_match.get('score', 0.0)
|
|
@@ -387,9 +484,10 @@ if st.session_state.all_job_matches_cache is not None:
|
|
| 387 |
st.markdown("**Key CV skills contributing to this match:**")
|
| 388 |
for skill_text, skill_sim_score in contributing_skills:
|
| 389 |
st.markdown(f"- `{skill_text}` (Contribution: {skill_sim_score:.2f})")
|
| 390 |
-
|
| 391 |
st.caption("Skill contribution analysis not available or no strong contributing skills.")
|
| 392 |
-
|
|
|
|
| 393 |
action_cols = st.columns([1, 1])
|
| 394 |
with action_cols[0]:
|
| 395 |
if job_url_primary and job_url_primary != '#': st.link_button("Apply Now 🚀", url=job_url_primary, type="primary", use_container_width=True)
|
|
|
|
| 19 |
from st_copy_to_clipboard import st_copy_to_clipboard # For copy to clipboard functionality
|
| 20 |
from huggingface_hub import HfApi, hf_hub_download, HfFolder # For HF Datasets
|
| 21 |
from huggingface_hub.utils import RepositoryNotFoundError, EntryNotFoundError # For HF Datasets
|
| 22 |
+
from category_translation import CATEGORY_TRANSLATIONS, REVERSE_CATEGORY_TRANSLATIONS
|
| 23 |
+
|
| 24 |
|
| 25 |
# Load environment variables
|
| 26 |
load_dotenv()
|
|
|
|
| 30 |
HF_FEEDBACK_FILENAME_IN_REPO = os.getenv("HF_FEEDBACK_FILENAME_IN_REPO", "feedback_log.parquet") # Default
|
| 31 |
HF_TOKEN_FOR_DATASET = os.getenv("HF_FEEDBACK_DATASET_TOKEN") # Needs to be set for writing
|
| 32 |
|
| 33 |
+
# Mapping of raw location names to standardized city names
|
| 34 |
+
LOCATION_STANDARDIZATION = {
|
| 35 |
+
"kbh": "Copenhagen", "københavn": "Copenhagen", "copenhagen": "Copenhagen",
|
| 36 |
+
"århus": "Aarhus", "aarhus": "Aarhus",
|
| 37 |
+
"odense": "Odense",
|
| 38 |
+
"aalborg": "Aalborg", "alborg": "Aalborg",
|
| 39 |
+
"esbjerg": "Esbjerg",
|
| 40 |
+
"randers": "Randers",
|
| 41 |
+
"horsens": "Horsens",
|
| 42 |
+
"kolding": "Kolding",
|
| 43 |
+
"vejle": "Vejle",
|
| 44 |
+
"roskilde": "Roskilde",
|
| 45 |
+
"silkeborg": "Silkeborg",
|
| 46 |
+
"herning": "Herning",
|
| 47 |
+
"hørsholm": "Hørsholm", "horsholm": "Hørsholm",
|
| 48 |
+
"helsingør": "Helsingør", "helsingor": "Helsingør",
|
| 49 |
+
"næstved": "Næstved", "naestved": "Næstved",
|
| 50 |
+
"viborg": "Viborg",
|
| 51 |
+
"fredericia": "Fredericia",
|
| 52 |
+
"køge": "Køge", "koege": "Køge",
|
| 53 |
+
"taastrup": "Taastrup",
|
| 54 |
+
"holstebro": "Holstebro"
|
| 55 |
+
}
|
| 56 |
+
|
| 57 |
+
|
| 58 |
# --- Import your custom modules ---
|
| 59 |
try:
|
| 60 |
from cv_match import find_similar_jobs, generate_embedding_for_skills
|
|
|
|
| 80 |
# --- Constants and Normalization Data ---
|
| 81 |
APP_DIR = os.path.dirname(os.path.abspath(__file__))
|
| 82 |
|
| 83 |
+
SIMILARITY_THRESHOLD = 20.0 # Default similarity threshold
|
| 84 |
MAX_JOBS_TO_DISPLAY_PER_PAGE = 5
|
| 85 |
+
TOP_N_RESULTS_FROM_SEARCH = int(os.getenv('TOP_N_RESULTS_FOR_APP_QUERY', '200')) # How many to fetch initially
|
| 86 |
CANONICAL_LANGUAGES_FOR_FILTER = ["English", "Danish", "German", "Spanish", "French", "Norwegian", "Swedish"]
|
| 87 |
|
| 88 |
+
def standardize_location(location_raw):
|
| 89 |
+
if not location_raw or not isinstance(location_raw, str):
|
| 90 |
+
return None
|
| 91 |
+
location_clean = location_raw.strip().lower()
|
| 92 |
+
for key, value in LOCATION_STANDARDIZATION.items():
|
| 93 |
+
if key in location_clean:
|
| 94 |
+
return value
|
| 95 |
+
return location_raw.strip() # fallback
|
| 96 |
+
|
| 97 |
+
|
| 98 |
def get_job_languages_from_metadata(job_match_dict): # Now expects the whole job_match dict
|
| 99 |
extracted_languages_set = set()
|
| 100 |
|
|
|
|
| 318 |
|
| 319 |
# --- Main App Layout ---
|
| 320 |
with st.sidebar:
|
| 321 |
+
# Replace this block inside your Streamlit sidebar (under `with st.sidebar:`)
|
| 322 |
+
|
| 323 |
st.header("🚀 Get Started")
|
| 324 |
uploaded_file = st.file_uploader("1. Upload Your CV", type=['pdf', 'docx', 'txt', 'md'],
|
| 325 |
key="cv_uploader_key",
|
| 326 |
help="Supports PDF, DOCX, Markdown, and TXT files.",
|
| 327 |
on_change=lambda: st.session_state.update(all_job_matches_cache=None, generated_cover_letters={}, cv_text_cache=None, cv_skills=None, feedback_given_jobs={}))
|
| 328 |
st.markdown("---")
|
| 329 |
+
|
| 330 |
st.header("🔍 Filter Job Matches")
|
| 331 |
+
|
| 332 |
+
# ✅ Hardcoded top 20 cities
|
| 333 |
+
unique_locations_options = [
|
| 334 |
+
"Copenhagen", "Aarhus", "Odense", "Aalborg", "Esbjerg", "Randers", "Horsens", "Kolding",
|
| 335 |
+
"Vejle", "Roskilde", "Silkeborg", "Herning", "Hørsholm", "Helsingør", "Næstved", "Viborg",
|
| 336 |
+
"Fredericia", "Køge", "Taastrup", "Holstebro", "Rest of Denmark"
|
| 337 |
+
]
|
| 338 |
+
|
| 339 |
+
|
| 340 |
+
# ✅ Categories are still dynamically built
|
| 341 |
+
unique_categories_options = []
|
| 342 |
if st.session_state.all_job_matches_cache:
|
|
|
|
| 343 |
raw_categories_from_cache = [job.get('category') for job in st.session_state.all_job_matches_cache]
|
| 344 |
+
unique_categories_options = sorted(
|
| 345 |
+
list({CATEGORY_TRANSLATIONS.get(cat.strip(), cat.strip()) for cat in raw_categories_from_cache if isinstance(cat, str) and cat.strip()})
|
| 346 |
+
)
|
| 347 |
+
|
| 348 |
+
# ✅ Multiselect widgets
|
| 349 |
+
selected_locations = st.multiselect("Job Locations (Area)", options=unique_locations_options, placeholder="Any Location")
|
| 350 |
selected_categories = st.multiselect("Job Categories", options=unique_categories_options, placeholder="Any Category" if unique_categories_options else "Upload CV")
|
| 351 |
selected_languages = st.multiselect("Required Languages", options=CANONICAL_LANGUAGES_FOR_FILTER, placeholder="Any Language")
|
| 352 |
+
|
| 353 |
st.markdown("---")
|
| 354 |
st.info("Tip: Upload CV first, then apply filters.")
|
| 355 |
st.caption(f"Displays top {MAX_JOBS_TO_DISPLAY_PER_PAGE} matches after filters from up to {TOP_N_RESULTS_FROM_SEARCH} initial results.")
|
|
|
|
| 397 |
|
| 398 |
|
| 399 |
if st.session_state.all_job_matches_cache is not None:
|
| 400 |
+
current_matches_to_filter = list(st.session_state.all_job_matches_cache)
|
| 401 |
+
|
| 402 |
+
# ✅ Location filter
|
| 403 |
+
if selected_locations:
|
| 404 |
+
def location_matches(job_area):
|
| 405 |
+
standardized = standardize_location(job_area)
|
| 406 |
+
if not standardized:
|
| 407 |
+
return False
|
| 408 |
+
|
| 409 |
+
# Match selected cities
|
| 410 |
+
if standardized in selected_locations:
|
| 411 |
+
return True
|
| 412 |
+
|
| 413 |
+
# Match Rest of Denmark
|
| 414 |
+
if "Rest of Denmark" in selected_locations and standardized not in unique_locations_options[:-1]:
|
| 415 |
+
return True
|
| 416 |
+
|
| 417 |
+
return False
|
| 418 |
+
|
| 419 |
+
current_matches_to_filter = [
|
| 420 |
+
job for job in current_matches_to_filter
|
| 421 |
+
if location_matches(job.get('area'))
|
| 422 |
+
]
|
| 423 |
+
|
| 424 |
+
|
| 425 |
+
|
| 426 |
+
|
| 427 |
+
# ✅ Category filter
|
| 428 |
+
if selected_categories:
|
| 429 |
+
selected_categories_danish = [
|
| 430 |
+
REVERSE_CATEGORY_TRANSLATIONS.get(cat, cat) for cat in selected_categories
|
| 431 |
+
]
|
| 432 |
+
current_matches_to_filter = [
|
| 433 |
+
job for job in current_matches_to_filter
|
| 434 |
+
if job.get('category') in selected_categories_danish
|
| 435 |
+
]
|
| 436 |
+
|
| 437 |
+
# ✅ Language filter
|
| 438 |
+
if selected_languages:
|
| 439 |
+
current_matches_to_filter = [
|
| 440 |
+
job for job in current_matches_to_filter
|
| 441 |
+
if any(lang in selected_languages for lang in get_job_languages_from_metadata(job))
|
| 442 |
+
]
|
| 443 |
+
|
| 444 |
+
|
| 445 |
+
|
| 446 |
if selected_languages: current_matches_to_filter = [job for job in current_matches_to_filter if any(lang in selected_languages for lang in get_job_languages_from_metadata(job))]
|
| 447 |
|
| 448 |
final_display_matches = [j for j in current_matches_to_filter if isinstance(j.get('score'), (int, float)) and j.get('score', 0) >= SIMILARITY_THRESHOLD]
|
|
|
|
| 461 |
job_title = job_match.get('title', 'N/A')
|
| 462 |
job_company = job_match.get('company', 'N/A')
|
| 463 |
job_area_display = job_match.get('area', 'N/A')
|
| 464 |
+
job_category_raw = job_match.get('category', 'N/A')
|
| 465 |
+
job_category_display = CATEGORY_TRANSLATIONS.get(job_category_raw, job_category_raw)
|
| 466 |
job_status = job_match.get('status', 'unknown').capitalize()
|
| 467 |
job_url_primary = job_match.get('application_url', job_match.get('url', '#'))
|
| 468 |
job_score = job_match.get('score', 0.0)
|
|
|
|
| 484 |
st.markdown("**Key CV skills contributing to this match:**")
|
| 485 |
for skill_text, skill_sim_score in contributing_skills:
|
| 486 |
st.markdown(f"- `{skill_text}` (Contribution: {skill_sim_score:.2f})")
|
| 487 |
+
else:
|
| 488 |
st.caption("Skill contribution analysis not available or no strong contributing skills.")
|
| 489 |
+
|
| 490 |
+
|
| 491 |
action_cols = st.columns([1, 1])
|
| 492 |
with action_cols[0]:
|
| 493 |
if job_url_primary and job_url_primary != '#': st.link_button("Apply Now 🚀", url=job_url_primary, type="primary", use_container_width=True)
|