Spaces:
Sleeping
Sleeping
| import asyncio | |
| import atexit | |
| import json | |
| from html import escape | |
| from pathlib import Path | |
| from typing import Any, List, Tuple | |
| # Suppress Python 3.13 asyncio "Invalid file descriptor: -1" noise at GC/shutdown. | |
| # CPython 3.13 prints these via the "Exception ignored in: <__del__>" path which | |
| # bypasses the warnings system entirely — the only reliable fix is to monkeypatch | |
| # BaseEventLoop.__del__ so the ValueError is swallowed before CPython can print it. | |
| try: | |
| import asyncio.base_events as _abe | |
| _orig_loop_del = _abe.BaseEventLoop.__del__ | |
| def _safe_loop_del(self) -> None: | |
| try: | |
| _orig_loop_del(self) | |
| except Exception: | |
| pass | |
| _abe.BaseEventLoop.__del__ = _safe_loop_del | |
| del _abe, _safe_loop_del | |
| except Exception: | |
| pass | |
| def _close_asyncio_loop() -> None: | |
| """Close any leftover asyncio event loop at process exit.""" | |
| try: | |
| loop = asyncio.get_event_loop_policy().get_event_loop() | |
| if loop and not loop.is_closed(): | |
| loop.close() | |
| except Exception: | |
| pass | |
| atexit.register(_close_asyncio_loop) | |
| import gradio as gr | |
| from dotenv import load_dotenv | |
| from src.jobs.ats_detector import detect_ats | |
| from src.jobs.company_loader import load_companies | |
| from src.jobs.debug_utils import log_debug_header, log_debug_line, save_debug_html | |
| from src.jobs.extractor import extract_jobs_with_diagnostics | |
| from src.jobs.fetcher import fetch_jobs_from_ats_api, resolve_real_jobs_page | |
| from src.models import JobPosting | |
| from src.output.generator import build_talking_points, resume_profile_to_json | |
| from src.resume.pdf_extract import extract_resume_text | |
| from src.resume.profile_builder import build_resume_profile | |
| from src.scoring.matcher import rank_companies, score_job_match | |
| BASE_DIR = Path(__file__).resolve().parent | |
| load_dotenv(BASE_DIR / ".env") | |
| DEFAULT_COMPANY_CANDIDATES = [ | |
| BASE_DIR / "NSBE 2026 Baltimore Company_ Schools - Companies.csv", | |
| BASE_DIR / "data" / "NSBE 2026 Baltimore Company_ Schools - Companies (1).csv", | |
| ] | |
| DEBUG_HTML_DIR = BASE_DIR / "debug_html" | |
| APP_THEME = gr.themes.Base( | |
| primary_hue="cyan", | |
| secondary_hue="indigo", | |
| neutral_hue="slate", | |
| font=["Manrope", "ui-sans-serif", "sans-serif"], | |
| ) | |
| CUSTOM_CSS = """ | |
| @import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700;800&family=Space+Grotesk:wght@500;700&display=swap'); | |
| :root { | |
| --bg: #f5f7fb; | |
| --surface: #ffffff; | |
| --surface-muted: #f8fafc; | |
| --surface-soft: #f1f5f9; | |
| --border: #e5eaf2; | |
| --border-strong: #d7dfeb; | |
| --text: #102033; | |
| --text-muted: #5e7086; | |
| --text-soft: #7d8ea4; | |
| --accent: #3366ff; | |
| --accent-soft: #eef3ff; | |
| --accent-hover: #2856df; | |
| --success: #1f9d73; | |
| --danger: #d94f45; | |
| --shadow-lg: 0 18px 40px rgba(15, 23, 42, 0.08); | |
| --shadow-md: 0 10px 24px rgba(15, 23, 42, 0.06); | |
| } | |
| html, body, .gradio-container { | |
| min-height: 100%; | |
| } | |
| body, .gradio-container { | |
| background: linear-gradient(180deg, #f7f9fc 0%, #f3f6fb 100%); | |
| color: var(--text); | |
| font-family: 'Inter', sans-serif; | |
| } | |
| .gradio-container { | |
| max-width: 1260px !important; | |
| padding: 20px 20px 30px !important; | |
| } | |
| .gradio-container * { | |
| box-sizing: border-box; | |
| } | |
| .app-shell { | |
| gap: 18px; | |
| } | |
| .app-hero { | |
| padding: 20px 22px; | |
| border-radius: 18px; | |
| border: 1px solid var(--border); | |
| background: var(--surface); | |
| box-shadow: var(--shadow-md); | |
| } | |
| .eyebrow { | |
| display: inline-flex; | |
| align-items: center; | |
| min-height: 28px; | |
| padding: 0 10px; | |
| border-radius: 999px; | |
| background: var(--accent-soft); | |
| color: var(--accent); | |
| font-size: 0.74rem; | |
| font-weight: 700; | |
| letter-spacing: 0.04em; | |
| text-transform: uppercase; | |
| } | |
| .hero-title { | |
| margin: 12px 0 6px; | |
| color: var(--text); | |
| font-family: 'Space Grotesk', sans-serif; | |
| font-size: clamp(1.8rem, 2.5vw, 2.5rem); | |
| letter-spacing: -0.04em; | |
| line-height: 1.02; | |
| } | |
| .hero-copy { | |
| margin: 0; | |
| max-width: 760px; | |
| color: var(--text-muted); | |
| font-size: 0.98rem; | |
| line-height: 1.55; | |
| } | |
| .hero-meta { | |
| display: grid; | |
| grid-template-columns: repeat(3, minmax(0, 1fr)); | |
| gap: 10px; | |
| margin-top: 16px; | |
| } | |
| .hero-pill { | |
| padding: 12px 14px; | |
| border-radius: 14px; | |
| border: 1px solid var(--border); | |
| background: var(--surface-muted); | |
| color: var(--text-muted); | |
| font-size: 0.88rem; | |
| line-height: 1.45; | |
| } | |
| .hero-pill strong { | |
| display: block; | |
| margin-bottom: 4px; | |
| color: var(--text); | |
| font-size: 0.92rem; | |
| } | |
| .panel { | |
| border-radius: 18px; | |
| border: 1px solid var(--border); | |
| background: var(--surface); | |
| box-shadow: var(--shadow-lg); | |
| } | |
| .control-panel, | |
| .results-panel { | |
| padding: 18px; | |
| } | |
| .section-title { | |
| margin-bottom: 14px; | |
| } | |
| .section-title h3 { | |
| margin: 0 0 6px; | |
| color: var(--text); | |
| font-size: 1.05rem; | |
| font-weight: 700; | |
| letter-spacing: -0.02em; | |
| } | |
| .section-title p { | |
| margin: 0; | |
| color: var(--text-muted); | |
| line-height: 1.5; | |
| font-size: 0.92rem; | |
| } | |
| .chip-row { | |
| display: flex; | |
| flex-wrap: wrap; | |
| gap: 8px; | |
| margin-top: 10px; | |
| } | |
| .chip { | |
| display: inline-flex; | |
| align-items: center; | |
| min-height: 28px; | |
| padding: 0 10px; | |
| border-radius: 999px; | |
| background: var(--surface-soft); | |
| border: 1px solid var(--border); | |
| color: var(--text-muted); | |
| font-size: 0.8rem; | |
| font-weight: 600; | |
| } | |
| .subcard { | |
| padding: 14px; | |
| margin-bottom: 12px; | |
| border-radius: 14px; | |
| border: 1px solid var(--border); | |
| background: var(--surface); | |
| } | |
| .subcard:last-child { | |
| margin-bottom: 0; | |
| } | |
| .subcard-title { | |
| margin: 0 0 10px; | |
| color: var(--text); | |
| font-size: 0.92rem; | |
| font-weight: 700; | |
| } | |
| .results-note { | |
| margin-top: 12px; | |
| color: var(--text-soft); | |
| font-size: 0.86rem; | |
| line-height: 1.5; | |
| } | |
| .gr-box, | |
| .gr-group, | |
| .gr-form, | |
| .gr-panel, | |
| .gradio-container .block, | |
| .gradio-container .gr-block { | |
| background: transparent !important; | |
| border: none !important; | |
| box-shadow: none !important; | |
| padding: 0 !important; | |
| } | |
| .gradio-container .gr-column { | |
| gap: 0 !important; | |
| } | |
| .gradio-container .gr-form, | |
| .gradio-container .gr-group { | |
| gap: 12px !important; | |
| } | |
| .gradio-container label, | |
| .gradio-container .wrap label, | |
| .gradio-container .prose, | |
| .gradio-container .prose p, | |
| .gradio-container .prose strong { | |
| color: var(--text) !important; | |
| } | |
| .gradio-container .gr-markdown p { | |
| color: var(--text-muted) !important; | |
| } | |
| .gradio-container .wrap label span, | |
| .gradio-container .label-wrap span, | |
| .gradio-container .gr-form label, | |
| .gradio-container .gr-checkbox label { | |
| color: var(--text) !important; | |
| font-weight: 600 !important; | |
| } | |
| .gradio-container input, | |
| .gradio-container textarea, | |
| .gradio-container select, | |
| .gradio-container .gr-textbox, | |
| .gradio-container .cm-editor, | |
| .gradio-container .gr-code, | |
| .gradio-container .gr-dataframe { | |
| border-radius: 10px !important; | |
| border: 1px solid var(--border-strong) !important; | |
| background: #ffffff !important; | |
| color: var(--text) !important; | |
| box-shadow: none !important; | |
| } | |
| .gradio-container input::placeholder, | |
| .gradio-container textarea::placeholder { | |
| color: var(--text-soft) !important; | |
| } | |
| .gradio-container .gr-file, | |
| .gradio-container .upload-card { | |
| min-height: 84px !important; | |
| border-radius: 12px !important; | |
| border: 1px dashed #c7d3e3 !important; | |
| background: var(--surface-muted) !important; | |
| transition: border-color 140ms ease, background 140ms ease, box-shadow 140ms ease; | |
| overflow: hidden !important; | |
| } | |
| .gradio-container .gr-file > div, | |
| .gradio-container .upload-card > div { | |
| min-height: 84px !important; | |
| } | |
| .gradio-container .gr-file:hover, | |
| .gradio-container .upload-card:hover { | |
| border-color: #9fb8ff !important; | |
| background: #f7faff !important; | |
| box-shadow: 0 0 0 4px rgba(51, 102, 255, 0.05) !important; | |
| } | |
| .gradio-container .gr-file .wrap, | |
| .gradio-container .gr-file .or, | |
| .gradio-container .gr-file .hint { | |
| color: var(--text-muted) !important; | |
| } | |
| .gradio-container .gr-button-primary { | |
| min-height: 44px; | |
| border-radius: 10px !important; | |
| border: none !important; | |
| background: var(--accent) !important; | |
| color: #ffffff !important; | |
| font-weight: 700 !important; | |
| letter-spacing: 0.01em; | |
| box-shadow: 0 8px 18px rgba(51, 102, 255, 0.18) !important; | |
| transition: transform 140ms ease, background 140ms ease, box-shadow 140ms ease !important; | |
| } | |
| .gradio-container .gr-button-primary:hover { | |
| background: var(--accent-hover) !important; | |
| transform: translateY(-1px); | |
| box-shadow: 0 10px 20px rgba(51, 102, 255, 0.22) !important; | |
| } | |
| .gradio-container button:disabled, | |
| .gradio-container .gr-button-primary[disabled] { | |
| opacity: 0.6 !important; | |
| cursor: not-allowed !important; | |
| } | |
| .gradio-container .gr-slider, | |
| .gradio-container .gr-slider .wrap, | |
| .gradio-container .gr-slider input { | |
| color: var(--text) !important; | |
| } | |
| .gradio-container input[type='checkbox'] { | |
| accent-color: var(--accent); | |
| } | |
| .gradio-container .tab-nav { | |
| margin-bottom: 12px; | |
| padding: 4px !important; | |
| border-radius: 12px !important; | |
| background: var(--surface-soft) !important; | |
| border: 1px solid var(--border) !important; | |
| } | |
| .gradio-container .tab-nav button { | |
| min-height: 38px; | |
| border-radius: 9px !important; | |
| color: var(--text-muted) !important; | |
| font-weight: 700 !important; | |
| transition: background 120ms ease, color 120ms ease; | |
| } | |
| .gradio-container .tab-nav button.selected { | |
| background: #ffffff !important; | |
| color: var(--text) !important; | |
| box-shadow: 0 1px 2px rgba(15, 23, 42, 0.08); | |
| } | |
| .tab-panel { | |
| padding-top: 4px; | |
| } | |
| .gradio-container .gr-dataframe { | |
| overflow: hidden !important; | |
| } | |
| .gradio-container table { | |
| border-collapse: collapse !important; | |
| } | |
| .gradio-container thead th { | |
| padding: 12px 14px !important; | |
| background: var(--surface-muted) !important; | |
| color: var(--text-muted) !important; | |
| font-size: 0.8rem !important; | |
| font-weight: 700 !important; | |
| border-bottom: 1px solid var(--border) !important; | |
| } | |
| .gradio-container tbody tr:hover { | |
| background: #f8fbff !important; | |
| } | |
| .gradio-container td { | |
| padding: 12px 14px !important; | |
| color: var(--text) !important; | |
| border-bottom: 1px solid #edf2f7 !important; | |
| } | |
| .status-card, | |
| .summary-shell, | |
| .empty-state { | |
| border-radius: 14px; | |
| border: 1px solid var(--border); | |
| background: var(--surface); | |
| } | |
| .status-card { | |
| padding: 14px 16px; | |
| margin-bottom: 12px; | |
| } | |
| .status-card strong { | |
| display: block; | |
| margin-bottom: 4px; | |
| color: var(--text); | |
| font-size: 0.94rem; | |
| } | |
| .status-card p { | |
| margin: 0; | |
| color: var(--text-muted); | |
| line-height: 1.5; | |
| } | |
| .status-card.info { | |
| border-color: #dbe6ff; | |
| background: #f8fbff; | |
| } | |
| .status-card.success { | |
| border-color: #d7f0e6; | |
| background: #f7fcf9; | |
| } | |
| .status-card.error { | |
| border-color: #f1d9d7; | |
| background: #fff8f7; | |
| } | |
| .summary-shell { | |
| padding: 12px; | |
| margin-bottom: 12px; | |
| background: var(--surface-muted); | |
| } | |
| .summary-grid { | |
| display: grid; | |
| grid-template-columns: repeat(4, minmax(0, 1fr)); | |
| gap: 10px; | |
| } | |
| .summary-card { | |
| padding: 14px; | |
| border-radius: 12px; | |
| border: 1px solid var(--border); | |
| background: var(--surface); | |
| } | |
| .summary-card span { | |
| display: block; | |
| margin-bottom: 6px; | |
| color: var(--text-soft); | |
| font-size: 0.75rem; | |
| text-transform: uppercase; | |
| letter-spacing: 0.04em; | |
| font-weight: 700; | |
| } | |
| .summary-card strong { | |
| display: block; | |
| color: var(--text); | |
| font-size: 1.25rem; | |
| font-weight: 800; | |
| letter-spacing: -0.03em; | |
| } | |
| .summary-card small { | |
| display: block; | |
| margin-top: 8px; | |
| color: var(--text-muted); | |
| line-height: 1.45; | |
| } | |
| .empty-state { | |
| padding: 22px 18px; | |
| text-align: center; | |
| } | |
| .empty-state strong { | |
| display: block; | |
| margin-bottom: 8px; | |
| color: var(--text); | |
| font-size: 1rem; | |
| } | |
| .empty-state p { | |
| max-width: 520px; | |
| margin: 0 auto; | |
| color: var(--text-muted); | |
| line-height: 1.5; | |
| } | |
| @media (max-width: 980px) { | |
| .hero-meta, | |
| .summary-grid { | |
| grid-template-columns: repeat(2, minmax(0, 1fr)); | |
| } | |
| .gradio-container { | |
| padding: 14px 14px 22px !important; | |
| } | |
| } | |
| @media (max-width: 720px) { | |
| .hero-meta, | |
| .summary-grid { | |
| grid-template-columns: 1fr; | |
| } | |
| } | |
| """ | |
| def _resolve_file_path(file_obj: Any) -> str: | |
| if file_obj is None: | |
| return "" | |
| if isinstance(file_obj, str): | |
| return file_obj | |
| if hasattr(file_obj, "name"): | |
| return str(file_obj.name) | |
| if isinstance(file_obj, dict): | |
| return str(file_obj.get("name", "")) | |
| return "" | |
| def _default_companies_path() -> str: | |
| for path in DEFAULT_COMPANY_CANDIDATES: | |
| if path.exists(): | |
| return str(path) | |
| raise FileNotFoundError("No default company CSV file is available.") | |
| def _fallback_job(company_name: str, careers_url: str, ats: str) -> JobPosting: | |
| return JobPosting( | |
| company=company_name, | |
| title="General Opportunities", | |
| location="", | |
| url=careers_url, | |
| department="", | |
| description="Careers page discovered but no structured roles were parsed.", | |
| ats=ats, | |
| ) | |
| def _build_status_html(title: str, body: str, tone: str = "info") -> str: | |
| return ( | |
| f'<div class="status-card {escape(tone)}">' | |
| f'<strong>{escape(title)}</strong>' | |
| f'<p>{escape(body)}</p>' | |
| f'</div>' | |
| ) | |
| def _build_summary_html(ranked_rows: List[List[Any]], match_rows: List[List[Any]]) -> str: | |
| if not ranked_rows: | |
| return """ | |
| <div class="empty-state"> | |
| <strong>No ranking data yet</strong> | |
| <p>Upload a resume, run the matcher, and this panel will summarize the strongest companies, match volume, and best-fit roles.</p> | |
| </div> | |
| """ | |
| top_company = str(ranked_rows[0][0]) if ranked_rows else "-" | |
| top_score = f"{float(ranked_rows[0][1]):.1f}" if ranked_rows and ranked_rows[0][1] not in (None, "") else "-" | |
| total_companies = len(ranked_rows) | |
| total_jobs = len(match_rows) | |
| avg_score = "-" | |
| if ranked_rows: | |
| scores = [float(row[1]) for row in ranked_rows if row[1] not in (None, "")] | |
| if scores: | |
| avg_score = f"{sum(scores) / len(scores):.1f}" | |
| best_role = str(ranked_rows[0][3]) if ranked_rows and len(ranked_rows[0]) > 3 else "-" | |
| return f""" | |
| <div class="summary-shell"> | |
| <div class="summary-grid"> | |
| <div class="summary-card"> | |
| <span>Top Company</span> | |
| <strong>{escape(top_company)}</strong> | |
| <small>Best-fit company based on resolved job boards and resume alignment.</small> | |
| </div> | |
| <div class="summary-card"> | |
| <span>Top Score</span> | |
| <strong>{escape(top_score)}</strong> | |
| <small>Highest company fit score in the current analysis.</small> | |
| </div> | |
| <div class="summary-card"> | |
| <span>Companies / Jobs</span> | |
| <strong>{total_companies} / {total_jobs}</strong> | |
| <small>Ranked companies and extracted job matches returned in this run.</small> | |
| </div> | |
| <div class="summary-card"> | |
| <span>Average Fit / Best Role</span> | |
| <strong>{escape(avg_score)}</strong> | |
| <small>{escape(best_role)}</small> | |
| </div> | |
| </div> | |
| </div> | |
| """ | |
| def _save_company_debug_html(company_name: str, resolved_page_html: str, snapshots: dict[str, str], failure_type: str) -> None: | |
| for stage, html in snapshots.items(): | |
| save_debug_html(company_name, html, stage, DEBUG_HTML_DIR) | |
| save_debug_html(company_name, resolved_page_html, "resolved", DEBUG_HTML_DIR) | |
| if failure_type and failure_type not in {"SUCCESS", "UNKNOWN"}: | |
| save_debug_html(company_name, resolved_page_html, failure_type.lower(), DEBUG_HTML_DIR) | |
| def _log_company_diagnostics( | |
| company_name: str, | |
| original_url: str, | |
| resolved_page_url: str, | |
| fetch_method: str, | |
| final_url: str, | |
| html: str, | |
| ats: str, | |
| api_jobs: List[JobPosting], | |
| diagnostics: Any, | |
| resolution_steps: List[str], | |
| ) -> None: | |
| log_debug_header(company_name) | |
| log_debug_line("ORIGINAL URL", original_url) | |
| log_debug_line("RESOLVED URL", resolved_page_url) | |
| log_debug_line("FETCH METHOD", fetch_method) | |
| log_debug_line("FINAL URL", final_url) | |
| log_debug_line("RESOLUTION STEPS", resolution_steps) | |
| log_debug_line("HTML LENGTH", len(html)) | |
| log_debug_line("ATS", ats) | |
| if ats not in {"greenhouse", "lever"}: | |
| log_debug_line("ATS NOTE", "No ATS API match detected; using generic HTML/script parsing") | |
| log_debug_line("TOTAL ELEMENTS SCANNED", diagnostics.total_elements_scanned) | |
| log_debug_line("RAW TEXT SAMPLE", diagnostics.raw_text_sample[:20]) | |
| log_debug_line("CANDIDATES FOUND", diagnostics.candidates_found) | |
| log_debug_line("TITLE FILTER PASSES", diagnostics.title_filtered_count) | |
| log_debug_line("SCRIPT MATCHES", diagnostics.script_matches) | |
| log_debug_line("SCRIPT JOBS", diagnostics.script_jobs_extracted) | |
| log_debug_line("API JOBS", len(api_jobs)) | |
| log_debug_line("VALID JOBS", diagnostics.valid_jobs + len(api_jobs)) | |
| log_debug_line("SAMPLE TITLES", diagnostics.sample_titles) | |
| log_debug_line("FAILURE TYPE", diagnostics.failure_type) | |
| log_debug_line("SUCCESS", diagnostics.failure_type == "SUCCESS" or len(api_jobs) + diagnostics.valid_jobs > 0) | |
| def analyze_resume( | |
| resume_pdf: Any, | |
| company_source: str, | |
| optional_company_csv: Any, | |
| max_companies: int, | |
| use_ai_parser: bool, | |
| progress: gr.Progress = gr.Progress(), | |
| ) -> Tuple[List[List[Any]], List[List[Any]], str, str, str, str]: | |
| resume_path = _resolve_file_path(resume_pdf) | |
| csv_path = _resolve_file_path(optional_company_csv) if company_source == "Custom CSV" else "" | |
| empty_summary = _build_summary_html([], []) | |
| if not resume_path: | |
| return ( | |
| [], | |
| [], | |
| json.dumps({"error": "Please upload a resume PDF."}, indent=2), | |
| "", | |
| _build_status_html("Resume required", "Upload a PDF resume to start the analysis.", "error"), | |
| empty_summary, | |
| ) | |
| try: | |
| # --- Debug: log pipeline inputs before anything runs --- | |
| import os | |
| print("[analyze] company_source:", company_source) | |
| print("[analyze] csv_path (resolved):", repr(csv_path)) | |
| print("[analyze] resume_path:", repr(resume_path)) | |
| print("[analyze] cwd:", os.getcwd()) | |
| progress(0.05, desc="Extracting resume text") | |
| resume_text = extract_resume_text(resume_path) | |
| progress(0.12, desc="Building resume profile") | |
| profile = build_resume_profile(resume_text, use_ai=use_ai_parser) | |
| # Try to resolve the default CSV path and log clearly if it's missing. | |
| try: | |
| default_csv_path = _default_companies_path() | |
| print("[analyze] default_csv_path:", default_csv_path) | |
| except FileNotFoundError as fnf: | |
| print("[analyze] CRITICAL: default CSV not found:", fnf) | |
| return ( | |
| [], | |
| [], | |
| json.dumps({"error": str(fnf)}, indent=2), | |
| "", | |
| _build_status_html("Company list not found", str(fnf), "error"), | |
| empty_summary, | |
| ) | |
| companies = load_companies(default_csv_path, csv_path if csv_path else None) | |
| total_loaded = len(companies) | |
| with_url = sum(1 for c in companies if c.careers_url) | |
| print(f"[analyze] Loaded {total_loaded} companies, {with_url} have careers_url") | |
| # Hard-stop early so the user sees a clear reason rather than "0 companies processed". | |
| if total_loaded == 0: | |
| msg = ( | |
| "No companies were loaded. " | |
| "Check that the CSV has a company-name column and at least one data row." | |
| ) | |
| return ( | |
| [], | |
| [], | |
| json.dumps({"error": msg}, indent=2), | |
| "", | |
| _build_status_html("No companies loaded", msg, "error"), | |
| empty_summary, | |
| ) | |
| if with_url == 0: | |
| # All companies exist but every careers_url is empty — display which columns exist. | |
| col_sample = list((companies[0].meta or {}).keys())[:12] if companies else [] | |
| msg = ( | |
| f"Loaded {total_loaded} companies but none have a usable careers URL. " | |
| f"CSV columns detected: {col_sample}. " | |
| "This app now reads only the opening page column (col 4 / 'Direct links to company career/job openings page'). " | |
| "Add valid https URLs in that column." | |
| ) | |
| print("[analyze] WARNING:", msg) | |
| return ( | |
| [], | |
| [], | |
| json.dumps({"error": msg, "csv_columns": col_sample}, indent=2), | |
| "", | |
| _build_status_html("No careers URLs found", msg, "error"), | |
| empty_summary, | |
| ) | |
| companies = companies[: int(max_companies)] | |
| print(f"[analyze] After max_companies cap: {len(companies)} companies to analyze") | |
| progress(0.18, desc=f"Analyzing {len(companies)} companies") | |
| discovered_jobs: List[JobPosting] = [] | |
| processed_companies = 0 | |
| for index, company in enumerate(companies, start=1): | |
| if not company.careers_url: | |
| continue | |
| try: | |
| progress(0.18 + (0.62 * index / max(1, len(companies))), desc=f"Resolving {company.company}") | |
| resolved_page = resolve_real_jobs_page(company.careers_url) | |
| resolved_url = resolved_page.url or company.careers_url | |
| resolved_html = resolved_page.html | |
| ats = detect_ats(resolved_url, resolved_html) | |
| if resolved_page.fallback_used: | |
| print(f"[scraper] playwright fallback triggered: {resolved_page.fallback_reason or 'fallback_used'}") | |
| api_jobs = fetch_jobs_from_ats_api(company, ats, source_url=resolved_url) | |
| html_jobs, diagnostics = extract_jobs_with_diagnostics( | |
| company, | |
| resolved_html, | |
| ats, | |
| base_url=resolved_url, | |
| ) | |
| if diagnostics.valid_jobs == 0 and company.careers_url == resolved_url and diagnostics.failure_type == "UNKNOWN": | |
| diagnostics.failure_type = "SHELL_PAGE" | |
| _save_company_debug_html( | |
| company.company, | |
| resolved_html, | |
| resolved_page.html_snapshots, | |
| diagnostics.failure_type if not api_jobs else "SUCCESS", | |
| ) | |
| _log_company_diagnostics( | |
| company.company, | |
| company.careers_url, | |
| resolved_url, | |
| resolved_page.fetch_method, | |
| resolved_page.final_url or resolved_url, | |
| resolved_html, | |
| ats, | |
| api_jobs, | |
| diagnostics, | |
| resolved_page.resolution_steps, | |
| ) | |
| jobs = api_jobs[:] | |
| if len(jobs) < 3: | |
| jobs.extend(html_jobs) | |
| if not jobs: | |
| print(f"[scraper] {company.company} failed at parsing step with failure type: {diagnostics.failure_type}") | |
| jobs = [_fallback_job(company.company, resolved_url, ats)] | |
| discovered_jobs.extend(jobs) | |
| processed_companies += 1 | |
| except Exception as company_exc: | |
| print("=" * 60) | |
| print(f"COMPANY: {company.company}") | |
| print(f"FAILURE TYPE: PARSING_ERROR") | |
| print(f"SUCCESS: False") | |
| print(f"STEP BROKE: analyze_resume loop") | |
| print(f"ERROR: {company_exc}") | |
| continue | |
| progress(0.86, desc="Scoring matches") | |
| matches = [score_job_match(job, profile) for job in discovered_jobs] | |
| matches = sorted(matches, key=lambda item: item.score, reverse=True) | |
| rankings = rank_companies(matches) | |
| ranked_rows = [ | |
| [r.company, r.company_score, r.match_count, r.best_role, r.ats, r.explanation] | |
| for r in rankings[:50] | |
| ] | |
| match_rows = [ | |
| [m.company, m.title, m.location, m.score, m.ats, m.url, m.explanation] | |
| for m in matches[:250] | |
| ] | |
| profile_json = json.dumps(resume_profile_to_json(profile), indent=2) | |
| talking_points = build_talking_points(rankings, matches) | |
| status_html = _build_status_html( | |
| "Analysis complete", | |
| f"Processed {processed_companies} companies, extracted {len(match_rows)} job matches, and ranked {len(ranked_rows)} companies.", | |
| "success", | |
| ) | |
| summary_html = _build_summary_html(ranked_rows, match_rows) | |
| progress(1.0, desc="Done") | |
| return ranked_rows, match_rows, profile_json, talking_points, status_html, summary_html | |
| except Exception as exc: | |
| return ( | |
| [], | |
| [], | |
| json.dumps({"error": str(exc)}, indent=2), | |
| "", | |
| _build_status_html("Analysis failed", str(exc), "error"), | |
| empty_summary, | |
| ) | |
| with gr.Blocks(title="AI Career Fair Matcher") as demo: | |
| with gr.Column(elem_classes=["app-shell"]): | |
| gr.HTML( | |
| """ | |
| <section class="app-hero"> | |
| <div class="eyebrow">AI Career Fair Matcher</div> | |
| <h1 class="hero-title">Prioritize the right companies before you ever walk into the fair.</h1> | |
| <p class="hero-copy"> | |
| Upload a resume, analyze a built-in or custom company list, and get ranked companies, matching jobs, and recruiter talking points in a clean workflow. | |
| </p> | |
| <div class="hero-meta"> | |
| <div class="hero-pill"><strong>Resume Parsing</strong>Uses AI to extract structured information from your resume.</div> | |
| <div class="hero-pill"><strong>Job Discovery</strong>Resolves real jobs pages behind career search shells.</div> | |
| <div class="hero-pill"><strong>Actionable Output</strong>Ranked targets, matching roles, and talking points.</div> | |
| </div> | |
| </section> | |
| """ | |
| ) | |
| with gr.Row(equal_height=False): | |
| with gr.Column(scale=5, min_width=360, elem_classes=["panel", "control-panel"]): | |
| gr.Markdown( | |
| """ | |
| <div class="section-title"> | |
| <h3>Workspace</h3> | |
| <p>Load your resume, choose the company source, and tune how broad the analysis should be.</p> | |
| </div> | |
| <div class="chip-row"> | |
| <span class="chip">Dark Mode Default</span> | |
| <span class="chip">AI Resume Parsing</span> | |
| <span class="chip">Built-In NSBE Dataset</span> | |
| </div> | |
| """, | |
| elem_classes=["section-title"], | |
| ) | |
| with gr.Group(elem_classes=["subcard"]): | |
| gr.Markdown("<div class='subcard-title'>Resume Upload</div>") | |
| resume_input = gr.File(label="Upload resume PDF", file_types=[".pdf"], elem_classes=["upload-card"]) | |
| with gr.Group(elem_classes=["subcard"]): | |
| gr.Markdown("<div class='subcard-title'>Company Source</div>") | |
| company_source_input = gr.Radio( | |
| choices=["Built-in NSBE List", "Custom CSV"], | |
| value="Built-in NSBE List", | |
| label="Choose company source", | |
| ) | |
| company_csv_input = gr.File(label="Optional custom company CSV", file_types=[".csv"], elem_classes=["upload-card"]) | |
| with gr.Group(elem_classes=["subcard"]): | |
| gr.Markdown("<div class='subcard-title'>Analysis Settings</div>") | |
| use_ai_parser_input = gr.Checkbox( | |
| value=True, | |
| label="Use AI Resume Parser", | |
| ) | |
| gr.Markdown( | |
| "<div class='results-note'>Uses AI to extract structured information from your resume.</div>" | |
| ) | |
| max_companies_input = gr.Slider( | |
| minimum=5, | |
| maximum=100, | |
| step=1, | |
| value=30, | |
| label="Max companies to analyze", | |
| ) | |
| analyze_button = gr.Button("Analyze Career Fair Fit", variant="primary") | |
| gr.Markdown( | |
| """ | |
| <div class="results-note"> | |
| Designed for quick scanning: inputs stay compact on the left while results, summaries, and tabs stay dense and readable on the right. | |
| </div> | |
| """ | |
| ) | |
| with gr.Column(scale=7, min_width=420, elem_classes=["panel", "results-panel"]): | |
| gr.Markdown( | |
| """ | |
| <div class="section-title"> | |
| <h3>Results</h3> | |
| <p>Start with the summary, then inspect ranked companies, matching jobs, resume profile fields, and recruiter talking points.</p> | |
| </div> | |
| """, | |
| elem_classes=["section-title"], | |
| ) | |
| status_output = gr.HTML( | |
| value=_build_status_html( | |
| "Ready to analyze", | |
| "Upload a resume, optionally add a custom CSV, and launch the matcher.", | |
| "info", | |
| ) | |
| ) | |
| summary_output = gr.HTML(value=_build_summary_html([], [])) | |
| with gr.Group(elem_classes=["subcard"]): | |
| with gr.Tabs(): | |
| with gr.TabItem("Ranked Companies", elem_classes=["tab-panel"]): | |
| ranked_output = gr.Dataframe( | |
| headers=["Company", "Score", "Matches", "Best Role", "ATS", "Explanation"], | |
| label="Ranked Companies", | |
| wrap=True, | |
| ) | |
| with gr.TabItem("Matching Jobs", elem_classes=["tab-panel"]): | |
| jobs_output = gr.Dataframe( | |
| headers=["Company", "Job Title", "Location", "Score", "ATS", "URL", "Why It Matches"], | |
| label="Matching Jobs", | |
| wrap=True, | |
| ) | |
| with gr.TabItem("Resume Profile", elem_classes=["tab-panel"]): | |
| profile_output = gr.Code(label="Resume Profile JSON", language="json") | |
| with gr.TabItem("Talking Points", elem_classes=["tab-panel"]): | |
| talking_points_output = gr.Markdown(label="Talking Points") | |
| analyze_button.click( | |
| fn=analyze_resume, | |
| inputs=[resume_input, company_source_input, company_csv_input, max_companies_input, use_ai_parser_input], | |
| outputs=[ranked_output, jobs_output, profile_output, talking_points_output, status_output, summary_output], | |
| ) | |
| if __name__ == "__main__": | |
| demo.queue().launch(theme=APP_THEME, css=CUSTOM_CSS, ssr_mode=False) | |