Spaces:

cherrykiwidd
/

nsbecf

Sleeping

File size: 32,272 Bytes

102d9a5
 
fa6caa6
cf6f0aa
fa6caa6
 
 
102d9a5
95aefa5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102d9a5
 
 
95aefa5
102d9a5
 
 
 
 
 
 
 
 
 
4a339d7
fa6caa6
 
 
 
8199ab0
 
6f6acfa
fa6caa6
 
 
 
 
 
 
 
 
 
 
 
 
8199ab0
b8d2d77
 
 
 
 
 
fa6caa6
2fc1e5d
8ab3794
2fc1e5d
 
8ab3794
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a9d588b
 
 
 
2fc1e5d
 
 
8ab3794
 
 
2fc1e5d
 
 
8ab3794
 
a9d588b
 
 
 
 
 
 
 
2fc1e5d
 
 
8ab3794
 
 
 
 
a9d588b
 
2fc1e5d
 
 
8ab3794
 
2fc1e5d
8ab3794
 
 
a9d588b
8ab3794
 
2fc1e5d
 
 
8ab3794
 
2fc1e5d
8ab3794
 
 
2fc1e5d
 
 
a9d588b
8ab3794
 
a9d588b
8ab3794
2fc1e5d
 
a9d588b
2fc1e5d
 
a9d588b
8ab3794
2fc1e5d
 
a9d588b
 
8ab3794
 
 
 
a9d588b
8ab3794
2fc1e5d
 
a9d588b
2fc1e5d
 
8ab3794
 
2fc1e5d
 
 
8ab3794
 
 
 
cf6f0aa
 
a9d588b
 
 
2fc1e5d
 
a9d588b
 
2fc1e5d
 
a9d588b
 
8ab3794
 
 
 
2fc1e5d
 
 
 
8ab3794
 
 
2fc1e5d
 
 
 
 
a9d588b
8ab3794
2fc1e5d
 
 
a9d588b
 
8ab3794
 
2fc1e5d
8ab3794
 
 
 
a9d588b
 
 
 
 
 
8ab3794
 
 
a9d588b
 
 
 
 
 
 
 
8ab3794
 
a9d588b
 
 
 
 
8ab3794
 
a9d588b
2fc1e5d
 
a9d588b
 
 
 
 
 
2fc1e5d
 
 
a9d588b
2fc1e5d
 
a9d588b
 
2fc1e5d
 
8ab3794
 
 
 
 
2fc1e5d
a9d588b
2fc1e5d
 
 
8ab3794
a9d588b
 
 
8ab3794
a9d588b
 
 
 
 
 
8ab3794
a9d588b
2fc1e5d
 
 
 
a9d588b
2fc1e5d
 
cf6f0aa
a9d588b
8ab3794
 
 
 
a9d588b
 
 
 
 
8ab3794
2fc1e5d
 
cf6f0aa
 
8ab3794
 
 
 
 
a9d588b
 
 
 
 
8ab3794
cf6f0aa
 
 
 
8ab3794
 
 
a9d588b
 
 
 
 
8ab3794
cf6f0aa
 
2fc1e5d
8ab3794
 
2fc1e5d
8ab3794
 
 
2fc1e5d
8ab3794
 
2fc1e5d
 
 
8ab3794
cf6f0aa
8ab3794
cf6f0aa
 
 
 
8ab3794
cf6f0aa
2fc1e5d
 
8ab3794
a9d588b
8ab3794
 
a9d588b
 
 
8ab3794
a9d588b
 
2fc1e5d
8ab3794
 
 
 
 
2fc1e5d
 
 
8ab3794
 
 
a9d588b
8ab3794
2fc1e5d
 
 
8ab3794
 
 
2fc1e5d
 
a9d588b
8ab3794
a9d588b
 
 
 
 
 
 
 
cf6f0aa
 
 
a9d588b
8ab3794
 
 
a9d588b
8ab3794
cf6f0aa
 
 
8ab3794
cf6f0aa
 
 
a9d588b
8ab3794
 
cf6f0aa
 
 
 
 
8ab3794
 
 
cf6f0aa
 
 
 
8ab3794
cf6f0aa
 
 
 
8ab3794
 
 
cf6f0aa
 
 
 
8ab3794
 
cf6f0aa
 
 
8ab3794
 
cf6f0aa
 
 
8ab3794
 
cf6f0aa
 
 
8ab3794
 
cf6f0aa
 
 
8ab3794
 
 
cf6f0aa
 
 
 
 
a9d588b
cf6f0aa
 
 
 
8ab3794
 
 
cf6f0aa
 
 
 
a9d588b
8ab3794
 
a9d588b
 
 
cf6f0aa
 
 
 
8ab3794
 
 
a9d588b
cf6f0aa
 
 
 
 
8ab3794
cf6f0aa
 
 
 
8ab3794
cf6f0aa
 
 
 
 
 
8ab3794
 
cf6f0aa
 
 
 
a9d588b
8ab3794
 
2fc1e5d
 
a9d588b
 
cf6f0aa
 
 
 
2fc1e5d
8ab3794
2fc1e5d
a9d588b
2fc1e5d
a9d588b
 
 
 
 
2fc1e5d
 
 
fa6caa6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cf6f0aa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8199ab0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fa6caa6
 
8ab3794
fa6caa6
 
 
cf6f0aa
 
fa6caa6
8ab3794
cf6f0aa
fa6caa6
 
cf6f0aa
 
 
 
 
 
 
 
fa6caa6
 
c500ead
 
 
 
 
 
 
cf6f0aa
fa6caa6
cf6f0aa
fa6caa6
 
c500ead
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70f34e2
 
c500ead
 
 
 
 
 
 
 
 
 
 
fa6caa6
c500ead
cf6f0aa
fa6caa6
 
cf6f0aa
 
fa6caa6
 
 
6f6acfa
cf6f0aa
6f6acfa
 
 
 
fa6caa6
8199ab0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6f6acfa
8199ab0
6f6acfa
 
8199ab0
6f6acfa
 
 
cf6f0aa
6f6acfa
8199ab0
 
 
 
 
 
6f6acfa
fa6caa6
cf6f0aa
fa6caa6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cf6f0aa
 
 
 
 
 
 
 
 
fa6caa6
cf6f0aa
 
 
 
 
 
 
 
fa6caa6
 
b8d2d77
a9d588b
 
 
 
8ab3794
 
 
 
 
a9d588b
ba6c9ff
8ab3794
 
a9d588b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ba6c9ff
a9d588b
 
 
 
2fc1e5d
 
a9d588b
8ab3794
a9d588b
2fc1e5d
a9d588b
 
8ab3794
 
 
 
 
a9d588b
2fc1e5d
a9d588b
 
 
 
ba6c9ff
 
 
 
a9d588b
 
 
 
 
 
 
2fc1e5d
a9d588b
 
 
 
 
 
 
 
 
2fc1e5d
a9d588b
 
 
 
 
 
 
 
 
 
2fc1e5d
a9d588b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fa6caa6
 
 
8ab3794
cf6f0aa
fa6caa6
4a339d7
 
fa6caa6
b8d2d77

import asyncio
import atexit
import json
from html import escape
from pathlib import Path
from typing import Any, List, Tuple

# Suppress Python 3.13 asyncio "Invalid file descriptor: -1" noise at GC/shutdown.
# CPython 3.13 prints these via the "Exception ignored in: <__del__>" path which
# bypasses the warnings system entirely — the only reliable fix is to monkeypatch
# BaseEventLoop.__del__ so the ValueError is swallowed before CPython can print it.
try:
    import asyncio.base_events as _abe
    _orig_loop_del = _abe.BaseEventLoop.__del__

    def _safe_loop_del(self) -> None:
        try:
            _orig_loop_del(self)
        except Exception:
            pass

    _abe.BaseEventLoop.__del__ = _safe_loop_del
    del _abe, _safe_loop_del
except Exception:
    pass


def _close_asyncio_loop() -> None:
    """Close any leftover asyncio event loop at process exit."""
    try:
        loop = asyncio.get_event_loop_policy().get_event_loop()
        if loop and not loop.is_closed():
            loop.close()
    except Exception:
        pass


atexit.register(_close_asyncio_loop)

import gradio as gr
from dotenv import load_dotenv

from src.jobs.ats_detector import detect_ats
from src.jobs.company_loader import load_companies
from src.jobs.debug_utils import log_debug_header, log_debug_line, save_debug_html
from src.jobs.extractor import extract_jobs_with_diagnostics
from src.jobs.fetcher import fetch_jobs_from_ats_api, resolve_real_jobs_page
from src.models import JobPosting
from src.output.generator import build_talking_points, resume_profile_to_json
from src.resume.pdf_extract import extract_resume_text
from src.resume.profile_builder import build_resume_profile
from src.scoring.matcher import rank_companies, score_job_match

BASE_DIR = Path(__file__).resolve().parent
load_dotenv(BASE_DIR / ".env")

DEFAULT_COMPANY_CANDIDATES = [
    BASE_DIR / "NSBE 2026 Baltimore Company_ Schools  - Companies.csv",
    BASE_DIR / "data" / "NSBE 2026 Baltimore Company_ Schools  - Companies (1).csv",
]
DEBUG_HTML_DIR = BASE_DIR / "debug_html"
APP_THEME = gr.themes.Base(
    primary_hue="cyan",
    secondary_hue="indigo",
    neutral_hue="slate",
    font=["Manrope", "ui-sans-serif", "sans-serif"],
)

CUSTOM_CSS = """
@import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700;800&family=Space+Grotesk:wght@500;700&display=swap');

:root {
    --bg: #f5f7fb;
    --surface: #ffffff;
    --surface-muted: #f8fafc;
    --surface-soft: #f1f5f9;
    --border: #e5eaf2;
    --border-strong: #d7dfeb;
    --text: #102033;
    --text-muted: #5e7086;
    --text-soft: #7d8ea4;
    --accent: #3366ff;
    --accent-soft: #eef3ff;
    --accent-hover: #2856df;
    --success: #1f9d73;
    --danger: #d94f45;
    --shadow-lg: 0 18px 40px rgba(15, 23, 42, 0.08);
    --shadow-md: 0 10px 24px rgba(15, 23, 42, 0.06);
}

html, body, .gradio-container {
    min-height: 100%;
}

body, .gradio-container {
    background: linear-gradient(180deg, #f7f9fc 0%, #f3f6fb 100%);
    color: var(--text);
    font-family: 'Inter', sans-serif;
}

.gradio-container {
    max-width: 1260px !important;
    padding: 20px 20px 30px !important;
}

.gradio-container * {
    box-sizing: border-box;
}

.app-shell {
    gap: 18px;
}

.app-hero {
    padding: 20px 22px;
    border-radius: 18px;
    border: 1px solid var(--border);
    background: var(--surface);
    box-shadow: var(--shadow-md);
}

.eyebrow {
    display: inline-flex;
    align-items: center;
    min-height: 28px;
    padding: 0 10px;
    border-radius: 999px;
    background: var(--accent-soft);
    color: var(--accent);
    font-size: 0.74rem;
    font-weight: 700;
    letter-spacing: 0.04em;
    text-transform: uppercase;
}

.hero-title {
    margin: 12px 0 6px;
    color: var(--text);
    font-family: 'Space Grotesk', sans-serif;
    font-size: clamp(1.8rem, 2.5vw, 2.5rem);
    letter-spacing: -0.04em;
    line-height: 1.02;
}

.hero-copy {
    margin: 0;
    max-width: 760px;
    color: var(--text-muted);
    font-size: 0.98rem;
    line-height: 1.55;
}

.hero-meta {
    display: grid;
    grid-template-columns: repeat(3, minmax(0, 1fr));
    gap: 10px;
    margin-top: 16px;
}

.hero-pill {
    padding: 12px 14px;
    border-radius: 14px;
    border: 1px solid var(--border);
    background: var(--surface-muted);
    color: var(--text-muted);
    font-size: 0.88rem;
    line-height: 1.45;
}

.hero-pill strong {
    display: block;
    margin-bottom: 4px;
    color: var(--text);
    font-size: 0.92rem;
}

.panel {
    border-radius: 18px;
    border: 1px solid var(--border);
    background: var(--surface);
    box-shadow: var(--shadow-lg);
}

.control-panel,
.results-panel {
    padding: 18px;
}

.section-title {
    margin-bottom: 14px;
}

.section-title h3 {
    margin: 0 0 6px;
    color: var(--text);
    font-size: 1.05rem;
    font-weight: 700;
    letter-spacing: -0.02em;
}

.section-title p {
    margin: 0;
    color: var(--text-muted);
    line-height: 1.5;
    font-size: 0.92rem;
}

.chip-row {
    display: flex;
    flex-wrap: wrap;
    gap: 8px;
    margin-top: 10px;
}

.chip {
    display: inline-flex;
    align-items: center;
    min-height: 28px;
    padding: 0 10px;
    border-radius: 999px;
    background: var(--surface-soft);
    border: 1px solid var(--border);
    color: var(--text-muted);
    font-size: 0.8rem;
    font-weight: 600;
}

.subcard {
    padding: 14px;
    margin-bottom: 12px;
    border-radius: 14px;
    border: 1px solid var(--border);
    background: var(--surface);
}

.subcard:last-child {
    margin-bottom: 0;
}

.subcard-title {
    margin: 0 0 10px;
    color: var(--text);
    font-size: 0.92rem;
    font-weight: 700;
}

.results-note {
    margin-top: 12px;
    color: var(--text-soft);
    font-size: 0.86rem;
    line-height: 1.5;
}

.gr-box,
.gr-group,
.gr-form,
.gr-panel,
.gradio-container .block,
.gradio-container .gr-block {
    background: transparent !important;
    border: none !important;
    box-shadow: none !important;
    padding: 0 !important;
}

.gradio-container .gr-column {
    gap: 0 !important;
}

.gradio-container .gr-form,
.gradio-container .gr-group {
    gap: 12px !important;
}

.gradio-container label,
.gradio-container .wrap label,
.gradio-container .prose,
.gradio-container .prose p,
.gradio-container .prose strong {
    color: var(--text) !important;
}

.gradio-container .gr-markdown p {
    color: var(--text-muted) !important;
}

.gradio-container .wrap label span,
.gradio-container .label-wrap span,
.gradio-container .gr-form label,
.gradio-container .gr-checkbox label {
    color: var(--text) !important;
    font-weight: 600 !important;
}

.gradio-container input,
.gradio-container textarea,
.gradio-container select,
.gradio-container .gr-textbox,
.gradio-container .cm-editor,
.gradio-container .gr-code,
.gradio-container .gr-dataframe {
    border-radius: 10px !important;
    border: 1px solid var(--border-strong) !important;
    background: #ffffff !important;
    color: var(--text) !important;
    box-shadow: none !important;
}

.gradio-container input::placeholder,
.gradio-container textarea::placeholder {
    color: var(--text-soft) !important;
}

.gradio-container .gr-file,
.gradio-container .upload-card {
    min-height: 84px !important;
    border-radius: 12px !important;
    border: 1px dashed #c7d3e3 !important;
    background: var(--surface-muted) !important;
    transition: border-color 140ms ease, background 140ms ease, box-shadow 140ms ease;
    overflow: hidden !important;
}

.gradio-container .gr-file > div,
.gradio-container .upload-card > div {
    min-height: 84px !important;
}

.gradio-container .gr-file:hover,
.gradio-container .upload-card:hover {
    border-color: #9fb8ff !important;
    background: #f7faff !important;
    box-shadow: 0 0 0 4px rgba(51, 102, 255, 0.05) !important;
}

.gradio-container .gr-file .wrap,
.gradio-container .gr-file .or,
.gradio-container .gr-file .hint {
    color: var(--text-muted) !important;
}

.gradio-container .gr-button-primary {
    min-height: 44px;
    border-radius: 10px !important;
    border: none !important;
    background: var(--accent) !important;
    color: #ffffff !important;
    font-weight: 700 !important;
    letter-spacing: 0.01em;
    box-shadow: 0 8px 18px rgba(51, 102, 255, 0.18) !important;
    transition: transform 140ms ease, background 140ms ease, box-shadow 140ms ease !important;
}

.gradio-container .gr-button-primary:hover {
    background: var(--accent-hover) !important;
    transform: translateY(-1px);
    box-shadow: 0 10px 20px rgba(51, 102, 255, 0.22) !important;
}

.gradio-container button:disabled,
.gradio-container .gr-button-primary[disabled] {
    opacity: 0.6 !important;
    cursor: not-allowed !important;
}

.gradio-container .gr-slider,
.gradio-container .gr-slider .wrap,
.gradio-container .gr-slider input {
    color: var(--text) !important;
}

.gradio-container input[type='checkbox'] {
    accent-color: var(--accent);
}

.gradio-container .tab-nav {
    margin-bottom: 12px;
    padding: 4px !important;
    border-radius: 12px !important;
    background: var(--surface-soft) !important;
    border: 1px solid var(--border) !important;
}

.gradio-container .tab-nav button {
    min-height: 38px;
    border-radius: 9px !important;
    color: var(--text-muted) !important;
    font-weight: 700 !important;
    transition: background 120ms ease, color 120ms ease;
}

.gradio-container .tab-nav button.selected {
    background: #ffffff !important;
    color: var(--text) !important;
    box-shadow: 0 1px 2px rgba(15, 23, 42, 0.08);
}

.tab-panel {
    padding-top: 4px;
}

.gradio-container .gr-dataframe {
    overflow: hidden !important;
}

.gradio-container table {
    border-collapse: collapse !important;
}

.gradio-container thead th {
    padding: 12px 14px !important;
    background: var(--surface-muted) !important;
    color: var(--text-muted) !important;
    font-size: 0.8rem !important;
    font-weight: 700 !important;
    border-bottom: 1px solid var(--border) !important;
}

.gradio-container tbody tr:hover {
    background: #f8fbff !important;
}

.gradio-container td {
    padding: 12px 14px !important;
    color: var(--text) !important;
    border-bottom: 1px solid #edf2f7 !important;
}

.status-card,
.summary-shell,
.empty-state {
    border-radius: 14px;
    border: 1px solid var(--border);
    background: var(--surface);
}

.status-card {
    padding: 14px 16px;
    margin-bottom: 12px;
}

.status-card strong {
    display: block;
    margin-bottom: 4px;
    color: var(--text);
    font-size: 0.94rem;
}

.status-card p {
    margin: 0;
    color: var(--text-muted);
    line-height: 1.5;
}

.status-card.info {
    border-color: #dbe6ff;
    background: #f8fbff;
}

.status-card.success {
    border-color: #d7f0e6;
    background: #f7fcf9;
}

.status-card.error {
    border-color: #f1d9d7;
    background: #fff8f7;
}

.summary-shell {
    padding: 12px;
    margin-bottom: 12px;
    background: var(--surface-muted);
}

.summary-grid {
    display: grid;
    grid-template-columns: repeat(4, minmax(0, 1fr));
    gap: 10px;
}

.summary-card {
    padding: 14px;
    border-radius: 12px;
    border: 1px solid var(--border);
    background: var(--surface);
}

.summary-card span {
    display: block;
    margin-bottom: 6px;
    color: var(--text-soft);
    font-size: 0.75rem;
    text-transform: uppercase;
    letter-spacing: 0.04em;
    font-weight: 700;
}

.summary-card strong {
    display: block;
    color: var(--text);
    font-size: 1.25rem;
    font-weight: 800;
    letter-spacing: -0.03em;
}

.summary-card small {
    display: block;
    margin-top: 8px;
    color: var(--text-muted);
    line-height: 1.45;
}

.empty-state {
    padding: 22px 18px;
    text-align: center;
}

.empty-state strong {
    display: block;
    margin-bottom: 8px;
    color: var(--text);
    font-size: 1rem;
}

.empty-state p {
    max-width: 520px;
    margin: 0 auto;
    color: var(--text-muted);
    line-height: 1.5;
}

@media (max-width: 980px) {
    .hero-meta,
    .summary-grid {
        grid-template-columns: repeat(2, minmax(0, 1fr));
    }

    .gradio-container {
        padding: 14px 14px 22px !important;
    }
}

@media (max-width: 720px) {
    .hero-meta,
    .summary-grid {
        grid-template-columns: 1fr;
    }
}
"""


def _resolve_file_path(file_obj: Any) -> str:
    if file_obj is None:
        return ""
    if isinstance(file_obj, str):
        return file_obj
    if hasattr(file_obj, "name"):
        return str(file_obj.name)
    if isinstance(file_obj, dict):
        return str(file_obj.get("name", ""))
    return ""


def _default_companies_path() -> str:
    for path in DEFAULT_COMPANY_CANDIDATES:
        if path.exists():
            return str(path)

    raise FileNotFoundError("No default company CSV file is available.")


def _fallback_job(company_name: str, careers_url: str, ats: str) -> JobPosting:
    return JobPosting(
        company=company_name,
        title="General Opportunities",
        location="",
        url=careers_url,
        department="",
        description="Careers page discovered but no structured roles were parsed.",
        ats=ats,
    )


def _build_status_html(title: str, body: str, tone: str = "info") -> str:
        return (
                f'<div class="status-card {escape(tone)}">'
                f'<strong>{escape(title)}</strong>'
                f'<p>{escape(body)}</p>'
                f'</div>'
        )


def _build_summary_html(ranked_rows: List[List[Any]], match_rows: List[List[Any]]) -> str:
        if not ranked_rows:
                return """
                <div class="empty-state">
                    <strong>No ranking data yet</strong>
                    <p>Upload a resume, run the matcher, and this panel will summarize the strongest companies, match volume, and best-fit roles.</p>
                </div>
                """

        top_company = str(ranked_rows[0][0]) if ranked_rows else "-"
        top_score = f"{float(ranked_rows[0][1]):.1f}" if ranked_rows and ranked_rows[0][1] not in (None, "") else "-"
        total_companies = len(ranked_rows)
        total_jobs = len(match_rows)
        avg_score = "-"
        if ranked_rows:
                scores = [float(row[1]) for row in ranked_rows if row[1] not in (None, "")]
                if scores:
                        avg_score = f"{sum(scores) / len(scores):.1f}"

        best_role = str(ranked_rows[0][3]) if ranked_rows and len(ranked_rows[0]) > 3 else "-"

        return f"""
        <div class="summary-shell">
            <div class="summary-grid">
                <div class="summary-card">
                    <span>Top Company</span>
                    <strong>{escape(top_company)}</strong>
                    <small>Best-fit company based on resolved job boards and resume alignment.</small>
                </div>
                <div class="summary-card">
                    <span>Top Score</span>
                    <strong>{escape(top_score)}</strong>
                    <small>Highest company fit score in the current analysis.</small>
                </div>
                <div class="summary-card">
                    <span>Companies / Jobs</span>
                    <strong>{total_companies} / {total_jobs}</strong>
                    <small>Ranked companies and extracted job matches returned in this run.</small>
                </div>
                <div class="summary-card">
                    <span>Average Fit / Best Role</span>
                    <strong>{escape(avg_score)}</strong>
                    <small>{escape(best_role)}</small>
                </div>
            </div>
        </div>
        """


def _save_company_debug_html(company_name: str, resolved_page_html: str, snapshots: dict[str, str], failure_type: str) -> None:
    for stage, html in snapshots.items():
        save_debug_html(company_name, html, stage, DEBUG_HTML_DIR)
    save_debug_html(company_name, resolved_page_html, "resolved", DEBUG_HTML_DIR)
    if failure_type and failure_type not in {"SUCCESS", "UNKNOWN"}:
        save_debug_html(company_name, resolved_page_html, failure_type.lower(), DEBUG_HTML_DIR)


def _log_company_diagnostics(
    company_name: str,
    original_url: str,
    resolved_page_url: str,
    fetch_method: str,
    final_url: str,
    html: str,
    ats: str,
    api_jobs: List[JobPosting],
    diagnostics: Any,
    resolution_steps: List[str],
) -> None:
    log_debug_header(company_name)
    log_debug_line("ORIGINAL URL", original_url)
    log_debug_line("RESOLVED URL", resolved_page_url)
    log_debug_line("FETCH METHOD", fetch_method)
    log_debug_line("FINAL URL", final_url)
    log_debug_line("RESOLUTION STEPS", resolution_steps)
    log_debug_line("HTML LENGTH", len(html))
    log_debug_line("ATS", ats)
    if ats not in {"greenhouse", "lever"}:
        log_debug_line("ATS NOTE", "No ATS API match detected; using generic HTML/script parsing")
    log_debug_line("TOTAL ELEMENTS SCANNED", diagnostics.total_elements_scanned)
    log_debug_line("RAW TEXT SAMPLE", diagnostics.raw_text_sample[:20])
    log_debug_line("CANDIDATES FOUND", diagnostics.candidates_found)
    log_debug_line("TITLE FILTER PASSES", diagnostics.title_filtered_count)
    log_debug_line("SCRIPT MATCHES", diagnostics.script_matches)
    log_debug_line("SCRIPT JOBS", diagnostics.script_jobs_extracted)
    log_debug_line("API JOBS", len(api_jobs))
    log_debug_line("VALID JOBS", diagnostics.valid_jobs + len(api_jobs))
    log_debug_line("SAMPLE TITLES", diagnostics.sample_titles)
    log_debug_line("FAILURE TYPE", diagnostics.failure_type)
    log_debug_line("SUCCESS", diagnostics.failure_type == "SUCCESS" or len(api_jobs) + diagnostics.valid_jobs > 0)


def analyze_resume(
    resume_pdf: Any,
    company_source: str,
    optional_company_csv: Any,
    max_companies: int,
    use_ai_parser: bool,
    progress: gr.Progress = gr.Progress(),
) -> Tuple[List[List[Any]], List[List[Any]], str, str, str, str]:
    resume_path = _resolve_file_path(resume_pdf)
    csv_path = _resolve_file_path(optional_company_csv) if company_source == "Custom CSV" else ""
    empty_summary = _build_summary_html([], [])

    if not resume_path:
        return (
            [],
            [],
            json.dumps({"error": "Please upload a resume PDF."}, indent=2),
            "",
            _build_status_html("Resume required", "Upload a PDF resume to start the analysis.", "error"),
            empty_summary,
        )

    try:
        # --- Debug: log pipeline inputs before anything runs ---
        import os
        print("[analyze] company_source:", company_source)
        print("[analyze] csv_path (resolved):", repr(csv_path))
        print("[analyze] resume_path:", repr(resume_path))
        print("[analyze] cwd:", os.getcwd())

        progress(0.05, desc="Extracting resume text")
        resume_text = extract_resume_text(resume_path)
        progress(0.12, desc="Building resume profile")
        profile = build_resume_profile(resume_text, use_ai=use_ai_parser)

        # Try to resolve the default CSV path and log clearly if it's missing.
        try:
            default_csv_path = _default_companies_path()
            print("[analyze] default_csv_path:", default_csv_path)
        except FileNotFoundError as fnf:
            print("[analyze] CRITICAL: default CSV not found:", fnf)
            return (
                [],
                [],
                json.dumps({"error": str(fnf)}, indent=2),
                "",
                _build_status_html("Company list not found", str(fnf), "error"),
                empty_summary,
            )

        companies = load_companies(default_csv_path, csv_path if csv_path else None)
        total_loaded = len(companies)
        with_url = sum(1 for c in companies if c.careers_url)
        print(f"[analyze] Loaded {total_loaded} companies, {with_url} have careers_url")

        # Hard-stop early so the user sees a clear reason rather than "0 companies processed".
        if total_loaded == 0:
            msg = (
                "No companies were loaded. "
                "Check that the CSV has a company-name column and at least one data row."
            )
            return (
                [],
                [],
                json.dumps({"error": msg}, indent=2),
                "",
                _build_status_html("No companies loaded", msg, "error"),
                empty_summary,
            )

        if with_url == 0:
            # All companies exist but every careers_url is empty — display which columns exist.
            col_sample = list((companies[0].meta or {}).keys())[:12] if companies else []
            msg = (
                f"Loaded {total_loaded} companies but none have a usable careers URL. "
                f"CSV columns detected: {col_sample}. "
                "This app now reads only the opening page column (col 4 / 'Direct links to company career/job openings page'). "
                "Add valid https URLs in that column."
            )
            print("[analyze] WARNING:", msg)
            return (
                [],
                [],
                json.dumps({"error": msg, "csv_columns": col_sample}, indent=2),
                "",
                _build_status_html("No careers URLs found", msg, "error"),
                empty_summary,
            )

        companies = companies[: int(max_companies)]
        print(f"[analyze] After max_companies cap: {len(companies)} companies to analyze")
        progress(0.18, desc=f"Analyzing {len(companies)} companies")

        discovered_jobs: List[JobPosting] = []
        processed_companies = 0
        for index, company in enumerate(companies, start=1):
            if not company.careers_url:
                continue

            try:
                progress(0.18 + (0.62 * index / max(1, len(companies))), desc=f"Resolving {company.company}")
                resolved_page = resolve_real_jobs_page(company.careers_url)
                resolved_url = resolved_page.url or company.careers_url
                resolved_html = resolved_page.html
                ats = detect_ats(resolved_url, resolved_html)

                if resolved_page.fallback_used:
                    print(f"[scraper] playwright fallback triggered: {resolved_page.fallback_reason or 'fallback_used'}")

                api_jobs = fetch_jobs_from_ats_api(company, ats, source_url=resolved_url)
                html_jobs, diagnostics = extract_jobs_with_diagnostics(
                    company,
                    resolved_html,
                    ats,
                    base_url=resolved_url,
                )

                if diagnostics.valid_jobs == 0 and company.careers_url == resolved_url and diagnostics.failure_type == "UNKNOWN":
                    diagnostics.failure_type = "SHELL_PAGE"

                _save_company_debug_html(
                    company.company,
                    resolved_html,
                    resolved_page.html_snapshots,
                    diagnostics.failure_type if not api_jobs else "SUCCESS",
                )
                _log_company_diagnostics(
                    company.company,
                    company.careers_url,
                    resolved_url,
                    resolved_page.fetch_method,
                    resolved_page.final_url or resolved_url,
                    resolved_html,
                    ats,
                    api_jobs,
                    diagnostics,
                    resolved_page.resolution_steps,
                )

                jobs = api_jobs[:]
                if len(jobs) < 3:
                    jobs.extend(html_jobs)

                if not jobs:
                    print(f"[scraper] {company.company} failed at parsing step with failure type: {diagnostics.failure_type}")
                    jobs = [_fallback_job(company.company, resolved_url, ats)]

                discovered_jobs.extend(jobs)
                processed_companies += 1
            except Exception as company_exc:
                print("=" * 60)
                print(f"COMPANY: {company.company}")
                print(f"FAILURE TYPE: PARSING_ERROR")
                print(f"SUCCESS: False")
                print(f"STEP BROKE: analyze_resume loop")
                print(f"ERROR: {company_exc}")
                continue

        progress(0.86, desc="Scoring matches")
        matches = [score_job_match(job, profile) for job in discovered_jobs]
        matches = sorted(matches, key=lambda item: item.score, reverse=True)

        rankings = rank_companies(matches)

        ranked_rows = [
            [r.company, r.company_score, r.match_count, r.best_role, r.ats, r.explanation]
            for r in rankings[:50]
        ]
        match_rows = [
            [m.company, m.title, m.location, m.score, m.ats, m.url, m.explanation]
            for m in matches[:250]
        ]

        profile_json = json.dumps(resume_profile_to_json(profile), indent=2)
        talking_points = build_talking_points(rankings, matches)
        status_html = _build_status_html(
            "Analysis complete",
            f"Processed {processed_companies} companies, extracted {len(match_rows)} job matches, and ranked {len(ranked_rows)} companies.",
            "success",
        )
        summary_html = _build_summary_html(ranked_rows, match_rows)
        progress(1.0, desc="Done")

        return ranked_rows, match_rows, profile_json, talking_points, status_html, summary_html
    except Exception as exc:
        return (
            [],
            [],
            json.dumps({"error": str(exc)}, indent=2),
            "",
            _build_status_html("Analysis failed", str(exc), "error"),
            empty_summary,
        )


with gr.Blocks(title="AI Career Fair Matcher") as demo:
    with gr.Column(elem_classes=["app-shell"]):
        gr.HTML(
            """
            <section class="app-hero">
                            <div class="eyebrow">AI Career Fair Matcher</div>
                            <h1 class="hero-title">Prioritize the right companies before you ever walk into the fair.</h1>
                            <p class="hero-copy">
                                Upload a resume, analyze a built-in or custom company list, and get ranked companies, matching jobs, and recruiter talking points in a clean workflow.
                            </p>
              <div class="hero-meta">
                                <div class="hero-pill"><strong>Resume Parsing</strong>Uses AI to extract structured information from your resume.</div>
                                <div class="hero-pill"><strong>Job Discovery</strong>Resolves real jobs pages behind career search shells.</div>
                                <div class="hero-pill"><strong>Actionable Output</strong>Ranked targets, matching roles, and talking points.</div>
              </div>
            </section>
            """
        )

        with gr.Row(equal_height=False):
            with gr.Column(scale=5, min_width=360, elem_classes=["panel", "control-panel"]):
                gr.Markdown(
                    """
                    <div class="section-title">
                      <h3>Workspace</h3>
                      <p>Load your resume, choose the company source, and tune how broad the analysis should be.</p>
                    </div>
                    <div class="chip-row">
                      <span class="chip">Dark Mode Default</span>
                      <span class="chip">AI Resume Parsing</span>
                      <span class="chip">Built-In NSBE Dataset</span>
                    </div>
                    """,
                    elem_classes=["section-title"],
                )

                with gr.Group(elem_classes=["subcard"]):
                    gr.Markdown("<div class='subcard-title'>Resume Upload</div>")
                    resume_input = gr.File(label="Upload resume PDF", file_types=[".pdf"], elem_classes=["upload-card"])

                with gr.Group(elem_classes=["subcard"]):
                    gr.Markdown("<div class='subcard-title'>Company Source</div>")
                    company_source_input = gr.Radio(
                        choices=["Built-in NSBE List", "Custom CSV"],
                        value="Built-in NSBE List",
                        label="Choose company source",
                    )
                    company_csv_input = gr.File(label="Optional custom company CSV", file_types=[".csv"], elem_classes=["upload-card"])

                with gr.Group(elem_classes=["subcard"]):
                    gr.Markdown("<div class='subcard-title'>Analysis Settings</div>")
                    use_ai_parser_input = gr.Checkbox(
                        value=True,
                        label="Use AI Resume Parser",
                    )
                    gr.Markdown(
                        "<div class='results-note'>Uses AI to extract structured information from your resume.</div>"
                    )
                    max_companies_input = gr.Slider(
                        minimum=5,
                        maximum=100,
                        step=1,
                        value=30,
                        label="Max companies to analyze",
                    )
                    analyze_button = gr.Button("Analyze Career Fair Fit", variant="primary")

                gr.Markdown(
                    """
                    <div class="results-note">
                      Designed for quick scanning: inputs stay compact on the left while results, summaries, and tabs stay dense and readable on the right.
                    </div>
                    """
                )

            with gr.Column(scale=7, min_width=420, elem_classes=["panel", "results-panel"]):
                gr.Markdown(
                    """
                    <div class="section-title">
                      <h3>Results</h3>
                      <p>Start with the summary, then inspect ranked companies, matching jobs, resume profile fields, and recruiter talking points.</p>
                    </div>
                    """,
                    elem_classes=["section-title"],
                )

                status_output = gr.HTML(
                    value=_build_status_html(
                        "Ready to analyze",
                        "Upload a resume, optionally add a custom CSV, and launch the matcher.",
                        "info",
                    )
                )
                summary_output = gr.HTML(value=_build_summary_html([], []))

                with gr.Group(elem_classes=["subcard"]):
                    with gr.Tabs():
                        with gr.TabItem("Ranked Companies", elem_classes=["tab-panel"]):
                            ranked_output = gr.Dataframe(
                                headers=["Company", "Score", "Matches", "Best Role", "ATS", "Explanation"],
                                label="Ranked Companies",
                                wrap=True,
                            )

                        with gr.TabItem("Matching Jobs", elem_classes=["tab-panel"]):
                            jobs_output = gr.Dataframe(
                                headers=["Company", "Job Title", "Location", "Score", "ATS", "URL", "Why It Matches"],
                                label="Matching Jobs",
                                wrap=True,
                            )

                        with gr.TabItem("Resume Profile", elem_classes=["tab-panel"]):
                            profile_output = gr.Code(label="Resume Profile JSON", language="json")

                        with gr.TabItem("Talking Points", elem_classes=["tab-panel"]):
                            talking_points_output = gr.Markdown(label="Talking Points")

    analyze_button.click(
        fn=analyze_resume,
        inputs=[resume_input, company_source_input, company_csv_input, max_companies_input, use_ai_parser_input],
        outputs=[ranked_output, jobs_output, profile_output, talking_points_output, status_output, summary_output],
    )


if __name__ == "__main__":
    demo.queue().launch(theme=APP_THEME, css=CUSTOM_CSS, ssr_mode=False)