Spaces:

cherrykiwidd
/

nsbecf

Sleeping

App Files Files Community

acarey5 commited on 14 days ago

Commit

fa6caa6

1 Parent(s): 4a339d7

build AI Career Fair Matcher

Browse files

Files changed (17) hide show

README.md +54 -1
app.py +165 -4
data/NSBE 2026 Baltimore Company_ Schools - Companies (1).csv +139 -0
src/__init__.py +1 -0
src/jobs/__init__.py +1 -0
src/jobs/ats_detector.py +28 -0
src/jobs/company_loader.py +76 -0
src/jobs/extractor.py +75 -0
src/jobs/fetcher.py +116 -0
src/models.py +53 -0
src/output/__init__.py +1 -0
src/output/generator.py +37 -0
src/resume/__init__.py +1 -0
src/resume/pdf_extract.py +11 -0
src/resume/profile_builder.py +223 -0
src/scoring/__init__.py +1 -0
src/scoring/matcher.py +96 -0

README.md CHANGED Viewed

@@ -9,4 +9,57 @@ app_file: app.py
 pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 pinned: false
 ---
+# AI Career Fair Matcher
+AI Career Fair Matcher helps students prioritize career fair companies by analyzing resume fit against live job postings.
+## What It Does
+- Accepts a resume PDF.
+- Uses a built-in company CSV and optionally accepts a user-uploaded CSV.
+- Extracts resume text and builds a structured profile JSON.
+- Supports AI resume parsing through an OpenAI-compatible API with fallback parsing.
+- Detects ATS providers from company careers URLs.
+- Fetches jobs using requests first and Playwright fallback.
+- Scores jobs with explainable rules.
+- Ranks companies by fit.
+- Generates recruiter talking points.
+## Project Structure
+- `app.py`
+- `src/resume/`
+- `src/jobs/`
+- `src/scoring/`
+- `src/output/`
+- `NSBE 2026 Baltimore Company_ Schools  - Companies.csv` (built-in default)
+- `data/NSBE 2026 Baltimore Company_ Schools  - Companies (1).csv` (alternate built-in)
+## Resume Profile Schema
+```json
+{
+  "skills": [],
+  "languages": [],
+  "frameworks": [],
+  "tools": [],
+  "target_titles": [],
+  "locations": [],
+  "experience_level": ""
+}
+```
+## Matching Rules
+- Rewards skill overlap.
+- Rewards role match.
+- Rewards entry-level signals.
+- Penalizes senior role signals.
+## Local Run
+1. Install dependencies:
+	`pip install -r ../requirements.txt`
+2. Optional but recommended for Playwright fallback:
+	`playwright install chromium`
+## Output
+- Ranked companies
+- Matching jobs
+- Resume profile JSON
+- Recruiter talking points

app.py CHANGED Viewed

@@ -1,7 +1,168 @@
 import gradio as gr
-def greet(name):
-    return "Hello " + name + "!!"
-demo = gr.Interface(fn=greet, inputs="text", outputs="text")
-demo.launch()

+import json
+from pathlib import Path
+from typing import Any, List, Tuple
 import gradio as gr
+from dotenv import load_dotenv
+from src.jobs.ats_detector import detect_ats
+from src.jobs.company_loader import load_companies
+from src.jobs.extractor import extract_jobs_from_html
+from src.jobs.fetcher import fetch_jobs_from_ats_api, fetch_url_content
+from src.models import JobPosting
+from src.output.generator import build_talking_points, resume_profile_to_json
+from src.resume.pdf_extract import extract_resume_text
+from src.resume.profile_builder import build_resume_profile
+from src.scoring.matcher import rank_companies, score_job_match
+BASE_DIR = Path(__file__).resolve().parent
+load_dotenv(BASE_DIR / ".env")
+DEFAULT_COMPANY_CANDIDATES = [
+    BASE_DIR / "NSBE 2026 Baltimore Company_ Schools  - Companies.csv",
+    BASE_DIR / "data" / "NSBE 2026 Baltimore Company_ Schools  - Companies (1).csv",
+]
+def _resolve_file_path(file_obj: Any) -> str:
+    if file_obj is None:
+        return ""
+    if isinstance(file_obj, str):
+        return file_obj
+    if hasattr(file_obj, "name"):
+        return str(file_obj.name)
+    if isinstance(file_obj, dict):
+        return str(file_obj.get("name", ""))
+    return ""
+def _default_companies_path() -> str:
+    for path in DEFAULT_COMPANY_CANDIDATES:
+        if path.exists():
+            return str(path)
+    raise FileNotFoundError("No default company CSV file is available.")
+def _fallback_job(company_name: str, careers_url: str, ats: str) -> JobPosting:
+    return JobPosting(
+        company=company_name,
+        title="General Opportunities",
+        location="",
+        url=careers_url,
+        department="",
+        description="Careers page discovered but no structured roles were parsed.",
+        ats=ats,
+    )
+def analyze_resume(
+    resume_pdf: Any,
+    optional_company_csv: Any,
+    max_companies: int,
+    use_ai_parser: bool,
+) -> Tuple[List[List[Any]], List[List[Any]], str, str]:
+    resume_path = _resolve_file_path(resume_pdf)
+    csv_path = _resolve_file_path(optional_company_csv)
+    if not resume_path:
+        return [], [], json.dumps({"error": "Please upload a resume PDF."}, indent=2), ""
+    try:
+        resume_text = extract_resume_text(resume_path)
+        profile = build_resume_profile(resume_text, use_ai=use_ai_parser)
+        companies = load_companies(_default_companies_path(), csv_path)
+        companies = companies[: int(max_companies)]
+        discovered_jobs: List[JobPosting] = []
+        for company in companies:
+            if not company.careers_url:
+                continue
+            ats = detect_ats(company.careers_url)
+            jobs = fetch_jobs_from_ats_api(company, ats)
+            if len(jobs) < 3:
+                html = fetch_url_content(company.careers_url)
+                if ats == "unknown":
+                    ats = detect_ats(company.careers_url, html)
+                jobs.extend(extract_jobs_from_html(company, html, ats))
+            if not jobs:
+                jobs = [_fallback_job(company.company, company.careers_url, ats)]
+            discovered_jobs.extend(jobs)
+        matches = [score_job_match(job, profile) for job in discovered_jobs]
+        matches = sorted(matches, key=lambda item: item.score, reverse=True)
+        rankings = rank_companies(matches)
+        ranked_rows = [
+            [r.company, r.company_score, r.match_count, r.best_role, r.ats, r.explanation]
+            for r in rankings[:50]
+        ]
+        match_rows = [
+            [m.company, m.title, m.location, m.score, m.ats, m.url, m.explanation]
+            for m in matches[:250]
+        ]
+        profile_json = json.dumps(resume_profile_to_json(profile), indent=2)
+        talking_points = build_talking_points(rankings, matches)
+        return ranked_rows, match_rows, profile_json, talking_points
+    except Exception as exc:
+        return [], [], json.dumps({"error": str(exc)}, indent=2), ""
+with gr.Blocks(title="AI Career Fair Matcher") as demo:
+    gr.Markdown("# AI Career Fair Matcher")
+    gr.Markdown(
+        "Upload your resume and optionally a company CSV. "
+        "The app extracts your profile, fetches jobs, scores fit, and suggests recruiter talking points."
+    )
+    with gr.Row():
+        resume_input = gr.File(label="Resume PDF", file_types=[".pdf"])
+        company_csv_input = gr.File(label="Optional Company CSV", file_types=[".csv"])
+    use_ai_parser_input = gr.Checkbox(
+        value=True,
+        label="Use AI Resume Parser (OPENAI_API_KEY or HF_TOKEN)",
+    )
+    max_companies_input = gr.Slider(
+        minimum=5,
+        maximum=100,
+        step=1,
+        value=30,
+        label="Max Companies to Analyze",
+    )
+    analyze_button = gr.Button("Analyze Career Fair Fit", variant="primary")
+    ranked_output = gr.Dataframe(
+        headers=["Company", "Score", "Matches", "Best Role", "ATS", "Explanation"],
+        label="Ranked Companies",
+        wrap=True,
+    )
+    jobs_output = gr.Dataframe(
+        headers=["Company", "Job Title", "Location", "Score", "ATS", "URL", "Why It Matches"],
+        label="Matching Jobs",
+        wrap=True,
+    )
+    profile_output = gr.Code(label="Resume Profile JSON", language="json")
+    talking_points_output = gr.Markdown(label="Talking Points")
+    analyze_button.click(
+        fn=analyze_resume,
+        inputs=[resume_input, company_csv_input, max_companies_input, use_ai_parser_input],
+        outputs=[ranked_output, jobs_output, profile_output, talking_points_output],
+    )
+if __name__ == "__main__":
+    demo.queue().launch()

data/NSBE 2026 Baltimore Company_ Schools - Companies (1).csv ADDED Viewed

	@@ -0,0 +1,139 @@

+Company List ,Pre-Conference interview or registartion links ,Column 4,Direct links to company Career/job openings page ,"Creator Donise Griffin: me on linkedln :)
+ https://www.linkedin.com/in/donise-griffin/","idea from lasts years 2025 list follow @mohamedhaithvm
+https://www.linkedin.com/in/mohamed-ahmed-429302228/"
+3M,,,openings page ,,
+"ABB, Inc",2026 Annual NSBE Convention | ABB,, openings page ,"Download the Mobile App for 2026
+NSBE Annual Convention!",iOS:
+Abbott,https://www.jobs.abbott/us/en/event/690bb1ceaf6b261cdd97d267/National-Society-of-Black-Engineers-Conference-2026,, openings page ,,Android:
+Accenture,,, openings page ,FairControls is the AI intelligence hub for events.,
+AECOM,,, openings page ,Before the career fair you can:,
+Air products,,, openings page ,🤖 Instantly understand every company attending,
+Airbus,,, openings page ,AI summaries + suggested questions to ask recruiters.,
+Amazon,,,openings page ,,
+Apple ,,, openings page ,,
+Arcadis ,https://arcadis.eightfold.ai/events/candidate?plannedEventId=Oa34M69ekv&domain=arcadis.com,, openings page ,,
+Arconic,,, openings page ,,
+Arup US Inc.,,, openings page ,,
+Astrazeneca Phaaceuticals LP,,, openings page ,,
+Barnes Group,,, openings page ,,
+Bechtel,https://bechtel.recsolu.com/app/collect/event/oSDe4PqZi3dHMdvevlBHjw,, openings page ,,
+"Bentley Systems, Inc.", ,, openings page ,,
+Black Blockchain labs,,,openings page ,,
+Blue Origin,,, openings page ,,
+BNY Mellon,https://eofe.fa.us2.oraclecloud.com/hcmUI/CandidateExperience/en/sites/CX_1001/job/74341/?utm_medium=jobshare&utm_source=External+Job+Share,, openings page ,,
+Bohler Engineering,https://bohler.recsolu.com/app/collect/event/J5TOHMJAPIRuH2YeEciT6w,,openings page,,
+Bristol-Myers Squibb,https://app.eightfold.ai/events/candidate?plannedEventId=GRj3OxoJ,, openings page ,,
+Burns & Mcdonnell,https://burnsmcd.recsolu.com/app/collect/event/iF1_1Zs9E3lBPYpjJ1nbjw,, openings page ,,
+"Cadence Design Systems, Inc.",https://cadence.yello.co/app/collect/event/7cdPwuX2_SeybbUJ3LmK-g?utm_source=hsamplify&utm_medium=linkedin&utm_term=8e24b117-2b45-4061-a1a6-82a6ba199b83,, openings page ,,
+"Capital One, Inc.",,, openings page ,,
+"Cargill, Incorporated",,, openings page ,,
+Caterpillar Inc.,https://caterpillar.yello.co/app/collect/event/egBb1GTxd55C3z7aKjbaJw,, openings page ,,
+Cheveron ,https://chevron.wd5.myworkdayjobs.com/University/event/a377a322e71f1001ad4c7bfcbf060001/register,, openings page ,,
+Clorox,https://wd1.myworkdaysite.com/recruiting/clorox/Clorox/page/e10acc474cc31001f01c95bbe6f10000,, openings page ,,
+ConocoPhillips,,, openings page ,,
+Constellation Energy,,, openings page ,,
+"Cook Medical Holdings, Inc.",,, openings page ,,
+"Cummins, Inc.",,, openings page ,,
+Dairy Farmers of America,,, openings page ,,
+Dauch Corporation,,, openings page ,,
+Dell Technologies,,, openings page ,,
+Deloitte,,, openings page ,,
+"Delta Air Lines, Inc",,LOOKING FOR PEOPLE FOR  CO-OPS , openings page ,,
+"Dodge Industrial, Inc.",,, openings page ,,
+DPR Construction,,, openings page ,,
+Draper,,, openings page ,,
+Duracell,,, openings page ,,
+Eaton Corporation,Find Eaton at NSBE,, openings page ,,
+"Edwards Lifesciences, LLC",https://flows.beamery.com/httpswwwedwardscomcareershome/edwards-nsbe-2026?utm_source=linkedin&utm_medium=video&utm_content=voe,, openings page ,,
+Ernst & Young LLP,,, openings page ,,
+"ESRI (Environmental Systems Research Institute, Inc.)",,, openings page ,,
+Estee lauder Companies ,,,openings page ,,
+ExxonMobile,,,openings page ,,
+Fish & Richardson P.C.,,, openings page ,,
+FM Global,,, openings page ,,
+Fonteva,,, openings page ,,
+Ford Motor Company,"Ford Events 2 | Instagram, Facebook | Linktree
+Ford Pre-Registration Link",, openings page ,,
+Freeport-McMoRan Inc,,, openings page ,,
+GE Aerospace,https://careers.geaerospace.com/global/en/event/697cccfc19232421f311f22f/GE-Aerospace-NSBE-2026-National-Convention-and-Career-Fair,, openings page ,,
+GE Healthcare,https://careers.gehealthcare.com/global/en/event/6970f96419232421f311e00c/National-Society-of-Black-Engineers-NSBE-Convention-2026,, openings page ,,
+GE Vernova,https://olivia.paradox.ai/co/GEVernova32/Event/2026NSBEAnnualConvention,, openings page ,,
+"General Dynamics, Inc.",,, openings page ,,
+General Motors,,, openings page ,,
+GHD,,, openings page ,,
+Gilbane Building Company,,, openings page ,,
+Givelify,,, openings page ,,
+Goldman Sachs,https://higher.gs.com/campus?DIVISION=Engineering%20Division&EXPERIENCE_LEVEL=Summer%20Analyst&LOCATION=New%20York%7CDallas%7CSalt%20Lake%20City&page=1&sort=POSTED_DATE,, openings page ,,
+Granite Construction,,, openings page ,,
+Hensel Phelps,,, openings page ,,
+Honeywell,https://app.brazenconnect.com/a/honeywell/e/zq7jm?utm_source=event%20page&utm_medium=flyer&utm_campaign=honeywell%20nsbe%202026,,openings page,,
+Intel Corporation,,,openings page ,,
+IQT,,,openings page ,,
+Jabil Inc.,,, openings page ,,
+Jacobs,https://jacobs.avature.net/eventlisting/EventDetail?eventId=29715,, openings page ,,
+John Deere,,, openings page ,,
+Johnson & Johnson Family of Companies,,, openings page ,,
+Kiewit,https://olivia.paradox.ai/co/Kiewit27/Event/2026NSBEAnnualConvention,, openings page ,,
+KLA,,, openings page ,,
+L'Oreal,,,openings page ,,
+Lam Research Corporation,,, openings page ,,
+Lenovo,,, openings page ,,
+Linde,,, openings page ,,
+Lockheed Martin Corporation,,, openings page ,,
+Los Angeles Department of Water and Power,,, openings page ,,
+Marvell Technology,,, openings page ,,
+MathWorks,,,openings page ,,
+McCarthy Building Companies,,, openings page ,,
+"Merck & Co., Inc.",https://jobs.merck.com/us/en/event/68ac56791218cc52521fd372/2026-NSBE-Annual-Convention,, openings page ,,
+Michigan Department of Transportation,,,openings page ,,
+Micron,,,openings page ,,
+"Microsoft, Inc.",,, openings page ,,
+Morgan Stanley ,,,openings page ,,
+Mortenson Construction,https://mortenson.recsolu.com/app/collect/event/6669UqBJAL4x9TwP_MyomA,, openings page ,,
+"Nintendo of America, Inc.",,, openings page ,,
+Nucor Corporation,,, openings page ,,
+NVIDIA,,, openings page ,,
+Pacific Gas and Electric Company,,, openings page ,,
+PBF Energy,,, openings page ,,
+"PepsiCo, Inc.",,, openings page ,,
+Perdue Farms,,,openings page ,,
+"Pfizer, Inc.",,, openings page ,,
+Ramboll,,, openings page ,,
+22,,, openings page ,,
+Rockwell Automation,,, openings page ,,
+Sandia National Laboratories,,, openings page ,,
+Schneider Electric,,, openings page ,,
+"Siemens, Inc.",,, openings page ,,
+Skanska,,, openings page ,,
+SMART Scholarship For Service Program,,, openings page ,,
+Smurfit Westrock,,, openings page ,,
+Southern Company,,, openings page ,,
+"Southwire Company, Inc.",,, openings page ,,
+SpaceX,,, openings page ,,
+SSOE Group,,,openings page ,,
+Swinerton,,,openings page ,,
+Target Corporation ,,,openings page ,,
+TE Connectivity,,, openings page ,,
+Teledyne Technologies,https://teledyne.yello.co/app/collect/event/xZobTUiCcDSAQkNmciQr3Q,, openings page ,,
+"Terracon Consultants, Inc.",https://sjobs.brassring.com/TGnewUI/Search/home/HomeWithPreLoad?PageType=JobDetails&partnerid=25664&siteid=5383&jobid=5211397#jobDetails=5211397_5383,, openings page ,,
+Texas Department of Transportation,,,openings page ,,
+Texas Instruments,,, openings page ,,
+The Clorox Company,https://wd1.myworkdaysite.com/recruiting/clorox/Clorox/page/e10acc474cc31001f01c95bbe6f10000,, openings page ,,
+The Whiting-Turner Contracting Company,,, openings page ,,
+Toyota,,, openings page ,,
+"Trane Technologies, Inc.",https://careers.tranetechnologies.com/global/en/event/697bbe1219232421f311f0c9/NSBE-2026-Career-Fair-with-Trane-Technologies-Booth-1681B,, openings page ,,
+"Trimble, Inc.",,, openings page ,,
+Turner Construction,,, openings page ,,
+UL standards and enterprise,,,openings page ,,
+United Airlines,,,openings page ,,
+United Parcel Service (UPS),,, openings page ,,
+Universal Destinations & Experiences,,, openings page ,,
+United States Postal Service (USPS),,, openings page ,,
+USAA,,, openings page ,,
+Vetex pharmaceuticals,,,openings page ,,
+Wabtec Corporation,https://careers.wabtec.com/events,, openings page ,,
+Walter P. Moore,,, openings page ,,
+Wells Fargo,https://talent.wellsfargojobs.com/flows/nsbe-annual-convention-2026-lyofuh0jm,, openings page ,,
+"WSP USA, Inc",,, openings page ,,
+Worley,RSVP,,openings page ,,

src/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ """AI Career Fair Matcher package."""

src/jobs/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ """Job discovery modules."""

src/jobs/ats_detector.py ADDED Viewed

	@@ -0,0 +1,28 @@

+from urllib.parse import urlparse
+ATS_PATTERNS = {
+    "greenhouse": ["greenhouse.io"],
+    "lever": ["lever.co", "jobs.lever.co"],
+    "workday": ["myworkdayjobs.com", "workday.com"],
+    "smartrecruiters": ["smartrecruiters.com"],
+    "icims": ["icims.com"],
+    "paradox": ["paradox.ai"],
+    "oracle": ["oraclecloud.com"],
+    "avature": ["avature.net"],
+    "eightfold": ["eightfold.ai"],
+    "recsolu": ["recsolu.com"],
+}
+def detect_ats(url: str, html: str = "") -> str:
+    text = f"{url} {html}".lower()
+    for ats, patterns in ATS_PATTERNS.items():
+        if any(pattern in text for pattern in patterns):
+            return ats
+    hostname = urlparse(url).netloc.lower() if url else ""
+    if "jobs" in hostname or "careers" in hostname:
+        return "custom"
+    return "unknown"

src/jobs/company_loader.py ADDED Viewed

	@@ -0,0 +1,76 @@

+import csv
+from pathlib import Path
+from typing import Dict, Iterable, List, Optional
+from src.models import CompanyRecord
+COMPANY_KEYS = ["company", "company list", "name", "employer", "organization"]
+CAREERS_KEYS = ["careers_url", "career url", "jobs_url", "job board", "direct links to company career/job openings page"]
+def _normalize_headers(row: Dict[str, str]) -> Dict[str, str]:
+    return {k.strip().lower(): (v or "").strip() for k, v in row.items() if k}
+def _pick_value(row: Dict[str, str], keys: Iterable[str]) -> str:
+    for key in keys:
+        if key in row and row[key]:
+            return row[key]
+    return ""
+def _first_url(row: Dict[str, str]) -> str:
+    direct = _pick_value(row, CAREERS_KEYS)
+    if direct.startswith("http"):
+        return direct
+    for value in row.values():
+        if value and value.startswith("http"):
+            return value
+    return ""
+def _read_companies(csv_path: Path, source: str) -> List[CompanyRecord]:
+    companies: List[CompanyRecord] = []
+    with csv_path.open("r", encoding="utf-8-sig", newline="") as handle:
+        reader = csv.DictReader(handle)
+        if not reader.fieldnames:
+            return companies
+        for raw in reader:
+            row = _normalize_headers(raw)
+            company = _pick_value(row, COMPANY_KEYS)
+            if not company:
+                continue
+            careers_url = _first_url(row)
+            companies.append(
+                CompanyRecord(
+                    company=company,
+                    careers_url=careers_url,
+                    source=source,
+                    meta=row,
+                )
+            )
+    return companies
+def load_companies(default_csv: str, uploaded_csv: Optional[str] = None) -> List[CompanyRecord]:
+    uploaded_path = Path(uploaded_csv) if uploaded_csv else None
+    if uploaded_path and uploaded_path.exists():
+        companies = _read_companies(uploaded_path, source="upload")
+        if companies:
+            return companies
+    default_path = Path(default_csv)
+    if not default_path.exists():
+        raise FileNotFoundError(f"Default company CSV not found: {default_csv}")
+    companies = _read_companies(default_path, source="default")
+    if not companies:
+        raise ValueError("No companies were loaded from the CSV source.")
+    return companies

src/jobs/extractor.py ADDED Viewed

	@@ -0,0 +1,75 @@

+import re
+from typing import List
+from bs4 import BeautifulSoup
+from src.models import CompanyRecord, JobPosting
+JOB_TITLE_HINTS = [
+    "engineer", "analyst", "developer", "scientist", "intern", "manager",
+    "associate", "technician", "specialist", "consultant", "architect",
+]
+def extract_jobs_from_html(company: CompanyRecord, html: str, ats: str) -> List[JobPosting]:
+    if not html:
+        return []
+    soup = BeautifulSoup(html, "html.parser")
+    jobs: List[JobPosting] = []
+    for anchor in soup.select("a[href]"):
+        title = " ".join(anchor.get_text(" ", strip=True).split())
+        if not title or len(title) < 4:
+            continue
+        title_lower = title.lower()
+        if not any(hint in title_lower for hint in JOB_TITLE_HINTS):
+            continue
+        href = anchor.get("href", "").strip()
+        if not href:
+            continue
+        if href.startswith("/"):
+            url = company.careers_url.rstrip("/") + href
+        elif href.startswith("http"):
+            url = href
+        else:
+            continue
+        nearby_text = anchor.parent.get_text(" ", strip=True)
+        location = _extract_location(nearby_text)
+        jobs.append(
+            JobPosting(
+                company=company.company,
+                title=title[:120],
+                location=location,
+                url=url,
+                department="",
+                description=nearby_text[:500],
+                ats=ats,
+            )
+        )
+    deduped = _dedupe_jobs(jobs)
+    return deduped[:60]
+def _extract_location(text: str) -> str:
+    pattern = r"(Remote|[A-Z][a-z]+,\s*[A-Z]{2})"
+    match = re.search(pattern, text)
+    return match.group(1) if match else ""
+def _dedupe_jobs(jobs: List[JobPosting]) -> List[JobPosting]:
+    seen = set()
+    deduped = []
+    for job in jobs:
+        key = (job.title.lower(), job.url)
+        if key in seen:
+            continue
+        seen.add(key)
+        deduped.append(job)
+    return deduped

src/jobs/fetcher.py ADDED Viewed

	@@ -0,0 +1,116 @@

+from __future__ import annotations
+import importlib
+import re
+from typing import List, Optional
+import requests
+from src.models import CompanyRecord, JobPosting
+HEADERS = {
+    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
+}
+def fetch_url_content(url: str, timeout: int = 12) -> str:
+    if not url:
+        return ""
+    try:
+        response = requests.get(url, headers=HEADERS, timeout=timeout)
+        if response.ok:
+            return response.text
+    except Exception:
+        pass
+    return _playwright_fallback(url)
+def _playwright_fallback(url: str) -> str:
+    if not url:
+        return ""
+    try:
+        sync_api = importlib.import_module("playwright.sync_api")
+        sync_playwright = getattr(sync_api, "sync_playwright")
+        with sync_playwright() as p:
+            browser = p.chromium.launch(headless=True)
+            page = browser.new_page()
+            page.goto(url, wait_until="networkidle", timeout=20000)
+            html = page.content()
+            browser.close()
+            return html
+    except Exception:
+        return ""
+def _extract_greenhouse_token(url: str) -> Optional[str]:
+    match = re.search(r"greenhouse\.io/([^/?#]+)", url)
+    return match.group(1) if match else None
+def _extract_lever_token(url: str) -> Optional[str]:
+    match = re.search(r"lever\.co/([^/?#]+)", url)
+    return match.group(1) if match else None
+def fetch_jobs_from_ats_api(company: CompanyRecord, ats: str) -> List[JobPosting]:
+    jobs: List[JobPosting] = []
+    if ats == "greenhouse":
+        token = _extract_greenhouse_token(company.careers_url)
+        if not token:
+            return jobs
+        api_url = f"https://boards-api.greenhouse.io/v1/boards/{token}/jobs"
+        try:
+            response = requests.get(api_url, headers=HEADERS, timeout=15)
+            if not response.ok:
+                return jobs
+            data = response.json()
+            for item in data.get("jobs", []):
+                jobs.append(
+                    JobPosting(
+                        company=company.company,
+                        title=item.get("title", "Unknown title"),
+                        location=(item.get("location") or {}).get("name", ""),
+                        url=item.get("absolute_url", company.careers_url),
+                        department=(item.get("department") or ""),
+                        description="",
+                        ats=ats,
+                    )
+                )
+        except Exception:
+            return jobs
+    elif ats == "lever":
+        token = _extract_lever_token(company.careers_url)
+        if not token:
+            return jobs
+        api_url = f"https://api.lever.co/v0/postings/{token}?mode=json"
+        try:
+            response = requests.get(api_url, headers=HEADERS, timeout=15)
+            if not response.ok:
+                return jobs
+            data = response.json()
+            for item in data:
+                jobs.append(
+                    JobPosting(
+                        company=company.company,
+                        title=item.get("text", "Unknown title"),
+                        location=(item.get("categories") or {}).get("location", ""),
+                        url=item.get("hostedUrl", company.careers_url),
+                        department=(item.get("categories") or {}).get("team", ""),
+                        description=item.get("descriptionPlain", ""),
+                        ats=ats,
+                    )
+                )
+        except Exception:
+            return jobs
+    return jobs

src/models.py ADDED Viewed

	@@ -0,0 +1,53 @@

+from dataclasses import dataclass, field
+from typing import Dict, List
+@dataclass
+class ResumeProfile:
+    skills: List[str] = field(default_factory=list)
+    languages: List[str] = field(default_factory=list)
+    frameworks: List[str] = field(default_factory=list)
+    tools: List[str] = field(default_factory=list)
+    target_titles: List[str] = field(default_factory=list)
+    locations: List[str] = field(default_factory=list)
+    experience_level: str = "entry"
+@dataclass
+class CompanyRecord:
+    company: str
+    careers_url: str = ""
+    source: str = "default"
+    meta: Dict[str, str] = field(default_factory=dict)
+@dataclass
+class JobPosting:
+    company: str
+    title: str
+    location: str
+    url: str
+    department: str = ""
+    description: str = ""
+    ats: str = "unknown"
+@dataclass
+class JobMatch:
+    company: str
+    title: str
+    location: str
+    url: str
+    score: float
+    explanation: str
+    ats: str
+@dataclass
+class CompanyRanking:
+    company: str
+    company_score: float
+    match_count: int
+    best_role: str
+    ats: str
+    explanation: str

src/output/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ """Output formatting and explanation modules."""

src/output/generator.py ADDED Viewed

	@@ -0,0 +1,37 @@

+from dataclasses import asdict
+from typing import List
+from src.models import CompanyRanking, JobMatch, ResumeProfile
+def resume_profile_to_json(profile: ResumeProfile) -> dict:
+    return asdict(profile)
+def build_talking_points(rankings: List[CompanyRanking], matches: List[JobMatch], max_companies: int = 8) -> str:
+    if not rankings:
+        return "No strong matches found yet. Try a larger company list or resume with more role-specific keywords."
+    lines = ["## Suggested Recruiter Talking Points", ""]
+    match_map = {}
+    for match in matches:
+        match_map.setdefault(match.company, []).append(match)
+    for rank in rankings[:max_companies]:
+        company_matches = sorted(match_map.get(rank.company, []), key=lambda item: item.score, reverse=True)
+        top = company_matches[0] if company_matches else None
+        score_text = f"{rank.company_score:.1f}"
+        lines.append(f"### {rank.company} (Fit Score: {score_text})")
+        if top:
+            lines.append(
+                f"- I noticed your {top.title} role and my resume aligns through {top.explanation}."
+            )
+            lines.append(
+                "- I can contribute quickly in internship/new-grad responsibilities and would love to discuss current hiring priorities."
+            )
+        else:
+            lines.append("- Your company aligns with my career goals, and I would like to learn which early-career teams are hiring now.")
+        lines.append("")
+    return "\n".join(lines)

src/resume/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ """Resume processing modules."""

src/resume/pdf_extract.py ADDED Viewed

	@@ -0,0 +1,11 @@

+from pypdf import PdfReader
+def extract_resume_text(pdf_path: str) -> str:
+    """Extract text from a resume PDF file."""
+    reader = PdfReader(pdf_path)
+    pages = []
+    for page in reader.pages:
+        text = page.extract_text() or ""
+        pages.append(text)
+    return "\n".join(pages).strip()

src/resume/profile_builder.py ADDED Viewed

	@@ -0,0 +1,223 @@

+import json
+import os
+import re
+from typing import Any, List, Optional
+from src.models import ResumeProfile
+SKILLS = {
+    "python", "java", "c++", "c", "sql", "javascript", "typescript", "go", "rust",
+    "machine learning", "data analysis", "data structures", "algorithms", "api", "testing",
+    "cloud", "aws", "azure", "gcp", "devops", "microservices", "cybersecurity", "etl",
+}
+LANGUAGES = {
+    "python", "java", "c++", "c", "sql", "javascript", "typescript", "go", "rust",
+    "matlab", "r", "swift", "kotlin",
+}
+FRAMEWORKS = {
+    "django", "flask", "fastapi", "react", "angular", "vue", "spring", "pytorch", "tensorflow",
+    "scikit-learn", "spark", "pandas", "numpy",
+}
+TOOLS = {
+    "git", "docker", "kubernetes", "jira", "figma", "tableau", "power bi", "linux", "excel",
+    "postgresql", "mysql", "mongodb",
+}
+TARGET_TITLES = {
+    "software engineer", "data analyst", "data scientist", "machine learning engineer",
+    "cybersecurity analyst", "product manager", "systems engineer", "electrical engineer",
+    "mechanical engineer", "civil engineer", "cloud engineer", "backend engineer", "frontend engineer",
+}
+ENTRY_LEVEL_SIGNALS = ["intern", "internship", "new grad", "entry", "junior", "student"]
+MID_LEVEL_SIGNALS = ["mid", "ii", "2+ years", "3+ years"]
+SENIOR_SIGNALS = ["senior", "staff", "principal", "lead", "manager", "director", "10+ years"]
+SCHEMA_TEMPLATE = {
+    "skills": [],
+    "languages": [],
+    "frameworks": [],
+    "tools": [],
+    "target_titles": [],
+    "locations": [],
+    "experience_level": "entry",
+}
+def _find_terms(text: str, candidates: set[str]) -> List[str]:
+    found = []
+    text_lower = text.lower()
+    for item in sorted(candidates):
+        pattern = r"\\b" + re.escape(item) + r"\\b"
+        if re.search(pattern, text_lower):
+            found.append(item)
+    return found
+def _extract_locations(text: str) -> List[str]:
+    common_locations = [
+        "baltimore", "washington", "dc", "new york", "atlanta", "charlotte", "chicago",
+        "dallas", "houston", "austin", "seattle", "san francisco", "los angeles", "remote",
+    ]
+    return [loc for loc in common_locations if loc in text.lower()]
+def _infer_experience_level(text: str) -> str:
+    text_lower = text.lower()
+    if any(signal in text_lower for signal in SENIOR_SIGNALS):
+        return "senior"
+    if any(signal in text_lower for signal in MID_LEVEL_SIGNALS):
+        return "mid"
+    if any(signal in text_lower for signal in ENTRY_LEVEL_SIGNALS):
+        return "entry"
+    return "entry"
+def _normalize_string_list(value: Any) -> List[str]:
+    if not isinstance(value, list):
+        return []
+    normalized = []
+    seen = set()
+    for item in value:
+        if not isinstance(item, str):
+            continue
+        cleaned = item.strip()
+        if not cleaned:
+            continue
+        key = cleaned.lower()
+        if key in seen:
+            continue
+        seen.add(key)
+        normalized.append(cleaned)
+    return normalized
+def _normalize_experience_level(value: Any) -> str:
+    if not isinstance(value, str):
+        return "entry"
+    lowered = value.strip().lower()
+    if lowered in {"entry", "junior", "new grad", "intern"}:
+        return "entry"
+    if lowered in {"mid", "mid-level", "intermediate"}:
+        return "mid"
+    if lowered in {"senior", "lead", "staff", "principal"}:
+        return "senior"
+    return "entry"
+def _coerce_profile_json(raw_profile: dict) -> ResumeProfile:
+    return ResumeProfile(
+        skills=_normalize_string_list(raw_profile.get("skills", [])),
+        languages=_normalize_string_list(raw_profile.get("languages", [])),
+        frameworks=_normalize_string_list(raw_profile.get("frameworks", [])),
+        tools=_normalize_string_list(raw_profile.get("tools", [])),
+        target_titles=_normalize_string_list(raw_profile.get("target_titles", [])),
+        locations=_normalize_string_list(raw_profile.get("locations", [])),
+        experience_level=_normalize_experience_level(raw_profile.get("experience_level", "entry")),
+    )
+def _extract_json_object(text: str) -> Optional[dict]:
+    text = text.strip()
+    if not text:
+        return None
+    try:
+        parsed = json.loads(text)
+        return parsed if isinstance(parsed, dict) else None
+    except json.JSONDecodeError:
+        pass
+    match = re.search(r"\{[\s\S]*\}", text)
+    if not match:
+        return None
+    try:
+        parsed = json.loads(match.group(0))
+        return parsed if isinstance(parsed, dict) else None
+    except json.JSONDecodeError:
+        return None
+def _build_fallback_profile(resume_text: str) -> ResumeProfile:
+    skills = _find_terms(resume_text, SKILLS)
+    languages = _find_terms(resume_text, LANGUAGES)
+    frameworks = _find_terms(resume_text, FRAMEWORKS)
+    tools = _find_terms(resume_text, TOOLS)
+    target_titles = _find_terms(resume_text, TARGET_TITLES)
+    locations = _extract_locations(resume_text)
+    experience_level = _infer_experience_level(resume_text)
+    return ResumeProfile(
+        skills=skills,
+        languages=languages,
+        frameworks=frameworks,
+        tools=tools,
+        target_titles=target_titles,
+        locations=locations,
+        experience_level=experience_level,
+    )
+def _parse_resume_with_ai(resume_text: str) -> Optional[ResumeProfile]:
+    token = (
+        os.getenv("HF_TOKEN", "").strip()
+        or os.getenv("HUGGINGFACEHUB_API_TOKEN", "").strip()
+        or os.getenv("HUGGINGFACE_API_TOKEN", "").strip()
+        or os.getenv("HUGGINGFACEHUB_TOKEN", "").strip()
+    )
+    if not token:
+        return None
+    model = os.getenv("HF_MODEL", "meta-llama/Llama-3.1-8B-Instruct").strip()
+    schema_str = json.dumps(SCHEMA_TEMPLATE, indent=2)
+    system_prompt = (
+        "You extract resume data into a strict JSON object. "
+        "Return JSON only with this exact schema and no extra keys."
+    )
+    user_prompt = (
+        "Extract the resume profile from the text below."
+        "\nRules:"
+        "\n- Use concise normalized terms."
+        "\n- Include roles under target_titles."
+        "\n- Set experience_level to one of: entry, mid, senior."
+        "\n- If unknown, use empty arrays and experience_level entry."
+        f"\nSchema:\n{schema_str}"
+        f"\nResume Text:\n{resume_text[:20000]}"
+    )
+    try:
+        from huggingface_hub import InferenceClient
+        client = InferenceClient(token=token)
+        completion = client.chat.completions.create(
+            model=model,
+            messages=[
+                {"role": "system", "content": system_prompt},
+                {"role": "user", "content": user_prompt},
+            ],
+            temperature=0,
+            max_tokens=1024,
+        )
+        content = (completion.choices[0].message.content or "").strip()
+        parsed = _extract_json_object(content)
+        if not parsed:
+            return None
+        return _coerce_profile_json(parsed)
+    except Exception:
+        return None
+def build_resume_profile(resume_text: str, use_ai: bool = True) -> ResumeProfile:
+    if use_ai:
+        ai_profile = _parse_resume_with_ai(resume_text)
+        if ai_profile is not None:
+            return ai_profile
+    return _build_fallback_profile(resume_text)

src/scoring/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ """Matching and ranking modules."""

src/scoring/matcher.py ADDED Viewed

	@@ -0,0 +1,96 @@

+import re
+from collections import defaultdict
+from typing import Dict, List
+from src.models import CompanyRanking, JobMatch, JobPosting, ResumeProfile
+ENTRY_LEVEL_TERMS = {"intern", "internship", "entry", "junior", "new grad", "associate", "graduate"}
+SENIOR_TERMS = {"senior", "staff", "principal", "lead", "manager", "director", "architect"}
+def _tokenize(text: str) -> set[str]:
+    return set(re.findall(r"[a-zA-Z0-9\+#\.]+", text.lower()))
+def _contains_phrase(text: str, phrases: List[str]) -> bool:
+    lowered = text.lower()
+    return any(phrase.lower() in lowered for phrase in phrases)
+def score_job_match(job: JobPosting, profile: ResumeProfile) -> JobMatch:
+    blob = " ".join([job.title, job.department, job.description, job.location]).lower()
+    tokens = _tokenize(blob)
+    skill_pool = set(profile.skills + profile.languages + profile.frameworks + profile.tools)
+    overlap = {item for item in skill_pool if item.lower() in blob or item.lower() in tokens}
+    skill_score = min(40.0, 8.0 * len(overlap))
+    role_score = 0.0
+    if profile.target_titles and _contains_phrase(job.title, profile.target_titles):
+        role_score = 25.0
+    elif not profile.target_titles and _contains_phrase(job.title, ["engineer", "analyst", "developer", "scientist"]):
+        role_score = 12.0
+    entry_score = 0.0
+    title_lower = job.title.lower()
+    if any(term in title_lower for term in ENTRY_LEVEL_TERMS):
+        entry_score += 20.0
+    if any(term in title_lower for term in SENIOR_TERMS):
+        entry_score -= 25.0
+    location_score = 0.0
+    if profile.locations and any(loc.lower() in blob for loc in profile.locations):
+        location_score = 10.0
+    total = max(0.0, min(100.0, skill_score + role_score + entry_score + location_score))
+    reasons = []
+    if overlap:
+        reasons.append(f"skill overlap ({', '.join(sorted(overlap)[:4])})")
+    if role_score > 0:
+        reasons.append("role alignment")
+    if entry_score > 0:
+        reasons.append("entry-level title")
+    if entry_score < 0:
+        reasons.append("senior-level penalty")
+    if location_score > 0:
+        reasons.append("location fit")
+    if not reasons:
+        reasons.append("limited overlap but still relevant board")
+    return JobMatch(
+        company=job.company,
+        title=job.title,
+        location=job.location,
+        url=job.url,
+        score=round(total, 2),
+        explanation="; ".join(reasons),
+        ats=job.ats,
+    )
+def rank_companies(matches: List[JobMatch]) -> List[CompanyRanking]:
+    grouped: Dict[str, List[JobMatch]] = defaultdict(list)
+    for match in matches:
+        grouped[match.company].append(match)
+    rankings: List[CompanyRanking] = []
+    for company, company_matches in grouped.items():
+        sorted_matches = sorted(company_matches, key=lambda m: m.score, reverse=True)
+        top = sorted_matches[:5]
+        avg_top = sum(match.score for match in top) / len(top)
+        rankings.append(
+            CompanyRanking(
+                company=company,
+                company_score=round(avg_top, 2),
+                match_count=len([m for m in company_matches if m.score >= 20]),
+                best_role=top[0].title if top else "",
+                ats=top[0].ats if top else "unknown",
+                explanation=top[0].explanation if top else "",
+            )
+        )
+    return sorted(rankings, key=lambda item: item.company_score, reverse=True)