acarey5 commited on
Commit
fa6caa6
·
1 Parent(s): 4a339d7

build AI Career Fair Matcher

Browse files
README.md CHANGED
@@ -9,4 +9,57 @@ app_file: app.py
9
  pinned: false
10
  ---
11
 
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  pinned: false
10
  ---
11
 
12
+ # AI Career Fair Matcher
13
+
14
+ AI Career Fair Matcher helps students prioritize career fair companies by analyzing resume fit against live job postings.
15
+
16
+ ## What It Does
17
+ - Accepts a resume PDF.
18
+ - Uses a built-in company CSV and optionally accepts a user-uploaded CSV.
19
+ - Extracts resume text and builds a structured profile JSON.
20
+ - Supports AI resume parsing through an OpenAI-compatible API with fallback parsing.
21
+ - Detects ATS providers from company careers URLs.
22
+ - Fetches jobs using requests first and Playwright fallback.
23
+ - Scores jobs with explainable rules.
24
+ - Ranks companies by fit.
25
+ - Generates recruiter talking points.
26
+
27
+ ## Project Structure
28
+ - `app.py`
29
+ - `src/resume/`
30
+ - `src/jobs/`
31
+ - `src/scoring/`
32
+ - `src/output/`
33
+ - `NSBE 2026 Baltimore Company_ Schools - Companies.csv` (built-in default)
34
+ - `data/NSBE 2026 Baltimore Company_ Schools - Companies (1).csv` (alternate built-in)
35
+
36
+ ## Resume Profile Schema
37
+ ```json
38
+ {
39
+ "skills": [],
40
+ "languages": [],
41
+ "frameworks": [],
42
+ "tools": [],
43
+ "target_titles": [],
44
+ "locations": [],
45
+ "experience_level": ""
46
+ }
47
+ ```
48
+
49
+ ## Matching Rules
50
+ - Rewards skill overlap.
51
+ - Rewards role match.
52
+ - Rewards entry-level signals.
53
+ - Penalizes senior role signals.
54
+
55
+ ## Local Run
56
+ 1. Install dependencies:
57
+ `pip install -r ../requirements.txt`
58
+ 2. Optional but recommended for Playwright fallback:
59
+ `playwright install chromium`
60
+
61
+ ## Output
62
+ - Ranked companies
63
+ - Matching jobs
64
+ - Resume profile JSON
65
+ - Recruiter talking points
app.py CHANGED
@@ -1,7 +1,168 @@
 
 
 
 
1
  import gradio as gr
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
- def greet(name):
4
- return "Hello " + name + "!!"
5
 
6
- demo = gr.Interface(fn=greet, inputs="text", outputs="text")
7
- demo.launch()
 
1
+ import json
2
+ from pathlib import Path
3
+ from typing import Any, List, Tuple
4
+
5
  import gradio as gr
6
+ from dotenv import load_dotenv
7
+
8
+ from src.jobs.ats_detector import detect_ats
9
+ from src.jobs.company_loader import load_companies
10
+ from src.jobs.extractor import extract_jobs_from_html
11
+ from src.jobs.fetcher import fetch_jobs_from_ats_api, fetch_url_content
12
+ from src.models import JobPosting
13
+ from src.output.generator import build_talking_points, resume_profile_to_json
14
+ from src.resume.pdf_extract import extract_resume_text
15
+ from src.resume.profile_builder import build_resume_profile
16
+ from src.scoring.matcher import rank_companies, score_job_match
17
+
18
+ BASE_DIR = Path(__file__).resolve().parent
19
+ load_dotenv(BASE_DIR / ".env")
20
+
21
+ DEFAULT_COMPANY_CANDIDATES = [
22
+ BASE_DIR / "NSBE 2026 Baltimore Company_ Schools - Companies.csv",
23
+ BASE_DIR / "data" / "NSBE 2026 Baltimore Company_ Schools - Companies (1).csv",
24
+ ]
25
+
26
+
27
+ def _resolve_file_path(file_obj: Any) -> str:
28
+ if file_obj is None:
29
+ return ""
30
+ if isinstance(file_obj, str):
31
+ return file_obj
32
+ if hasattr(file_obj, "name"):
33
+ return str(file_obj.name)
34
+ if isinstance(file_obj, dict):
35
+ return str(file_obj.get("name", ""))
36
+ return ""
37
+
38
+
39
+ def _default_companies_path() -> str:
40
+ for path in DEFAULT_COMPANY_CANDIDATES:
41
+ if path.exists():
42
+ return str(path)
43
+
44
+ raise FileNotFoundError("No default company CSV file is available.")
45
+
46
+
47
+ def _fallback_job(company_name: str, careers_url: str, ats: str) -> JobPosting:
48
+ return JobPosting(
49
+ company=company_name,
50
+ title="General Opportunities",
51
+ location="",
52
+ url=careers_url,
53
+ department="",
54
+ description="Careers page discovered but no structured roles were parsed.",
55
+ ats=ats,
56
+ )
57
+
58
+
59
+ def analyze_resume(
60
+ resume_pdf: Any,
61
+ optional_company_csv: Any,
62
+ max_companies: int,
63
+ use_ai_parser: bool,
64
+ ) -> Tuple[List[List[Any]], List[List[Any]], str, str]:
65
+ resume_path = _resolve_file_path(resume_pdf)
66
+ csv_path = _resolve_file_path(optional_company_csv)
67
+
68
+ if not resume_path:
69
+ return [], [], json.dumps({"error": "Please upload a resume PDF."}, indent=2), ""
70
+
71
+ try:
72
+ resume_text = extract_resume_text(resume_path)
73
+ profile = build_resume_profile(resume_text, use_ai=use_ai_parser)
74
+
75
+ companies = load_companies(_default_companies_path(), csv_path)
76
+ companies = companies[: int(max_companies)]
77
+
78
+ discovered_jobs: List[JobPosting] = []
79
+ for company in companies:
80
+ if not company.careers_url:
81
+ continue
82
+
83
+ ats = detect_ats(company.careers_url)
84
+ jobs = fetch_jobs_from_ats_api(company, ats)
85
+
86
+ if len(jobs) < 3:
87
+ html = fetch_url_content(company.careers_url)
88
+ if ats == "unknown":
89
+ ats = detect_ats(company.careers_url, html)
90
+ jobs.extend(extract_jobs_from_html(company, html, ats))
91
+
92
+ if not jobs:
93
+ jobs = [_fallback_job(company.company, company.careers_url, ats)]
94
+
95
+ discovered_jobs.extend(jobs)
96
+
97
+ matches = [score_job_match(job, profile) for job in discovered_jobs]
98
+ matches = sorted(matches, key=lambda item: item.score, reverse=True)
99
+
100
+ rankings = rank_companies(matches)
101
+
102
+ ranked_rows = [
103
+ [r.company, r.company_score, r.match_count, r.best_role, r.ats, r.explanation]
104
+ for r in rankings[:50]
105
+ ]
106
+ match_rows = [
107
+ [m.company, m.title, m.location, m.score, m.ats, m.url, m.explanation]
108
+ for m in matches[:250]
109
+ ]
110
+
111
+ profile_json = json.dumps(resume_profile_to_json(profile), indent=2)
112
+ talking_points = build_talking_points(rankings, matches)
113
+
114
+ return ranked_rows, match_rows, profile_json, talking_points
115
+ except Exception as exc:
116
+ return [], [], json.dumps({"error": str(exc)}, indent=2), ""
117
+
118
+
119
+ with gr.Blocks(title="AI Career Fair Matcher") as demo:
120
+ gr.Markdown("# AI Career Fair Matcher")
121
+ gr.Markdown(
122
+ "Upload your resume and optionally a company CSV. "
123
+ "The app extracts your profile, fetches jobs, scores fit, and suggests recruiter talking points."
124
+ )
125
+
126
+ with gr.Row():
127
+ resume_input = gr.File(label="Resume PDF", file_types=[".pdf"])
128
+ company_csv_input = gr.File(label="Optional Company CSV", file_types=[".csv"])
129
+
130
+ use_ai_parser_input = gr.Checkbox(
131
+ value=True,
132
+ label="Use AI Resume Parser (OPENAI_API_KEY or HF_TOKEN)",
133
+ )
134
+
135
+ max_companies_input = gr.Slider(
136
+ minimum=5,
137
+ maximum=100,
138
+ step=1,
139
+ value=30,
140
+ label="Max Companies to Analyze",
141
+ )
142
+
143
+ analyze_button = gr.Button("Analyze Career Fair Fit", variant="primary")
144
+
145
+ ranked_output = gr.Dataframe(
146
+ headers=["Company", "Score", "Matches", "Best Role", "ATS", "Explanation"],
147
+ label="Ranked Companies",
148
+ wrap=True,
149
+ )
150
+
151
+ jobs_output = gr.Dataframe(
152
+ headers=["Company", "Job Title", "Location", "Score", "ATS", "URL", "Why It Matches"],
153
+ label="Matching Jobs",
154
+ wrap=True,
155
+ )
156
+
157
+ profile_output = gr.Code(label="Resume Profile JSON", language="json")
158
+ talking_points_output = gr.Markdown(label="Talking Points")
159
+
160
+ analyze_button.click(
161
+ fn=analyze_resume,
162
+ inputs=[resume_input, company_csv_input, max_companies_input, use_ai_parser_input],
163
+ outputs=[ranked_output, jobs_output, profile_output, talking_points_output],
164
+ )
165
 
 
 
166
 
167
+ if __name__ == "__main__":
168
+ demo.queue().launch()
data/NSBE 2026 Baltimore Company_ Schools - Companies (1).csv ADDED
@@ -0,0 +1,139 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Company List ,Pre-Conference interview or registartion links ,Column 4,Direct links to company Career/job openings page ,"Creator Donise Griffin: me on linkedln :)
2
+ https://www.linkedin.com/in/donise-griffin/","idea from lasts years 2025 list follow @mohamedhaithvm
3
+ https://www.linkedin.com/in/mohamed-ahmed-429302228/"
4
+ 3M,,,openings page ,,
5
+ "ABB, Inc",2026 Annual NSBE Convention | ABB,, openings page ,"Download the Mobile App for 2026
6
+ NSBE Annual Convention!",iOS:
7
+ Abbott,https://www.jobs.abbott/us/en/event/690bb1ceaf6b261cdd97d267/National-Society-of-Black-Engineers-Conference-2026,, openings page ,,Android:
8
+ Accenture,,, openings page ,FairControls is the AI intelligence hub for events.,
9
+ AECOM,,, openings page ,Before the career fair you can:,
10
+ Air products,,, openings page ,🤖 Instantly understand every company attending,
11
+ Airbus,,, openings page ,AI summaries + suggested questions to ask recruiters.,
12
+ Amazon,,,openings page ,,
13
+ Apple ,,, openings page ,,
14
+ Arcadis ,https://arcadis.eightfold.ai/events/candidate?plannedEventId=Oa34M69ekv&domain=arcadis.com,, openings page ,,
15
+ Arconic,,, openings page ,,
16
+ Arup US Inc.,,, openings page ,,
17
+ Astrazeneca Phaaceuticals LP,,, openings page ,,
18
+ Barnes Group,,, openings page ,,
19
+ Bechtel,https://bechtel.recsolu.com/app/collect/event/oSDe4PqZi3dHMdvevlBHjw,, openings page ,,
20
+ "Bentley Systems, Inc.", ,, openings page ,,
21
+ Black Blockchain labs,,,openings page ,,
22
+ Blue Origin,,, openings page ,,
23
+ BNY Mellon,https://eofe.fa.us2.oraclecloud.com/hcmUI/CandidateExperience/en/sites/CX_1001/job/74341/?utm_medium=jobshare&utm_source=External+Job+Share,, openings page ,,
24
+ Bohler Engineering,https://bohler.recsolu.com/app/collect/event/J5TOHMJAPIRuH2YeEciT6w,,openings page,,
25
+ Bristol-Myers Squibb,https://app.eightfold.ai/events/candidate?plannedEventId=GRj3OxoJ,, openings page ,,
26
+ Burns & Mcdonnell,https://burnsmcd.recsolu.com/app/collect/event/iF1_1Zs9E3lBPYpjJ1nbjw,, openings page ,,
27
+ "Cadence Design Systems, Inc.",https://cadence.yello.co/app/collect/event/7cdPwuX2_SeybbUJ3LmK-g?utm_source=hsamplify&utm_medium=linkedin&utm_term=8e24b117-2b45-4061-a1a6-82a6ba199b83,, openings page ,,
28
+ "Capital One, Inc.",,, openings page ,,
29
+ "Cargill, Incorporated",,, openings page ,,
30
+ Caterpillar Inc.,https://caterpillar.yello.co/app/collect/event/egBb1GTxd55C3z7aKjbaJw,, openings page ,,
31
+ Cheveron ,https://chevron.wd5.myworkdayjobs.com/University/event/a377a322e71f1001ad4c7bfcbf060001/register,, openings page ,,
32
+ Clorox,https://wd1.myworkdaysite.com/recruiting/clorox/Clorox/page/e10acc474cc31001f01c95bbe6f10000,, openings page ,,
33
+ ConocoPhillips,,, openings page ,,
34
+ Constellation Energy,,, openings page ,,
35
+ "Cook Medical Holdings, Inc.",,, openings page ,,
36
+ "Cummins, Inc.",,, openings page ,,
37
+ Dairy Farmers of America,,, openings page ,,
38
+ Dauch Corporation,,, openings page ,,
39
+ Dell Technologies,,, openings page ,,
40
+ Deloitte,,, openings page ,,
41
+ "Delta Air Lines, Inc",,LOOKING FOR PEOPLE FOR CO-OPS , openings page ,,
42
+ "Dodge Industrial, Inc.",,, openings page ,,
43
+ DPR Construction,,, openings page ,,
44
+ Draper,,, openings page ,,
45
+ Duracell,,, openings page ,,
46
+ Eaton Corporation,Find Eaton at NSBE,, openings page ,,
47
+ "Edwards Lifesciences, LLC",https://flows.beamery.com/httpswwwedwardscomcareershome/edwards-nsbe-2026?utm_source=linkedin&utm_medium=video&utm_content=voe,, openings page ,,
48
+ Ernst & Young LLP,,, openings page ,,
49
+ "ESRI (Environmental Systems Research Institute, Inc.)",,, openings page ,,
50
+ Estee lauder Companies ,,,openings page ,,
51
+ ExxonMobile,,,openings page ,,
52
+ Fish & Richardson P.C.,,, openings page ,,
53
+ FM Global,,, openings page ,,
54
+ Fonteva,,, openings page ,,
55
+ Ford Motor Company,"Ford Events 2 | Instagram, Facebook | Linktree
56
+
57
+ Ford Pre-Registration Link",, openings page ,,
58
+ Freeport-McMoRan Inc,,, openings page ,,
59
+ GE Aerospace,https://careers.geaerospace.com/global/en/event/697cccfc19232421f311f22f/GE-Aerospace-NSBE-2026-National-Convention-and-Career-Fair,, openings page ,,
60
+ GE Healthcare,https://careers.gehealthcare.com/global/en/event/6970f96419232421f311e00c/National-Society-of-Black-Engineers-NSBE-Convention-2026,, openings page ,,
61
+ GE Vernova,https://olivia.paradox.ai/co/GEVernova32/Event/2026NSBEAnnualConvention,, openings page ,,
62
+ "General Dynamics, Inc.",,, openings page ,,
63
+ General Motors,,, openings page ,,
64
+ GHD,,, openings page ,,
65
+ Gilbane Building Company,,, openings page ,,
66
+ Givelify,,, openings page ,,
67
+ Goldman Sachs,https://higher.gs.com/campus?DIVISION=Engineering%20Division&EXPERIENCE_LEVEL=Summer%20Analyst&LOCATION=New%20York%7CDallas%7CSalt%20Lake%20City&page=1&sort=POSTED_DATE,, openings page ,,
68
+ Granite Construction,,, openings page ,,
69
+ Hensel Phelps,,, openings page ,,
70
+ Honeywell,https://app.brazenconnect.com/a/honeywell/e/zq7jm?utm_source=event%20page&utm_medium=flyer&utm_campaign=honeywell%20nsbe%202026,,openings page,,
71
+ Intel Corporation,,,openings page ,,
72
+ IQT,,,openings page ,,
73
+ Jabil Inc.,,, openings page ,,
74
+ Jacobs,https://jacobs.avature.net/eventlisting/EventDetail?eventId=29715,, openings page ,,
75
+ John Deere,,, openings page ,,
76
+ Johnson & Johnson Family of Companies,,, openings page ,,
77
+ Kiewit,https://olivia.paradox.ai/co/Kiewit27/Event/2026NSBEAnnualConvention,, openings page ,,
78
+ KLA,,, openings page ,,
79
+ L'Oreal,,,openings page ,,
80
+ Lam Research Corporation,,, openings page ,,
81
+ Lenovo,,, openings page ,,
82
+ Linde,,, openings page ,,
83
+ Lockheed Martin Corporation,,, openings page ,,
84
+ Los Angeles Department of Water and Power,,, openings page ,,
85
+ Marvell Technology,,, openings page ,,
86
+ MathWorks,,,openings page ,,
87
+ McCarthy Building Companies,,, openings page ,,
88
+ "Merck & Co., Inc.",https://jobs.merck.com/us/en/event/68ac56791218cc52521fd372/2026-NSBE-Annual-Convention,, openings page ,,
89
+ Michigan Department of Transportation,,,openings page ,,
90
+ Micron,,,openings page ,,
91
+ "Microsoft, Inc.",,, openings page ,,
92
+ Morgan Stanley ,,,openings page ,,
93
+ Mortenson Construction,https://mortenson.recsolu.com/app/collect/event/6669UqBJAL4x9TwP_MyomA,, openings page ,,
94
+ "Nintendo of America, Inc.",,, openings page ,,
95
+ Nucor Corporation,,, openings page ,,
96
+ NVIDIA,,, openings page ,,
97
+ Pacific Gas and Electric Company,,, openings page ,,
98
+ PBF Energy,,, openings page ,,
99
+ "PepsiCo, Inc.",,, openings page ,,
100
+ Perdue Farms,,,openings page ,,
101
+ "Pfizer, Inc.",,, openings page ,,
102
+ Ramboll,,, openings page ,,
103
+ 22,,, openings page ,,
104
+ Rockwell Automation,,, openings page ,,
105
+ Sandia National Laboratories,,, openings page ,,
106
+ Schneider Electric,,, openings page ,,
107
+ "Siemens, Inc.",,, openings page ,,
108
+ Skanska,,, openings page ,,
109
+ SMART Scholarship For Service Program,,, openings page ,,
110
+ Smurfit Westrock,,, openings page ,,
111
+ Southern Company,,, openings page ,,
112
+ "Southwire Company, Inc.",,, openings page ,,
113
+ SpaceX,,, openings page ,,
114
+ SSOE Group,,,openings page ,,
115
+ Swinerton,,,openings page ,,
116
+ Target Corporation ,,,openings page ,,
117
+ TE Connectivity,,, openings page ,,
118
+ Teledyne Technologies,https://teledyne.yello.co/app/collect/event/xZobTUiCcDSAQkNmciQr3Q,, openings page ,,
119
+ "Terracon Consultants, Inc.",https://sjobs.brassring.com/TGnewUI/Search/home/HomeWithPreLoad?PageType=JobDetails&partnerid=25664&siteid=5383&jobid=5211397#jobDetails=5211397_5383,, openings page ,,
120
+ Texas Department of Transportation,,,openings page ,,
121
+ Texas Instruments,,, openings page ,,
122
+ The Clorox Company,https://wd1.myworkdaysite.com/recruiting/clorox/Clorox/page/e10acc474cc31001f01c95bbe6f10000,, openings page ,,
123
+ The Whiting-Turner Contracting Company,,, openings page ,,
124
+ Toyota,,, openings page ,,
125
+ "Trane Technologies, Inc.",https://careers.tranetechnologies.com/global/en/event/697bbe1219232421f311f0c9/NSBE-2026-Career-Fair-with-Trane-Technologies-Booth-1681B,, openings page ,,
126
+ "Trimble, Inc.",,, openings page ,,
127
+ Turner Construction,,, openings page ,,
128
+ UL standards and enterprise,,,openings page ,,
129
+ United Airlines,,,openings page ,,
130
+ United Parcel Service (UPS),,, openings page ,,
131
+ Universal Destinations & Experiences,,, openings page ,,
132
+ United States Postal Service (USPS),,, openings page ,,
133
+ USAA,,, openings page ,,
134
+ Vetex pharmaceuticals,,,openings page ,,
135
+ Wabtec Corporation,https://careers.wabtec.com/events,, openings page ,,
136
+ Walter P. Moore,,, openings page ,,
137
+ Wells Fargo,https://talent.wellsfargojobs.com/flows/nsbe-annual-convention-2026-lyofuh0jm,, openings page ,,
138
+ "WSP USA, Inc",,, openings page ,,
139
+ Worley,RSVP,,openings page ,,
src/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ """AI Career Fair Matcher package."""
src/jobs/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ """Job discovery modules."""
src/jobs/ats_detector.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from urllib.parse import urlparse
2
+
3
+
4
+ ATS_PATTERNS = {
5
+ "greenhouse": ["greenhouse.io"],
6
+ "lever": ["lever.co", "jobs.lever.co"],
7
+ "workday": ["myworkdayjobs.com", "workday.com"],
8
+ "smartrecruiters": ["smartrecruiters.com"],
9
+ "icims": ["icims.com"],
10
+ "paradox": ["paradox.ai"],
11
+ "oracle": ["oraclecloud.com"],
12
+ "avature": ["avature.net"],
13
+ "eightfold": ["eightfold.ai"],
14
+ "recsolu": ["recsolu.com"],
15
+ }
16
+
17
+
18
+ def detect_ats(url: str, html: str = "") -> str:
19
+ text = f"{url} {html}".lower()
20
+ for ats, patterns in ATS_PATTERNS.items():
21
+ if any(pattern in text for pattern in patterns):
22
+ return ats
23
+
24
+ hostname = urlparse(url).netloc.lower() if url else ""
25
+ if "jobs" in hostname or "careers" in hostname:
26
+ return "custom"
27
+
28
+ return "unknown"
src/jobs/company_loader.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import csv
2
+ from pathlib import Path
3
+ from typing import Dict, Iterable, List, Optional
4
+
5
+ from src.models import CompanyRecord
6
+
7
+
8
+ COMPANY_KEYS = ["company", "company list", "name", "employer", "organization"]
9
+ CAREERS_KEYS = ["careers_url", "career url", "jobs_url", "job board", "direct links to company career/job openings page"]
10
+
11
+
12
+ def _normalize_headers(row: Dict[str, str]) -> Dict[str, str]:
13
+ return {k.strip().lower(): (v or "").strip() for k, v in row.items() if k}
14
+
15
+
16
+ def _pick_value(row: Dict[str, str], keys: Iterable[str]) -> str:
17
+ for key in keys:
18
+ if key in row and row[key]:
19
+ return row[key]
20
+ return ""
21
+
22
+
23
+ def _first_url(row: Dict[str, str]) -> str:
24
+ direct = _pick_value(row, CAREERS_KEYS)
25
+ if direct.startswith("http"):
26
+ return direct
27
+
28
+ for value in row.values():
29
+ if value and value.startswith("http"):
30
+ return value
31
+ return ""
32
+
33
+
34
+ def _read_companies(csv_path: Path, source: str) -> List[CompanyRecord]:
35
+ companies: List[CompanyRecord] = []
36
+
37
+ with csv_path.open("r", encoding="utf-8-sig", newline="") as handle:
38
+ reader = csv.DictReader(handle)
39
+ if not reader.fieldnames:
40
+ return companies
41
+
42
+ for raw in reader:
43
+ row = _normalize_headers(raw)
44
+ company = _pick_value(row, COMPANY_KEYS)
45
+ if not company:
46
+ continue
47
+
48
+ careers_url = _first_url(row)
49
+ companies.append(
50
+ CompanyRecord(
51
+ company=company,
52
+ careers_url=careers_url,
53
+ source=source,
54
+ meta=row,
55
+ )
56
+ )
57
+
58
+ return companies
59
+
60
+
61
+ def load_companies(default_csv: str, uploaded_csv: Optional[str] = None) -> List[CompanyRecord]:
62
+ uploaded_path = Path(uploaded_csv) if uploaded_csv else None
63
+ if uploaded_path and uploaded_path.exists():
64
+ companies = _read_companies(uploaded_path, source="upload")
65
+ if companies:
66
+ return companies
67
+
68
+ default_path = Path(default_csv)
69
+ if not default_path.exists():
70
+ raise FileNotFoundError(f"Default company CSV not found: {default_csv}")
71
+
72
+ companies = _read_companies(default_path, source="default")
73
+ if not companies:
74
+ raise ValueError("No companies were loaded from the CSV source.")
75
+
76
+ return companies
src/jobs/extractor.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ from typing import List
3
+
4
+ from bs4 import BeautifulSoup
5
+
6
+ from src.models import CompanyRecord, JobPosting
7
+
8
+ JOB_TITLE_HINTS = [
9
+ "engineer", "analyst", "developer", "scientist", "intern", "manager",
10
+ "associate", "technician", "specialist", "consultant", "architect",
11
+ ]
12
+
13
+
14
+ def extract_jobs_from_html(company: CompanyRecord, html: str, ats: str) -> List[JobPosting]:
15
+ if not html:
16
+ return []
17
+
18
+ soup = BeautifulSoup(html, "html.parser")
19
+ jobs: List[JobPosting] = []
20
+
21
+ for anchor in soup.select("a[href]"):
22
+ title = " ".join(anchor.get_text(" ", strip=True).split())
23
+ if not title or len(title) < 4:
24
+ continue
25
+
26
+ title_lower = title.lower()
27
+ if not any(hint in title_lower for hint in JOB_TITLE_HINTS):
28
+ continue
29
+
30
+ href = anchor.get("href", "").strip()
31
+ if not href:
32
+ continue
33
+
34
+ if href.startswith("/"):
35
+ url = company.careers_url.rstrip("/") + href
36
+ elif href.startswith("http"):
37
+ url = href
38
+ else:
39
+ continue
40
+
41
+ nearby_text = anchor.parent.get_text(" ", strip=True)
42
+ location = _extract_location(nearby_text)
43
+
44
+ jobs.append(
45
+ JobPosting(
46
+ company=company.company,
47
+ title=title[:120],
48
+ location=location,
49
+ url=url,
50
+ department="",
51
+ description=nearby_text[:500],
52
+ ats=ats,
53
+ )
54
+ )
55
+
56
+ deduped = _dedupe_jobs(jobs)
57
+ return deduped[:60]
58
+
59
+
60
+ def _extract_location(text: str) -> str:
61
+ pattern = r"(Remote|[A-Z][a-z]+,\s*[A-Z]{2})"
62
+ match = re.search(pattern, text)
63
+ return match.group(1) if match else ""
64
+
65
+
66
+ def _dedupe_jobs(jobs: List[JobPosting]) -> List[JobPosting]:
67
+ seen = set()
68
+ deduped = []
69
+ for job in jobs:
70
+ key = (job.title.lower(), job.url)
71
+ if key in seen:
72
+ continue
73
+ seen.add(key)
74
+ deduped.append(job)
75
+ return deduped
src/jobs/fetcher.py ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import importlib
4
+ import re
5
+ from typing import List, Optional
6
+
7
+ import requests
8
+
9
+ from src.models import CompanyRecord, JobPosting
10
+
11
+ HEADERS = {
12
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
13
+ }
14
+
15
+
16
+ def fetch_url_content(url: str, timeout: int = 12) -> str:
17
+ if not url:
18
+ return ""
19
+
20
+ try:
21
+ response = requests.get(url, headers=HEADERS, timeout=timeout)
22
+ if response.ok:
23
+ return response.text
24
+ except Exception:
25
+ pass
26
+
27
+ return _playwright_fallback(url)
28
+
29
+
30
+ def _playwright_fallback(url: str) -> str:
31
+ if not url:
32
+ return ""
33
+
34
+ try:
35
+ sync_api = importlib.import_module("playwright.sync_api")
36
+ sync_playwright = getattr(sync_api, "sync_playwright")
37
+
38
+ with sync_playwright() as p:
39
+ browser = p.chromium.launch(headless=True)
40
+ page = browser.new_page()
41
+ page.goto(url, wait_until="networkidle", timeout=20000)
42
+ html = page.content()
43
+ browser.close()
44
+ return html
45
+ except Exception:
46
+ return ""
47
+
48
+
49
+ def _extract_greenhouse_token(url: str) -> Optional[str]:
50
+ match = re.search(r"greenhouse\.io/([^/?#]+)", url)
51
+ return match.group(1) if match else None
52
+
53
+
54
+ def _extract_lever_token(url: str) -> Optional[str]:
55
+ match = re.search(r"lever\.co/([^/?#]+)", url)
56
+ return match.group(1) if match else None
57
+
58
+
59
+ def fetch_jobs_from_ats_api(company: CompanyRecord, ats: str) -> List[JobPosting]:
60
+ jobs: List[JobPosting] = []
61
+
62
+ if ats == "greenhouse":
63
+ token = _extract_greenhouse_token(company.careers_url)
64
+ if not token:
65
+ return jobs
66
+
67
+ api_url = f"https://boards-api.greenhouse.io/v1/boards/{token}/jobs"
68
+ try:
69
+ response = requests.get(api_url, headers=HEADERS, timeout=15)
70
+ if not response.ok:
71
+ return jobs
72
+
73
+ data = response.json()
74
+ for item in data.get("jobs", []):
75
+ jobs.append(
76
+ JobPosting(
77
+ company=company.company,
78
+ title=item.get("title", "Unknown title"),
79
+ location=(item.get("location") or {}).get("name", ""),
80
+ url=item.get("absolute_url", company.careers_url),
81
+ department=(item.get("department") or ""),
82
+ description="",
83
+ ats=ats,
84
+ )
85
+ )
86
+ except Exception:
87
+ return jobs
88
+
89
+ elif ats == "lever":
90
+ token = _extract_lever_token(company.careers_url)
91
+ if not token:
92
+ return jobs
93
+
94
+ api_url = f"https://api.lever.co/v0/postings/{token}?mode=json"
95
+ try:
96
+ response = requests.get(api_url, headers=HEADERS, timeout=15)
97
+ if not response.ok:
98
+ return jobs
99
+
100
+ data = response.json()
101
+ for item in data:
102
+ jobs.append(
103
+ JobPosting(
104
+ company=company.company,
105
+ title=item.get("text", "Unknown title"),
106
+ location=(item.get("categories") or {}).get("location", ""),
107
+ url=item.get("hostedUrl", company.careers_url),
108
+ department=(item.get("categories") or {}).get("team", ""),
109
+ description=item.get("descriptionPlain", ""),
110
+ ats=ats,
111
+ )
112
+ )
113
+ except Exception:
114
+ return jobs
115
+
116
+ return jobs
src/models.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dataclasses import dataclass, field
2
+ from typing import Dict, List
3
+
4
+
5
+ @dataclass
6
+ class ResumeProfile:
7
+ skills: List[str] = field(default_factory=list)
8
+ languages: List[str] = field(default_factory=list)
9
+ frameworks: List[str] = field(default_factory=list)
10
+ tools: List[str] = field(default_factory=list)
11
+ target_titles: List[str] = field(default_factory=list)
12
+ locations: List[str] = field(default_factory=list)
13
+ experience_level: str = "entry"
14
+
15
+
16
+ @dataclass
17
+ class CompanyRecord:
18
+ company: str
19
+ careers_url: str = ""
20
+ source: str = "default"
21
+ meta: Dict[str, str] = field(default_factory=dict)
22
+
23
+
24
+ @dataclass
25
+ class JobPosting:
26
+ company: str
27
+ title: str
28
+ location: str
29
+ url: str
30
+ department: str = ""
31
+ description: str = ""
32
+ ats: str = "unknown"
33
+
34
+
35
+ @dataclass
36
+ class JobMatch:
37
+ company: str
38
+ title: str
39
+ location: str
40
+ url: str
41
+ score: float
42
+ explanation: str
43
+ ats: str
44
+
45
+
46
+ @dataclass
47
+ class CompanyRanking:
48
+ company: str
49
+ company_score: float
50
+ match_count: int
51
+ best_role: str
52
+ ats: str
53
+ explanation: str
src/output/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ """Output formatting and explanation modules."""
src/output/generator.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dataclasses import asdict
2
+ from typing import List
3
+
4
+ from src.models import CompanyRanking, JobMatch, ResumeProfile
5
+
6
+
7
+ def resume_profile_to_json(profile: ResumeProfile) -> dict:
8
+ return asdict(profile)
9
+
10
+
11
+ def build_talking_points(rankings: List[CompanyRanking], matches: List[JobMatch], max_companies: int = 8) -> str:
12
+ if not rankings:
13
+ return "No strong matches found yet. Try a larger company list or resume with more role-specific keywords."
14
+
15
+ lines = ["## Suggested Recruiter Talking Points", ""]
16
+ match_map = {}
17
+ for match in matches:
18
+ match_map.setdefault(match.company, []).append(match)
19
+
20
+ for rank in rankings[:max_companies]:
21
+ company_matches = sorted(match_map.get(rank.company, []), key=lambda item: item.score, reverse=True)
22
+ top = company_matches[0] if company_matches else None
23
+ score_text = f"{rank.company_score:.1f}"
24
+
25
+ lines.append(f"### {rank.company} (Fit Score: {score_text})")
26
+ if top:
27
+ lines.append(
28
+ f"- I noticed your {top.title} role and my resume aligns through {top.explanation}."
29
+ )
30
+ lines.append(
31
+ "- I can contribute quickly in internship/new-grad responsibilities and would love to discuss current hiring priorities."
32
+ )
33
+ else:
34
+ lines.append("- Your company aligns with my career goals, and I would like to learn which early-career teams are hiring now.")
35
+ lines.append("")
36
+
37
+ return "\n".join(lines)
src/resume/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ """Resume processing modules."""
src/resume/pdf_extract.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pypdf import PdfReader
2
+
3
+
4
+ def extract_resume_text(pdf_path: str) -> str:
5
+ """Extract text from a resume PDF file."""
6
+ reader = PdfReader(pdf_path)
7
+ pages = []
8
+ for page in reader.pages:
9
+ text = page.extract_text() or ""
10
+ pages.append(text)
11
+ return "\n".join(pages).strip()
src/resume/profile_builder.py ADDED
@@ -0,0 +1,223 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import os
3
+ import re
4
+ from typing import Any, List, Optional
5
+
6
+ from src.models import ResumeProfile
7
+
8
+
9
+ SKILLS = {
10
+ "python", "java", "c++", "c", "sql", "javascript", "typescript", "go", "rust",
11
+ "machine learning", "data analysis", "data structures", "algorithms", "api", "testing",
12
+ "cloud", "aws", "azure", "gcp", "devops", "microservices", "cybersecurity", "etl",
13
+ }
14
+
15
+ LANGUAGES = {
16
+ "python", "java", "c++", "c", "sql", "javascript", "typescript", "go", "rust",
17
+ "matlab", "r", "swift", "kotlin",
18
+ }
19
+
20
+ FRAMEWORKS = {
21
+ "django", "flask", "fastapi", "react", "angular", "vue", "spring", "pytorch", "tensorflow",
22
+ "scikit-learn", "spark", "pandas", "numpy",
23
+ }
24
+
25
+ TOOLS = {
26
+ "git", "docker", "kubernetes", "jira", "figma", "tableau", "power bi", "linux", "excel",
27
+ "postgresql", "mysql", "mongodb",
28
+ }
29
+
30
+ TARGET_TITLES = {
31
+ "software engineer", "data analyst", "data scientist", "machine learning engineer",
32
+ "cybersecurity analyst", "product manager", "systems engineer", "electrical engineer",
33
+ "mechanical engineer", "civil engineer", "cloud engineer", "backend engineer", "frontend engineer",
34
+ }
35
+
36
+ ENTRY_LEVEL_SIGNALS = ["intern", "internship", "new grad", "entry", "junior", "student"]
37
+ MID_LEVEL_SIGNALS = ["mid", "ii", "2+ years", "3+ years"]
38
+ SENIOR_SIGNALS = ["senior", "staff", "principal", "lead", "manager", "director", "10+ years"]
39
+
40
+ SCHEMA_TEMPLATE = {
41
+ "skills": [],
42
+ "languages": [],
43
+ "frameworks": [],
44
+ "tools": [],
45
+ "target_titles": [],
46
+ "locations": [],
47
+ "experience_level": "entry",
48
+ }
49
+
50
+
51
+ def _find_terms(text: str, candidates: set[str]) -> List[str]:
52
+ found = []
53
+ text_lower = text.lower()
54
+ for item in sorted(candidates):
55
+ pattern = r"\\b" + re.escape(item) + r"\\b"
56
+ if re.search(pattern, text_lower):
57
+ found.append(item)
58
+ return found
59
+
60
+
61
+ def _extract_locations(text: str) -> List[str]:
62
+ common_locations = [
63
+ "baltimore", "washington", "dc", "new york", "atlanta", "charlotte", "chicago",
64
+ "dallas", "houston", "austin", "seattle", "san francisco", "los angeles", "remote",
65
+ ]
66
+ return [loc for loc in common_locations if loc in text.lower()]
67
+
68
+
69
+ def _infer_experience_level(text: str) -> str:
70
+ text_lower = text.lower()
71
+ if any(signal in text_lower for signal in SENIOR_SIGNALS):
72
+ return "senior"
73
+ if any(signal in text_lower for signal in MID_LEVEL_SIGNALS):
74
+ return "mid"
75
+ if any(signal in text_lower for signal in ENTRY_LEVEL_SIGNALS):
76
+ return "entry"
77
+ return "entry"
78
+
79
+
80
+ def _normalize_string_list(value: Any) -> List[str]:
81
+ if not isinstance(value, list):
82
+ return []
83
+
84
+ normalized = []
85
+ seen = set()
86
+ for item in value:
87
+ if not isinstance(item, str):
88
+ continue
89
+ cleaned = item.strip()
90
+ if not cleaned:
91
+ continue
92
+ key = cleaned.lower()
93
+ if key in seen:
94
+ continue
95
+ seen.add(key)
96
+ normalized.append(cleaned)
97
+ return normalized
98
+
99
+
100
+ def _normalize_experience_level(value: Any) -> str:
101
+ if not isinstance(value, str):
102
+ return "entry"
103
+ lowered = value.strip().lower()
104
+ if lowered in {"entry", "junior", "new grad", "intern"}:
105
+ return "entry"
106
+ if lowered in {"mid", "mid-level", "intermediate"}:
107
+ return "mid"
108
+ if lowered in {"senior", "lead", "staff", "principal"}:
109
+ return "senior"
110
+ return "entry"
111
+
112
+
113
+ def _coerce_profile_json(raw_profile: dict) -> ResumeProfile:
114
+ return ResumeProfile(
115
+ skills=_normalize_string_list(raw_profile.get("skills", [])),
116
+ languages=_normalize_string_list(raw_profile.get("languages", [])),
117
+ frameworks=_normalize_string_list(raw_profile.get("frameworks", [])),
118
+ tools=_normalize_string_list(raw_profile.get("tools", [])),
119
+ target_titles=_normalize_string_list(raw_profile.get("target_titles", [])),
120
+ locations=_normalize_string_list(raw_profile.get("locations", [])),
121
+ experience_level=_normalize_experience_level(raw_profile.get("experience_level", "entry")),
122
+ )
123
+
124
+
125
+ def _extract_json_object(text: str) -> Optional[dict]:
126
+ text = text.strip()
127
+ if not text:
128
+ return None
129
+
130
+ try:
131
+ parsed = json.loads(text)
132
+ return parsed if isinstance(parsed, dict) else None
133
+ except json.JSONDecodeError:
134
+ pass
135
+
136
+ match = re.search(r"\{[\s\S]*\}", text)
137
+ if not match:
138
+ return None
139
+
140
+ try:
141
+ parsed = json.loads(match.group(0))
142
+ return parsed if isinstance(parsed, dict) else None
143
+ except json.JSONDecodeError:
144
+ return None
145
+
146
+
147
+ def _build_fallback_profile(resume_text: str) -> ResumeProfile:
148
+ skills = _find_terms(resume_text, SKILLS)
149
+ languages = _find_terms(resume_text, LANGUAGES)
150
+ frameworks = _find_terms(resume_text, FRAMEWORKS)
151
+ tools = _find_terms(resume_text, TOOLS)
152
+ target_titles = _find_terms(resume_text, TARGET_TITLES)
153
+ locations = _extract_locations(resume_text)
154
+ experience_level = _infer_experience_level(resume_text)
155
+
156
+ return ResumeProfile(
157
+ skills=skills,
158
+ languages=languages,
159
+ frameworks=frameworks,
160
+ tools=tools,
161
+ target_titles=target_titles,
162
+ locations=locations,
163
+ experience_level=experience_level,
164
+ )
165
+
166
+
167
+ def _parse_resume_with_ai(resume_text: str) -> Optional[ResumeProfile]:
168
+ token = (
169
+ os.getenv("HF_TOKEN", "").strip()
170
+ or os.getenv("HUGGINGFACEHUB_API_TOKEN", "").strip()
171
+ or os.getenv("HUGGINGFACE_API_TOKEN", "").strip()
172
+ or os.getenv("HUGGINGFACEHUB_TOKEN", "").strip()
173
+ )
174
+ if not token:
175
+ return None
176
+
177
+ model = os.getenv("HF_MODEL", "meta-llama/Llama-3.1-8B-Instruct").strip()
178
+
179
+ schema_str = json.dumps(SCHEMA_TEMPLATE, indent=2)
180
+ system_prompt = (
181
+ "You extract resume data into a strict JSON object. "
182
+ "Return JSON only with this exact schema and no extra keys."
183
+ )
184
+ user_prompt = (
185
+ "Extract the resume profile from the text below."
186
+ "\nRules:"
187
+ "\n- Use concise normalized terms."
188
+ "\n- Include roles under target_titles."
189
+ "\n- Set experience_level to one of: entry, mid, senior."
190
+ "\n- If unknown, use empty arrays and experience_level entry."
191
+ f"\nSchema:\n{schema_str}"
192
+ f"\nResume Text:\n{resume_text[:20000]}"
193
+ )
194
+
195
+ try:
196
+ from huggingface_hub import InferenceClient
197
+
198
+ client = InferenceClient(token=token)
199
+ completion = client.chat.completions.create(
200
+ model=model,
201
+ messages=[
202
+ {"role": "system", "content": system_prompt},
203
+ {"role": "user", "content": user_prompt},
204
+ ],
205
+ temperature=0,
206
+ max_tokens=1024,
207
+ )
208
+ content = (completion.choices[0].message.content or "").strip()
209
+ parsed = _extract_json_object(content)
210
+ if not parsed:
211
+ return None
212
+ return _coerce_profile_json(parsed)
213
+ except Exception:
214
+ return None
215
+
216
+
217
+ def build_resume_profile(resume_text: str, use_ai: bool = True) -> ResumeProfile:
218
+ if use_ai:
219
+ ai_profile = _parse_resume_with_ai(resume_text)
220
+ if ai_profile is not None:
221
+ return ai_profile
222
+
223
+ return _build_fallback_profile(resume_text)
src/scoring/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ """Matching and ranking modules."""
src/scoring/matcher.py ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ from collections import defaultdict
3
+ from typing import Dict, List
4
+
5
+ from src.models import CompanyRanking, JobMatch, JobPosting, ResumeProfile
6
+
7
+ ENTRY_LEVEL_TERMS = {"intern", "internship", "entry", "junior", "new grad", "associate", "graduate"}
8
+ SENIOR_TERMS = {"senior", "staff", "principal", "lead", "manager", "director", "architect"}
9
+
10
+
11
+
12
+ def _tokenize(text: str) -> set[str]:
13
+ return set(re.findall(r"[a-zA-Z0-9\+#\.]+", text.lower()))
14
+
15
+
16
+ def _contains_phrase(text: str, phrases: List[str]) -> bool:
17
+ lowered = text.lower()
18
+ return any(phrase.lower() in lowered for phrase in phrases)
19
+
20
+
21
+ def score_job_match(job: JobPosting, profile: ResumeProfile) -> JobMatch:
22
+ blob = " ".join([job.title, job.department, job.description, job.location]).lower()
23
+ tokens = _tokenize(blob)
24
+
25
+ skill_pool = set(profile.skills + profile.languages + profile.frameworks + profile.tools)
26
+ overlap = {item for item in skill_pool if item.lower() in blob or item.lower() in tokens}
27
+
28
+ skill_score = min(40.0, 8.0 * len(overlap))
29
+
30
+ role_score = 0.0
31
+ if profile.target_titles and _contains_phrase(job.title, profile.target_titles):
32
+ role_score = 25.0
33
+ elif not profile.target_titles and _contains_phrase(job.title, ["engineer", "analyst", "developer", "scientist"]):
34
+ role_score = 12.0
35
+
36
+ entry_score = 0.0
37
+ title_lower = job.title.lower()
38
+ if any(term in title_lower for term in ENTRY_LEVEL_TERMS):
39
+ entry_score += 20.0
40
+ if any(term in title_lower for term in SENIOR_TERMS):
41
+ entry_score -= 25.0
42
+
43
+ location_score = 0.0
44
+ if profile.locations and any(loc.lower() in blob for loc in profile.locations):
45
+ location_score = 10.0
46
+
47
+ total = max(0.0, min(100.0, skill_score + role_score + entry_score + location_score))
48
+
49
+ reasons = []
50
+ if overlap:
51
+ reasons.append(f"skill overlap ({', '.join(sorted(overlap)[:4])})")
52
+ if role_score > 0:
53
+ reasons.append("role alignment")
54
+ if entry_score > 0:
55
+ reasons.append("entry-level title")
56
+ if entry_score < 0:
57
+ reasons.append("senior-level penalty")
58
+ if location_score > 0:
59
+ reasons.append("location fit")
60
+ if not reasons:
61
+ reasons.append("limited overlap but still relevant board")
62
+
63
+ return JobMatch(
64
+ company=job.company,
65
+ title=job.title,
66
+ location=job.location,
67
+ url=job.url,
68
+ score=round(total, 2),
69
+ explanation="; ".join(reasons),
70
+ ats=job.ats,
71
+ )
72
+
73
+
74
+ def rank_companies(matches: List[JobMatch]) -> List[CompanyRanking]:
75
+ grouped: Dict[str, List[JobMatch]] = defaultdict(list)
76
+ for match in matches:
77
+ grouped[match.company].append(match)
78
+
79
+ rankings: List[CompanyRanking] = []
80
+ for company, company_matches in grouped.items():
81
+ sorted_matches = sorted(company_matches, key=lambda m: m.score, reverse=True)
82
+ top = sorted_matches[:5]
83
+ avg_top = sum(match.score for match in top) / len(top)
84
+
85
+ rankings.append(
86
+ CompanyRanking(
87
+ company=company,
88
+ company_score=round(avg_top, 2),
89
+ match_count=len([m for m in company_matches if m.score >= 20]),
90
+ best_role=top[0].title if top else "",
91
+ ats=top[0].ats if top else "unknown",
92
+ explanation=top[0].explanation if top else "",
93
+ )
94
+ )
95
+
96
+ return sorted(rankings, key=lambda item: item.company_score, reverse=True)