Siggmoid Cursor commited on
Commit
d2b7a80
·
0 Parent(s):

Deploy ATS Intelligence Engine to Hugging Face Space

Browse files

Co-authored-by: Cursor <cursoragent@cursor.com>

.dockerignore ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ __pycache__/
2
+ *.pyc
3
+ *.pyo
4
+ .env
5
+ .git
6
+ .gitignore
7
+ __pycache__
8
+ **/__pycache__
9
+ *.md
10
+ images/
11
+ terminals/
12
+ .vscode/
13
+ .idea/
.gitattributes ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.png filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ *__pycache__
2
+ *.pyc
3
+ *.pyo
4
+ .env
Dockerfile ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Hugging Face Spaces Docker: https://huggingface.co/docs/hub/spaces-sdks-docker
2
+ FROM python:3.11-slim
3
+
4
+ # Required for Hugging Face Spaces Dev Mode
5
+ RUN useradd -m -u 1000 user
6
+
7
+ WORKDIR /app
8
+
9
+ # System deps for PyMuPDF
10
+ RUN apt-get update && apt-get install -y --no-install-recommends \
11
+ libglib2.0-0 \
12
+ libsm6 \
13
+ libxrender1 \
14
+ libxext6 \
15
+ && rm -rf /var/lib/apt/lists/*
16
+
17
+ ENV HF_HOME=/home/user/.cache/huggingface \
18
+ PORT=7860
19
+
20
+ RUN mkdir -p ${HF_HOME} && chown -R user:user /home/user
21
+
22
+ COPY --chown=user requirements.txt requirements.txt
23
+ RUN pip install --no-cache-dir -r requirements.txt
24
+
25
+ # Pre-download embedding model at build time (faster Space cold start)
26
+ RUN su - user -c "python -c \"from sentence_transformers import SentenceTransformer; SentenceTransformer('all-MiniLM-L6-v2')\""
27
+
28
+ COPY --chown=user . /app
29
+
30
+ USER user
31
+ ENV PATH=/home/user/.local/bin:$PATH
32
+
33
+ EXPOSE 7860
34
+
35
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
Frontend/app.js ADDED
@@ -0,0 +1,283 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // ── State ──────────────────────────────────────────────────────────────────
2
+ const state = { resume: null, jd: null };
3
+ //const API = 'http://localhost:8000'; // for localhost
4
+ const API = ''; //for nginx
5
+ let resumeMode = 'pdf'; // 'pdf' | 'text'
6
+ let jdMode = 'pdf'; // 'pdf' | 'text'
7
+
8
+ // Set pdf.js worker
9
+ pdfjsLib.GlobalWorkerOptions.workerSrc =
10
+ 'https://cdnjs.cloudflare.com/ajax/libs/pdf.js/3.11.174/pdf.worker.min.js';
11
+
12
+ // ── PDF Extraction ─────────────────────────────────────────────────────────
13
+ async function extractTextFromPDF(file) {
14
+ const arrayBuffer = await file.arrayBuffer();
15
+ const pdf = await pdfjsLib.getDocument({ data: arrayBuffer }).promise;
16
+ let fullText = '';
17
+ for (let i = 1; i <= pdf.numPages; i++) {
18
+ const page = await pdf.getPage(i);
19
+ const content = await page.getTextContent();
20
+ const pageText = content.items.map(item => item.str).join(' ');
21
+ fullText += pageText + '\n';
22
+ }
23
+ return fullText.trim();
24
+ }
25
+
26
+ // ── File Handling ──────────────────────────────────────────────────────────
27
+ async function handleFile(type, file) {
28
+ if (!file || file.type !== 'application/pdf') {
29
+ setStatus(type, 'Only PDF files are supported.', true);
30
+ return;
31
+ }
32
+ if (file.size > 10 * 1024 * 1024) {
33
+ setStatus(type, 'File exceeds 10MB limit.', true);
34
+ return;
35
+ }
36
+
37
+ const chipName = document.getElementById(type + 'ChipName');
38
+ const chip = document.getElementById(type + 'Chip');
39
+ chipName.textContent = file.name;
40
+ chip.style.display = 'flex';
41
+
42
+ document.getElementById(type + 'DropZone').classList.add('has-file');
43
+
44
+ setStatus(type, '⏳ Extracting text from PDF...');
45
+ try {
46
+ const text = await extractTextFromPDF(file);
47
+ if (!text || text.length < 30) {
48
+ setStatus(type, '⚠️ Could not extract enough text. Is this a scanned PDF?', true);
49
+ return;
50
+ }
51
+ state[type] = text;
52
+ setStatus(type, `✓ Extracted ${text.length.toLocaleString()} characters from ${file.name}`);
53
+
54
+ const previewBtn = document.getElementById(type + 'PreviewBtn');
55
+ const previewDiv = document.getElementById(type + 'Preview');
56
+ previewDiv.textContent = text.slice(0, 1200) + (text.length > 1200 ? '\n\n… (truncated)' : '');
57
+ previewBtn.style.display = 'inline-block';
58
+ } catch (err) {
59
+ setStatus(type, '✕ Failed to read PDF: ' + err.message, true);
60
+ }
61
+ }
62
+
63
+ function setStatus(type, msg, isError = false) {
64
+ const el = document.getElementById(type + 'Status');
65
+ el.textContent = msg;
66
+ el.className = 'extract-status' + (isError ? ' error' : '');
67
+ }
68
+
69
+ function clearFile(type) {
70
+ state[type] = null;
71
+ document.getElementById(type + 'File').value = '';
72
+ document.getElementById(type + 'Chip').style.display = 'none';
73
+ document.getElementById(type + 'DropZone').classList.remove('has-file');
74
+ document.getElementById(type + 'Status').textContent = '';
75
+ document.getElementById(type + 'PreviewBtn').style.display = 'none';
76
+ document.getElementById(type + 'Preview').style.display = 'none';
77
+ }
78
+
79
+ function togglePreview(type) {
80
+ const div = document.getElementById(type + 'Preview');
81
+ const btn = document.getElementById(type + 'PreviewBtn');
82
+ const visible = div.style.display !== 'none';
83
+ div.style.display = visible ? 'none' : 'block';
84
+ btn.textContent = visible
85
+ ? (type === 'resume' ? '👁 Preview resume text' : '👁 Preview JD text')
86
+ : '🙈 Hide preview';
87
+ }
88
+
89
+ // ── Drop Zone Setup ────────────────────────────────────────────────────────
90
+ function setupDropZone(type) {
91
+ const zone = document.getElementById(type + 'DropZone');
92
+ const input = document.getElementById(type + 'File');
93
+
94
+ zone.addEventListener('click', e => {
95
+ if (e.target.closest('.file-chip') || e.target.classList.contains('drop-link')) return;
96
+ input.click();
97
+ });
98
+
99
+ input.addEventListener('change', () => {
100
+ if (input.files[0]) handleFile(type, input.files[0]);
101
+ });
102
+
103
+ zone.addEventListener('dragover', e => { e.preventDefault(); zone.classList.add('drag-over'); });
104
+ zone.addEventListener('dragleave', () => zone.classList.remove('drag-over'));
105
+ zone.addEventListener('drop', e => {
106
+ e.preventDefault();
107
+ zone.classList.remove('drag-over');
108
+ const file = e.dataTransfer.files[0];
109
+ if (file) handleFile(type, file);
110
+ });
111
+ }
112
+
113
+ setupDropZone('resume');
114
+ setupDropZone('jd');
115
+
116
+ // ── Mode Toggle (shared for both resume and jd) ────────────────────────────
117
+ function switchMode(type, mode) {
118
+ if (type === 'resume') resumeMode = mode;
119
+ else jdMode = mode;
120
+
121
+ const cap = type.charAt(0).toUpperCase() + type.slice(1);
122
+
123
+ document.getElementById(type + 'PdfMode').style.display = mode === 'pdf' ? 'block' : 'none';
124
+ document.getElementById(type + 'TextMode').style.display = mode === 'text' ? 'block' : 'none';
125
+ document.getElementById(type + 'TogglePdf').classList.toggle('active', mode === 'pdf');
126
+ document.getElementById(type + 'ToggleText').classList.toggle('active', mode === 'text');
127
+
128
+ if (mode === 'pdf') {
129
+ document.getElementById(type + 'Textarea').value = '';
130
+ updateCharCount(type);
131
+ } else {
132
+ clearFile(type);
133
+ }
134
+ }
135
+
136
+ // ── Get text from whichever mode is active ─────────────────────────────────
137
+ function getText(type) {
138
+ const mode = type === 'resume' ? resumeMode : jdMode;
139
+ if (mode === 'text') {
140
+ return document.getElementById(type + 'Textarea').value.trim();
141
+ }
142
+ return state[type];
143
+ }
144
+
145
+ // ── Character Counters ─────────────────────────────────────────────────────
146
+ function updateCharCount(type) {
147
+ const textarea = document.getElementById(type + 'Textarea');
148
+ const len = textarea ? textarea.value.length : 0;
149
+ const counter = document.getElementById(type + 'CharCount');
150
+ if (counter) counter.textContent = `${len.toLocaleString()} character${len !== 1 ? 's' : ''}`;
151
+ }
152
+
153
+ document.getElementById('resumeTextarea').addEventListener('input', () => updateCharCount('resume'));
154
+ document.getElementById('jdTextarea').addEventListener('input', () => updateCharCount('jd'));
155
+
156
+ // ── Analyze ────────────────────────────────────────────────────────────────
157
+ async function analyze() {
158
+ const btn = document.getElementById('analyzeBtn');
159
+ const spinner = document.getElementById('spinner');
160
+ const btnLabel = document.getElementById('btnLabel');
161
+ const statusText = document.getElementById('statusText');
162
+ const errorBox = document.getElementById('errorBox');
163
+ const results = document.getElementById('results');
164
+
165
+ errorBox.classList.remove('show');
166
+ results.classList.remove('show');
167
+
168
+ const resumeText = getText('resume');
169
+ if (!resumeText || resumeText.length < 30) {
170
+ showError(
171
+ resumeMode === 'pdf'
172
+ ? 'Please upload and process your resume PDF first.'
173
+ : 'Please paste at least a few lines of resume text.'
174
+ );
175
+ return;
176
+ }
177
+
178
+ const jdText = getText('jd');
179
+ if (!jdText || jdText.length < 30) {
180
+ showError(
181
+ jdMode === 'pdf'
182
+ ? 'Please upload and process the job description PDF first.'
183
+ : 'Please paste at least a few lines of job description text.'
184
+ );
185
+ return;
186
+ }
187
+
188
+ btn.disabled = true;
189
+ spinner.classList.add('active');
190
+ btnLabel.textContent = 'Analyzing...';
191
+ statusText.textContent = 'Sending to backend...';
192
+
193
+ try {
194
+ const resumeFile = document.getElementById('resumeFile').files[0];
195
+ const jdFile = document.getElementById('jdFile').files[0];
196
+ const useUpload = (resumeMode === 'pdf' && resumeFile) || (jdMode === 'pdf' && jdFile);
197
+
198
+ let res;
199
+ if (useUpload) {
200
+ // At least one PDF — send as multipart/form-data
201
+ const form = new FormData();
202
+ if (resumeMode === 'pdf' && resumeFile) form.append('resume_pdf', resumeFile);
203
+ else form.append('resume_text', resumeText);
204
+ if (jdMode === 'pdf' && jdFile) form.append('jd_pdf', jdFile);
205
+ else form.append('job_description', jdText);
206
+ res = await fetch(`${API}/predict/ats/upload`, { method: 'POST', body: form });
207
+ } else {
208
+ // Both plain text — send as JSON
209
+ res = await fetch(`${API}/predict/ats`, {
210
+ method: 'POST',
211
+ headers: { 'Content-Type': 'application/json' },
212
+ body: JSON.stringify({ resume_text: resumeText, job_description: jdText })
213
+ });
214
+ }
215
+
216
+ if (!res.ok) {
217
+ const err = await res.json().catch(() => ({}));
218
+ throw new Error(err.detail || `HTTP ${res.status}`);
219
+ }
220
+
221
+ const data = await res.json();
222
+ render(data);
223
+ statusText.textContent = 'Done ✓';
224
+ } catch (e) {
225
+ const msg = (e.message || '').toLowerCase().includes('fetch')
226
+ ? `Cannot reach backend at ${API}. Is your FastAPI server running?`
227
+ : e.message;
228
+ showError(msg);
229
+ statusText.textContent = '';
230
+ } finally {
231
+ btn.disabled = false;
232
+ spinner.classList.remove('active');
233
+ btnLabel.textContent = 'Run Analysis';
234
+ }
235
+ }
236
+
237
+ // ── Render Results ─────────────────────────────────────────────────────────
238
+ function render(data) {
239
+ const s = +(data.semantic_score || 0).toFixed(1);
240
+ const k = +(data.keyword_score || 0).toFixed(1);
241
+ const f = +(data.final_ats_score || 0).toFixed(1);
242
+
243
+ document.getElementById('finalScore').innerHTML = `${f}<span class="score-unit">/100</span>`;
244
+ document.getElementById('semanticScore').innerHTML = `${s}<span class="score-unit">/100</span>`;
245
+ document.getElementById('keywordScore').innerHTML = `${k}<span class="score-unit">/100</span>`;
246
+ document.getElementById('feedbackBody').textContent = data.summary || 'No feedback returned.';
247
+
248
+ document.getElementById('results').classList.add('show');
249
+
250
+ setTimeout(() => {
251
+ setBar('finalBar', 'finalPct', 'finalMiniBar', f);
252
+ setBar('semanticBar', 'semanticPct', 'semanticMiniBar', s);
253
+ setBar('keywordBar', 'keywordPct', 'keywordMiniBar', k);
254
+ }, 60);
255
+ }
256
+
257
+ function setBar(barId, pctId, miniId, val) {
258
+ const pct = Math.min(val, 100);
259
+ document.getElementById(barId).style.width = pct + '%';
260
+ document.getElementById(miniId).style.width = pct + '%';
261
+ document.getElementById(pctId).textContent = val + '%';
262
+ }
263
+
264
+ // ── Utilities ──────────────────────────────────────────────────────────────
265
+ function showError(msg) {
266
+ const box = document.getElementById('errorBox');
267
+ box.textContent = msg;
268
+ box.classList.add('show');
269
+ }
270
+
271
+ function clearAll() {
272
+ clearFile('resume');
273
+ clearFile('jd');
274
+ document.getElementById('resumeTextarea').value = '';
275
+ document.getElementById('jdTextarea').value = '';
276
+ updateCharCount('resume');
277
+ updateCharCount('jd');
278
+ switchMode('resume', 'pdf');
279
+ switchMode('jd', 'pdf');
280
+ document.getElementById('results').classList.remove('show');
281
+ document.getElementById('errorBox').classList.remove('show');
282
+ document.getElementById('statusText').textContent = '';
283
+ }
Frontend/index.html ADDED
@@ -0,0 +1,191 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8" />
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0"/>
6
+ <title>ATS Resume Scorer</title>
7
+ <link href="https://fonts.googleapis.com/css2?family=Syne:wght@400;500;600;700;800&family=DM+Mono:wght@300;400;500&display=swap" rel="stylesheet"/>
8
+ <link rel="stylesheet" href="style.css"/>
9
+ <script src="https://cdnjs.cloudflare.com/ajax/libs/pdf.js/3.11.174/pdf.min.js"></script>
10
+ </head>
11
+ <body>
12
+
13
+ <div class="glow-orb a"></div>
14
+ <div class="glow-orb b"></div>
15
+
16
+ <div class="container">
17
+
18
+ <header>
19
+ <div class="logo-mark">
20
+ <div class="logo-icon"></div>
21
+ <span class="logo-text">ATS<span>Score</span></span>
22
+ </div>
23
+ <span class="badge">v2.0 · PDF-Powered</span>
24
+ </header>
25
+
26
+ <section class="hero">
27
+ <h1>Resume ATS <span class="line2">Intelligence Engine</span></h1>
28
+ <p>Upload your resume and job description as PDFs. We extract the text, analyze semantic similarity, and score your ATS compatibility.</p>
29
+ </section>
30
+
31
+ <div class="input-grid">
32
+ <!-- Resume field-wrap — replace the existing one -->
33
+ <div class="field-wrap">
34
+ <label class="field-label">Resume PDF</label>
35
+
36
+ <div class="input-toggle">
37
+ <button class="toggle-btn active" id="resumeTogglePdf" onclick="switchMode('resume', 'pdf')">📄 PDF Upload</button>
38
+ <button class="toggle-btn" id="resumeToggleText" onclick="switchMode('resume', 'text')">✏️ Paste Text</button>
39
+ </div>
40
+
41
+ <!-- PDF mode -->
42
+ <div id="resumePdfMode">
43
+ <div class="drop-zone" id="resumeDropZone">
44
+ <input type="file" id="resumeFile" accept=".pdf" hidden />
45
+ <div class="drop-icon">
46
+ <svg width="28" height="28" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="1.5">
47
+ <path d="M14 2H6a2 2 0 0 0-2 2v16a2 2 0 0 0 2 2h12a2 2 0 0 0 2-2V8z"/>
48
+ <polyline points="14 2 14 8 20 8"/>
49
+ <line x1="12" y1="18" x2="12" y2="12"/>
50
+ <line x1="9" y1="15" x2="15" y2="15"/>
51
+ </svg>
52
+ </div>
53
+ <p class="drop-title">Drop your resume here</p>
54
+ <p class="drop-sub">or <span class="drop-link" onclick="document.getElementById('resumeFile').click()">browse file</span></p>
55
+ <p class="drop-hint">PDF only · Max 10MB</p>
56
+ <div class="file-chip" id="resumeChip" style="display:none;">
57
+ <span class="chip-icon">📄</span>
58
+ <span class="chip-name" id="resumeChipName"></span>
59
+ <button class="chip-remove" onclick="clearFile('resume')">✕</button>
60
+ </div>
61
+ <div class="extract-status" id="resumeStatus"></div>
62
+ </div>
63
+ </div>
64
+
65
+ <!-- Text mode -->
66
+ <div id="resumeTextMode" style="display:none;">
67
+ <textarea id="resumeTextarea" placeholder="Paste your full resume content here — skills, experience, education, projects..."></textarea>
68
+ <div class="char-count" id="resumeCharCount">0 characters</div>
69
+ </div>
70
+ </div>
71
+
72
+ <div class="field-wrap">
73
+ <label class="field-label jd-label">Job Description</label>
74
+
75
+ <!-- Toggle -->
76
+ <div class="input-toggle">
77
+ <button class="toggle-btn active" id="jdTogglePdf" onclick="switchMode('jd', 'pdf')">📄 PDF Upload</button>
78
+ <button class="toggle-btn" id="jdToggleText" onclick="switchMode('jd', 'text')">✏️ Paste Text</button>
79
+ </div>
80
+
81
+ <!-- PDF mode -->
82
+ <div id="jdPdfMode">
83
+ <div class="drop-zone jd-zone" id="jdDropZone">
84
+ <input type="file" id="jdFile" accept=".pdf" hidden />
85
+ <div class="drop-icon">
86
+ <svg width="28" height="28" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="1.5">
87
+ <path d="M14 2H6a2 2 0 0 0-2 2v16a2 2 0 0 0 2 2h12a2 2 0 0 0 2-2V8z"/>
88
+ <polyline points="14 2 14 8 20 8"/>
89
+ <line x1="16" y1="13" x2="8" y2="13"/>
90
+ <line x1="16" y1="17" x2="8" y2="17"/>
91
+ <polyline points="10 9 9 9 8 9"/>
92
+ </svg>
93
+ </div>
94
+ <p class="drop-title">Drop job description here</p>
95
+ <p class="drop-sub">or <span class="drop-link" onclick="document.getElementById('jdFile').click()">browse file</span></p>
96
+ <p class="drop-hint">PDF only · Max 10MB</p>
97
+ <div class="file-chip" id="jdChip" style="display:none;">
98
+ <span class="chip-icon">📋</span>
99
+ <span class="chip-name" id="jdChipName"></span>
100
+ <button class="chip-remove" onclick="clearFile('jd')">✕</button>
101
+ </div>
102
+ <div class="extract-status" id="jdStatus"></div>
103
+ </div>
104
+ </div>
105
+
106
+ <!-- Text mode -->
107
+ <div id="jdTextMode" style="display:none;">
108
+ <textarea id="jdTextarea" placeholder="Paste the job description here — requirements, responsibilities, qualifications..."></textarea>
109
+ <div class="char-count" id="jdCharCount">0 characters</div>
110
+ </div>
111
+ </div>
112
+
113
+ </div><!-- /.input-grid -->
114
+
115
+ <div class="preview-section">
116
+ <div class="preview-col">
117
+ <button class="btn-preview" id="resumePreviewBtn" onclick="togglePreview('resume')" style="display:none;">👁 Preview resume text</button>
118
+ <div class="text-preview" id="resumePreview" style="display:none;"></div>
119
+ </div>
120
+ <div class="preview-col">
121
+ <button class="btn-preview" id="jdPreviewBtn" onclick="togglePreview('jd')" style="display:none;">👁 Preview JD text</button>
122
+ <div class="text-preview" id="jdPreview" style="display:none;"></div>
123
+ </div>
124
+ </div>
125
+
126
+ <div class="action-row">
127
+ <button class="btn-analyze" id="analyzeBtn" onclick="analyze()">
128
+ <div class="spinner" id="spinner"></div>
129
+ <span id="btnLabel">Run Analysis</span>
130
+ </button>
131
+ <button class="btn-clear" onclick="clearAll()">Clear All</button>
132
+ <span class="status-text" id="statusText"></span>
133
+ </div>
134
+
135
+ <div class="error-box" id="errorBox"></div>
136
+
137
+ <div id="results">
138
+ <div class="results-header">
139
+ <h2>Analysis Results</h2>
140
+ <div class="divider"></div>
141
+ </div>
142
+ <div class="score-grid">
143
+ <div class="score-card main">
144
+ <div class="score-label">Final ATS Score</div>
145
+ <div class="score-value" id="finalScore">—<span class="score-unit">/100</span></div>
146
+ <div class="score-bar-mini"><div class="score-bar-mini-fill bar-purple" id="finalMiniBar"></div></div>
147
+ </div>
148
+ <div class="score-card">
149
+ <div class="score-label">Semantic Match</div>
150
+ <div class="score-value" id="semanticScore">—<span class="score-unit">/100</span></div>
151
+ <div class="score-bar-mini"><div class="score-bar-mini-fill bar-teal" id="semanticMiniBar"></div></div>
152
+ </div>
153
+ <div class="score-card">
154
+ <div class="score-label">Keyword Match</div>
155
+ <div class="score-value" id="keywordScore">—<span class="score-unit">/100</span></div>
156
+ <div class="score-bar-mini"><div class="score-bar-mini-fill bar-gray" id="keywordMiniBar"></div></div>
157
+ </div>
158
+ </div>
159
+ <div class="bars-card">
160
+ <div class="bars-title">Score Breakdown</div>
161
+ <div class="bar-item">
162
+ <div class="bar-meta"><span class="bar-name">Final ATS Score</span><span class="bar-pct" id="finalPct">0%</span></div>
163
+ <div class="bar-track"><div class="bar-fill bar-purple" id="finalBar"></div></div>
164
+ </div>
165
+ <div class="bar-item">
166
+ <div class="bar-meta"><span class="bar-name">Semantic Similarity</span><span class="bar-pct" id="semanticPct">0%</span></div>
167
+ <div class="bar-track"><div class="bar-fill bar-teal" id="semanticBar"></div></div>
168
+ </div>
169
+ <div class="bar-item">
170
+ <div class="bar-meta"><span class="bar-name">Keyword Match</span><span class="bar-pct" id="keywordPct">0%</span></div>
171
+ <div class="bar-track"><div class="bar-fill bar-gray" id="keywordBar"></div></div>
172
+ </div>
173
+ </div>
174
+ <div class="feedback-card">
175
+ <div class="feedback-header">
176
+ <span class="feedback-title">AI Feedback</span>
177
+ <span class="feedback-model">allenai/OLMo-3-7B</span>
178
+ </div>
179
+ <div class="feedback-body" id="feedbackBody">Generating feedback...</div>
180
+ </div>
181
+ </div>
182
+
183
+ <footer>
184
+ <p>Powered by FastAPI · SentenceTransformers · OLMo-3-7B · PDF.js</p>
185
+ <p>localhost:8000/predict/ats</p>
186
+ </footer>
187
+
188
+ </div>
189
+ <script src="app.js"></script>
190
+ </body>
191
+ </html>
Frontend/style.css ADDED
@@ -0,0 +1,367 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *, *::before, *::after { box-sizing: border-box; margin: 0; padding: 0; }
2
+
3
+ :root {
4
+ --bg: #0a0a0f;
5
+ --surface: #12121a;
6
+ --surface2: #1a1a26;
7
+ --border: rgba(255,255,255,0.07);
8
+ --border-bright: rgba(255,255,255,0.15);
9
+ --accent: #7c6fff;
10
+ --accent2: #3dffc0;
11
+ --accent3: #ff6b6b;
12
+ --text: #f0eefd;
13
+ --muted: #7a7890;
14
+ --font-display: 'Syne', sans-serif;
15
+ --font-mono: 'DM Mono', monospace;
16
+ }
17
+
18
+ html { scroll-behavior: smooth; }
19
+
20
+ body {
21
+ background: var(--bg);
22
+ color: var(--text);
23
+ font-family: var(--font-display);
24
+ min-height: 100vh;
25
+ overflow-x: hidden;
26
+ }
27
+
28
+ body::before {
29
+ content: '';
30
+ position: fixed; inset: 0;
31
+ background-image:
32
+ linear-gradient(rgba(124,111,255,0.04) 1px, transparent 1px),
33
+ linear-gradient(90deg, rgba(124,111,255,0.04) 1px, transparent 1px);
34
+ background-size: 40px 40px;
35
+ pointer-events: none;
36
+ z-index: 0;
37
+ }
38
+
39
+ .glow-orb { position: fixed; border-radius: 50%; filter: blur(120px); pointer-events: none; z-index: 0; }
40
+ .glow-orb.a { width: 500px; height: 500px; background: rgba(124,111,255,0.12); top: -100px; left: -100px; }
41
+ .glow-orb.b { width: 400px; height: 400px; background: rgba(61,255,192,0.07); bottom: -100px; right: -100px; }
42
+
43
+ .container { position: relative; z-index: 1; max-width: 960px; margin: 0 auto; padding: 0 24px 80px; }
44
+
45
+ /* HEADER */
46
+ header {
47
+ padding: 48px 0 40px;
48
+ display: flex; align-items: flex-start; justify-content: space-between;
49
+ border-bottom: 1px solid var(--border);
50
+ margin-bottom: 48px;
51
+ }
52
+ .logo-mark { display: flex; align-items: center; gap: 12px; }
53
+ .logo-icon {
54
+ width: 36px; height: 36px;
55
+ border: 1.5px solid var(--accent);
56
+ border-radius: 8px;
57
+ display: flex; align-items: center; justify-content: center;
58
+ position: relative; overflow: hidden;
59
+ }
60
+ .logo-icon::after {
61
+ content: '';
62
+ position: absolute;
63
+ width: 18px; height: 18px;
64
+ background: var(--accent);
65
+ clip-path: polygon(0 100%, 50% 0, 100% 100%);
66
+ opacity: 0.8;
67
+ }
68
+ .logo-text { font-size: 15px; font-weight: 700; letter-spacing: 0.12em; text-transform: uppercase; color: var(--text); }
69
+ .logo-text span { color: var(--accent); }
70
+ .badge {
71
+ background: rgba(124,111,255,0.12);
72
+ border: 1px solid rgba(124,111,255,0.25);
73
+ color: var(--accent);
74
+ font-family: var(--font-mono);
75
+ font-size: 11px; padding: 5px 12px;
76
+ border-radius: 999px; letter-spacing: 0.05em;
77
+ }
78
+
79
+ /* HERO */
80
+ .hero { margin-bottom: 48px; }
81
+ .hero h1 { font-size: clamp(36px, 5vw, 56px); font-weight: 800; line-height: 1.05; letter-spacing: -0.02em; margin-bottom: 16px; }
82
+ .hero h1 .line2 { display: block; background: linear-gradient(90deg, var(--accent), var(--accent2)); -webkit-background-clip: text; -webkit-text-fill-color: transparent; }
83
+ .hero p { font-family: var(--font-mono); font-size: 14px; color: var(--muted); max-width: 520px; line-height: 1.7; }
84
+
85
+ /* INPUT GRID */
86
+ .input-grid { display: grid; grid-template-columns: 1fr 1fr; gap: 16px; margin-bottom: 16px; }
87
+ .field-wrap { display: flex; flex-direction: column; gap: 8px; }
88
+ .field-label {
89
+ font-family: var(--font-mono); font-size: 11px; font-weight: 500;
90
+ color: var(--muted); text-transform: uppercase; letter-spacing: 0.1em;
91
+ display: flex; align-items: center; gap: 6px;
92
+ }
93
+ .field-label::before { content: ''; display: inline-block; width: 6px; height: 6px; border-radius: 50%; background: var(--accent); }
94
+ .jd-label::before { background: var(--accent2); }
95
+
96
+ /* DROP ZONE */
97
+ .drop-zone {
98
+ background: var(--surface);
99
+ border: 1.5px dashed rgba(124,111,255,0.25);
100
+ border-radius: 14px;
101
+ min-height: 220px;
102
+ display: flex; flex-direction: column; align-items: center; justify-content: center;
103
+ padding: 28px 20px; gap: 6px;
104
+ cursor: pointer;
105
+ transition: border-color 0.2s, background 0.2s;
106
+ text-align: center;
107
+ position: relative;
108
+ }
109
+ .drop-zone:hover { border-color: var(--accent); background: rgba(124,111,255,0.04); }
110
+ .drop-zone.drag-over { border-color: var(--accent); background: rgba(124,111,255,0.08); }
111
+ .drop-zone.has-file { border-style: solid; border-color: rgba(124,111,255,0.4); }
112
+
113
+ .jd-zone { border-color: rgba(61,255,192,0.2); }
114
+ .jd-zone:hover { border-color: var(--accent2); background: rgba(61,255,192,0.04); }
115
+ .jd-zone.drag-over { border-color: var(--accent2); background: rgba(61,255,192,0.06); }
116
+ .jd-zone.has-file { border-color: rgba(61,255,192,0.4); }
117
+
118
+ .drop-icon { color: var(--muted); margin-bottom: 4px; }
119
+ .drop-title { font-size: 14px; font-weight: 600; color: var(--text); }
120
+ .drop-sub { font-family: var(--font-mono); font-size: 12px; color: var(--muted); }
121
+ .drop-link { color: var(--accent); cursor: pointer; text-decoration: underline; }
122
+ .jd-zone .drop-link { color: var(--accent2); }
123
+ .drop-hint { font-family: var(--font-mono); font-size: 11px; color: rgba(122,120,144,0.5); margin-top: 2px; }
124
+
125
+ /* FILE CHIP */
126
+ .file-chip {
127
+ display: flex; align-items: center; gap: 8px;
128
+ background: rgba(124,111,255,0.1);
129
+ border: 1px solid rgba(124,111,255,0.25);
130
+ border-radius: 8px;
131
+ padding: 8px 12px;
132
+ margin-top: 10px;
133
+ width: 100%;
134
+ max-width: 280px;
135
+ }
136
+ .chip-icon { font-size: 16px; }
137
+ .chip-name { font-family: var(--font-mono); font-size: 12px; color: var(--text); flex: 1; overflow: hidden; text-overflow: ellipsis; white-space: nowrap; }
138
+ .chip-remove { background: none; border: none; color: var(--muted); cursor: pointer; font-size: 13px; padding: 0 2px; }
139
+ .chip-remove:hover { color: var(--accent3); }
140
+
141
+ /* EXTRACT STATUS */
142
+ .extract-status { font-family: var(--font-mono); font-size: 11px; color: var(--accent2); margin-top: 6px; min-height: 16px; }
143
+ .extract-status.error { color: var(--accent3); }
144
+
145
+ /* PREVIEW */
146
+ .preview-row { display: flex; gap: 12px; margin-bottom: 12px; flex-wrap: wrap; }
147
+ .btn-preview {
148
+ background: rgba(255,255,255,0.04);
149
+ border: 1px solid var(--border-bright);
150
+ border-radius: 8px;
151
+ padding: 7px 14px;
152
+ font-family: var(--font-mono);
153
+ font-size: 12px;
154
+ color: var(--muted);
155
+ cursor: pointer;
156
+ transition: color 0.2s, border-color 0.2s;
157
+ }
158
+ .btn-preview:hover { color: var(--text); border-color: rgba(255,255,255,0.25); }
159
+
160
+ .text-preview {
161
+ background: var(--surface);
162
+ border: 1px solid var(--border);
163
+ border-radius: 10px;
164
+ padding: 14px 16px;
165
+ font-family: var(--font-mono);
166
+ font-size: 12px;
167
+ color: rgba(240,238,253,0.6);
168
+ line-height: 1.7;
169
+ max-height: 160px;
170
+ overflow-y: auto;
171
+ white-space: pre-wrap;
172
+ margin-bottom: 12px;
173
+ }
174
+
175
+ /* ACTION ROW */
176
+ .action-row { display: flex; align-items: center; gap: 12px; margin-bottom: 40px; margin-top: 8px; }
177
+ .btn-analyze {
178
+ background: var(--accent); color: #fff; border: none; border-radius: 10px;
179
+ padding: 13px 28px; font-family: var(--font-display); font-size: 14px; font-weight: 700;
180
+ letter-spacing: 0.04em; cursor: pointer;
181
+ transition: transform 0.15s, box-shadow 0.15s, opacity 0.15s;
182
+ box-shadow: 0 4px 24px rgba(124,111,255,0.35);
183
+ display: flex; align-items: center; gap: 8px;
184
+ }
185
+ .btn-analyze:hover { transform: translateY(-1px); box-shadow: 0 6px 32px rgba(124,111,255,0.5); }
186
+ .btn-analyze:active { transform: scale(0.98); }
187
+ .btn-analyze:disabled { opacity: 0.45; cursor: not-allowed; transform: none; }
188
+ .btn-clear {
189
+ background: transparent; border: 1px solid var(--border-bright); border-radius: 10px;
190
+ padding: 13px 20px; font-family: var(--font-display); font-size: 14px; font-weight: 500;
191
+ color: var(--muted); cursor: pointer; transition: border-color 0.2s, color 0.2s;
192
+ }
193
+ .btn-clear:hover { border-color: rgba(255,255,255,0.25); color: var(--text); }
194
+ .status-text { font-family: var(--font-mono); font-size: 12px; color: var(--muted); }
195
+
196
+ /* SPINNER */
197
+ .spinner { width: 14px; height: 14px; border: 2px solid rgba(255,255,255,0.3); border-top-color: #fff; border-radius: 50%; animation: spin 0.7s linear infinite; display: none; }
198
+ .spinner.active { display: block; }
199
+ @keyframes spin { to { transform: rotate(360deg); } }
200
+
201
+ /* ERROR */
202
+ .error-box { display: none; background: rgba(255,107,107,0.08); border: 1px solid rgba(255,107,107,0.25); border-radius: 10px; padding: 14px 18px; font-family: var(--font-mono); font-size: 13px; color: var(--accent3); margin-bottom: 24px; }
203
+ .error-box.show { display: block; }
204
+
205
+ /* RESULTS */
206
+ #results { display: none; }
207
+ #results.show { display: block; animation: fadeUp 0.4s ease; }
208
+ @keyframes fadeUp { from { opacity: 0; transform: translateY(16px); } to { opacity: 1; transform: translateY(0); } }
209
+
210
+ .results-header { display: flex; align-items: center; gap: 12px; margin-bottom: 28px; }
211
+ .results-header h2 { font-size: 18px; font-weight: 700; letter-spacing: -0.01em; }
212
+ .results-header .divider { flex: 1; height: 1px; background: var(--border); }
213
+
214
+ /* SCORE CARDS */
215
+ .score-grid { display: grid; grid-template-columns: repeat(3, 1fr); gap: 12px; margin-bottom: 28px; }
216
+ .score-card { background: var(--surface); border: 1px solid var(--border); border-radius: 14px; padding: 20px; position: relative; overflow: hidden; transition: border-color 0.2s; }
217
+ .score-card:hover { border-color: var(--border-bright); }
218
+ .score-card.main { border-color: rgba(124,111,255,0.3); background: linear-gradient(135deg, rgba(124,111,255,0.08), var(--surface)); }
219
+ .score-card.main::before { content: ''; position: absolute; top: 0; right: 0; width: 60px; height: 60px; background: radial-gradient(circle at top right, rgba(124,111,255,0.2), transparent 70%); }
220
+ .score-label { font-family: var(--font-mono); font-size: 10px; font-weight: 500; text-transform: uppercase; letter-spacing: 0.1em; color: var(--muted); margin-bottom: 10px; }
221
+ .score-value { font-size: 42px; font-weight: 800; line-height: 1; letter-spacing: -0.02em; color: var(--text); }
222
+ .score-card.main .score-value { color: var(--accent); }
223
+ .score-unit { font-size: 18px; font-weight: 400; color: var(--muted); margin-left: 2px; }
224
+ .score-bar-mini { margin-top: 14px; height: 3px; background: rgba(255,255,255,0.07); border-radius: 2px; overflow: hidden; }
225
+ .score-bar-mini-fill { height: 100%; border-radius: 2px; width: 0%; transition: width 0.8s cubic-bezier(0.16, 1, 0.3, 1); }
226
+ .bar-purple { background: var(--accent); }
227
+ .bar-teal { background: var(--accent2); }
228
+ .bar-gray { background: rgba(255,255,255,0.4); }
229
+
230
+ /* BARS */
231
+ .bars-card { background: var(--surface); border: 1px solid var(--border); border-radius: 14px; padding: 24px; margin-bottom: 20px; }
232
+ .bars-title { font-family: var(--font-mono); font-size: 11px; font-weight: 500; text-transform: uppercase; letter-spacing: 0.1em; color: var(--muted); margin-bottom: 20px; }
233
+ .bar-item { margin-bottom: 18px; }
234
+ .bar-item:last-child { margin-bottom: 0; }
235
+ .bar-meta { display: flex; justify-content: space-between; align-items: baseline; margin-bottom: 8px; }
236
+ .bar-name { font-size: 13px; font-weight: 600; color: var(--text); }
237
+ .bar-pct { font-family: var(--font-mono); font-size: 13px; color: var(--muted); }
238
+ .bar-track { height: 8px; background: rgba(255,255,255,0.05); border-radius: 4px; overflow: hidden; }
239
+ .bar-fill { height: 100%; border-radius: 4px; width: 0%; transition: width 0.9s cubic-bezier(0.16, 1, 0.3, 1); }
240
+
241
+ /* FEEDBACK */
242
+ .feedback-card { background: var(--surface); border: 1px solid var(--border); border-radius: 14px; padding: 24px; }
243
+ .feedback-header { display: flex; align-items: center; justify-content: space-between; margin-bottom: 20px; padding-bottom: 16px; border-bottom: 1px solid var(--border); }
244
+ .feedback-title { font-size: 14px; font-weight: 700; letter-spacing: 0.02em; display: flex; align-items: center; gap: 8px; }
245
+ .feedback-title::before { content: ''; display: inline-block; width: 8px; height: 8px; background: var(--accent2); border-radius: 50%; animation: pulse 2s ease-in-out infinite; }
246
+ @keyframes pulse { 0%, 100% { opacity: 1; } 50% { opacity: 0.3; } }
247
+ .feedback-model { font-family: var(--font-mono); font-size: 10px; color: var(--muted); background: rgba(255,255,255,0.04); border: 1px solid var(--border); padding: 4px 10px; border-radius: 6px; }
248
+ .feedback-body { font-family: var(--font-mono); font-size: 13px; line-height: 1.85; color: rgba(240,238,253,0.8); white-space: pre-wrap; }
249
+
250
+ /* FOOTER */
251
+ footer { margin-top: 64px; padding-top: 24px; border-top: 1px solid var(--border); display: flex; justify-content: space-between; align-items: center; }
252
+ footer p { font-family: var(--font-mono); font-size: 11px; color: var(--muted); }
253
+
254
+ /* RESPONSIVE */
255
+ @media (max-width: 640px) {
256
+ .input-grid { grid-template-columns: 1fr; }
257
+ .score-grid { grid-template-columns: 1fr; }
258
+ header { flex-direction: column; gap: 16px; }
259
+ }
260
+ /* INPUT MODE TOGGLE */
261
+ .input-toggle {
262
+ display: flex;
263
+ gap: 6px;
264
+ margin-bottom: 10px;
265
+ }
266
+ .toggle-btn {
267
+ background: transparent;
268
+ border: 1px solid var(--border-bright);
269
+ border-radius: 8px;
270
+ padding: 6px 14px;
271
+ font-family: var(--font-mono);
272
+ font-size: 12px;
273
+ color: var(--muted);
274
+ cursor: pointer;
275
+ transition: all 0.2s;
276
+ }
277
+ .toggle-btn:hover { color: var(--text); border-color: rgba(255,255,255,0.25); }
278
+ .toggle-btn.active {
279
+ background: rgba(61,255,192,0.1);
280
+ border-color: rgba(61,255,192,0.35);
281
+ color: var(--accent2);
282
+ }
283
+
284
+ /* JD TEXTAREA (text mode) */
285
+ #jdTextarea {
286
+ background: var(--surface);
287
+ border: 1px solid var(--border);
288
+ border-radius: 12px;
289
+ color: var(--text);
290
+ font-family: var(--font-mono);
291
+ font-size: 13px;
292
+ line-height: 1.7;
293
+ padding: 16px;
294
+ resize: vertical;
295
+ height: 207px;
296
+ width: 100%;
297
+ transition: border-color 0.2s, box-shadow 0.2s;
298
+ outline: none;
299
+ }
300
+ #jdTextarea::placeholder { color: rgba(120,116,145,0.5); }
301
+ #jdTextarea:focus {
302
+ border-color: var(--accent2);
303
+ box-shadow: 0 0 0 3px rgba(61,255,192,0.08);
304
+ }
305
+
306
+ .char-count {
307
+ font-family: var(--font-mono);
308
+ font-size: 11px;
309
+ color: var(--muted);
310
+ text-align: right;
311
+ margin-top: 6px;
312
+ }
313
+ /* RESUME TEXTAREA (text mode) — mirrors JD textarea with purple accent */
314
+ #resumeTextarea {
315
+ background: var(--surface);
316
+ border: 1px solid var(--border);
317
+ border-radius: 12px;
318
+ color: var(--text);
319
+ font-family: var(--font-mono);
320
+ font-size: 13px;
321
+ line-height: 1.7;
322
+ padding: 16px;
323
+ resize: vertical;
324
+ height: 207px;
325
+ width: 100%;
326
+ transition: border-color 0.2s, box-shadow 0.2s;
327
+ outline: none;
328
+ }
329
+ #resumeTextarea::placeholder { color: rgba(120,116,145,0.5); }
330
+ #resumeTextarea:focus {
331
+ border-color: var(--accent);
332
+ box-shadow: 0 0 0 3px rgba(124,111,255,0.08);
333
+ }
334
+
335
+ /* PREVIEW SECTION — side-by-side, aligned to input grid columns */
336
+ .preview-section {
337
+ display: grid;
338
+ grid-template-columns: 1fr 1fr;
339
+ gap: 16px;
340
+ margin-top: 4px;
341
+ margin-bottom: 16px;
342
+ }
343
+ .preview-col {
344
+ display: flex;
345
+ flex-direction: column;
346
+ gap: 8px;
347
+ min-width: 0;
348
+ }
349
+ .preview-col .btn-preview { align-self: flex-start; }
350
+ .preview-col .text-preview { margin-bottom: 0; }
351
+
352
+ @media (max-width: 640px) {
353
+ .preview-section { grid-template-columns: 1fr; }
354
+ }
355
+
356
+ /* FOOTER — centered text, tidy bottom padding */
357
+ footer {
358
+ margin-top: 64px;
359
+ padding: 24px 0 32px;
360
+ border-top: 1px solid var(--border);
361
+ display: flex !important;
362
+ flex-direction: column !important;
363
+ align-items: center !important;
364
+ justify-content: center !important;
365
+ gap: 6px;
366
+ text-align: center;
367
+ }
README.md ADDED
@@ -0,0 +1,171 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: ATS Resume Intelligence Engine
3
+ emoji: 📄
4
+ colorFrom: blue
5
+ colorTo: purple
6
+ sdk: docker
7
+ app_port: 7860
8
+ pinned: false
9
+ license: mit
10
+ suggested_hardware: cpu-upgrade
11
+ ---
12
+
13
+ # ATS Intelligence Engine
14
+
15
+ <p align="center">
16
+ <b>An AI-powered Applicant Tracking System that scores resumes against job descriptions using semantic similarity.</b><br>
17
+ Built with FastAPI, LangChain, and Hugging Face — deployable as a Docker Space.
18
+ </p>
19
+
20
+ ---
21
+
22
+ ## Features
23
+
24
+ * Upload resume & job description (PDF or text)
25
+ * Extract text using PyMuPDF
26
+ * Semantic similarity via Sentence Transformers
27
+ * Keyword matching for skill overlap
28
+ * Final ATS score with LangChain + Hugging Face LLM feedback
29
+ * REST API built with FastAPI
30
+ * Docker Space ready (port **7860**)
31
+
32
+ ---
33
+
34
+ ## Tech Stack
35
+
36
+ | Layer | Technology |
37
+ | ---------------- | ----------------------------------- |
38
+ | Backend | FastAPI, Uvicorn |
39
+ | ML / Embeddings | Sentence Transformers, Scikit-learn |
40
+ | LLM / Feedback | LangChain, Hugging Face Inference API |
41
+ | PDF Parsing | PyMuPDF |
42
+ | Frontend | HTML / JS (served by FastAPI) |
43
+ | Deployment | Docker, Hugging Face Spaces |
44
+
45
+ ---
46
+
47
+ ## Deploy on Hugging Face Spaces (Docker)
48
+
49
+ ### 1. Create a new Space
50
+
51
+ 1. Go to [huggingface.co/new-space](https://huggingface.co/new-space)
52
+ 2. Choose **Docker** as the SDK
53
+ 3. Pick a name (e.g. `ats-resume-intelligence`)
54
+ 4. Hardware: **CPU upgrade** (recommended for Sentence Transformers)
55
+
56
+ ### 2. Push this repository
57
+
58
+ ```bash
59
+ git remote add space https://huggingface.co/spaces/YOUR_USERNAME/YOUR_SPACE_NAME
60
+ git push space main
61
+ ```
62
+
63
+ Or clone the empty Space repo and copy these files into it, then commit and push.
64
+
65
+ ### 3. Add your Hugging Face token (required for AI feedback)
66
+
67
+ In the Space → **Settings** → **Repository secrets**, add:
68
+
69
+ | Secret name | Value |
70
+ | ----------- | ----- |
71
+ | `HF_TOKEN` | Your Hugging Face access token ([create one](https://huggingface.co/settings/tokens)) with **Inference** permission |
72
+
73
+ The Space will rebuild automatically after you push or add secrets.
74
+
75
+ ### 4. Open your live app
76
+
77
+ Your Space URL will be:
78
+
79
+ `https://huggingface.co/spaces/YOUR_USERNAME/YOUR_SPACE_NAME`
80
+
81
+ The UI and API run on the same origin (port 7860), so no CORS or proxy setup is needed.
82
+
83
+ ---
84
+
85
+ ## Run locally with Docker
86
+
87
+ ### Prerequisites
88
+
89
+ * Docker
90
+ * Hugging Face API token
91
+
92
+ ### Setup
93
+
94
+ Create a `.env` file:
95
+
96
+ ```env
97
+ HF_TOKEN=your_huggingface_token_here
98
+ ```
99
+
100
+ ### Start
101
+
102
+ ```bash
103
+ docker compose up --build
104
+ ```
105
+
106
+ Open **http://localhost:7860**
107
+
108
+ ---
109
+
110
+ ## API Endpoints
111
+
112
+ ### `POST /predict/ats` — JSON
113
+
114
+ ```json
115
+ {
116
+ "resume_text": "...",
117
+ "job_description": "..."
118
+ }
119
+ ```
120
+
121
+ ### `POST /predict/ats/upload` — multipart
122
+
123
+ | Field | Type | Description |
124
+ | --------------- | ------ | -------------------- |
125
+ | resume_pdf | File | Resume PDF |
126
+ | resume_text | string | Resume text |
127
+ | jd_pdf | File | Job description PDF |
128
+ | job_description | string | Job description text |
129
+
130
+ ### Response
131
+
132
+ ```json
133
+ {
134
+ "semantic_score": 0.82,
135
+ "keyword_score": 0.74,
136
+ "final_ats_score": 0.79,
137
+ "summary": "..."
138
+ }
139
+ ```
140
+
141
+ ### `GET /health` — health check
142
+
143
+ ---
144
+
145
+ ## Development (without Docker)
146
+
147
+ ```bash
148
+ pip install -r requirements.txt
149
+ set HF_TOKEN=your_token_here
150
+ uvicorn main:app --reload --port 7860
151
+ ```
152
+
153
+ For local dev without the Space URL, in `Frontend/app.js` set:
154
+
155
+ ```js
156
+ const API = 'http://localhost:7860';
157
+ ```
158
+
159
+ ---
160
+
161
+ ## Environment variables
162
+
163
+ | Variable | Description |
164
+ | -------- | ----------- |
165
+ | `HF_TOKEN` | Hugging Face API token (Space secret or `.env`) |
166
+
167
+ ---
168
+
169
+ ## License
170
+
171
+ MIT License
docker-compose.yml ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ # Local run — mirrors Hugging Face Spaces (single container, port 7860)
2
+ services:
3
+ app:
4
+ build: .
5
+ container_name: ats_app
6
+ ports:
7
+ - "7860:7860"
8
+ environment:
9
+ - HF_TOKEN=${HF_TOKEN}
10
+ restart: unless-stopped
main.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+
3
+ from fastapi import FastAPI
4
+ from fastapi.middleware.cors import CORSMiddleware
5
+ from fastapi.staticfiles import StaticFiles
6
+
7
+ from routes import router as predict_router
8
+
9
+ FRONTEND_DIR = Path(__file__).resolve().parent / "Frontend"
10
+
11
+ app = FastAPI(title="ATS Resume Intelligence Engine")
12
+ app.include_router(predict_router)
13
+ app.add_middleware(
14
+ CORSMiddleware,
15
+ allow_origins=["*"],
16
+ allow_methods=["*"],
17
+ allow_headers=["*"],
18
+ )
19
+
20
+
21
+ @app.get("/health")
22
+ async def health():
23
+ return {"status": "ok"}
24
+
25
+
26
+ if FRONTEND_DIR.is_dir():
27
+ app.mount("/", StaticFiles(directory=FRONTEND_DIR, html=True), name="frontend")
nginx.conf ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ server {
2
+ listen 80;
3
+ client_max_body_size 10M;
4
+
5
+ # Serve frontend files
6
+ location / {
7
+ root /usr/share/nginx/html;
8
+ index index.html;
9
+ try_files $uri $uri/ /index.html;
10
+ }
11
+
12
+ # Proxy API calls to FastAPI backend
13
+ location /predict/ {
14
+ proxy_pass http://backend:8000;
15
+ proxy_http_version 1.1;
16
+ proxy_set_header Host $host;
17
+ proxy_set_header X-Real-IP $remote_addr;
18
+ proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
19
+ proxy_set_header X-Forwarded-Proto $scheme;
20
+ proxy_read_timeout 300s;
21
+ proxy_connect_timeout 60s;
22
+ }
23
+ }
requirements.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn[standard]
3
+ sentence-transformers
4
+ scikit-learn
5
+ PyMuPDF
6
+ huggingface-hub
7
+ pydantic
8
+ python-multipart
9
+ langchain
10
+ langchain-core
11
+ langchain-huggingface
routes.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # routes.py
2
+ from fastapi import APIRouter, UploadFile, File, Form, HTTPException
3
+ from typing import Optional
4
+ from schemas import ScoreResponse, ScoreRequest
5
+ from services.scorer import resume_score
6
+ from utilities.pdf_parser import extract_text_from_pdf
7
+ from utilities.keyword_match import clean_text
8
+
9
+ router = APIRouter(prefix="/predict", tags=["Prediction"])
10
+
11
+
12
+ # ── JSON route (both inputs are plain text) ────────────────────────────────
13
+ @router.post("/ats", response_model=ScoreResponse)
14
+ async def predict_ats_json(payload: ScoreRequest):
15
+ resume_clean = clean_text(payload.resume_text)
16
+ jd_clean = clean_text(payload.job_description)
17
+ return resume_score(resume_clean, jd_clean)
18
+
19
+
20
+ # ── Multipart route (one or both inputs are PDFs) ─────────────────────────
21
+ @router.post("/ats/upload", response_model=ScoreResponse)
22
+ async def predict_ats_upload(
23
+ resume_pdf: Optional[UploadFile] = File(None, description="Resume PDF (optional)"),
24
+ jd_pdf: Optional[UploadFile] = File(None, description="JD PDF (optional)"),
25
+ resume_text: Optional[str] = Form(None, description="Resume plain text (optional)"),
26
+ job_description: Optional[str] = Form(None, description="JD plain text (optional)"),
27
+ ):
28
+ # ── Resolve resume text ──────────────────────────────────────────────
29
+ if resume_pdf and resume_pdf.filename:
30
+ if resume_pdf.content_type != "application/pdf":
31
+ raise HTTPException(status_code=400, detail="resume_pdf must be a PDF file.")
32
+ try:
33
+ resume_raw = extract_text_from_pdf(await resume_pdf.read())
34
+ except ValueError as e:
35
+ raise HTTPException(status_code=422, detail=str(e))
36
+ elif resume_text:
37
+ resume_raw = resume_text
38
+ else:
39
+ raise HTTPException(status_code=422, detail="Provide either resume_pdf or resume_text.")
40
+
41
+ # ── Resolve JD text ──────────────────────────────────────────────────
42
+ if jd_pdf and jd_pdf.filename:
43
+ if jd_pdf.content_type != "application/pdf":
44
+ raise HTTPException(status_code=400, detail="jd_pdf must be a PDF file.")
45
+ try:
46
+ jd_raw = extract_text_from_pdf(await jd_pdf.read())
47
+ except ValueError as e:
48
+ raise HTTPException(status_code=422, detail=str(e))
49
+ elif job_description:
50
+ jd_raw = job_description
51
+ else:
52
+ raise HTTPException(status_code=422, detail="Provide either jd_pdf or job_description.")
53
+
54
+ resume_clean = clean_text(resume_raw)
55
+ jd_clean = clean_text(jd_raw)
56
+ return resume_score(resume_clean, jd_clean)
schemas.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel
2
+
3
+ class ScoreRequest(BaseModel):
4
+ resume_text: str
5
+ job_description: str
6
+
7
+ class ScoreResponse(BaseModel):
8
+ semantic_score: float
9
+ keyword_score: float
10
+ final_ats_score: float
11
+ summary: str
12
+
services/feedback.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_core.messages import HumanMessage, SystemMessage
2
+
3
+ from .llm import get_llm
4
+
5
+ SYSTEM_PROMPT = """You are an ATS resume analyst.
6
+
7
+ You MUST base your feedback ONLY on:
8
+ - The provided ATS scores
9
+ - The detected gaps
10
+
11
+ DO NOT invent missing skills.
12
+ DO NOT give generic advice.
13
+ DO NOT mention motivation, confidence, or mindset.
14
+
15
+ Write a concise analysis with exactly 3 sections:
16
+ 1. Score Explanation
17
+ 2. Weak Areas
18
+ 3. Actionable Improvements
19
+
20
+ Keep it under 500 words."""
21
+
22
+
23
+ def generate_resume_feedback(scores: dict, gaps: dict) -> str:
24
+ user_prompt = f"""ATS Scores:
25
+ Semantic: {scores['semantic_score']}
26
+ Keyword: {scores['keyword_score']}
27
+ Final: {scores['final_ats_score']}
28
+
29
+ Detected Gaps:
30
+ Missing Keywords: {gaps['missing_keywords']}
31
+ Skill Overlap: {gaps['skill_overlap_percentage']}%
32
+
33
+ Provide the 3-section analysis now."""
34
+
35
+ try:
36
+ response = get_llm().invoke(
37
+ [
38
+ SystemMessage(content=SYSTEM_PROMPT),
39
+ HumanMessage(content=user_prompt),
40
+ ]
41
+ )
42
+ content = response.content
43
+ if isinstance(content, str) and content.strip():
44
+ return content.strip()
45
+ except Exception:
46
+ pass
47
+
48
+ return "Feedback generation failed."
services/llm.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ from langchain_core.messages import HumanMessage, SystemMessage
4
+ from langchain_huggingface import ChatHuggingFace
5
+
6
+ MODEL_ID = "allenai/Olmo-3-7B-Instruct"
7
+
8
+ _chat_model: ChatHuggingFace | None = None
9
+
10
+
11
+ def get_llm() -> ChatHuggingFace:
12
+ """Return a shared ChatHuggingFace client (Hugging Face Inference API via LangChain)."""
13
+ global _chat_model
14
+ if _chat_model is None:
15
+ token = (
16
+ os.environ.get("HF_TOKEN")
17
+ or os.environ.get("HUGGINGFACEHUB_API_TOKEN")
18
+ or os.environ.get("HUGGING_FACE_HUB_TOKEN")
19
+ )
20
+ if not token:
21
+ raise ValueError(
22
+ "HF_TOKEN is not set. Add it as a Space secret or in your local .env file."
23
+ )
24
+ _chat_model = ChatHuggingFace(
25
+ model=MODEL_ID,
26
+ token=token,
27
+ temperature=0.2,
28
+ max_tokens=512,
29
+ )
30
+ return _chat_model
31
+
32
+
33
+ if __name__ == "__main__":
34
+ llm = get_llm()
35
+ response = llm.invoke(
36
+ [
37
+ SystemMessage(content="You are an ATS resume analyst."),
38
+ HumanMessage(
39
+ content=(
40
+ "ATS Scores: Semantic 0.45, Keyword 0.70, Final 0.68. "
41
+ "Missing: api, tensorflow, docker. Skill overlap: 70%. "
42
+ "Write 3 short sections: Score Explanation, Weak Areas, Actionable Improvements."
43
+ )
44
+ ),
45
+ ]
46
+ )
47
+ print(response.content)
services/scorer.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from utilities.keyword_match import final_ats_score, experience_level_penalty
2
+ from utilities.skills import (
3
+ find_missing_skills,
4
+ calculate_skill_overlap,
5
+ extract_resume_skills,
6
+ extract_required_skills_from_jd,
7
+ clean_text,
8
+ )
9
+ from services.feedback import generate_resume_feedback
10
+
11
+
12
+ # ---------------------------------------------------------------------------
13
+ # Gap analysis
14
+ # ---------------------------------------------------------------------------
15
+
16
+ def extract_gaps(resume_text: str, jd_text: str) -> dict:
17
+ """
18
+ Builds a structured gap report used both for the LLM feedback prompt
19
+ and for any structured API response fields you add later.
20
+
21
+ Fields
22
+ ------
23
+ missing_keywords Top-10 skills in JD that are absent from resume.
24
+ skill_overlap_pct % of JD skills present in resume.
25
+ matched_skills Skills the candidate already has that JD wants.
26
+ high_priority_missing Missing skills that appear more than once in JD
27
+ (JD emphasises them → higher impact gaps).
28
+ seniority_penalty Penalty points from experience-level mismatch.
29
+ """
30
+ missing = find_missing_skills(resume_text, jd_text)
31
+ overlap = calculate_skill_overlap(resume_text, jd_text)
32
+ resume_skills = extract_resume_skills(resume_text)
33
+ jd_skills_freq = extract_required_skills_from_jd(jd_text)
34
+
35
+ matched = sorted(resume_skills & set(jd_skills_freq.keys()))
36
+
37
+ # Skills the JD mentions more than once — candidate should prioritise these
38
+ high_priority_missing = [
39
+ skill for skill in missing
40
+ if jd_skills_freq.get(skill, 0) > 1
41
+ ]
42
+
43
+ penalty = experience_level_penalty(resume_text, jd_text)
44
+
45
+ return {
46
+ "missing_keywords": missing[:10],
47
+ "skill_overlap_percentage": overlap,
48
+ "matched_skills": matched,
49
+ "high_priority_missing": high_priority_missing[:5],
50
+ "seniority_penalty": penalty,
51
+ }
52
+
53
+
54
+ # ---------------------------------------------------------------------------
55
+ # Main scoring entry point
56
+ # ---------------------------------------------------------------------------
57
+
58
+ def resume_score(resume_text: str, jd_text: str) -> dict:
59
+ """
60
+ Orchestrates scoring → gap analysis → LLM feedback.
61
+
62
+ Returns a dict matching ScoreResponse schema plus a 'summary' field.
63
+ """
64
+ resume_clean = clean_text(resume_text)
65
+ jd_clean = clean_text(jd_text)
66
+
67
+ scores = final_ats_score(resume_clean, jd_clean)
68
+ gaps = extract_gaps(resume_clean, jd_clean)
69
+
70
+ feedback = generate_resume_feedback(scores, gaps)
71
+
72
+ return {
73
+ **scores,
74
+ "summary": feedback,
75
+ }
utilities/keyword_match.py ADDED
@@ -0,0 +1,205 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ from sklearn.metrics.pairwise import cosine_similarity
3
+ from sentence_transformers import SentenceTransformer
4
+ from utilities.skills import (
5
+ extract_resume_skills,
6
+ extract_required_skills_from_jd,
7
+ SKILLS_SORTED_BY_LENGTH,
8
+ clean_text,
9
+ )
10
+
11
+ model = SentenceTransformer("all-MiniLM-L6-v2")
12
+
13
+
14
+ # ---------------------------------------------------------------------------
15
+ # Stop-word list — common English words that pollute keyword matching
16
+ # ---------------------------------------------------------------------------
17
+ STOP_WORDS: set = {
18
+ "a", "an", "the", "and", "or", "but", "in", "on", "at", "to", "for",
19
+ "of", "with", "by", "from", "as", "is", "was", "are", "were", "be",
20
+ "been", "being", "have", "has", "had", "do", "does", "did", "will",
21
+ "would", "could", "should", "may", "might", "shall", "can", "need",
22
+ "that", "this", "these", "those", "it", "its", "we", "our", "you",
23
+ "your", "they", "their", "he", "she", "his", "her", "i", "my",
24
+ "not", "no", "so", "if", "then", "than", "also", "just", "only",
25
+ "about", "up", "out", "over", "into", "through", "during", "including",
26
+ "used", "use", "using", "work", "working", "works", "strong", "good",
27
+ "experience", "experiences", "role", "team", "company", "environment",
28
+ "ability", "skills", "skill", "looking", "required", "requirement",
29
+ "plus", "bonus", "nice", "preferred", "knowledge", "understanding",
30
+ "familiarity", "proficiency", "proficient", "hands", "on",
31
+ }
32
+
33
+
34
+ # ---------------------------------------------------------------------------
35
+ # Text utilities
36
+ # ---------------------------------------------------------------------------
37
+
38
+ def extract_skill_sentences(text: str) -> str:
39
+ """
40
+ Keep only sentences / bullet points that contain at least one
41
+ known tech skill. Falls back to the full text if nothing matches
42
+ (prevents a zero-length embedding).
43
+ """
44
+ segments = re.split(r'[.\n;]', text)
45
+ cleaned_text = clean_text(text)
46
+ relevant = []
47
+ for seg in segments:
48
+ seg_clean = clean_text(seg)
49
+ if any(skill in seg_clean for skill in SKILLS_SORTED_BY_LENGTH):
50
+ relevant.append(seg_clean)
51
+ return " ".join(relevant) if relevant else cleaned_text
52
+
53
+
54
+ def remove_stop_words(text: str) -> set:
55
+ """Return meaningful tokens after removing stop words."""
56
+ tokens = set(text.split())
57
+ return tokens - STOP_WORDS
58
+
59
+
60
+ # ---------------------------------------------------------------------------
61
+ # Scoring functions
62
+ # ---------------------------------------------------------------------------
63
+
64
+ def keyword_match_score(resume_text: str, jd_text: str) -> float:
65
+ """
66
+ Skill-only keyword match.
67
+
68
+ Strategy:
69
+ - Extract recognised tech skills from both texts using the master taxonomy.
70
+ - Score = |resume_skills ∩ jd_skills| / |jd_skills|
71
+ - This eliminates stop-word noise and counts only meaningful tech terms.
72
+
73
+ Weighting bonus:
74
+ - JD skills that appear multiple times are treated as high-priority.
75
+ A missing high-frequency skill is penalised more heavily.
76
+ """
77
+ jd_skills_freq = extract_required_skills_from_jd(jd_text) # {skill: freq}
78
+ resume_skills = extract_resume_skills(resume_text)
79
+
80
+ if not jd_skills_freq:
81
+ return 0.0
82
+
83
+ # Weighted scoring: skills mentioned more in JD carry more weight
84
+ total_weight = sum(jd_skills_freq.values())
85
+ matched_weight = sum(
86
+ freq for skill, freq in jd_skills_freq.items()
87
+ if skill in resume_skills
88
+ )
89
+
90
+ return round(matched_weight / total_weight * 100, 2)
91
+
92
+
93
+ def semantic_match_score(resume_text: str, jd_text: str) -> float:
94
+ """
95
+ Skill-focused semantic similarity.
96
+
97
+ Strategy:
98
+ - Filter both texts down to skill-relevant sentences before encoding.
99
+ - This focuses the embedding on technical content and reduces noise
100
+ from generic filler language ("we are a fast-paced team...").
101
+ """
102
+ resume_focused = extract_skill_sentences(resume_text)
103
+ jd_focused = extract_skill_sentences(jd_text)
104
+
105
+ embeddings = model.encode([resume_focused, jd_focused])
106
+ score = cosine_similarity([embeddings[0]], [embeddings[1]])[0][0]
107
+ return round(float(score) * 100, 2)
108
+
109
+
110
+ def experience_level_penalty(resume_text: str, jd_text: str) -> float:
111
+ """
112
+ Detects seniority mismatch and returns a 0–10 penalty.
113
+
114
+ E.g. a senior-level JD matched against a junior resume
115
+ should score lower even if skills overlap.
116
+ """
117
+ SENIOR_SIGNALS = {"senior", "lead", "principal", "architect", "staff", "head of"}
118
+ JUNIOR_SIGNALS = {"junior", "entry level", "entry-level", "graduate", "intern", "fresher"}
119
+
120
+ jd_lower = jd_text.lower()
121
+ resume_lower = resume_text.lower()
122
+
123
+ jd_is_senior = any(s in jd_lower for s in SENIOR_SIGNALS)
124
+ jd_is_junior = any(s in jd_lower for s in JUNIOR_SIGNALS)
125
+
126
+ resume_is_senior = any(s in resume_lower for s in SENIOR_SIGNALS)
127
+ resume_is_junior = any(s in resume_lower for s in JUNIOR_SIGNALS)
128
+
129
+ # JD wants senior but resume signals junior
130
+ if jd_is_senior and resume_is_junior:
131
+ return 10.0
132
+ # JD wants junior but resume is over-qualified (minor penalty)
133
+ if jd_is_junior and resume_is_senior:
134
+ return 3.0
135
+ return 0.0
136
+
137
+
138
+ # ---------------------------------------------------------------------------
139
+ # Final composite score
140
+ # ---------------------------------------------------------------------------
141
+
142
+ def final_ats_score(resume_text: str, jd_text: str) -> dict:
143
+ """
144
+ Composite ATS score weighted as:
145
+ 60% semantic similarity (contextual understanding)
146
+ 40% keyword match (skill taxonomy match, frequency-weighted)
147
+
148
+ A seniority mismatch penalty (0–10 pts) is subtracted from the final score.
149
+
150
+ Returns a dict compatible with ScoreResponse schema.
151
+ """
152
+ semantic = semantic_match_score(resume_text, jd_text)
153
+ keyword = keyword_match_score(resume_text, jd_text)
154
+ penalty = experience_level_penalty(resume_text, jd_text)
155
+
156
+ raw_score = round(0.6 * semantic + 0.4 * keyword, 2)
157
+ final = round(max(0.0, raw_score - penalty), 2)
158
+
159
+ return {
160
+ "semantic_score": round(semantic, 2),
161
+ "keyword_score": round(keyword, 2),
162
+ "final_ats_score": final,
163
+ }
164
+
165
+
166
+ # ---------------------------------------------------------------------------
167
+ # Smoke-test
168
+ # ---------------------------------------------------------------------------
169
+ if __name__ == "__main__":
170
+ examples = [
171
+ [
172
+ "Python developer with FastAPI, SQL, and machine learning experience",
173
+ "Looking for a Python developer with FastAPI, SQL, and ML skills",
174
+ ],
175
+ [
176
+ "Built backend services using Python frameworks and databases",
177
+ "Python developer with FastAPI and SQL",
178
+ ],
179
+ [
180
+ "Python Python Python SQL SQL FastAPI",
181
+ "Python developer with FastAPI and SQL",
182
+ ],
183
+ [
184
+ "Professional photographer specialising in portraits and wildlife",
185
+ "Hiring a machine learning engineer with Python and PyTorch",
186
+ ],
187
+ [
188
+ "Led ML teams, deployed large-scale models, optimised transformers",
189
+ "Junior Python developer with basic ML",
190
+ ],
191
+ [
192
+ "NLP engineer: PyTorch, HuggingFace transformers, LLM fine-tuning, RAG pipelines",
193
+ "Senior ML engineer: LLM, RAG, fine-tuning, Python, AWS SageMaker",
194
+ ],
195
+ ]
196
+
197
+ print(f"{'#':<3} {'Semantic':>10} {'Keyword':>10} {'Final ATS':>10}")
198
+ print("-" * 38)
199
+ for i, (resume, jd) in enumerate(examples):
200
+ result = final_ats_score(resume, jd)
201
+ print(
202
+ f"{i:<3} {result['semantic_score']:>10} "
203
+ f"{result['keyword_score']:>10} "
204
+ f"{result['final_ats_score']:>10}"
205
+ )
utilities/pdf_parser.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import fitz # PyMuPDF
2
+
3
+ def extract_text_from_pdf(file_bytes: bytes) -> str:
4
+ """
5
+ Extract plain text from a PDF given its raw bytes.
6
+ Joins all pages into a single string.
7
+ Raises ValueError if the PDF yields no text (e.g. scanned image-only PDF).
8
+ """
9
+ doc = fitz.open(stream=file_bytes, filetype="pdf")
10
+
11
+ pages_text = []
12
+ for page in doc:
13
+ pages_text.append(page.get_text("text")) # "text" = plain text mode
14
+
15
+ full_text = "\n".join(pages_text).strip()
16
+
17
+ if not full_text:
18
+ raise ValueError(
19
+ "No text could be extracted from the PDF. "
20
+ "It may be a scanned image. Please upload a text-based PDF."
21
+ )
22
+
23
+ return full_text
utilities/skills.py ADDED
@@ -0,0 +1,217 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+
3
+ # ---------------------------------------------------------------------------
4
+ # Tech Skill Taxonomy
5
+ # Multi-word phrases are matched BEFORE single tokens to avoid partial hits.
6
+ # Each category is kept separate so it's easy to extend.
7
+ # ---------------------------------------------------------------------------
8
+
9
+ # --- Languages ---
10
+ LANGUAGES = {
11
+ "python", "java", "javascript", "typescript", "golang", "go",
12
+ "rust", "ruby", "scala", "kotlin", "swift", "php", "r", "matlab",
13
+ "bash", "shell", "c", "cpp", "c++", "csharp", "c#",
14
+ }
15
+
16
+ # --- Web Frameworks ---
17
+ WEB_FRAMEWORKS = {
18
+ "fastapi", "flask", "django", "spring boot", "spring",
19
+ "express", "nestjs", "nextjs", "nuxtjs", "rails",
20
+ "laravel", "fiber", "gin",
21
+ }
22
+
23
+ # --- Frontend ---
24
+ FRONTEND = {
25
+ "react", "angular", "vue", "svelte", "html", "css",
26
+ "tailwind", "bootstrap", "redux", "webpack", "vite",
27
+ }
28
+
29
+ # --- APIs & Architecture ---
30
+ API_ARCH = {
31
+ "rest api", "restful api", "graphql", "grpc", "websocket",
32
+ "microservices", "microservice", "event driven", "message queue",
33
+ "api gateway", "api",
34
+ }
35
+
36
+ # --- Databases ---
37
+ DATABASES = {
38
+ "postgresql", "postgres", "mysql", "sqlite", "oracle",
39
+ "mongodb", "mongo", "redis", "cassandra", "dynamodb",
40
+ "elasticsearch", "neo4j", "firebase", "supabase",
41
+ "sql", "nosql", "vector database", "pinecone", "weaviate",
42
+ }
43
+
44
+ # --- ML / AI / Data Science ---
45
+ ML_AI = {
46
+ "machine learning", "deep learning", "reinforcement learning",
47
+ "supervised learning", "unsupervised learning",
48
+ "natural language processing", "nlp", "computer vision",
49
+ "large language model", "llm", "generative ai", "gen ai",
50
+ "transformers", "bert", "gpt", "llama", "mistral",
51
+ "scikit-learn", "scikit learn", "sklearn",
52
+ "pytorch", "torch", "tensorflow", "keras", "jax",
53
+ "hugging face", "huggingface", "langchain", "llamaindex",
54
+ "xgboost", "lightgbm", "catboost",
55
+ "pandas", "numpy", "scipy", "matplotlib", "seaborn", "plotly",
56
+ "mlflow", "mlops", "model serving", "model deployment",
57
+ "feature engineering", "hyperparameter tuning",
58
+ "rag", "retrieval augmented generation", "fine tuning", "fine-tuning",
59
+ "diffusion models", "stable diffusion",
60
+ "data analysis", "data analytics", "data science",
61
+ "statistical analysis", "statistics", "probability",
62
+ "a/b testing", "hypothesis testing",
63
+ }
64
+
65
+ # --- Data Engineering ---
66
+ DATA_ENGINEERING = {
67
+ "apache spark", "spark", "hadoop", "kafka", "airflow",
68
+ "dbt", "flink", "hive", "presto", "trino",
69
+ "etl", "elt", "data pipeline", "data warehouse",
70
+ "snowflake", "bigquery", "redshift", "databricks",
71
+ }
72
+
73
+ # --- Cloud & Infrastructure ---
74
+ CLOUD = {
75
+ "aws", "amazon web services", "azure", "gcp", "google cloud",
76
+ "ec2", "s3", "lambda", "sagemaker", "bedrock",
77
+ "cloudformation", "terraform", "pulumi",
78
+ "serverless", "cloud functions",
79
+ }
80
+
81
+ # --- DevOps & CI/CD ---
82
+ DEVOPS = {
83
+ "docker", "kubernetes", "k8s", "helm",
84
+ "ci/cd", "ci cd", "github actions", "gitlab ci", "jenkins",
85
+ "ansible", "chef", "puppet",
86
+ "linux", "unix", "nginx", "apache",
87
+ "monitoring", "observability", "prometheus", "grafana",
88
+ "opentelemetry", "datadog", "new relic",
89
+ }
90
+
91
+ # --- Version Control & Collaboration ---
92
+ VCS = {
93
+ "git", "github", "gitlab", "bitbucket", "version control",
94
+ }
95
+
96
+ # --- System Design & Software Engineering ---
97
+ ENGINEERING = {
98
+ "system design", "software design", "object oriented", "oop",
99
+ "design patterns", "solid principles", "clean code",
100
+ "distributed systems", "high availability", "scalability",
101
+ "load balancing", "caching", "message broker",
102
+ "unit testing", "integration testing", "tdd", "bdd",
103
+ "pytest", "junit", "jest", "mocha",
104
+ "code review", "agile", "scrum", "kanban",
105
+ }
106
+
107
+ # --- Security ---
108
+ SECURITY = {
109
+ "cybersecurity", "penetration testing", "pen testing",
110
+ "oauth", "jwt", "ssl", "tls", "encryption",
111
+ "owasp", "security auditing",
112
+ }
113
+
114
+ # ---------------------------------------------------------------------------
115
+ # Master set — sorted longest-first so multi-word phrases match before tokens
116
+ # ---------------------------------------------------------------------------
117
+ GENERAL_TECH_SKILLS: set = (
118
+ LANGUAGES | WEB_FRAMEWORKS | FRONTEND | API_ARCH |
119
+ DATABASES | ML_AI | DATA_ENGINEERING | CLOUD |
120
+ DEVOPS | VCS | ENGINEERING | SECURITY
121
+ )
122
+
123
+ # Pre-sorted for greedy multi-word matching (longest phrase wins)
124
+ SKILLS_SORTED_BY_LENGTH: list = sorted(GENERAL_TECH_SKILLS, key=len, reverse=True)
125
+
126
+
127
+ # ---------------------------------------------------------------------------
128
+ # Helpers
129
+ # ---------------------------------------------------------------------------
130
+
131
+ def clean_text(text: str) -> str:
132
+ text = text.lower()
133
+ text = re.sub(r'[^a-z0-9\s/]', ' ', text) # keep / for ci/cd etc.
134
+ text = re.sub(r'\s+', ' ', text).strip()
135
+ return text
136
+
137
+
138
+ def extract_skills(text: str) -> set:
139
+ """
140
+ Greedy left-to-right phrase match.
141
+ Multi-word skills (e.g. 'machine learning') are checked before
142
+ their constituent tokens to prevent double-counting.
143
+ """
144
+ cleaned = clean_text(text)
145
+ found: set = set()
146
+ consumed_positions: set = set() # char positions already claimed
147
+
148
+ for skill in SKILLS_SORTED_BY_LENGTH:
149
+ start = 0
150
+ while True:
151
+ idx = cleaned.find(skill, start)
152
+ if idx == -1:
153
+ break
154
+ end = idx + len(skill)
155
+
156
+ # Boundary check — skill must be a whole word / phrase
157
+ before_ok = (idx == 0 or cleaned[idx - 1] == ' ')
158
+ after_ok = (end == len(cleaned) or cleaned[end] == ' ')
159
+
160
+ if before_ok and after_ok:
161
+ span = set(range(idx, end))
162
+ if not span & consumed_positions: # no overlap with claimed chars
163
+ found.add(skill)
164
+ consumed_positions |= span
165
+ break # found this skill, move to next
166
+
167
+ start = idx + 1
168
+
169
+ return found
170
+
171
+
172
+ def extract_required_skills_from_jd(jd_text: str) -> dict:
173
+ """Return JD skills with mention-frequency as an importance signal."""
174
+ skills = extract_skills(jd_text)
175
+ cleaned = clean_text(jd_text)
176
+ return {skill: cleaned.count(skill) for skill in skills}
177
+
178
+
179
+ def extract_resume_skills(resume_text: str) -> set:
180
+ return extract_skills(resume_text)
181
+
182
+
183
+ def find_missing_skills(resume_text: str, jd_text: str) -> list:
184
+ jd_skills = set(extract_required_skills_from_jd(jd_text).keys())
185
+ resume_skills = extract_resume_skills(resume_text)
186
+ return [s for s in jd_skills if s not in resume_skills]
187
+
188
+
189
+ def calculate_skill_overlap(resume_text: str, jd_text: str) -> float:
190
+ jd_skills = set(extract_required_skills_from_jd(jd_text).keys())
191
+ resume_skills = extract_resume_skills(resume_text)
192
+ if not jd_skills:
193
+ return 0.0
194
+ return round(len(jd_skills & resume_skills) / len(jd_skills) * 100, 2)
195
+
196
+
197
+ # ---------------------------------------------------------------------------
198
+ # Quick smoke-test
199
+ # ---------------------------------------------------------------------------
200
+ if __name__ == "__main__":
201
+ resume_text = (
202
+ "Python, NumPy, Pandas, Scikit-learn, PyTorch, TensorFlow, spaCy. "
203
+ "Machine Learning, NLP, Feature Engineering, Model Evaluation. "
204
+ "Flask, FastAPI, Git, GitHub, Linux, MLflow, Docker."
205
+ )
206
+ jd_text = (
207
+ "Machine Learning Engineer. Strong Python. Amazon SageMaker. "
208
+ "ML model deployment. APIs. GenAI / LLM solutions. "
209
+ "MLOps: model monitoring, drift detection, retraining. "
210
+ "Data pipelines. CI/CD. Kubernetes."
211
+ )
212
+ clean_r = clean_text(resume_text)
213
+ clean_j = clean_text(jd_text)
214
+ print("Missing skills :", find_missing_skills(clean_r, clean_j))
215
+ print("Skill overlap :", calculate_skill_overlap(clean_r, clean_j), "%")
216
+ print("Resume skills :", extract_resume_skills(clean_r))
217
+ print("JD skills :", set(extract_required_skills_from_jd(clean_j).keys()))