Spaces:
Running
Running
Deploy cv-buddy backend
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- .gitignore +7 -0
- Dockerfile +24 -0
- app/__init__.py +0 -0
- app/api/__init__.py +0 -0
- app/api/dependencies.py +21 -0
- app/api/routes/__init__.py +0 -0
- app/api/routes/analyze.py +129 -0
- app/api/routes/compare.py +65 -0
- app/api/routes/export.py +50 -0
- app/api/routes/progress.py +59 -0
- app/api/routes/result.py +17 -0
- app/api/routes/upload.py +101 -0
- app/core/__init__.py +0 -0
- app/core/config.py +25 -0
- app/core/redis.py +47 -0
- app/llm/__init__.py +0 -0
- app/llm/base.py +17 -0
- app/llm/factory.py +95 -0
- app/llm/fallback_provider.py +142 -0
- app/llm/google_provider.py +29 -0
- app/llm/groq_provider.py +41 -0
- app/llm/openai_provider.py +38 -0
- app/llm/zai_provider.py +49 -0
- app/main.py +94 -0
- app/models/__init__.py +5 -0
- app/models/analysis.py +38 -0
- app/models/customization.py +32 -0
- app/models/job.py +13 -0
- app/models/resume.py +32 -0
- app/models/score.py +16 -0
- app/services/__init__.py +0 -0
- app/services/ats_scorer.py +195 -0
- app/services/bullet_analyzer.py +145 -0
- app/services/job_scraper.py +108 -0
- app/services/layout_scanner.py +196 -0
- app/services/progress.py +83 -0
- app/services/resume_comparator.py +236 -0
- app/services/resume_customizer.py +151 -0
- app/services/resume_generator.py +249 -0
- app/services/resume_parser.py +84 -0
- app/workers/__init__.py +0 -0
- app/workers/celery_app.py +26 -0
- app/workers/tasks.py +124 -0
- prompts/analyze_bullets.txt +27 -0
- prompts/customize_resume.txt +26 -0
- prompts/extract_job.txt +15 -0
- prompts/structure_resume.txt +32 -0
- pyproject.toml +9 -0
- requirements.txt +33 -0
- supervisord.conf +24 -0
.gitignore
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
__pycache__/
|
| 2 |
+
*.pyc
|
| 3 |
+
.env
|
| 4 |
+
.env.*
|
| 5 |
+
fly.toml
|
| 6 |
+
*.egg-info/
|
| 7 |
+
.pytest_cache/
|
Dockerfile
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM python:3.11-slim
|
| 2 |
+
|
| 3 |
+
WORKDIR /app
|
| 4 |
+
|
| 5 |
+
# Install system dependencies for WeasyPrint + supervisor for running multiple processes
|
| 6 |
+
RUN apt-get update && apt-get install -y \
|
| 7 |
+
libpango-1.0-0 \
|
| 8 |
+
libpangocairo-1.0-0 \
|
| 9 |
+
libgdk-pixbuf-2.0-0 \
|
| 10 |
+
libffi-dev \
|
| 11 |
+
shared-mime-info \
|
| 12 |
+
supervisor \
|
| 13 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 14 |
+
|
| 15 |
+
COPY requirements.txt .
|
| 16 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 17 |
+
|
| 18 |
+
COPY . .
|
| 19 |
+
|
| 20 |
+
COPY supervisord.conf /etc/supervisor/conf.d/supervisord.conf
|
| 21 |
+
|
| 22 |
+
ENV PORT=7860
|
| 23 |
+
|
| 24 |
+
CMD ["supervisord", "-c", "/etc/supervisor/conf.d/supervisord.conf"]
|
app/__init__.py
ADDED
|
File without changes
|
app/api/__init__.py
ADDED
|
File without changes
|
app/api/dependencies.py
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
import uuid
|
| 3 |
+
from typing import Optional
|
| 4 |
+
from fastapi import Header, HTTPException
|
| 5 |
+
from app.core.redis import get_redis
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
async def get_session_id(x_session_id: Optional[str] = Header(default=None)) -> str:
|
| 9 |
+
if not x_session_id:
|
| 10 |
+
raise HTTPException(status_code=401, detail="No session found. Please upload a resume first.")
|
| 11 |
+
|
| 12 |
+
redis = await get_redis()
|
| 13 |
+
exists = await redis.exists(f"session:{x_session_id}:resume")
|
| 14 |
+
if not exists:
|
| 15 |
+
raise HTTPException(status_code=401, detail="Session expired. Please upload your resume again.")
|
| 16 |
+
|
| 17 |
+
return x_session_id
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
def generate_session_id() -> str:
|
| 21 |
+
return str(uuid.uuid4())
|
app/api/routes/__init__.py
ADDED
|
File without changes
|
app/api/routes/analyze.py
ADDED
|
@@ -0,0 +1,129 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
import json
|
| 3 |
+
from typing import Optional
|
| 4 |
+
from fastapi import APIRouter, HTTPException, Depends
|
| 5 |
+
from pydantic import BaseModel
|
| 6 |
+
from app.api.dependencies import get_session_id
|
| 7 |
+
from app.core.redis import get_redis
|
| 8 |
+
from app.workers.tasks import analyze_and_customize
|
| 9 |
+
from app.models.customization import Intensity
|
| 10 |
+
from app.models.resume import ResumeData
|
| 11 |
+
from app.services.job_scraper import JobScraper
|
| 12 |
+
from app.services.ats_scorer import ATSScorer
|
| 13 |
+
|
| 14 |
+
router = APIRouter()
|
| 15 |
+
|
| 16 |
+
# Threshold for low ATS score warning
|
| 17 |
+
LOW_SCORE_THRESHOLD = 30
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
class AnalyzeRequest(BaseModel):
|
| 21 |
+
job_url: Optional[str] = None
|
| 22 |
+
job_text: Optional[str] = None
|
| 23 |
+
intensity: Intensity = Intensity.MODERATE
|
| 24 |
+
confirm_low_score: bool = False # User confirmation for low scores
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
class PreviewScoreRequest(BaseModel):
|
| 28 |
+
job_url: Optional[str] = None
|
| 29 |
+
job_text: Optional[str] = None
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
@router.post("/preview-score")
|
| 33 |
+
async def preview_score(
|
| 34 |
+
request: PreviewScoreRequest,
|
| 35 |
+
session_id: str = Depends(get_session_id),
|
| 36 |
+
):
|
| 37 |
+
"""Get preliminary ATS score before full customization."""
|
| 38 |
+
if not request.job_url and not request.job_text:
|
| 39 |
+
raise HTTPException(
|
| 40 |
+
status_code=400,
|
| 41 |
+
detail="Please provide either a job URL or job description text."
|
| 42 |
+
)
|
| 43 |
+
|
| 44 |
+
# Get resume from session
|
| 45 |
+
redis = await get_redis()
|
| 46 |
+
resume_data = await redis.get(f"session:{session_id}:resume")
|
| 47 |
+
if not resume_data:
|
| 48 |
+
raise HTTPException(status_code=404, detail="Session expired or resume not found")
|
| 49 |
+
|
| 50 |
+
resume = ResumeData(**json.loads(resume_data))
|
| 51 |
+
|
| 52 |
+
# Parse job posting
|
| 53 |
+
scraper = JobScraper()
|
| 54 |
+
try:
|
| 55 |
+
if request.job_url:
|
| 56 |
+
job = await scraper.scrape(request.job_url)
|
| 57 |
+
else:
|
| 58 |
+
job = await scraper.parse_text(request.job_text)
|
| 59 |
+
except Exception as e:
|
| 60 |
+
raise HTTPException(status_code=400, detail=f"Failed to parse job posting: {str(e)}")
|
| 61 |
+
|
| 62 |
+
# Calculate preliminary ATS score
|
| 63 |
+
scorer = ATSScorer()
|
| 64 |
+
score = await scorer.calculate(resume, job)
|
| 65 |
+
|
| 66 |
+
# Determine if confirmation is needed
|
| 67 |
+
needs_confirmation = score.total < LOW_SCORE_THRESHOLD
|
| 68 |
+
|
| 69 |
+
return {
|
| 70 |
+
"score": score.total,
|
| 71 |
+
"matched_keywords": score.matched_keywords,
|
| 72 |
+
"missing_keywords": score.missing_keywords,
|
| 73 |
+
"needs_confirmation": needs_confirmation,
|
| 74 |
+
"message": (
|
| 75 |
+
f"Your resume has a {score.total}% match with this job. "
|
| 76 |
+
f"This is quite low and will require significant changes. "
|
| 77 |
+
f"Do you want to proceed?"
|
| 78 |
+
) if needs_confirmation else None,
|
| 79 |
+
}
|
| 80 |
+
|
| 81 |
+
|
| 82 |
+
@router.post("/analyze-job")
|
| 83 |
+
async def analyze_job(
|
| 84 |
+
request: AnalyzeRequest,
|
| 85 |
+
session_id: str = Depends(get_session_id),
|
| 86 |
+
):
|
| 87 |
+
if not request.job_url and not request.job_text:
|
| 88 |
+
raise HTTPException(
|
| 89 |
+
status_code=400,
|
| 90 |
+
detail="Please provide either a job URL or job description text."
|
| 91 |
+
)
|
| 92 |
+
|
| 93 |
+
# If not explicitly confirmed, do a quick score check
|
| 94 |
+
if not request.confirm_low_score:
|
| 95 |
+
redis = await get_redis()
|
| 96 |
+
resume_data = await redis.get(f"session:{session_id}:resume")
|
| 97 |
+
if resume_data:
|
| 98 |
+
resume = ResumeData(**json.loads(resume_data))
|
| 99 |
+
scraper = JobScraper()
|
| 100 |
+
try:
|
| 101 |
+
if request.job_url:
|
| 102 |
+
job = await scraper.scrape(request.job_url)
|
| 103 |
+
else:
|
| 104 |
+
job = await scraper.parse_text(request.job_text)
|
| 105 |
+
|
| 106 |
+
scorer = ATSScorer()
|
| 107 |
+
score = await scorer.calculate(resume, job)
|
| 108 |
+
|
| 109 |
+
if score.total < LOW_SCORE_THRESHOLD:
|
| 110 |
+
return {
|
| 111 |
+
"needs_confirmation": True,
|
| 112 |
+
"score": score.total,
|
| 113 |
+
"message": (
|
| 114 |
+
f"Your resume has only a {score.total}% match with this job. "
|
| 115 |
+
f"Major changes will be needed. Do you want to continue?"
|
| 116 |
+
),
|
| 117 |
+
}
|
| 118 |
+
except Exception:
|
| 119 |
+
pass # If score check fails, proceed anyway
|
| 120 |
+
|
| 121 |
+
# Queue the task
|
| 122 |
+
task = analyze_and_customize.delay(
|
| 123 |
+
session_id=session_id,
|
| 124 |
+
job_url=request.job_url,
|
| 125 |
+
job_text=request.job_text,
|
| 126 |
+
intensity=request.intensity.value,
|
| 127 |
+
)
|
| 128 |
+
|
| 129 |
+
return {"task_id": task.id}
|
app/api/routes/compare.py
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
import json
|
| 3 |
+
from fastapi import APIRouter, HTTPException, UploadFile, File, Form
|
| 4 |
+
from app.core.redis import get_redis
|
| 5 |
+
from app.services.resume_parser import ResumeParser
|
| 6 |
+
from app.services.resume_comparator import ResumeComparator
|
| 7 |
+
from app.models.resume import ResumeData
|
| 8 |
+
|
| 9 |
+
router = APIRouter()
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
@router.post("/compare/{result_id}")
|
| 13 |
+
async def compare_resumes(
|
| 14 |
+
result_id: str,
|
| 15 |
+
original_file: UploadFile = File(...),
|
| 16 |
+
job_description: str = Form(default="")
|
| 17 |
+
):
|
| 18 |
+
"""
|
| 19 |
+
Compare original (ground truth) resume with LLM-generated version.
|
| 20 |
+
|
| 21 |
+
Args:
|
| 22 |
+
result_id: ID of the LLM analysis result
|
| 23 |
+
original_file: Original resume file uploaded by user
|
| 24 |
+
job_description: Job description text (optional)
|
| 25 |
+
|
| 26 |
+
Returns:
|
| 27 |
+
Detailed comparison metrics
|
| 28 |
+
"""
|
| 29 |
+
# Get LLM result from Redis
|
| 30 |
+
redis = await get_redis()
|
| 31 |
+
data = await redis.get(f"result:{result_id}")
|
| 32 |
+
|
| 33 |
+
if not data:
|
| 34 |
+
raise HTTPException(status_code=404, detail="Result not found or expired.")
|
| 35 |
+
|
| 36 |
+
result = json.loads(data)
|
| 37 |
+
llm_resume = ResumeData(**result["customized"])
|
| 38 |
+
|
| 39 |
+
# Parse original resume
|
| 40 |
+
parser = ResumeParser()
|
| 41 |
+
|
| 42 |
+
if not original_file.content_type or not parser.is_supported(original_file.content_type):
|
| 43 |
+
raise HTTPException(
|
| 44 |
+
status_code=400,
|
| 45 |
+
detail="Invalid file type. Please upload a PDF or Word document."
|
| 46 |
+
)
|
| 47 |
+
|
| 48 |
+
try:
|
| 49 |
+
contents = await original_file.read()
|
| 50 |
+
original_resume = await parser.parse(contents, original_file.content_type)
|
| 51 |
+
except Exception as e:
|
| 52 |
+
raise HTTPException(status_code=500, detail=f"Failed to parse original resume: {str(e)}")
|
| 53 |
+
|
| 54 |
+
# Run comparison
|
| 55 |
+
comparator = ResumeComparator(
|
| 56 |
+
original=original_resume,
|
| 57 |
+
llm_version=llm_resume,
|
| 58 |
+
job_description=job_description
|
| 59 |
+
)
|
| 60 |
+
|
| 61 |
+
try:
|
| 62 |
+
comparison_result = comparator.compare()
|
| 63 |
+
return comparison_result
|
| 64 |
+
except Exception as e:
|
| 65 |
+
raise HTTPException(status_code=500, detail=f"Comparison failed: {str(e)}")
|
app/api/routes/export.py
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
import json
|
| 3 |
+
from enum import Enum
|
| 4 |
+
from fastapi import APIRouter, HTTPException, Query
|
| 5 |
+
from fastapi.responses import Response
|
| 6 |
+
from app.core.redis import get_redis
|
| 7 |
+
from app.services.resume_generator import ResumeGenerator
|
| 8 |
+
from app.models.resume import ResumeData
|
| 9 |
+
|
| 10 |
+
router = APIRouter()
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
class ExportFormat(str, Enum):
|
| 14 |
+
PDF = "pdf"
|
| 15 |
+
DOCX = "docx"
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
@router.get("/export/{result_id}")
|
| 19 |
+
async def export_resume(
|
| 20 |
+
result_id: str,
|
| 21 |
+
format: ExportFormat = Query(default=ExportFormat.PDF),
|
| 22 |
+
):
|
| 23 |
+
redis = await get_redis()
|
| 24 |
+
data = await redis.get(f"result:{result_id}")
|
| 25 |
+
|
| 26 |
+
if not data:
|
| 27 |
+
raise HTTPException(status_code=404, detail="Result not found or expired.")
|
| 28 |
+
|
| 29 |
+
result = json.loads(data)
|
| 30 |
+
resume = ResumeData(**result["customized"])
|
| 31 |
+
|
| 32 |
+
generator = ResumeGenerator()
|
| 33 |
+
|
| 34 |
+
try:
|
| 35 |
+
if format == ExportFormat.PDF:
|
| 36 |
+
content = generator.to_pdf(resume)
|
| 37 |
+
media_type = "application/pdf"
|
| 38 |
+
filename = "resume.pdf"
|
| 39 |
+
else:
|
| 40 |
+
content = generator.to_docx(resume)
|
| 41 |
+
media_type = "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
|
| 42 |
+
filename = "resume.docx"
|
| 43 |
+
except Exception as e:
|
| 44 |
+
raise HTTPException(status_code=500, detail=f"Export failed: {str(e)}")
|
| 45 |
+
|
| 46 |
+
return Response(
|
| 47 |
+
content=content,
|
| 48 |
+
media_type=media_type,
|
| 49 |
+
headers={"Content-Disposition": f'attachment; filename="{filename}"'},
|
| 50 |
+
)
|
app/api/routes/progress.py
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
import json
|
| 3 |
+
import asyncio
|
| 4 |
+
from typing import AsyncGenerator
|
| 5 |
+
from fastapi import APIRouter
|
| 6 |
+
from fastapi.responses import StreamingResponse
|
| 7 |
+
from app.core.redis import get_redis
|
| 8 |
+
|
| 9 |
+
router = APIRouter()
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
@router.get("/progress/{task_id}")
|
| 13 |
+
async def stream_progress(task_id: str):
|
| 14 |
+
async def event_generator() -> AsyncGenerator[str, None]:
|
| 15 |
+
redis = await get_redis()
|
| 16 |
+
pubsub = redis.pubsub()
|
| 17 |
+
channel = f"progress:{task_id}"
|
| 18 |
+
|
| 19 |
+
await pubsub.subscribe(channel)
|
| 20 |
+
|
| 21 |
+
# Send current state if exists
|
| 22 |
+
current_state = await redis.get(f"progress_state:{task_id}")
|
| 23 |
+
if current_state:
|
| 24 |
+
yield f"data: {current_state}\n\n"
|
| 25 |
+
|
| 26 |
+
try:
|
| 27 |
+
while True:
|
| 28 |
+
message = await asyncio.wait_for(
|
| 29 |
+
pubsub.get_message(ignore_subscribe_messages=True),
|
| 30 |
+
timeout=30.0,
|
| 31 |
+
)
|
| 32 |
+
|
| 33 |
+
if message and message["type"] == "message":
|
| 34 |
+
data = message["data"]
|
| 35 |
+
yield f"data: {data}\n\n"
|
| 36 |
+
|
| 37 |
+
# Check if complete or error
|
| 38 |
+
parsed = json.loads(data)
|
| 39 |
+
if parsed.get("step") in ("complete", "error"):
|
| 40 |
+
break
|
| 41 |
+
else:
|
| 42 |
+
# Send heartbeat
|
| 43 |
+
yield ": heartbeat\n\n"
|
| 44 |
+
|
| 45 |
+
except asyncio.TimeoutError:
|
| 46 |
+
yield 'data: {"step": "error", "error": {"code": "TIMEOUT", "message": "Connection timeout"}}\n\n'
|
| 47 |
+
finally:
|
| 48 |
+
await pubsub.unsubscribe(channel)
|
| 49 |
+
await pubsub.close()
|
| 50 |
+
|
| 51 |
+
return StreamingResponse(
|
| 52 |
+
event_generator(),
|
| 53 |
+
media_type="text/event-stream",
|
| 54 |
+
headers={
|
| 55 |
+
"Cache-Control": "no-cache",
|
| 56 |
+
"Connection": "keep-alive",
|
| 57 |
+
"X-Accel-Buffering": "no",
|
| 58 |
+
},
|
| 59 |
+
)
|
app/api/routes/result.py
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
import json
|
| 3 |
+
from fastapi import APIRouter, HTTPException
|
| 4 |
+
from app.core.redis import get_redis
|
| 5 |
+
|
| 6 |
+
router = APIRouter()
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
@router.get("/result/{result_id}")
|
| 10 |
+
async def get_result(result_id: str):
|
| 11 |
+
redis = await get_redis()
|
| 12 |
+
data = await redis.get(f"result:{result_id}")
|
| 13 |
+
|
| 14 |
+
if not data:
|
| 15 |
+
raise HTTPException(status_code=404, detail="Result not found or expired.")
|
| 16 |
+
|
| 17 |
+
return json.loads(data)
|
app/api/routes/upload.py
ADDED
|
@@ -0,0 +1,101 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
import logging
|
| 3 |
+
from fastapi import APIRouter, UploadFile, File, HTTPException
|
| 4 |
+
from app.api.dependencies import generate_session_id
|
| 5 |
+
from app.core.redis import get_redis
|
| 6 |
+
from app.core.config import settings
|
| 7 |
+
from app.services.resume_parser import ResumeParser
|
| 8 |
+
from app.services.layout_scanner import LayoutScanner
|
| 9 |
+
from app.models.analysis import SafetyScan
|
| 10 |
+
|
| 11 |
+
router = APIRouter()
|
| 12 |
+
logger = logging.getLogger(__name__)
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
@router.post("/upload")
|
| 16 |
+
async def upload_resume(file: UploadFile = File(...)):
|
| 17 |
+
try:
|
| 18 |
+
parser = ResumeParser()
|
| 19 |
+
|
| 20 |
+
# Validate file type
|
| 21 |
+
if not file.content_type or not parser.is_supported(file.content_type):
|
| 22 |
+
raise HTTPException(
|
| 23 |
+
status_code=400,
|
| 24 |
+
detail="Invalid file type. Please upload a PDF or Word document."
|
| 25 |
+
)
|
| 26 |
+
|
| 27 |
+
# Validate file size (5MB max)
|
| 28 |
+
contents = await file.read()
|
| 29 |
+
if len(contents) > 5 * 1024 * 1024:
|
| 30 |
+
raise HTTPException(status_code=400, detail="File too large. Maximum size is 5MB.")
|
| 31 |
+
|
| 32 |
+
# Scan document format for ATS compatibility issues FIRST
|
| 33 |
+
format_warnings = []
|
| 34 |
+
try:
|
| 35 |
+
scanner = LayoutScanner()
|
| 36 |
+
safety_scan = scanner.scan(contents, file.content_type)
|
| 37 |
+
if safety_scan.has_issues:
|
| 38 |
+
format_warnings = [
|
| 39 |
+
{"type": w.type, "message": w.message, "recommendation": w.recommendation}
|
| 40 |
+
for w in safety_scan.warnings
|
| 41 |
+
]
|
| 42 |
+
except Exception as e:
|
| 43 |
+
logger.warning(f"Format scan failed: {e}")
|
| 44 |
+
|
| 45 |
+
try:
|
| 46 |
+
resume = await parser.parse(contents, file.content_type)
|
| 47 |
+
except ValueError as e:
|
| 48 |
+
raise HTTPException(status_code=400, detail=str(e))
|
| 49 |
+
except Exception as e:
|
| 50 |
+
logger.error(f"Resume parsing failed: {e}", exc_info=True)
|
| 51 |
+
raise HTTPException(status_code=500, detail=f"Failed to parse resume: {str(e)}")
|
| 52 |
+
|
| 53 |
+
# Validate resume has minimum required content
|
| 54 |
+
content_issues = []
|
| 55 |
+
if not resume.contact.name:
|
| 56 |
+
content_issues.append("No name detected in resume")
|
| 57 |
+
if not resume.experience:
|
| 58 |
+
content_issues.append("No work experience detected")
|
| 59 |
+
if len(resume.skills) == 0:
|
| 60 |
+
content_issues.append("No skills detected")
|
| 61 |
+
|
| 62 |
+
# Store in Redis
|
| 63 |
+
session_id = generate_session_id()
|
| 64 |
+
redis = await get_redis()
|
| 65 |
+
|
| 66 |
+
await redis.set(
|
| 67 |
+
f"session:{session_id}:resume",
|
| 68 |
+
resume.model_dump_json(),
|
| 69 |
+
ex=settings.session_ttl_seconds,
|
| 70 |
+
)
|
| 71 |
+
|
| 72 |
+
# Also store raw file for potential re-processing
|
| 73 |
+
await redis.set(
|
| 74 |
+
f"session:{session_id}:file",
|
| 75 |
+
contents,
|
| 76 |
+
ex=settings.session_ttl_seconds,
|
| 77 |
+
)
|
| 78 |
+
|
| 79 |
+
# Store content type for layout scanning
|
| 80 |
+
await redis.set(
|
| 81 |
+
f"session:{session_id}:content_type",
|
| 82 |
+
file.content_type,
|
| 83 |
+
ex=settings.session_ttl_seconds,
|
| 84 |
+
)
|
| 85 |
+
|
| 86 |
+
return {
|
| 87 |
+
"session_id": session_id,
|
| 88 |
+
"profile": {
|
| 89 |
+
"name": resume.contact.name,
|
| 90 |
+
"email": resume.contact.email,
|
| 91 |
+
"skills": resume.skills[:10],
|
| 92 |
+
"experience_count": len(resume.experience),
|
| 93 |
+
},
|
| 94 |
+
"format_warnings": format_warnings,
|
| 95 |
+
"content_issues": content_issues,
|
| 96 |
+
}
|
| 97 |
+
except HTTPException:
|
| 98 |
+
raise
|
| 99 |
+
except Exception as e:
|
| 100 |
+
logger.error(f"Upload failed with unexpected error: {e}", exc_info=True)
|
| 101 |
+
raise HTTPException(status_code=500, detail=f"Upload failed: {str(e)}")
|
app/core/__init__.py
ADDED
|
File without changes
|
app/core/config.py
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from pydantic_settings import BaseSettings, SettingsConfigDict
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
class Settings(BaseSettings):
|
| 5 |
+
model_config = SettingsConfigDict(env_file=".env", env_file_encoding="utf-8")
|
| 6 |
+
|
| 7 |
+
# Redis
|
| 8 |
+
redis_url: str = "redis://localhost:6379/0"
|
| 9 |
+
|
| 10 |
+
# LLM
|
| 11 |
+
llm_provider: str = "google"
|
| 12 |
+
llm_model_fast: str = "gemini-2.0-flash"
|
| 13 |
+
llm_model_smart: str = "gemini-2.0-flash"
|
| 14 |
+
|
| 15 |
+
# API Keys
|
| 16 |
+
openai_api_key: str = ""
|
| 17 |
+
google_api_key: str = ""
|
| 18 |
+
zai_api_key: str = ""
|
| 19 |
+
groq_api_key: str = ""
|
| 20 |
+
|
| 21 |
+
# Session
|
| 22 |
+
session_ttl_seconds: int = 7200
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
settings = Settings()
|
app/core/redis.py
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
import ssl
|
| 3 |
+
from contextlib import asynccontextmanager
|
| 4 |
+
from typing import Optional
|
| 5 |
+
|
| 6 |
+
import redis.asyncio as redis
|
| 7 |
+
from app.core.config import settings
|
| 8 |
+
|
| 9 |
+
# Global client for FastAPI (long-lived connection)
|
| 10 |
+
_fastapi_client: Optional[redis.Redis] = None
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
def _redis_kwargs() -> dict:
|
| 14 |
+
"""Extra kwargs for rediss:// (TLS) connections like Upstash."""
|
| 15 |
+
if settings.redis_url.startswith("rediss://"):
|
| 16 |
+
return {"ssl_cert_reqs": None}
|
| 17 |
+
return {}
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
async def get_redis() -> redis.Redis:
|
| 21 |
+
"""Get Redis client for FastAPI (reuses connection)."""
|
| 22 |
+
global _fastapi_client
|
| 23 |
+
if _fastapi_client is None:
|
| 24 |
+
_fastapi_client = redis.from_url(
|
| 25 |
+
settings.redis_url, decode_responses=True, **_redis_kwargs()
|
| 26 |
+
)
|
| 27 |
+
return _fastapi_client
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
async def close_redis():
|
| 31 |
+
"""Close FastAPI Redis connection."""
|
| 32 |
+
global _fastapi_client
|
| 33 |
+
if _fastapi_client:
|
| 34 |
+
await _fastapi_client.close()
|
| 35 |
+
_fastapi_client = None
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
@asynccontextmanager
|
| 39 |
+
async def get_redis_for_worker():
|
| 40 |
+
"""Get fresh Redis client for Celery workers (new connection per task)."""
|
| 41 |
+
client = redis.from_url(
|
| 42 |
+
settings.redis_url, decode_responses=True, **_redis_kwargs()
|
| 43 |
+
)
|
| 44 |
+
try:
|
| 45 |
+
yield client
|
| 46 |
+
finally:
|
| 47 |
+
await client.close()
|
app/llm/__init__.py
ADDED
|
File without changes
|
app/llm/base.py
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
from typing import Protocol, Any, Dict, Optional
|
| 3 |
+
from abc import abstractmethod
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
class LLMProvider(Protocol):
|
| 7 |
+
model: str
|
| 8 |
+
|
| 9 |
+
@abstractmethod
|
| 10 |
+
async def complete(self, prompt: str, system: Optional[str] = None) -> str:
|
| 11 |
+
"""Generate text completion."""
|
| 12 |
+
...
|
| 13 |
+
|
| 14 |
+
@abstractmethod
|
| 15 |
+
async def complete_json(self, prompt: str, schema: Dict[str, Any]) -> Dict[str, Any]:
|
| 16 |
+
"""Generate structured JSON output."""
|
| 17 |
+
...
|
app/llm/factory.py
ADDED
|
@@ -0,0 +1,95 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
import logging
|
| 3 |
+
from typing import Dict, List, Tuple, Type
|
| 4 |
+
|
| 5 |
+
from app.llm.base import LLMProvider
|
| 6 |
+
from app.llm.openai_provider import OpenAIProvider
|
| 7 |
+
from app.llm.google_provider import GoogleProvider
|
| 8 |
+
from app.llm.zai_provider import ZAIProvider
|
| 9 |
+
from app.llm.groq_provider import GroqProvider
|
| 10 |
+
from app.llm.fallback_provider import FallbackLLMProvider
|
| 11 |
+
from app.core.config import settings
|
| 12 |
+
|
| 13 |
+
logger = logging.getLogger(__name__)
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
class LLMFactory:
|
| 17 |
+
_providers: Dict[str, Type[LLMProvider]] = {
|
| 18 |
+
"openai": OpenAIProvider,
|
| 19 |
+
"google": GoogleProvider,
|
| 20 |
+
"zai": ZAIProvider,
|
| 21 |
+
"groq": GroqProvider,
|
| 22 |
+
}
|
| 23 |
+
|
| 24 |
+
_default_models: Dict[str, Tuple[str, str]] = {
|
| 25 |
+
"openai": ("gpt-4o-mini", "gpt-4o"),
|
| 26 |
+
"google": ("gemini-3-flash-preview", "gemini-3-flash-preview"),
|
| 27 |
+
"zai": ("glm-4.7", "glm-4.7"),
|
| 28 |
+
"groq": ("llama-3.1-8b-instant", "llama-3.1-8b-instant"),
|
| 29 |
+
}
|
| 30 |
+
|
| 31 |
+
@classmethod
|
| 32 |
+
def create(cls, provider: str, model: str, api_key: str) -> LLMProvider:
|
| 33 |
+
if provider not in cls._providers:
|
| 34 |
+
raise ValueError(f"Unknown provider: {provider}. Available: {list(cls._providers.keys())}")
|
| 35 |
+
return cls._providers[provider](model=model, api_key=api_key)
|
| 36 |
+
|
| 37 |
+
@classmethod
|
| 38 |
+
def _get_available_providers(cls, use_fast: bool = True) -> List[LLMProvider]:
|
| 39 |
+
"""Get list of available providers with valid API keys, primary first."""
|
| 40 |
+
providers: List[LLMProvider] = []
|
| 41 |
+
|
| 42 |
+
# Map of provider names to their API keys
|
| 43 |
+
api_keys = {
|
| 44 |
+
"openai": settings.openai_api_key,
|
| 45 |
+
"google": settings.google_api_key,
|
| 46 |
+
"zai": settings.zai_api_key,
|
| 47 |
+
"groq": settings.groq_api_key,
|
| 48 |
+
}
|
| 49 |
+
|
| 50 |
+
# Get primary provider's model
|
| 51 |
+
primary = settings.llm_provider
|
| 52 |
+
model_index = 0 if use_fast else 1
|
| 53 |
+
|
| 54 |
+
# Add primary provider first if it has an API key
|
| 55 |
+
if api_keys.get(primary):
|
| 56 |
+
model = settings.llm_model_fast if use_fast else settings.llm_model_smart
|
| 57 |
+
providers.append(cls.create(primary, model, api_keys[primary]))
|
| 58 |
+
logger.info(f"Primary provider: {primary} ({model})")
|
| 59 |
+
|
| 60 |
+
# Add fallback providers
|
| 61 |
+
for name, key in api_keys.items():
|
| 62 |
+
if name != primary and key:
|
| 63 |
+
fast_model, smart_model = cls._default_models[name]
|
| 64 |
+
model = fast_model if use_fast else smart_model
|
| 65 |
+
providers.append(cls.create(name, model, key))
|
| 66 |
+
logger.info(f"Fallback provider: {name} ({model})")
|
| 67 |
+
|
| 68 |
+
return providers
|
| 69 |
+
|
| 70 |
+
@classmethod
|
| 71 |
+
def get_fast(cls) -> LLMProvider:
|
| 72 |
+
"""Get configured fast/cheap model with automatic fallback."""
|
| 73 |
+
providers = cls._get_available_providers(use_fast=True)
|
| 74 |
+
if len(providers) == 1:
|
| 75 |
+
return providers[0]
|
| 76 |
+
return FallbackLLMProvider(providers)
|
| 77 |
+
|
| 78 |
+
@classmethod
|
| 79 |
+
def get_smart(cls) -> LLMProvider:
|
| 80 |
+
"""Get configured smart model with automatic fallback."""
|
| 81 |
+
providers = cls._get_available_providers(use_fast=False)
|
| 82 |
+
if len(providers) == 1:
|
| 83 |
+
return providers[0]
|
| 84 |
+
return FallbackLLMProvider(providers)
|
| 85 |
+
|
| 86 |
+
@classmethod
|
| 87 |
+
def _get_api_key(cls) -> str:
|
| 88 |
+
"""Get API key for primary provider (legacy method)."""
|
| 89 |
+
keys = {
|
| 90 |
+
"openai": settings.openai_api_key,
|
| 91 |
+
"google": settings.google_api_key,
|
| 92 |
+
"zai": settings.zai_api_key,
|
| 93 |
+
"groq": settings.groq_api_key,
|
| 94 |
+
}
|
| 95 |
+
return keys.get(settings.llm_provider, "")
|
app/llm/fallback_provider.py
ADDED
|
@@ -0,0 +1,142 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
import asyncio
|
| 3 |
+
import logging
|
| 4 |
+
import re
|
| 5 |
+
import time
|
| 6 |
+
from typing import Any, Dict, List, Optional
|
| 7 |
+
|
| 8 |
+
from app.llm.base import LLMProvider
|
| 9 |
+
|
| 10 |
+
logger = logging.getLogger(__name__)
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
class QuotaExceededError(Exception):
|
| 14 |
+
"""Raised when all LLM providers are exhausted."""
|
| 15 |
+
pass
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
class FallbackLLMProvider(LLMProvider):
|
| 19 |
+
"""LLM provider with automatic fallback and retry with exponential backoff."""
|
| 20 |
+
|
| 21 |
+
# Retry configuration
|
| 22 |
+
MAX_RETRIES_PER_PROVIDER = 3
|
| 23 |
+
INITIAL_BACKOFF_SECONDS = 5
|
| 24 |
+
MAX_BACKOFF_SECONDS = 65
|
| 25 |
+
|
| 26 |
+
def __init__(self, providers: List[LLMProvider]):
|
| 27 |
+
if not providers:
|
| 28 |
+
raise ValueError("At least one provider is required")
|
| 29 |
+
self.providers = providers
|
| 30 |
+
self.model = providers[0].model
|
| 31 |
+
|
| 32 |
+
def _is_rate_limit_error(self, error: Exception) -> bool:
|
| 33 |
+
"""Check if error is a rate limit/quota error that might resolve with retry."""
|
| 34 |
+
error_str = str(error).lower()
|
| 35 |
+
rate_limit_indicators = [
|
| 36 |
+
"rate_limit",
|
| 37 |
+
"rate limit",
|
| 38 |
+
"429",
|
| 39 |
+
"too many requests",
|
| 40 |
+
"retry",
|
| 41 |
+
"quota exceeded",
|
| 42 |
+
"resource_exhausted",
|
| 43 |
+
]
|
| 44 |
+
return any(indicator in error_str for indicator in rate_limit_indicators)
|
| 45 |
+
|
| 46 |
+
def _extract_retry_delay(self, error: Exception) -> Optional[float]:
|
| 47 |
+
"""Extract retry delay from error message if present."""
|
| 48 |
+
error_str = str(error)
|
| 49 |
+
# Look for patterns like "retry in 22.428058397s" or "retry_delay { seconds: 22 }"
|
| 50 |
+
patterns = [
|
| 51 |
+
r'retry in ([\d.]+)s',
|
| 52 |
+
r'retry_delay.*?seconds[:\s]+(\d+)',
|
| 53 |
+
r'(\d+)\s*seconds?',
|
| 54 |
+
]
|
| 55 |
+
for pattern in patterns:
|
| 56 |
+
match = re.search(pattern, error_str, re.IGNORECASE)
|
| 57 |
+
if match:
|
| 58 |
+
try:
|
| 59 |
+
return min(float(match.group(1)), self.MAX_BACKOFF_SECONDS)
|
| 60 |
+
except ValueError:
|
| 61 |
+
pass
|
| 62 |
+
return None
|
| 63 |
+
|
| 64 |
+
async def _call_with_retry(
|
| 65 |
+
self,
|
| 66 |
+
provider: LLMProvider,
|
| 67 |
+
call_func,
|
| 68 |
+
*args,
|
| 69 |
+
**kwargs
|
| 70 |
+
) -> Any:
|
| 71 |
+
"""Call provider method with retry on rate limit errors."""
|
| 72 |
+
last_error = None
|
| 73 |
+
backoff = self.INITIAL_BACKOFF_SECONDS
|
| 74 |
+
|
| 75 |
+
for attempt in range(self.MAX_RETRIES_PER_PROVIDER):
|
| 76 |
+
try:
|
| 77 |
+
# Track time to first token (TTFT)
|
| 78 |
+
start_time = time.time()
|
| 79 |
+
result = await call_func(*args, **kwargs)
|
| 80 |
+
ttft = (time.time() - start_time) * 1000 # Convert to milliseconds
|
| 81 |
+
logger.info(f"⏱️ {provider.__class__.__name__} TTFT: {ttft:.2f}ms ({ttft/1000:.3f}s)")
|
| 82 |
+
return result
|
| 83 |
+
except Exception as e:
|
| 84 |
+
last_error = e
|
| 85 |
+
|
| 86 |
+
if self._is_rate_limit_error(e) and attempt < self.MAX_RETRIES_PER_PROVIDER - 1:
|
| 87 |
+
# Extract delay from error or use exponential backoff
|
| 88 |
+
delay = self._extract_retry_delay(e) or backoff
|
| 89 |
+
logger.warning(
|
| 90 |
+
f"{provider.__class__.__name__} rate limited, "
|
| 91 |
+
f"retrying in {delay:.1f}s (attempt {attempt + 1}/{self.MAX_RETRIES_PER_PROVIDER})"
|
| 92 |
+
)
|
| 93 |
+
await asyncio.sleep(delay)
|
| 94 |
+
backoff = min(backoff * 2, self.MAX_BACKOFF_SECONDS)
|
| 95 |
+
else:
|
| 96 |
+
raise
|
| 97 |
+
|
| 98 |
+
raise last_error
|
| 99 |
+
|
| 100 |
+
async def complete(self, prompt: str, system: Optional[str] = None) -> str:
|
| 101 |
+
last_error: Optional[Exception] = None
|
| 102 |
+
errors_by_provider: List[str] = []
|
| 103 |
+
|
| 104 |
+
for i, provider in enumerate(self.providers):
|
| 105 |
+
try:
|
| 106 |
+
logger.info(f"Trying provider {i + 1}/{len(self.providers)}: {provider.__class__.__name__}")
|
| 107 |
+
return await self._call_with_retry(
|
| 108 |
+
provider,
|
| 109 |
+
provider.complete,
|
| 110 |
+
prompt,
|
| 111 |
+
system
|
| 112 |
+
)
|
| 113 |
+
except Exception as e:
|
| 114 |
+
last_error = e
|
| 115 |
+
errors_by_provider.append(f"{provider.__class__.__name__}: {str(e)[:100]}")
|
| 116 |
+
logger.warning(f"Provider {provider.__class__.__name__} failed after retries: {e}")
|
| 117 |
+
continue
|
| 118 |
+
|
| 119 |
+
error_summary = "; ".join(errors_by_provider)
|
| 120 |
+
raise QuotaExceededError(f"All providers exhausted. Errors: {error_summary}")
|
| 121 |
+
|
| 122 |
+
async def complete_json(self, prompt: str, schema: Dict[str, Any]) -> Dict[str, Any]:
|
| 123 |
+
last_error: Optional[Exception] = None
|
| 124 |
+
errors_by_provider: List[str] = []
|
| 125 |
+
|
| 126 |
+
for i, provider in enumerate(self.providers):
|
| 127 |
+
try:
|
| 128 |
+
logger.info(f"Trying provider {i + 1}/{len(self.providers)}: {provider.__class__.__name__}")
|
| 129 |
+
return await self._call_with_retry(
|
| 130 |
+
provider,
|
| 131 |
+
provider.complete_json,
|
| 132 |
+
prompt,
|
| 133 |
+
schema
|
| 134 |
+
)
|
| 135 |
+
except Exception as e:
|
| 136 |
+
last_error = e
|
| 137 |
+
errors_by_provider.append(f"{provider.__class__.__name__}: {str(e)[:100]}")
|
| 138 |
+
logger.warning(f"Provider {provider.__class__.__name__} failed after retries: {e}")
|
| 139 |
+
continue
|
| 140 |
+
|
| 141 |
+
error_summary = "; ".join(errors_by_provider)
|
| 142 |
+
raise QuotaExceededError(f"All providers exhausted. Errors: {error_summary}")
|
app/llm/google_provider.py
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
import json
|
| 3 |
+
from typing import Any, Dict, Optional
|
| 4 |
+
import google.generativeai as genai
|
| 5 |
+
from app.llm.base import LLMProvider
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
class GoogleProvider(LLMProvider):
|
| 9 |
+
def __init__(self, model: str, api_key: str):
|
| 10 |
+
self.model = model
|
| 11 |
+
genai.configure(api_key=api_key)
|
| 12 |
+
self._model = genai.GenerativeModel(model)
|
| 13 |
+
|
| 14 |
+
async def complete(self, prompt: str, system: Optional[str] = None) -> str:
|
| 15 |
+
full_prompt = f"{system}\n\n{prompt}" if system else prompt
|
| 16 |
+
response = await self._model.generate_content_async(full_prompt)
|
| 17 |
+
return response.text
|
| 18 |
+
|
| 19 |
+
async def complete_json(self, prompt: str, schema: Dict[str, Any]) -> Dict[str, Any]:
|
| 20 |
+
system = f"Respond only with valid JSON matching this schema: {json.dumps(schema)}. No markdown, no explanation, just JSON."
|
| 21 |
+
full_prompt = f"{system}\n\n{prompt}"
|
| 22 |
+
response = await self._model.generate_content_async(full_prompt)
|
| 23 |
+
content = response.text
|
| 24 |
+
# Strip markdown code blocks if present
|
| 25 |
+
if content.startswith("```"):
|
| 26 |
+
content = content.split("```")[1]
|
| 27 |
+
if content.startswith("json"):
|
| 28 |
+
content = content[4:]
|
| 29 |
+
return json.loads(content.strip())
|
app/llm/groq_provider.py
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
import json
|
| 3 |
+
from typing import Any, Dict, List, Optional
|
| 4 |
+
from openai import AsyncOpenAI
|
| 5 |
+
from app.llm.base import LLMProvider
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
class GroqProvider(LLMProvider):
|
| 9 |
+
def __init__(self, model: str, api_key: str):
|
| 10 |
+
self.model = model
|
| 11 |
+
self.client = AsyncOpenAI(
|
| 12 |
+
api_key=api_key,
|
| 13 |
+
base_url="https://api.groq.com/openai/v1"
|
| 14 |
+
)
|
| 15 |
+
|
| 16 |
+
async def complete(self, prompt: str, system: Optional[str] = None) -> str:
|
| 17 |
+
messages: List[Dict[str, str]] = []
|
| 18 |
+
if system:
|
| 19 |
+
messages.append({"role": "system", "content": system})
|
| 20 |
+
messages.append({"role": "user", "content": prompt})
|
| 21 |
+
|
| 22 |
+
response = await self.client.chat.completions.create(
|
| 23 |
+
model=self.model,
|
| 24 |
+
messages=messages,
|
| 25 |
+
)
|
| 26 |
+
return response.choices[0].message.content or ""
|
| 27 |
+
|
| 28 |
+
async def complete_json(self, prompt: str, schema: Dict[str, Any]) -> Dict[str, Any]:
|
| 29 |
+
system = f"Respond only with valid JSON matching this schema: {json.dumps(schema)}"
|
| 30 |
+
messages = [
|
| 31 |
+
{"role": "system", "content": system},
|
| 32 |
+
{"role": "user", "content": prompt},
|
| 33 |
+
]
|
| 34 |
+
|
| 35 |
+
response = await self.client.chat.completions.create(
|
| 36 |
+
model=self.model,
|
| 37 |
+
messages=messages,
|
| 38 |
+
response_format={"type": "json_object"},
|
| 39 |
+
)
|
| 40 |
+
content = response.choices[0].message.content or "{}"
|
| 41 |
+
return json.loads(content)
|
app/llm/openai_provider.py
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
import json
|
| 3 |
+
from typing import Any, Dict, List, Optional
|
| 4 |
+
from openai import AsyncOpenAI
|
| 5 |
+
from app.llm.base import LLMProvider
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
class OpenAIProvider(LLMProvider):
|
| 9 |
+
def __init__(self, model: str, api_key: str):
|
| 10 |
+
self.model = model
|
| 11 |
+
self.client = AsyncOpenAI(api_key=api_key)
|
| 12 |
+
|
| 13 |
+
async def complete(self, prompt: str, system: Optional[str] = None) -> str:
|
| 14 |
+
messages: List[Dict[str, str]] = []
|
| 15 |
+
if system:
|
| 16 |
+
messages.append({"role": "system", "content": system})
|
| 17 |
+
messages.append({"role": "user", "content": prompt})
|
| 18 |
+
|
| 19 |
+
response = await self.client.chat.completions.create(
|
| 20 |
+
model=self.model,
|
| 21 |
+
messages=messages,
|
| 22 |
+
)
|
| 23 |
+
return response.choices[0].message.content or ""
|
| 24 |
+
|
| 25 |
+
async def complete_json(self, prompt: str, schema: Dict[str, Any]) -> Dict[str, Any]:
|
| 26 |
+
system = f"Respond only with valid JSON matching this schema: {json.dumps(schema)}"
|
| 27 |
+
messages = [
|
| 28 |
+
{"role": "system", "content": system},
|
| 29 |
+
{"role": "user", "content": prompt},
|
| 30 |
+
]
|
| 31 |
+
|
| 32 |
+
response = await self.client.chat.completions.create(
|
| 33 |
+
model=self.model,
|
| 34 |
+
messages=messages,
|
| 35 |
+
response_format={"type": "json_object"},
|
| 36 |
+
)
|
| 37 |
+
content = response.choices[0].message.content or "{}"
|
| 38 |
+
return json.loads(content)
|
app/llm/zai_provider.py
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
import json
|
| 3 |
+
from typing import Any, Dict, List, Optional
|
| 4 |
+
from openai import AsyncOpenAI
|
| 5 |
+
from app.llm.base import LLMProvider
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
class ZAIProvider(LLMProvider):
|
| 9 |
+
def __init__(self, model: str, api_key: str):
|
| 10 |
+
self.model = model
|
| 11 |
+
self.client = AsyncOpenAI(
|
| 12 |
+
api_key=api_key,
|
| 13 |
+
base_url="https://api.z.ai/api/coding/paas/v4",
|
| 14 |
+
timeout=60.0
|
| 15 |
+
)
|
| 16 |
+
|
| 17 |
+
async def complete(self, prompt: str, system: Optional[str] = None) -> str:
|
| 18 |
+
messages: List[Dict[str, str]] = []
|
| 19 |
+
if system:
|
| 20 |
+
messages.append({"role": "system", "content": system})
|
| 21 |
+
messages.append({"role": "user", "content": prompt})
|
| 22 |
+
|
| 23 |
+
response = await self.client.chat.completions.create(
|
| 24 |
+
model=self.model,
|
| 25 |
+
messages=messages,
|
| 26 |
+
)
|
| 27 |
+
return response.choices[0].message.content or ""
|
| 28 |
+
|
| 29 |
+
async def complete_json(self, prompt: str, schema: Dict[str, Any]) -> Dict[str, Any]:
|
| 30 |
+
system = f"Respond only with valid JSON matching this schema: {json.dumps(schema)}"
|
| 31 |
+
messages = [
|
| 32 |
+
{"role": "system", "content": system},
|
| 33 |
+
{"role": "user", "content": prompt},
|
| 34 |
+
]
|
| 35 |
+
|
| 36 |
+
try:
|
| 37 |
+
response = await self.client.chat.completions.create(
|
| 38 |
+
model=self.model,
|
| 39 |
+
messages=messages,
|
| 40 |
+
response_format={"type": "json_object"},
|
| 41 |
+
)
|
| 42 |
+
except Exception:
|
| 43 |
+
# Fallback without response_format if not supported
|
| 44 |
+
response = await self.client.chat.completions.create(
|
| 45 |
+
model=self.model,
|
| 46 |
+
messages=messages,
|
| 47 |
+
)
|
| 48 |
+
content = response.choices[0].message.content or "{}"
|
| 49 |
+
return json.loads(content)
|
app/main.py
ADDED
|
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from contextlib import asynccontextmanager
|
| 2 |
+
import logging
|
| 3 |
+
from fastapi import FastAPI, Request
|
| 4 |
+
from fastapi.middleware.cors import CORSMiddleware
|
| 5 |
+
from fastapi.responses import JSONResponse
|
| 6 |
+
from app.core.redis import close_redis
|
| 7 |
+
from app.api.routes import upload, analyze, progress, result, export, compare
|
| 8 |
+
|
| 9 |
+
logger = logging.getLogger(__name__)
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
@asynccontextmanager
|
| 13 |
+
async def lifespan(app: FastAPI):
|
| 14 |
+
yield
|
| 15 |
+
await close_redis()
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
app = FastAPI(
|
| 19 |
+
title="CV-Buddy API",
|
| 20 |
+
description="AI-powered resume customization",
|
| 21 |
+
version="0.1.0",
|
| 22 |
+
lifespan=lifespan,
|
| 23 |
+
)
|
| 24 |
+
|
| 25 |
+
import os
|
| 26 |
+
|
| 27 |
+
allowed_origins = [
|
| 28 |
+
"http://localhost:3000",
|
| 29 |
+
]
|
| 30 |
+
# Add production Vercel URL if set
|
| 31 |
+
vercel_url = os.environ.get("FRONTEND_URL")
|
| 32 |
+
if vercel_url:
|
| 33 |
+
allowed_origins.append(vercel_url)
|
| 34 |
+
|
| 35 |
+
app.add_middleware(
|
| 36 |
+
CORSMiddleware,
|
| 37 |
+
allow_origins=allowed_origins,
|
| 38 |
+
allow_credentials=True,
|
| 39 |
+
allow_methods=["*"],
|
| 40 |
+
allow_headers=["*"],
|
| 41 |
+
)
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
# Global exception handler - ALWAYS return JSON
|
| 45 |
+
@app.exception_handler(Exception)
|
| 46 |
+
async def global_exception_handler(request: Request, exc: Exception):
|
| 47 |
+
"""Catch all unhandled exceptions and return JSON error response."""
|
| 48 |
+
logger.error(f"Unhandled exception: {exc}", exc_info=True)
|
| 49 |
+
|
| 50 |
+
# Extract useful error message
|
| 51 |
+
error_message = str(exc)
|
| 52 |
+
|
| 53 |
+
# Check for common error types
|
| 54 |
+
if "quota" in error_message.lower() or "rate" in error_message.lower():
|
| 55 |
+
return JSONResponse(
|
| 56 |
+
status_code=429,
|
| 57 |
+
content={
|
| 58 |
+
"detail": "API rate limit reached. Please wait a moment and try again.",
|
| 59 |
+
"error_type": "rate_limit",
|
| 60 |
+
"original_error": error_message[:500],
|
| 61 |
+
}
|
| 62 |
+
)
|
| 63 |
+
|
| 64 |
+
if "exhausted" in error_message.lower():
|
| 65 |
+
return JSONResponse(
|
| 66 |
+
status_code=503,
|
| 67 |
+
content={
|
| 68 |
+
"detail": "AI service temporarily unavailable. Please try again in a few seconds.",
|
| 69 |
+
"error_type": "service_unavailable",
|
| 70 |
+
"original_error": error_message[:500],
|
| 71 |
+
}
|
| 72 |
+
)
|
| 73 |
+
|
| 74 |
+
return JSONResponse(
|
| 75 |
+
status_code=500,
|
| 76 |
+
content={
|
| 77 |
+
"detail": f"An error occurred: {error_message[:200]}",
|
| 78 |
+
"error_type": "internal_error",
|
| 79 |
+
}
|
| 80 |
+
)
|
| 81 |
+
|
| 82 |
+
|
| 83 |
+
# Include routers
|
| 84 |
+
app.include_router(upload.router, prefix="/api", tags=["upload"])
|
| 85 |
+
app.include_router(analyze.router, prefix="/api", tags=["analyze"])
|
| 86 |
+
app.include_router(progress.router, prefix="/api", tags=["progress"])
|
| 87 |
+
app.include_router(result.router, prefix="/api", tags=["result"])
|
| 88 |
+
app.include_router(export.router, prefix="/api", tags=["export"])
|
| 89 |
+
app.include_router(compare.router, prefix="/api", tags=["compare"])
|
| 90 |
+
|
| 91 |
+
|
| 92 |
+
@app.get("/health")
|
| 93 |
+
async def health_check():
|
| 94 |
+
return {"status": "ok"}
|
app/models/__init__.py
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from app.models.resume import ResumeData, ContactInfo, Experience, Education
|
| 2 |
+
from app.models.job import JobData
|
| 3 |
+
from app.models.score import ATSScore, ScoreBreakdown
|
| 4 |
+
from app.models.customization import CustomizationResult, Change, Intensity
|
| 5 |
+
from app.models.analysis import BulletAnalysis, LayoutWarning, SafetyScan, KeywordPlacement
|
app/models/analysis.py
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import List, Optional
|
| 2 |
+
from pydantic import BaseModel
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
class BulletAnalysis(BaseModel):
|
| 6 |
+
"""Analysis of a single resume bullet point against job requirements."""
|
| 7 |
+
location: str # e.g., "experience[0].bullets[2]"
|
| 8 |
+
original_text: str
|
| 9 |
+
relevance_score: int # 0-100
|
| 10 |
+
matched_keywords: List[str] = []
|
| 11 |
+
missing_keywords: List[str] = []
|
| 12 |
+
suggestion: str = ""
|
| 13 |
+
|
| 14 |
+
# After customization (populated if bullet was modified)
|
| 15 |
+
customized_text: Optional[str] = None
|
| 16 |
+
new_relevance_score: Optional[int] = None
|
| 17 |
+
keywords_added: List[str] = []
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
class LayoutWarning(BaseModel):
|
| 21 |
+
"""A single layout compatibility warning."""
|
| 22 |
+
type: str # "multi_column", "complex_table", "graphics"
|
| 23 |
+
message: str
|
| 24 |
+
recommendation: str
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
class SafetyScan(BaseModel):
|
| 28 |
+
"""Results of layout safety analysis for ATS compatibility."""
|
| 29 |
+
has_issues: bool = False
|
| 30 |
+
warnings: List[LayoutWarning] = []
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
class KeywordPlacement(BaseModel):
|
| 34 |
+
"""Analysis of where a keyword appears and if it's naturally integrated."""
|
| 35 |
+
keyword: str
|
| 36 |
+
locations: List[str] = [] # Where the keyword appears
|
| 37 |
+
is_natural: bool = True
|
| 38 |
+
flag: Optional[str] = None # Warning message if unnatural
|
app/models/customization.py
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from enum import Enum
|
| 2 |
+
from typing import List, Optional
|
| 3 |
+
from pydantic import BaseModel
|
| 4 |
+
from app.models.resume import ResumeData
|
| 5 |
+
from app.models.score import ATSScore
|
| 6 |
+
from app.models.analysis import BulletAnalysis, SafetyScan, KeywordPlacement
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
class Intensity(str, Enum):
|
| 10 |
+
CONSERVATIVE = "conservative"
|
| 11 |
+
MODERATE = "moderate"
|
| 12 |
+
AGGRESSIVE = "aggressive"
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
class Change(BaseModel):
|
| 16 |
+
type: str # "added", "modified", "reordered"
|
| 17 |
+
location: str # e.g., "experience[0].bullets[2]"
|
| 18 |
+
before: str = ""
|
| 19 |
+
after: str = ""
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
class CustomizationResult(BaseModel):
|
| 23 |
+
original: ResumeData
|
| 24 |
+
customized: ResumeData
|
| 25 |
+
changes: List[Change] = []
|
| 26 |
+
original_score: ATSScore
|
| 27 |
+
customized_score: ATSScore
|
| 28 |
+
|
| 29 |
+
# Enhanced analysis fields
|
| 30 |
+
bullet_analysis: List[BulletAnalysis] = []
|
| 31 |
+
safety_scan: Optional[SafetyScan] = None
|
| 32 |
+
keyword_quality: List[KeywordPlacement] = []
|
app/models/job.py
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import List
|
| 2 |
+
from pydantic import BaseModel
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
class JobData(BaseModel):
|
| 6 |
+
title: str = ""
|
| 7 |
+
company: str = ""
|
| 8 |
+
location: str = ""
|
| 9 |
+
requirements: List[str] = []
|
| 10 |
+
responsibilities: List[str] = []
|
| 11 |
+
keywords_required: List[str] = []
|
| 12 |
+
keywords_preferred: List[str] = []
|
| 13 |
+
raw_text: str = ""
|
app/models/resume.py
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import List
|
| 2 |
+
from pydantic import BaseModel
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
class ContactInfo(BaseModel):
|
| 6 |
+
name: str = ""
|
| 7 |
+
email: str = ""
|
| 8 |
+
phone: str = ""
|
| 9 |
+
linkedin: str = ""
|
| 10 |
+
location: str = ""
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
class Experience(BaseModel):
|
| 14 |
+
company: str
|
| 15 |
+
title: str
|
| 16 |
+
dates: str
|
| 17 |
+
bullets: List[str]
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
class Education(BaseModel):
|
| 21 |
+
school: str
|
| 22 |
+
degree: str
|
| 23 |
+
dates: str
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
class ResumeData(BaseModel):
|
| 27 |
+
contact: ContactInfo
|
| 28 |
+
summary: str = ""
|
| 29 |
+
experience: List[Experience] = []
|
| 30 |
+
education: List[Education] = []
|
| 31 |
+
skills: List[str] = []
|
| 32 |
+
raw_text: str = ""
|
app/models/score.py
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import List
|
| 2 |
+
from pydantic import BaseModel
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
class ScoreBreakdown(BaseModel):
|
| 6 |
+
keyword_match: float = 0.0 # 0-100
|
| 7 |
+
skills_alignment: float = 0.0 # 0-100
|
| 8 |
+
experience_relevance: float = 0.0 # 0-100
|
| 9 |
+
format_score: float = 100.0 # 0-100 (always 100 for our generated resumes)
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
class ATSScore(BaseModel):
|
| 13 |
+
total: int = 0 # 0-100
|
| 14 |
+
breakdown: ScoreBreakdown = ScoreBreakdown()
|
| 15 |
+
matched_keywords: List[str] = []
|
| 16 |
+
missing_keywords: List[str] = []
|
app/services/__init__.py
ADDED
|
File without changes
|
app/services/ats_scorer.py
ADDED
|
@@ -0,0 +1,195 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
import re
|
| 3 |
+
from typing import List, Tuple
|
| 4 |
+
from app.models.resume import ResumeData
|
| 5 |
+
from app.models.job import JobData
|
| 6 |
+
from app.models.score import ATSScore, ScoreBreakdown
|
| 7 |
+
from app.models.analysis import KeywordPlacement
|
| 8 |
+
from app.llm.factory import LLMFactory
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
class ATSScorer:
|
| 12 |
+
# Weights for score components
|
| 13 |
+
WEIGHT_KEYWORD = 0.4
|
| 14 |
+
WEIGHT_SKILLS = 0.3
|
| 15 |
+
WEIGHT_EXPERIENCE = 0.2
|
| 16 |
+
WEIGHT_FORMAT = 0.1
|
| 17 |
+
|
| 18 |
+
def match_keywords(self, text: str, keywords: List[str]) -> Tuple[List[str], List[str]]:
|
| 19 |
+
"""Match keywords in text (case-insensitive)."""
|
| 20 |
+
text_lower = text.lower()
|
| 21 |
+
matched = []
|
| 22 |
+
missing = []
|
| 23 |
+
|
| 24 |
+
for keyword in keywords:
|
| 25 |
+
# Create pattern for word boundary matching
|
| 26 |
+
pattern = r'\b' + re.escape(keyword.lower()) + r'\b'
|
| 27 |
+
if re.search(pattern, text_lower):
|
| 28 |
+
matched.append(keyword)
|
| 29 |
+
else:
|
| 30 |
+
missing.append(keyword)
|
| 31 |
+
|
| 32 |
+
return matched, missing
|
| 33 |
+
|
| 34 |
+
def calculate_keyword_score(self, resume: ResumeData, job: JobData) -> Tuple[float, List[str], List[str]]:
|
| 35 |
+
"""Calculate keyword match percentage."""
|
| 36 |
+
all_keywords = job.keywords_required + job.keywords_preferred
|
| 37 |
+
if not all_keywords:
|
| 38 |
+
return 100.0, [], []
|
| 39 |
+
|
| 40 |
+
# Combine all resume text
|
| 41 |
+
resume_text = " ".join([
|
| 42 |
+
resume.raw_text,
|
| 43 |
+
" ".join(resume.skills),
|
| 44 |
+
resume.summary,
|
| 45 |
+
])
|
| 46 |
+
|
| 47 |
+
matched, missing = self.match_keywords(resume_text, all_keywords)
|
| 48 |
+
|
| 49 |
+
# Weight required keywords more heavily
|
| 50 |
+
required_matched = [k for k in matched if k in job.keywords_required]
|
| 51 |
+
preferred_matched = [k for k in matched if k in job.keywords_preferred]
|
| 52 |
+
|
| 53 |
+
required_count = len(job.keywords_required) or 1
|
| 54 |
+
preferred_count = len(job.keywords_preferred) or 1
|
| 55 |
+
|
| 56 |
+
required_score = (len(required_matched) / required_count) * 70
|
| 57 |
+
preferred_score = (len(preferred_matched) / preferred_count) * 30
|
| 58 |
+
|
| 59 |
+
return required_score + preferred_score, matched, missing
|
| 60 |
+
|
| 61 |
+
def calculate_skills_score(self, resume: ResumeData, job: JobData) -> float:
|
| 62 |
+
"""Calculate skills alignment score."""
|
| 63 |
+
if not job.keywords_required:
|
| 64 |
+
return 100.0
|
| 65 |
+
|
| 66 |
+
resume_skills_lower = [s.lower() for s in resume.skills]
|
| 67 |
+
matched = sum(1 for k in job.keywords_required if k.lower() in resume_skills_lower)
|
| 68 |
+
|
| 69 |
+
return (matched / len(job.keywords_required)) * 100
|
| 70 |
+
|
| 71 |
+
async def calculate_experience_relevance(self, resume: ResumeData, job: JobData) -> float:
|
| 72 |
+
"""Use LLM to judge experience relevance (0-100)."""
|
| 73 |
+
if not resume.experience or not job.responsibilities:
|
| 74 |
+
return 50.0
|
| 75 |
+
|
| 76 |
+
experience_text = "\n".join([
|
| 77 |
+
f"{exp.title} at {exp.company}: " + "; ".join(exp.bullets[:3])
|
| 78 |
+
for exp in resume.experience[:3]
|
| 79 |
+
])
|
| 80 |
+
|
| 81 |
+
responsibilities_text = "\n".join(job.responsibilities[:5])
|
| 82 |
+
|
| 83 |
+
prompt = f"""Rate how relevant this candidate's experience is to the job responsibilities on a scale of 0-100.
|
| 84 |
+
|
| 85 |
+
Candidate Experience:
|
| 86 |
+
{experience_text}
|
| 87 |
+
|
| 88 |
+
Job Responsibilities:
|
| 89 |
+
{responsibilities_text}
|
| 90 |
+
|
| 91 |
+
Respond with only a number between 0 and 100."""
|
| 92 |
+
|
| 93 |
+
llm = LLMFactory.get_fast()
|
| 94 |
+
response = await llm.complete(prompt)
|
| 95 |
+
|
| 96 |
+
try:
|
| 97 |
+
match = re.search(r'\d+', response)
|
| 98 |
+
if match:
|
| 99 |
+
score = int(match.group())
|
| 100 |
+
return min(100, max(0, score))
|
| 101 |
+
return 50.0
|
| 102 |
+
except (AttributeError, ValueError):
|
| 103 |
+
return 50.0
|
| 104 |
+
|
| 105 |
+
async def calculate(self, resume: ResumeData, job: JobData) -> ATSScore:
|
| 106 |
+
"""Calculate complete ATS score."""
|
| 107 |
+
keyword_score, matched, missing = self.calculate_keyword_score(resume, job)
|
| 108 |
+
skills_score = self.calculate_skills_score(resume, job)
|
| 109 |
+
experience_score = await self.calculate_experience_relevance(resume, job)
|
| 110 |
+
format_score = 100.0 # Our generated resumes are always ATS-friendly
|
| 111 |
+
|
| 112 |
+
total = int(
|
| 113 |
+
keyword_score * self.WEIGHT_KEYWORD +
|
| 114 |
+
skills_score * self.WEIGHT_SKILLS +
|
| 115 |
+
experience_score * self.WEIGHT_EXPERIENCE +
|
| 116 |
+
format_score * self.WEIGHT_FORMAT
|
| 117 |
+
)
|
| 118 |
+
|
| 119 |
+
return ATSScore(
|
| 120 |
+
total=total,
|
| 121 |
+
breakdown=ScoreBreakdown(
|
| 122 |
+
keyword_match=keyword_score,
|
| 123 |
+
skills_alignment=skills_score,
|
| 124 |
+
experience_relevance=experience_score,
|
| 125 |
+
format_score=format_score,
|
| 126 |
+
),
|
| 127 |
+
matched_keywords=matched,
|
| 128 |
+
missing_keywords=missing,
|
| 129 |
+
)
|
| 130 |
+
|
| 131 |
+
def check_keyword_quality(
|
| 132 |
+
self,
|
| 133 |
+
resume: ResumeData,
|
| 134 |
+
job: JobData,
|
| 135 |
+
added_keywords: List[str]
|
| 136 |
+
) -> List[KeywordPlacement]:
|
| 137 |
+
"""Check if keywords are naturally placed or potentially stuffed."""
|
| 138 |
+
placements = []
|
| 139 |
+
|
| 140 |
+
for keyword in added_keywords:
|
| 141 |
+
locations = self._find_keyword_locations(resume, keyword)
|
| 142 |
+
is_natural, flag = self._evaluate_placement(locations, keyword)
|
| 143 |
+
|
| 144 |
+
placements.append(KeywordPlacement(
|
| 145 |
+
keyword=keyword,
|
| 146 |
+
locations=locations,
|
| 147 |
+
is_natural=is_natural,
|
| 148 |
+
flag=flag
|
| 149 |
+
))
|
| 150 |
+
|
| 151 |
+
return placements
|
| 152 |
+
|
| 153 |
+
def _find_keyword_locations(self, resume: ResumeData, keyword: str) -> List[str]:
|
| 154 |
+
"""Find all locations where a keyword appears in the resume."""
|
| 155 |
+
locations = []
|
| 156 |
+
pattern = r'\b' + re.escape(keyword.lower()) + r'\b'
|
| 157 |
+
|
| 158 |
+
# Check summary
|
| 159 |
+
if resume.summary and re.search(pattern, resume.summary.lower()):
|
| 160 |
+
locations.append("summary")
|
| 161 |
+
|
| 162 |
+
# Check skills
|
| 163 |
+
for skill in resume.skills:
|
| 164 |
+
if re.search(pattern, skill.lower()):
|
| 165 |
+
locations.append("skills")
|
| 166 |
+
break
|
| 167 |
+
|
| 168 |
+
# Check experience bullets
|
| 169 |
+
for exp_idx, exp in enumerate(resume.experience):
|
| 170 |
+
for bullet_idx, bullet in enumerate(exp.bullets):
|
| 171 |
+
if re.search(pattern, bullet.lower()):
|
| 172 |
+
locations.append(f"experience[{exp_idx}].bullets[{bullet_idx}]")
|
| 173 |
+
|
| 174 |
+
return locations
|
| 175 |
+
|
| 176 |
+
def _evaluate_placement(self, locations: List[str], keyword: str) -> Tuple[bool, str | None]:
|
| 177 |
+
"""Evaluate if a keyword placement is natural or stuffed."""
|
| 178 |
+
if not locations:
|
| 179 |
+
return True, None # Not found, no issue
|
| 180 |
+
|
| 181 |
+
# Check if keyword ONLY appears in skills (potential stuffing)
|
| 182 |
+
if locations == ["skills"]:
|
| 183 |
+
return False, f"'{keyword}' only appears in Skills section - consider demonstrating it in your experience bullets"
|
| 184 |
+
|
| 185 |
+
# Check if keyword appears too many times (over-optimization)
|
| 186 |
+
experience_mentions = [loc for loc in locations if "experience" in loc]
|
| 187 |
+
if len(experience_mentions) > 3:
|
| 188 |
+
return False, f"'{keyword}' appears {len(experience_mentions)} times - this may seem repetitive to reviewers"
|
| 189 |
+
|
| 190 |
+
# Natural: appears in experience or summary with context
|
| 191 |
+
has_context = any("experience" in loc or loc == "summary" for loc in locations)
|
| 192 |
+
if has_context:
|
| 193 |
+
return True, None
|
| 194 |
+
|
| 195 |
+
return True, None
|
app/services/bullet_analyzer.py
ADDED
|
@@ -0,0 +1,145 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
import json
|
| 3 |
+
import logging
|
| 4 |
+
import re
|
| 5 |
+
from pathlib import Path
|
| 6 |
+
from typing import List, Optional
|
| 7 |
+
|
| 8 |
+
from app.models.resume import ResumeData
|
| 9 |
+
from app.models.job import JobData
|
| 10 |
+
from app.models.analysis import BulletAnalysis
|
| 11 |
+
from app.llm.factory import LLMFactory
|
| 12 |
+
|
| 13 |
+
logger = logging.getLogger(__name__)
|
| 14 |
+
|
| 15 |
+
PROMPT_PATH = Path(__file__).parent.parent.parent / "prompts" / "analyze_bullets.txt"
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
class BulletAnalyzer:
|
| 19 |
+
"""Analyzes individual resume bullet points against job requirements."""
|
| 20 |
+
|
| 21 |
+
def __init__(self):
|
| 22 |
+
self.prompt_template = PROMPT_PATH.read_text()
|
| 23 |
+
|
| 24 |
+
def _get_all_bullets(self, resume: ResumeData) -> List[tuple[str, str]]:
|
| 25 |
+
"""Extract all bullets with their locations from resume."""
|
| 26 |
+
bullets = []
|
| 27 |
+
for exp_idx, exp in enumerate(resume.experience):
|
| 28 |
+
for bullet_idx, bullet in enumerate(exp.bullets):
|
| 29 |
+
location = f"experience[{exp_idx}].bullets[{bullet_idx}]"
|
| 30 |
+
bullets.append((location, bullet))
|
| 31 |
+
return bullets
|
| 32 |
+
|
| 33 |
+
def _calculate_keyword_overlap(self, text: str, keywords: List[str]) -> List[str]:
|
| 34 |
+
"""Find which keywords appear in the text."""
|
| 35 |
+
text_lower = text.lower()
|
| 36 |
+
matched = []
|
| 37 |
+
for keyword in keywords:
|
| 38 |
+
pattern = r'\b' + re.escape(keyword.lower()) + r'\b'
|
| 39 |
+
if re.search(pattern, text_lower):
|
| 40 |
+
matched.append(keyword)
|
| 41 |
+
return matched
|
| 42 |
+
|
| 43 |
+
async def analyze_bullet(
|
| 44 |
+
self,
|
| 45 |
+
bullet_text: str,
|
| 46 |
+
location: str,
|
| 47 |
+
job: JobData
|
| 48 |
+
) -> BulletAnalysis:
|
| 49 |
+
"""Analyze a single bullet point against job requirements."""
|
| 50 |
+
# Quick keyword check first
|
| 51 |
+
all_keywords = job.keywords_required + job.keywords_preferred
|
| 52 |
+
matched = self._calculate_keyword_overlap(bullet_text, all_keywords)
|
| 53 |
+
|
| 54 |
+
# Calculate base relevance from keyword overlap
|
| 55 |
+
if all_keywords:
|
| 56 |
+
base_score = int((len(matched) / len(all_keywords)) * 100)
|
| 57 |
+
else:
|
| 58 |
+
base_score = 50
|
| 59 |
+
|
| 60 |
+
# Use LLM for deeper analysis and suggestions
|
| 61 |
+
prompt = self.prompt_template.format(
|
| 62 |
+
bullet_text=bullet_text,
|
| 63 |
+
required_keywords=", ".join(job.keywords_required[:10]),
|
| 64 |
+
preferred_keywords=", ".join(job.keywords_preferred[:10]),
|
| 65 |
+
responsibilities="\n".join(job.responsibilities[:5])
|
| 66 |
+
)
|
| 67 |
+
|
| 68 |
+
try:
|
| 69 |
+
llm = LLMFactory.get_fast()
|
| 70 |
+
response = await llm.complete(prompt)
|
| 71 |
+
|
| 72 |
+
# Parse JSON response
|
| 73 |
+
json_match = re.search(r'\{[\s\S]*\}', response)
|
| 74 |
+
if json_match:
|
| 75 |
+
data = json.loads(json_match.group())
|
| 76 |
+
return BulletAnalysis(
|
| 77 |
+
location=location,
|
| 78 |
+
original_text=bullet_text,
|
| 79 |
+
relevance_score=data.get("relevance_score", base_score),
|
| 80 |
+
matched_keywords=data.get("matched_keywords", matched),
|
| 81 |
+
missing_keywords=data.get("missing_keywords", []),
|
| 82 |
+
suggestion=data.get("suggestion", "")
|
| 83 |
+
)
|
| 84 |
+
except (json.JSONDecodeError, Exception) as e:
|
| 85 |
+
logger.warning(f"Failed to parse bullet analysis: {e}")
|
| 86 |
+
|
| 87 |
+
# Fallback to basic analysis
|
| 88 |
+
missing = [k for k in all_keywords if k not in matched]
|
| 89 |
+
return BulletAnalysis(
|
| 90 |
+
location=location,
|
| 91 |
+
original_text=bullet_text,
|
| 92 |
+
relevance_score=base_score,
|
| 93 |
+
matched_keywords=matched,
|
| 94 |
+
missing_keywords=missing[:5],
|
| 95 |
+
suggestion=f"Consider adding keywords: {', '.join(missing[:3])}" if missing else ""
|
| 96 |
+
)
|
| 97 |
+
|
| 98 |
+
async def analyze_all_bullets(
|
| 99 |
+
self,
|
| 100 |
+
resume: ResumeData,
|
| 101 |
+
job: JobData,
|
| 102 |
+
max_bullets: int = 15
|
| 103 |
+
) -> List[BulletAnalysis]:
|
| 104 |
+
"""Analyze all bullet points in the resume."""
|
| 105 |
+
bullets = self._get_all_bullets(resume)
|
| 106 |
+
analyses = []
|
| 107 |
+
|
| 108 |
+
# Limit to avoid too many LLM calls
|
| 109 |
+
for location, bullet_text in bullets[:max_bullets]:
|
| 110 |
+
if bullet_text.strip():
|
| 111 |
+
analysis = await self.analyze_bullet(bullet_text, location, job)
|
| 112 |
+
analyses.append(analysis)
|
| 113 |
+
|
| 114 |
+
return analyses
|
| 115 |
+
|
| 116 |
+
def update_with_customized(
|
| 117 |
+
self,
|
| 118 |
+
original_analyses: List[BulletAnalysis],
|
| 119 |
+
customized_resume: ResumeData,
|
| 120 |
+
job: JobData
|
| 121 |
+
) -> List[BulletAnalysis]:
|
| 122 |
+
"""Update bullet analyses with customized versions."""
|
| 123 |
+
customized_bullets = dict(self._get_all_bullets(customized_resume))
|
| 124 |
+
all_keywords = job.keywords_required + job.keywords_preferred
|
| 125 |
+
|
| 126 |
+
updated = []
|
| 127 |
+
for analysis in original_analyses:
|
| 128 |
+
new_text = customized_bullets.get(analysis.location)
|
| 129 |
+
|
| 130 |
+
if new_text and new_text != analysis.original_text:
|
| 131 |
+
# Calculate new score
|
| 132 |
+
new_matched = self._calculate_keyword_overlap(new_text, all_keywords)
|
| 133 |
+
new_score = int((len(new_matched) / len(all_keywords)) * 100) if all_keywords else 50
|
| 134 |
+
|
| 135 |
+
# Find keywords that were added
|
| 136 |
+
old_matched = set(analysis.matched_keywords)
|
| 137 |
+
keywords_added = [k for k in new_matched if k not in old_matched]
|
| 138 |
+
|
| 139 |
+
analysis.customized_text = new_text
|
| 140 |
+
analysis.new_relevance_score = max(new_score, analysis.relevance_score + 10) # Show improvement
|
| 141 |
+
analysis.keywords_added = keywords_added
|
| 142 |
+
|
| 143 |
+
updated.append(analysis)
|
| 144 |
+
|
| 145 |
+
return updated
|
app/services/job_scraper.py
ADDED
|
@@ -0,0 +1,108 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
import re
|
| 3 |
+
from pathlib import Path
|
| 4 |
+
from typing import Dict, Any, List
|
| 5 |
+
import httpx
|
| 6 |
+
from bs4 import BeautifulSoup
|
| 7 |
+
from app.models.job import JobData
|
| 8 |
+
from app.llm.factory import LLMFactory
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
class JobScraper:
|
| 12 |
+
def __init__(self):
|
| 13 |
+
self.prompts_dir = Path(__file__).parent.parent.parent / "prompts"
|
| 14 |
+
|
| 15 |
+
def validate_url(self, url: str) -> bool:
|
| 16 |
+
if not url:
|
| 17 |
+
return False
|
| 18 |
+
pattern = r'^https?://[^\s/$.?#].[^\s]*$'
|
| 19 |
+
return bool(re.match(pattern, url))
|
| 20 |
+
|
| 21 |
+
async def fetch_page(self, url: str) -> str:
|
| 22 |
+
async with httpx.AsyncClient(follow_redirects=True, timeout=30.0) as client:
|
| 23 |
+
headers = {
|
| 24 |
+
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36"
|
| 25 |
+
}
|
| 26 |
+
response = await client.get(url, headers=headers)
|
| 27 |
+
response.raise_for_status()
|
| 28 |
+
return response.text
|
| 29 |
+
|
| 30 |
+
def extract_text(self, html: str) -> str:
|
| 31 |
+
soup = BeautifulSoup(html, "html.parser")
|
| 32 |
+
|
| 33 |
+
# Remove script and style elements
|
| 34 |
+
for element in soup(["script", "style", "nav", "footer", "header"]):
|
| 35 |
+
element.decompose()
|
| 36 |
+
|
| 37 |
+
# Get text
|
| 38 |
+
text = soup.get_text(separator="\n", strip=True)
|
| 39 |
+
|
| 40 |
+
# Clean up whitespace
|
| 41 |
+
lines = [line.strip() for line in text.splitlines() if line.strip()]
|
| 42 |
+
return "\n".join(lines)
|
| 43 |
+
|
| 44 |
+
async def scrape(self, url: str) -> JobData:
|
| 45 |
+
if not self.validate_url(url):
|
| 46 |
+
raise ValueError(f"Invalid URL: {url}")
|
| 47 |
+
|
| 48 |
+
html = await self.fetch_page(url)
|
| 49 |
+
raw_text = self.extract_text(html)
|
| 50 |
+
|
| 51 |
+
if len(raw_text) < 100:
|
| 52 |
+
raise ValueError("Could not extract sufficient job content from page")
|
| 53 |
+
|
| 54 |
+
prompt_template = (self.prompts_dir / "extract_job.txt").read_text()
|
| 55 |
+
prompt = prompt_template.replace("{job_text}", raw_text[:8000]) # Limit context
|
| 56 |
+
|
| 57 |
+
schema: Dict[str, Any] = {
|
| 58 |
+
"title": "",
|
| 59 |
+
"company": "",
|
| 60 |
+
"location": "",
|
| 61 |
+
"requirements": [],
|
| 62 |
+
"responsibilities": [],
|
| 63 |
+
"keywords_required": [],
|
| 64 |
+
"keywords_preferred": [],
|
| 65 |
+
}
|
| 66 |
+
|
| 67 |
+
llm = LLMFactory.get_fast()
|
| 68 |
+
data = await llm.complete_json(prompt, schema)
|
| 69 |
+
|
| 70 |
+
return JobData(
|
| 71 |
+
title=data.get("title", ""),
|
| 72 |
+
company=data.get("company", ""),
|
| 73 |
+
location=data.get("location", ""),
|
| 74 |
+
requirements=data.get("requirements", []),
|
| 75 |
+
responsibilities=data.get("responsibilities", []),
|
| 76 |
+
keywords_required=data.get("keywords_required", []),
|
| 77 |
+
keywords_preferred=data.get("keywords_preferred", []),
|
| 78 |
+
raw_text=raw_text,
|
| 79 |
+
)
|
| 80 |
+
|
| 81 |
+
async def parse_text(self, job_text: str) -> JobData:
|
| 82 |
+
"""Parse job description from raw text (manual paste fallback)."""
|
| 83 |
+
prompt_template = (self.prompts_dir / "extract_job.txt").read_text()
|
| 84 |
+
prompt = prompt_template.replace("{job_text}", job_text[:8000])
|
| 85 |
+
|
| 86 |
+
schema: Dict[str, Any] = {
|
| 87 |
+
"title": "",
|
| 88 |
+
"company": "",
|
| 89 |
+
"location": "",
|
| 90 |
+
"requirements": [],
|
| 91 |
+
"responsibilities": [],
|
| 92 |
+
"keywords_required": [],
|
| 93 |
+
"keywords_preferred": [],
|
| 94 |
+
}
|
| 95 |
+
|
| 96 |
+
llm = LLMFactory.get_fast()
|
| 97 |
+
data = await llm.complete_json(prompt, schema)
|
| 98 |
+
|
| 99 |
+
return JobData(
|
| 100 |
+
title=data.get("title", ""),
|
| 101 |
+
company=data.get("company", ""),
|
| 102 |
+
location=data.get("location", ""),
|
| 103 |
+
requirements=data.get("requirements", []),
|
| 104 |
+
responsibilities=data.get("responsibilities", []),
|
| 105 |
+
keywords_required=data.get("keywords_required", []),
|
| 106 |
+
keywords_preferred=data.get("keywords_preferred", []),
|
| 107 |
+
raw_text=job_text,
|
| 108 |
+
)
|
app/services/layout_scanner.py
ADDED
|
@@ -0,0 +1,196 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
import io
|
| 3 |
+
import logging
|
| 4 |
+
import re
|
| 5 |
+
from typing import List, Tuple
|
| 6 |
+
|
| 7 |
+
import fitz # PyMuPDF
|
| 8 |
+
from docx import Document
|
| 9 |
+
|
| 10 |
+
from app.models.analysis import SafetyScan, LayoutWarning
|
| 11 |
+
|
| 12 |
+
logger = logging.getLogger(__name__)
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
class LayoutScanner:
|
| 16 |
+
"""Scans resume layout for ATS compatibility issues."""
|
| 17 |
+
|
| 18 |
+
def scan(self, file_bytes: bytes, content_type: str) -> SafetyScan:
|
| 19 |
+
"""Scan a resume file for layout issues."""
|
| 20 |
+
warnings: List[LayoutWarning] = []
|
| 21 |
+
|
| 22 |
+
try:
|
| 23 |
+
if "pdf" in content_type:
|
| 24 |
+
warnings = self._scan_pdf(file_bytes)
|
| 25 |
+
elif "wordprocessingml" in content_type or "docx" in content_type:
|
| 26 |
+
warnings = self._scan_docx(file_bytes)
|
| 27 |
+
except Exception as e:
|
| 28 |
+
logger.warning(f"Layout scan failed: {e}")
|
| 29 |
+
|
| 30 |
+
return SafetyScan(
|
| 31 |
+
has_issues=len(warnings) > 0,
|
| 32 |
+
warnings=warnings
|
| 33 |
+
)
|
| 34 |
+
|
| 35 |
+
def _scan_pdf(self, file_bytes: bytes) -> List[LayoutWarning]:
|
| 36 |
+
"""Scan PDF for layout issues."""
|
| 37 |
+
warnings = []
|
| 38 |
+
|
| 39 |
+
try:
|
| 40 |
+
doc = fitz.open(stream=file_bytes, filetype="pdf")
|
| 41 |
+
|
| 42 |
+
for page_num, page in enumerate(doc):
|
| 43 |
+
# Check for multiple columns by analyzing text block positions
|
| 44 |
+
if self._detect_columns_pdf(page):
|
| 45 |
+
warnings.append(LayoutWarning(
|
| 46 |
+
type="multi_column",
|
| 47 |
+
message="Resume appears to use a multi-column layout",
|
| 48 |
+
recommendation="Switch to a single-column layout. Many ATS systems read left-to-right, line-by-line, which can scramble multi-column content."
|
| 49 |
+
))
|
| 50 |
+
break
|
| 51 |
+
|
| 52 |
+
# Check for tables
|
| 53 |
+
if self._detect_tables_pdf(page):
|
| 54 |
+
warnings.append(LayoutWarning(
|
| 55 |
+
type="complex_table",
|
| 56 |
+
message="Tables detected in your resume",
|
| 57 |
+
recommendation="Replace tables with simple bullet points. ATS systems often cannot parse table cells correctly."
|
| 58 |
+
))
|
| 59 |
+
break
|
| 60 |
+
|
| 61 |
+
# Check for images/graphics
|
| 62 |
+
if self._detect_graphics_pdf(page):
|
| 63 |
+
warnings.append(LayoutWarning(
|
| 64 |
+
type="graphics",
|
| 65 |
+
message="Images or graphics detected in your resume",
|
| 66 |
+
recommendation="Remove decorative graphics, icons, or images. ATS cannot read text in images."
|
| 67 |
+
))
|
| 68 |
+
break
|
| 69 |
+
|
| 70 |
+
doc.close()
|
| 71 |
+
except Exception as e:
|
| 72 |
+
logger.warning(f"PDF scan error: {e}")
|
| 73 |
+
|
| 74 |
+
return warnings
|
| 75 |
+
|
| 76 |
+
def _detect_columns_pdf(self, page: fitz.Page) -> bool:
|
| 77 |
+
"""Detect if page has multiple columns by analyzing text block positions."""
|
| 78 |
+
blocks = page.get_text("dict")["blocks"]
|
| 79 |
+
text_blocks = [b for b in blocks if b.get("type") == 0] # Type 0 = text
|
| 80 |
+
|
| 81 |
+
if len(text_blocks) < 4:
|
| 82 |
+
return False
|
| 83 |
+
|
| 84 |
+
# Get x-coordinates of block starts
|
| 85 |
+
x_coords = [b["bbox"][0] for b in text_blocks]
|
| 86 |
+
|
| 87 |
+
# If we have distinct left margins (clusters of x-coords), it's likely multi-column
|
| 88 |
+
unique_margins = set()
|
| 89 |
+
for x in x_coords:
|
| 90 |
+
# Round to nearest 50 to group similar positions
|
| 91 |
+
margin = round(x / 50) * 50
|
| 92 |
+
unique_margins.add(margin)
|
| 93 |
+
|
| 94 |
+
# More than 2 distinct left margins suggests columns
|
| 95 |
+
# (accounting for slight indentation variations)
|
| 96 |
+
significant_margins = [m for m in unique_margins if m > 50] # Ignore very left margin
|
| 97 |
+
return len(significant_margins) >= 2
|
| 98 |
+
|
| 99 |
+
def _detect_tables_pdf(self, page: fitz.Page) -> bool:
|
| 100 |
+
"""Detect tables in PDF by looking for grid-like structures."""
|
| 101 |
+
# Check for drawn lines that might indicate table borders
|
| 102 |
+
drawings = page.get_drawings()
|
| 103 |
+
|
| 104 |
+
horizontal_lines = 0
|
| 105 |
+
vertical_lines = 0
|
| 106 |
+
|
| 107 |
+
for d in drawings:
|
| 108 |
+
if d.get("items"):
|
| 109 |
+
for item in d["items"]:
|
| 110 |
+
if item[0] == "l": # Line
|
| 111 |
+
p1, p2 = item[1], item[2]
|
| 112 |
+
if abs(p1.y - p2.y) < 2: # Horizontal line
|
| 113 |
+
horizontal_lines += 1
|
| 114 |
+
elif abs(p1.x - p2.x) < 2: # Vertical line
|
| 115 |
+
vertical_lines += 1
|
| 116 |
+
|
| 117 |
+
# Multiple horizontal and vertical lines suggest a table
|
| 118 |
+
return horizontal_lines >= 3 and vertical_lines >= 2
|
| 119 |
+
|
| 120 |
+
def _detect_graphics_pdf(self, page: fitz.Page) -> bool:
|
| 121 |
+
"""Detect images/graphics in PDF."""
|
| 122 |
+
images = page.get_images()
|
| 123 |
+
# Filter out very small images (likely bullets or icons under 50x50)
|
| 124 |
+
significant_images = []
|
| 125 |
+
for img in images:
|
| 126 |
+
try:
|
| 127 |
+
xref = img[0]
|
| 128 |
+
base_image = page.parent.extract_image(xref)
|
| 129 |
+
if base_image:
|
| 130 |
+
width = base_image.get("width", 0)
|
| 131 |
+
height = base_image.get("height", 0)
|
| 132 |
+
if width > 100 or height > 100:
|
| 133 |
+
significant_images.append(img)
|
| 134 |
+
except:
|
| 135 |
+
pass
|
| 136 |
+
|
| 137 |
+
return len(significant_images) > 0
|
| 138 |
+
|
| 139 |
+
def _scan_docx(self, file_bytes: bytes) -> List[LayoutWarning]:
|
| 140 |
+
"""Scan DOCX for layout issues."""
|
| 141 |
+
warnings = []
|
| 142 |
+
|
| 143 |
+
try:
|
| 144 |
+
doc = Document(io.BytesIO(file_bytes))
|
| 145 |
+
|
| 146 |
+
# Check for tables
|
| 147 |
+
if len(doc.tables) > 0:
|
| 148 |
+
# Check if tables are used for layout (common in resumes)
|
| 149 |
+
for table in doc.tables:
|
| 150 |
+
if len(table.columns) > 1:
|
| 151 |
+
warnings.append(LayoutWarning(
|
| 152 |
+
type="complex_table",
|
| 153 |
+
message="Tables detected in your resume",
|
| 154 |
+
recommendation="Replace tables with simple paragraphs and bullet points. ATS systems often misread table layouts."
|
| 155 |
+
))
|
| 156 |
+
break
|
| 157 |
+
|
| 158 |
+
# Check for multiple columns in sections
|
| 159 |
+
for section in doc.sections:
|
| 160 |
+
if hasattr(section, '_sectPr'):
|
| 161 |
+
cols = section._sectPr.find('.//{http://schemas.openxmlformats.org/wordprocessingml/2006/main}cols')
|
| 162 |
+
if cols is not None:
|
| 163 |
+
num_cols = cols.get('{http://schemas.openxmlformats.org/wordprocessingml/2006/main}num')
|
| 164 |
+
if num_cols and int(num_cols) > 1:
|
| 165 |
+
warnings.append(LayoutWarning(
|
| 166 |
+
type="multi_column",
|
| 167 |
+
message="Resume uses a multi-column layout",
|
| 168 |
+
recommendation="Switch to a single-column format for better ATS compatibility."
|
| 169 |
+
))
|
| 170 |
+
break
|
| 171 |
+
|
| 172 |
+
# Check for text boxes (often used for sidebar layouts)
|
| 173 |
+
if self._has_text_boxes_docx(doc):
|
| 174 |
+
warnings.append(LayoutWarning(
|
| 175 |
+
type="multi_column",
|
| 176 |
+
message="Text boxes detected (possibly a sidebar layout)",
|
| 177 |
+
recommendation="Remove text boxes and use a linear, single-column layout instead."
|
| 178 |
+
))
|
| 179 |
+
|
| 180 |
+
except Exception as e:
|
| 181 |
+
logger.warning(f"DOCX scan error: {e}")
|
| 182 |
+
|
| 183 |
+
return warnings
|
| 184 |
+
|
| 185 |
+
def _has_text_boxes_docx(self, doc: Document) -> bool:
|
| 186 |
+
"""Check if DOCX contains text boxes."""
|
| 187 |
+
try:
|
| 188 |
+
# Text boxes appear as drawing elements
|
| 189 |
+
for para in doc.paragraphs:
|
| 190 |
+
if para._element.xml and 'textbox' in para._element.xml.lower():
|
| 191 |
+
return True
|
| 192 |
+
if para._element.xml and 'w:drawing' in para._element.xml:
|
| 193 |
+
return True
|
| 194 |
+
except:
|
| 195 |
+
pass
|
| 196 |
+
return False
|
app/services/progress.py
ADDED
|
@@ -0,0 +1,83 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
import json
|
| 3 |
+
from enum import Enum
|
| 4 |
+
from typing import Dict, Any, Optional
|
| 5 |
+
from redis.asyncio import Redis
|
| 6 |
+
from app.core.redis import get_redis
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
class ProgressStep(str, Enum):
|
| 10 |
+
PARSING_RESUME = "parsing_resume"
|
| 11 |
+
SCRAPING_JOB = "scraping_job"
|
| 12 |
+
CALCULATING_ORIGINAL_SCORE = "calculating_original_score"
|
| 13 |
+
CUSTOMIZING = "customizing"
|
| 14 |
+
CALCULATING_NEW_SCORE = "calculating_new_score"
|
| 15 |
+
FINALIZING = "finalizing"
|
| 16 |
+
COMPLETE = "complete"
|
| 17 |
+
ERROR = "error"
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
STEP_PROGRESS: Dict[ProgressStep, int] = {
|
| 21 |
+
ProgressStep.PARSING_RESUME: 15,
|
| 22 |
+
ProgressStep.SCRAPING_JOB: 30,
|
| 23 |
+
ProgressStep.CALCULATING_ORIGINAL_SCORE: 40,
|
| 24 |
+
ProgressStep.CUSTOMIZING: 80,
|
| 25 |
+
ProgressStep.CALCULATING_NEW_SCORE: 90,
|
| 26 |
+
ProgressStep.FINALIZING: 95,
|
| 27 |
+
ProgressStep.COMPLETE: 100,
|
| 28 |
+
}
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
class ProgressService:
|
| 32 |
+
def __init__(self, task_id: str, redis_client: Optional[Redis] = None):
|
| 33 |
+
self.task_id = task_id
|
| 34 |
+
self.channel = f"progress:{task_id}"
|
| 35 |
+
self._redis = redis_client
|
| 36 |
+
|
| 37 |
+
async def _get_redis(self) -> Redis:
|
| 38 |
+
if self._redis:
|
| 39 |
+
return self._redis
|
| 40 |
+
return await get_redis()
|
| 41 |
+
|
| 42 |
+
async def update(self, step: ProgressStep, message: str = "", result_id: str = ""):
|
| 43 |
+
redis = await self._get_redis()
|
| 44 |
+
percent = STEP_PROGRESS.get(step, 0)
|
| 45 |
+
|
| 46 |
+
data: Dict[str, Any] = {
|
| 47 |
+
"step": step.value,
|
| 48 |
+
"percent": percent,
|
| 49 |
+
"message": message,
|
| 50 |
+
}
|
| 51 |
+
|
| 52 |
+
if result_id:
|
| 53 |
+
data["result_id"] = result_id
|
| 54 |
+
|
| 55 |
+
# Publish to channel for SSE subscribers
|
| 56 |
+
await redis.publish(self.channel, json.dumps(data))
|
| 57 |
+
|
| 58 |
+
# Store current state for late subscribers
|
| 59 |
+
await redis.set(
|
| 60 |
+
f"progress_state:{self.task_id}",
|
| 61 |
+
json.dumps(data),
|
| 62 |
+
ex=3600, # 1 hour TTL
|
| 63 |
+
)
|
| 64 |
+
|
| 65 |
+
async def error(self, code: str, message: str, recoverable: bool = True):
|
| 66 |
+
redis = await self._get_redis()
|
| 67 |
+
|
| 68 |
+
data: Dict[str, Any] = {
|
| 69 |
+
"step": ProgressStep.ERROR.value,
|
| 70 |
+
"percent": 0,
|
| 71 |
+
"error": {
|
| 72 |
+
"code": code,
|
| 73 |
+
"message": message,
|
| 74 |
+
"recoverable": recoverable,
|
| 75 |
+
},
|
| 76 |
+
}
|
| 77 |
+
|
| 78 |
+
await redis.publish(self.channel, json.dumps(data))
|
| 79 |
+
await redis.set(
|
| 80 |
+
f"progress_state:{self.task_id}",
|
| 81 |
+
json.dumps(data),
|
| 82 |
+
ex=3600,
|
| 83 |
+
)
|
app/services/resume_comparator.py
ADDED
|
@@ -0,0 +1,236 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
import re
|
| 3 |
+
from typing import Dict, List, Any
|
| 4 |
+
from app.models.resume import ResumeData
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
class ResumeComparator:
|
| 8 |
+
"""Compare original (ground truth) resume with LLM-generated version."""
|
| 9 |
+
|
| 10 |
+
# Strong action verbs commonly used in resumes
|
| 11 |
+
STRONG_ACTION_VERBS = {
|
| 12 |
+
'achieved', 'accelerated', 'accomplished', 'designed', 'developed', 'directed',
|
| 13 |
+
'established', 'executed', 'generated', 'implemented', 'improved', 'increased',
|
| 14 |
+
'launched', 'led', 'managed', 'optimized', 'orchestrated', 'pioneered',
|
| 15 |
+
'reduced', 'resolved', 'spearheaded', 'streamlined', 'transformed', 'architected',
|
| 16 |
+
'built', 'created', 'delivered', 'drove', 'enhanced', 'expanded', 'founded',
|
| 17 |
+
'initiated', 'maintained', 'organized', 'produced', 'redesigned', 'restructured',
|
| 18 |
+
'revamped', 'scaled', 'strengthened', 'automated', 'collaborated', 'coordinated',
|
| 19 |
+
'facilitated', 'negotiated', 'presented', 'supervised', 'trained'
|
| 20 |
+
}
|
| 21 |
+
|
| 22 |
+
def __init__(self, original: ResumeData, llm_version: ResumeData, job_description: str = ""):
|
| 23 |
+
self.original = original
|
| 24 |
+
self.llm_version = llm_version
|
| 25 |
+
self.job_description = job_description
|
| 26 |
+
|
| 27 |
+
def _extract_text(self, resume: ResumeData) -> str:
|
| 28 |
+
"""Extract all text from resume."""
|
| 29 |
+
text_parts = []
|
| 30 |
+
|
| 31 |
+
# Add experience bullets
|
| 32 |
+
for exp in resume.experience:
|
| 33 |
+
text_parts.extend(exp.bullets)
|
| 34 |
+
|
| 35 |
+
# Add skills
|
| 36 |
+
text_parts.extend(resume.skills)
|
| 37 |
+
|
| 38 |
+
# Add education
|
| 39 |
+
for edu in resume.education:
|
| 40 |
+
if edu.degree:
|
| 41 |
+
text_parts.append(edu.degree)
|
| 42 |
+
if edu.institution:
|
| 43 |
+
text_parts.append(edu.institution)
|
| 44 |
+
|
| 45 |
+
return " ".join(text_parts).lower()
|
| 46 |
+
|
| 47 |
+
def _count_keywords(self, text: str, keywords: List[str]) -> int:
|
| 48 |
+
"""Count how many keywords appear in text."""
|
| 49 |
+
text_lower = text.lower()
|
| 50 |
+
count = 0
|
| 51 |
+
for keyword in keywords:
|
| 52 |
+
if keyword.lower() in text_lower:
|
| 53 |
+
count += 1
|
| 54 |
+
return count
|
| 55 |
+
|
| 56 |
+
def _extract_jd_keywords(self) -> List[str]:
|
| 57 |
+
"""Extract potential keywords from job description."""
|
| 58 |
+
if not self.job_description:
|
| 59 |
+
return []
|
| 60 |
+
|
| 61 |
+
# Extract words longer than 3 characters, excluding common words
|
| 62 |
+
words = re.findall(r'\b[a-zA-Z]{4,}\b', self.job_description.lower())
|
| 63 |
+
|
| 64 |
+
# Common words to exclude
|
| 65 |
+
common_words = {
|
| 66 |
+
'will', 'with', 'have', 'this', 'that', 'from', 'they', 'were', 'been',
|
| 67 |
+
'their', 'what', 'about', 'which', 'when', 'make', 'like', 'time', 'than',
|
| 68 |
+
'into', 'year', 'your', 'some', 'could', 'them', 'other', 'then', 'more',
|
| 69 |
+
'these', 'would', 'such', 'also', 'only', 'must', 'work', 'team', 'role'
|
| 70 |
+
}
|
| 71 |
+
|
| 72 |
+
keywords = [w for w in set(words) if w not in common_words]
|
| 73 |
+
return keywords[:50] # Top 50 keywords
|
| 74 |
+
|
| 75 |
+
def _count_bullets(self, resume: ResumeData) -> int:
|
| 76 |
+
"""Count total number of bullet points."""
|
| 77 |
+
total = 0
|
| 78 |
+
for exp in resume.experience:
|
| 79 |
+
total += len(exp.bullets)
|
| 80 |
+
return total
|
| 81 |
+
|
| 82 |
+
def _count_action_verbs(self, resume: ResumeData) -> int:
|
| 83 |
+
"""Count strong action verbs used."""
|
| 84 |
+
count = 0
|
| 85 |
+
for exp in resume.experience:
|
| 86 |
+
for bullet in exp.bullets:
|
| 87 |
+
words = bullet.lower().split()
|
| 88 |
+
if words and words[0] in self.STRONG_ACTION_VERBS:
|
| 89 |
+
count += 1
|
| 90 |
+
return count
|
| 91 |
+
|
| 92 |
+
def _count_quantifiable_metrics(self, text: str) -> int:
|
| 93 |
+
"""Count numbers, percentages, and quantifiable achievements."""
|
| 94 |
+
# Match patterns like: 50%, $1M, 10+, 2x, 100K, etc.
|
| 95 |
+
patterns = [
|
| 96 |
+
r'\d+%', # Percentages
|
| 97 |
+
r'\$\d+[KMB]?', # Dollar amounts
|
| 98 |
+
r'\d+[KMB]', # Numbers with K/M/B suffix
|
| 99 |
+
r'\d+x', # Multipliers
|
| 100 |
+
r'\d+\+', # Numbers with +
|
| 101 |
+
r'\d{2,}', # Any number with 2+ digits
|
| 102 |
+
]
|
| 103 |
+
|
| 104 |
+
count = 0
|
| 105 |
+
for pattern in patterns:
|
| 106 |
+
count += len(re.findall(pattern, text))
|
| 107 |
+
return count
|
| 108 |
+
|
| 109 |
+
def _count_words(self, text: str) -> int:
|
| 110 |
+
"""Count words in text."""
|
| 111 |
+
return len(re.findall(r'\b\w+\b', text))
|
| 112 |
+
|
| 113 |
+
def _analyze_section_coverage(self, resume: ResumeData) -> Dict[str, int]:
|
| 114 |
+
"""Analyze coverage of different resume sections."""
|
| 115 |
+
return {
|
| 116 |
+
'experience_count': len(resume.experience),
|
| 117 |
+
'education_count': len(resume.education),
|
| 118 |
+
'skills_count': len(resume.skills),
|
| 119 |
+
'certifications_count': len(resume.certifications)
|
| 120 |
+
}
|
| 121 |
+
|
| 122 |
+
def compare(self) -> Dict[str, Any]:
|
| 123 |
+
"""Generate comprehensive comparison report."""
|
| 124 |
+
|
| 125 |
+
original_text = self._extract_text(self.original)
|
| 126 |
+
llm_text = self._extract_text(self.llm_version)
|
| 127 |
+
|
| 128 |
+
jd_keywords = self._extract_jd_keywords()
|
| 129 |
+
|
| 130 |
+
# Metric calculations
|
| 131 |
+
original_metrics = {
|
| 132 |
+
'bullet_points': self._count_bullets(self.original),
|
| 133 |
+
'action_verbs': self._count_action_verbs(self.original),
|
| 134 |
+
'quantifiable_metrics': self._count_quantifiable_metrics(original_text),
|
| 135 |
+
'word_count': self._count_words(original_text),
|
| 136 |
+
'character_count': len(original_text),
|
| 137 |
+
'jd_keyword_matches': self._count_keywords(original_text, jd_keywords) if jd_keywords else 0,
|
| 138 |
+
**self._analyze_section_coverage(self.original)
|
| 139 |
+
}
|
| 140 |
+
|
| 141 |
+
llm_metrics = {
|
| 142 |
+
'bullet_points': self._count_bullets(self.llm_version),
|
| 143 |
+
'action_verbs': self._count_action_verbs(self.llm_version),
|
| 144 |
+
'quantifiable_metrics': self._count_quantifiable_metrics(llm_text),
|
| 145 |
+
'word_count': self._count_words(llm_text),
|
| 146 |
+
'character_count': len(llm_text),
|
| 147 |
+
'jd_keyword_matches': self._count_keywords(llm_text, jd_keywords) if jd_keywords else 0,
|
| 148 |
+
**self._analyze_section_coverage(self.llm_version)
|
| 149 |
+
}
|
| 150 |
+
|
| 151 |
+
# Calculate improvements/changes
|
| 152 |
+
improvements = {}
|
| 153 |
+
for key in original_metrics:
|
| 154 |
+
original_val = original_metrics[key]
|
| 155 |
+
llm_val = llm_metrics[key]
|
| 156 |
+
diff = llm_val - original_val
|
| 157 |
+
|
| 158 |
+
if original_val > 0:
|
| 159 |
+
percent_change = (diff / original_val) * 100
|
| 160 |
+
else:
|
| 161 |
+
percent_change = 100 if llm_val > 0 else 0
|
| 162 |
+
|
| 163 |
+
improvements[key] = {
|
| 164 |
+
'original': original_val,
|
| 165 |
+
'llm': llm_val,
|
| 166 |
+
'difference': diff,
|
| 167 |
+
'percent_change': round(percent_change, 2)
|
| 168 |
+
}
|
| 169 |
+
|
| 170 |
+
# Overall quality score (simple weighted average)
|
| 171 |
+
weights = {
|
| 172 |
+
'action_verbs': 0.25,
|
| 173 |
+
'quantifiable_metrics': 0.30,
|
| 174 |
+
'jd_keyword_matches': 0.25,
|
| 175 |
+
'bullet_points': 0.20
|
| 176 |
+
}
|
| 177 |
+
|
| 178 |
+
original_score = sum(
|
| 179 |
+
original_metrics.get(k, 0) * v
|
| 180 |
+
for k, v in weights.items()
|
| 181 |
+
)
|
| 182 |
+
llm_score = sum(
|
| 183 |
+
llm_metrics.get(k, 0) * v
|
| 184 |
+
for k, v in weights.items()
|
| 185 |
+
)
|
| 186 |
+
|
| 187 |
+
# Normalize scores (simple approach)
|
| 188 |
+
max_score = max(original_score, llm_score) or 1
|
| 189 |
+
|
| 190 |
+
return {
|
| 191 |
+
'original_metrics': original_metrics,
|
| 192 |
+
'llm_metrics': llm_metrics,
|
| 193 |
+
'improvements': improvements,
|
| 194 |
+
'scores': {
|
| 195 |
+
'original_score': round((original_score / max_score) * 100, 2),
|
| 196 |
+
'llm_score': round((llm_score / max_score) * 100, 2),
|
| 197 |
+
'improvement': round(((llm_score - original_score) / max_score) * 100, 2)
|
| 198 |
+
},
|
| 199 |
+
'summary': self._generate_summary(improvements),
|
| 200 |
+
'jd_keywords_analyzed': len(jd_keywords)
|
| 201 |
+
}
|
| 202 |
+
|
| 203 |
+
def _generate_summary(self, improvements: Dict[str, Dict]) -> str:
|
| 204 |
+
"""Generate human-readable summary."""
|
| 205 |
+
summary_parts = []
|
| 206 |
+
|
| 207 |
+
# Action verbs
|
| 208 |
+
av_diff = improvements['action_verbs']['difference']
|
| 209 |
+
if av_diff > 0:
|
| 210 |
+
summary_parts.append(f"Added {av_diff} more strong action verbs")
|
| 211 |
+
elif av_diff < 0:
|
| 212 |
+
summary_parts.append(f"Removed {abs(av_diff)} action verbs")
|
| 213 |
+
|
| 214 |
+
# Quantifiable metrics
|
| 215 |
+
qm_diff = improvements['quantifiable_metrics']['difference']
|
| 216 |
+
if qm_diff > 0:
|
| 217 |
+
summary_parts.append(f"Added {qm_diff} more quantifiable metrics")
|
| 218 |
+
elif qm_diff < 0:
|
| 219 |
+
summary_parts.append(f"Removed {abs(qm_diff)} quantifiable metrics")
|
| 220 |
+
|
| 221 |
+
# Keywords
|
| 222 |
+
kw_diff = improvements['jd_keyword_matches']['difference']
|
| 223 |
+
if kw_diff > 0:
|
| 224 |
+
summary_parts.append(f"Matched {kw_diff} more JD keywords")
|
| 225 |
+
elif kw_diff < 0:
|
| 226 |
+
summary_parts.append(f"Matched {abs(kw_diff)} fewer JD keywords")
|
| 227 |
+
|
| 228 |
+
# Word count
|
| 229 |
+
wc_change = improvements['word_count']['percent_change']
|
| 230 |
+
if abs(wc_change) > 10:
|
| 231 |
+
if wc_change > 0:
|
| 232 |
+
summary_parts.append(f"Increased content by {abs(wc_change):.1f}%")
|
| 233 |
+
else:
|
| 234 |
+
summary_parts.append(f"Reduced content by {abs(wc_change):.1f}%")
|
| 235 |
+
|
| 236 |
+
return "; ".join(summary_parts) if summary_parts else "Minimal changes"
|
app/services/resume_customizer.py
ADDED
|
@@ -0,0 +1,151 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
import json
|
| 3 |
+
from pathlib import Path
|
| 4 |
+
from typing import List, Dict, Any
|
| 5 |
+
from app.models.resume import ResumeData, Experience
|
| 6 |
+
from app.models.job import JobData
|
| 7 |
+
from app.models.customization import CustomizationResult, Change, Intensity
|
| 8 |
+
from app.models.score import ATSScore
|
| 9 |
+
from app.models.analysis import BulletAnalysis, KeywordPlacement
|
| 10 |
+
from app.services.ats_scorer import ATSScorer
|
| 11 |
+
from app.services.bullet_analyzer import BulletAnalyzer
|
| 12 |
+
from app.llm.factory import LLMFactory
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
class ResumeCustomizer:
|
| 16 |
+
def __init__(self):
|
| 17 |
+
self.prompts_dir = Path(__file__).parent.parent.parent / "prompts"
|
| 18 |
+
self.scorer = ATSScorer()
|
| 19 |
+
self.bullet_analyzer = BulletAnalyzer()
|
| 20 |
+
|
| 21 |
+
def _detect_changes(self, original: ResumeData, customized: ResumeData) -> List[Change]:
|
| 22 |
+
"""Compare original and customized to detect changes."""
|
| 23 |
+
changes = []
|
| 24 |
+
|
| 25 |
+
# Compare experience bullets
|
| 26 |
+
for i, (orig_exp, cust_exp) in enumerate(zip(original.experience, customized.experience)):
|
| 27 |
+
for j, (orig_bullet, cust_bullet) in enumerate(zip(orig_exp.bullets, cust_exp.bullets)):
|
| 28 |
+
if orig_bullet != cust_bullet:
|
| 29 |
+
changes.append(Change(
|
| 30 |
+
type="modified",
|
| 31 |
+
location=f"experience[{i}].bullets[{j}]",
|
| 32 |
+
before=orig_bullet,
|
| 33 |
+
after=cust_bullet,
|
| 34 |
+
))
|
| 35 |
+
|
| 36 |
+
# Check for added bullets
|
| 37 |
+
if len(cust_exp.bullets) > len(orig_exp.bullets):
|
| 38 |
+
for j in range(len(orig_exp.bullets), len(cust_exp.bullets)):
|
| 39 |
+
changes.append(Change(
|
| 40 |
+
type="added",
|
| 41 |
+
location=f"experience[{i}].bullets[{j}]",
|
| 42 |
+
before="",
|
| 43 |
+
after=cust_exp.bullets[j],
|
| 44 |
+
))
|
| 45 |
+
|
| 46 |
+
# Compare skills
|
| 47 |
+
orig_skills = set(original.skills)
|
| 48 |
+
cust_skills = set(customized.skills)
|
| 49 |
+
|
| 50 |
+
for skill in cust_skills - orig_skills:
|
| 51 |
+
changes.append(Change(
|
| 52 |
+
type="added",
|
| 53 |
+
location="skills",
|
| 54 |
+
before="",
|
| 55 |
+
after=skill,
|
| 56 |
+
))
|
| 57 |
+
|
| 58 |
+
# Compare summary
|
| 59 |
+
if original.summary != customized.summary:
|
| 60 |
+
changes.append(Change(
|
| 61 |
+
type="modified",
|
| 62 |
+
location="summary",
|
| 63 |
+
before=original.summary,
|
| 64 |
+
after=customized.summary,
|
| 65 |
+
))
|
| 66 |
+
|
| 67 |
+
return changes
|
| 68 |
+
|
| 69 |
+
async def customize(
|
| 70 |
+
self,
|
| 71 |
+
resume: ResumeData,
|
| 72 |
+
job: JobData,
|
| 73 |
+
intensity: Intensity = Intensity.MODERATE,
|
| 74 |
+
) -> CustomizationResult:
|
| 75 |
+
"""Customize resume for the target job."""
|
| 76 |
+
import logging
|
| 77 |
+
logger = logging.getLogger(__name__)
|
| 78 |
+
|
| 79 |
+
# Calculate original score
|
| 80 |
+
original_score = await self.scorer.calculate(resume, job)
|
| 81 |
+
|
| 82 |
+
# Analyze bullets BEFORE customization (optional feature)
|
| 83 |
+
bullet_analysis: List[BulletAnalysis] = []
|
| 84 |
+
try:
|
| 85 |
+
bullet_analysis = await self.bullet_analyzer.analyze_all_bullets(resume, job)
|
| 86 |
+
except Exception as e:
|
| 87 |
+
logger.warning(f"Bullet analysis failed (non-critical): {e}")
|
| 88 |
+
|
| 89 |
+
# Prepare prompt
|
| 90 |
+
prompt_template = (self.prompts_dir / "customize_resume.txt").read_text()
|
| 91 |
+
|
| 92 |
+
resume_dict = resume.model_dump()
|
| 93 |
+
del resume_dict["raw_text"] # Don't include raw text in prompt
|
| 94 |
+
|
| 95 |
+
prompt = prompt_template.format(
|
| 96 |
+
intensity=intensity.value,
|
| 97 |
+
resume_json=json.dumps(resume_dict, indent=2),
|
| 98 |
+
job_title=job.title,
|
| 99 |
+
job_company=job.company,
|
| 100 |
+
keywords_required=", ".join(job.keywords_required),
|
| 101 |
+
keywords_preferred=", ".join(job.keywords_preferred),
|
| 102 |
+
responsibilities="\n".join(f"- {r}" for r in job.responsibilities[:5]),
|
| 103 |
+
missing_keywords=", ".join(original_score.missing_keywords[:10]),
|
| 104 |
+
)
|
| 105 |
+
|
| 106 |
+
# Get customized resume from LLM
|
| 107 |
+
llm = LLMFactory.get_smart()
|
| 108 |
+
schema: Dict[str, Any] = resume_dict # Use original structure as schema
|
| 109 |
+
customized_dict = await llm.complete_json(prompt, schema)
|
| 110 |
+
|
| 111 |
+
# Preserve raw_text from original
|
| 112 |
+
customized_dict["raw_text"] = resume.raw_text
|
| 113 |
+
customized = ResumeData(**customized_dict)
|
| 114 |
+
|
| 115 |
+
# Calculate new score
|
| 116 |
+
customized_score = await self.scorer.calculate(customized, job)
|
| 117 |
+
|
| 118 |
+
# Detect changes
|
| 119 |
+
changes = self._detect_changes(resume, customized)
|
| 120 |
+
|
| 121 |
+
# Update bullet analysis with customized versions (optional feature)
|
| 122 |
+
try:
|
| 123 |
+
if bullet_analysis:
|
| 124 |
+
bullet_analysis = self.bullet_analyzer.update_with_customized(
|
| 125 |
+
bullet_analysis, customized, job
|
| 126 |
+
)
|
| 127 |
+
except Exception as e:
|
| 128 |
+
logger.warning(f"Bullet analysis update failed (non-critical): {e}")
|
| 129 |
+
|
| 130 |
+
# Check keyword quality (optional feature)
|
| 131 |
+
keyword_quality: List[KeywordPlacement] = []
|
| 132 |
+
try:
|
| 133 |
+
added_keywords = [
|
| 134 |
+
kw for kw in customized_score.matched_keywords
|
| 135 |
+
if kw not in original_score.matched_keywords
|
| 136 |
+
]
|
| 137 |
+
keyword_quality = self.scorer.check_keyword_quality(
|
| 138 |
+
customized, job, added_keywords
|
| 139 |
+
)
|
| 140 |
+
except Exception as e:
|
| 141 |
+
logger.warning(f"Keyword quality check failed (non-critical): {e}")
|
| 142 |
+
|
| 143 |
+
return CustomizationResult(
|
| 144 |
+
original=resume,
|
| 145 |
+
customized=customized,
|
| 146 |
+
changes=changes,
|
| 147 |
+
original_score=original_score,
|
| 148 |
+
customized_score=customized_score,
|
| 149 |
+
bullet_analysis=bullet_analysis,
|
| 150 |
+
keyword_quality=keyword_quality,
|
| 151 |
+
)
|
app/services/resume_generator.py
ADDED
|
@@ -0,0 +1,249 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
import io
|
| 3 |
+
from pathlib import Path
|
| 4 |
+
from typing import TYPE_CHECKING
|
| 5 |
+
from jinja2 import Template
|
| 6 |
+
from docx import Document
|
| 7 |
+
from docx.shared import Pt
|
| 8 |
+
from docx.enum.text import WD_ALIGN_PARAGRAPH
|
| 9 |
+
|
| 10 |
+
if TYPE_CHECKING:
|
| 11 |
+
from app.models.resume import ResumeData
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
class ResumeGenerator:
|
| 15 |
+
def __init__(self):
|
| 16 |
+
self.templates_dir = Path(__file__).parent.parent.parent / "templates"
|
| 17 |
+
|
| 18 |
+
def to_html(self, resume: "ResumeData") -> str:
|
| 19 |
+
template_path = self.templates_dir / "resume.html"
|
| 20 |
+
|
| 21 |
+
# Use default template if not exists
|
| 22 |
+
if not template_path.exists():
|
| 23 |
+
template_str = self._default_template()
|
| 24 |
+
else:
|
| 25 |
+
template_str = template_path.read_text()
|
| 26 |
+
|
| 27 |
+
template = Template(template_str)
|
| 28 |
+
return template.render(resume=resume)
|
| 29 |
+
|
| 30 |
+
def _sanitize_text(self, text: str) -> str:
|
| 31 |
+
"""Remove or replace characters not supported by Helvetica."""
|
| 32 |
+
replacements = {
|
| 33 |
+
'★': '*',
|
| 34 |
+
'☆': '*',
|
| 35 |
+
'•': '-',
|
| 36 |
+
'→': '->',
|
| 37 |
+
'←': '<-',
|
| 38 |
+
'✓': '[x]',
|
| 39 |
+
'✗': '[ ]',
|
| 40 |
+
'…': '...',
|
| 41 |
+
'"': '"',
|
| 42 |
+
'"': '"',
|
| 43 |
+
''': "'",
|
| 44 |
+
''': "'",
|
| 45 |
+
'–': '-',
|
| 46 |
+
'—': '-',
|
| 47 |
+
}
|
| 48 |
+
for char, replacement in replacements.items():
|
| 49 |
+
text = text.replace(char, replacement)
|
| 50 |
+
# Remove any remaining non-latin1 characters
|
| 51 |
+
return text.encode('latin-1', errors='replace').decode('latin-1')
|
| 52 |
+
|
| 53 |
+
def to_pdf(self, resume: "ResumeData") -> bytes:
|
| 54 |
+
from fpdf import FPDF
|
| 55 |
+
|
| 56 |
+
pdf = FPDF()
|
| 57 |
+
pdf.add_page()
|
| 58 |
+
pdf.set_margins(15, 15, 15)
|
| 59 |
+
pdf.set_auto_page_break(auto=True, margin=15)
|
| 60 |
+
|
| 61 |
+
sanitize = self._sanitize_text
|
| 62 |
+
|
| 63 |
+
# Contact header
|
| 64 |
+
pdf.set_font("Helvetica", "B", 14)
|
| 65 |
+
pdf.cell(0, 8, sanitize(resume.contact.name or "Name"), ln=True, align="C")
|
| 66 |
+
|
| 67 |
+
pdf.set_font("Helvetica", "", 9)
|
| 68 |
+
contact_parts = [p for p in [resume.contact.email, resume.contact.phone, resume.contact.location] if p]
|
| 69 |
+
if contact_parts:
|
| 70 |
+
pdf.cell(0, 5, sanitize(" | ".join(contact_parts)), ln=True, align="C")
|
| 71 |
+
pdf.ln(4)
|
| 72 |
+
|
| 73 |
+
page_width = pdf.w - pdf.l_margin - pdf.r_margin
|
| 74 |
+
|
| 75 |
+
# Summary
|
| 76 |
+
if resume.summary:
|
| 77 |
+
pdf.set_font("Helvetica", "B", 11)
|
| 78 |
+
pdf.cell(0, 7, "SUMMARY", ln=True)
|
| 79 |
+
pdf.set_draw_color(100, 100, 100)
|
| 80 |
+
pdf.line(pdf.l_margin, pdf.get_y(), pdf.l_margin + page_width, pdf.get_y())
|
| 81 |
+
pdf.ln(2)
|
| 82 |
+
pdf.set_font("Helvetica", "", 9)
|
| 83 |
+
pdf.multi_cell(page_width, 4, sanitize(resume.summary))
|
| 84 |
+
pdf.ln(3)
|
| 85 |
+
|
| 86 |
+
# Experience
|
| 87 |
+
if resume.experience:
|
| 88 |
+
pdf.set_font("Helvetica", "B", 11)
|
| 89 |
+
pdf.cell(0, 7, "EXPERIENCE", ln=True)
|
| 90 |
+
pdf.line(pdf.l_margin, pdf.get_y(), pdf.l_margin + page_width, pdf.get_y())
|
| 91 |
+
pdf.ln(2)
|
| 92 |
+
|
| 93 |
+
for exp in resume.experience:
|
| 94 |
+
pdf.set_font("Helvetica", "B", 10)
|
| 95 |
+
title_company = f"{exp.title} - {exp.company}"
|
| 96 |
+
pdf.cell(0, 5, sanitize(title_company[:80]), ln=True)
|
| 97 |
+
if exp.dates:
|
| 98 |
+
pdf.set_font("Helvetica", "I", 8)
|
| 99 |
+
pdf.cell(0, 4, sanitize(exp.dates), ln=True)
|
| 100 |
+
pdf.set_font("Helvetica", "", 9)
|
| 101 |
+
for bullet in exp.bullets:
|
| 102 |
+
bullet_text = f"* {bullet}"
|
| 103 |
+
pdf.multi_cell(page_width, 4, sanitize(bullet_text))
|
| 104 |
+
pdf.ln(2)
|
| 105 |
+
|
| 106 |
+
# Education
|
| 107 |
+
if resume.education:
|
| 108 |
+
pdf.set_font("Helvetica", "B", 11)
|
| 109 |
+
pdf.cell(0, 7, "EDUCATION", ln=True)
|
| 110 |
+
pdf.line(pdf.l_margin, pdf.get_y(), pdf.l_margin + page_width, pdf.get_y())
|
| 111 |
+
pdf.ln(2)
|
| 112 |
+
|
| 113 |
+
for edu in resume.education:
|
| 114 |
+
pdf.set_font("Helvetica", "B", 10)
|
| 115 |
+
pdf.cell(0, 5, sanitize(f"{edu.degree} - {edu.school}"), ln=True)
|
| 116 |
+
if edu.dates:
|
| 117 |
+
pdf.set_font("Helvetica", "I", 8)
|
| 118 |
+
pdf.cell(0, 4, sanitize(edu.dates), ln=True)
|
| 119 |
+
pdf.ln(2)
|
| 120 |
+
|
| 121 |
+
# Skills
|
| 122 |
+
if resume.skills:
|
| 123 |
+
pdf.set_font("Helvetica", "B", 11)
|
| 124 |
+
pdf.cell(0, 7, "SKILLS", ln=True)
|
| 125 |
+
pdf.line(pdf.l_margin, pdf.get_y(), pdf.l_margin + page_width, pdf.get_y())
|
| 126 |
+
pdf.ln(2)
|
| 127 |
+
pdf.set_font("Helvetica", "", 9)
|
| 128 |
+
skills_text = ", ".join(resume.skills)
|
| 129 |
+
pdf.multi_cell(page_width, 4, sanitize(skills_text))
|
| 130 |
+
|
| 131 |
+
return bytes(pdf.output())
|
| 132 |
+
|
| 133 |
+
def to_docx(self, resume: "ResumeData") -> bytes:
|
| 134 |
+
doc = Document()
|
| 135 |
+
|
| 136 |
+
# Contact info
|
| 137 |
+
name_para = doc.add_paragraph()
|
| 138 |
+
name_run = name_para.add_run(resume.contact.name)
|
| 139 |
+
name_run.bold = True
|
| 140 |
+
name_run.font.size = Pt(16)
|
| 141 |
+
name_para.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
| 142 |
+
|
| 143 |
+
contact_para = doc.add_paragraph()
|
| 144 |
+
contact_para.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
| 145 |
+
contact_parts = []
|
| 146 |
+
if resume.contact.email:
|
| 147 |
+
contact_parts.append(resume.contact.email)
|
| 148 |
+
if resume.contact.phone:
|
| 149 |
+
contact_parts.append(resume.contact.phone)
|
| 150 |
+
if resume.contact.location:
|
| 151 |
+
contact_parts.append(resume.contact.location)
|
| 152 |
+
contact_para.add_run(" | ".join(contact_parts))
|
| 153 |
+
|
| 154 |
+
# Summary
|
| 155 |
+
if resume.summary:
|
| 156 |
+
doc.add_heading("Summary", level=1)
|
| 157 |
+
doc.add_paragraph(resume.summary)
|
| 158 |
+
|
| 159 |
+
# Experience
|
| 160 |
+
if resume.experience:
|
| 161 |
+
doc.add_heading("Experience", level=1)
|
| 162 |
+
for exp in resume.experience:
|
| 163 |
+
exp_para = doc.add_paragraph()
|
| 164 |
+
exp_para.add_run(f"{exp.title}").bold = True
|
| 165 |
+
exp_para.add_run(f" | {exp.company}")
|
| 166 |
+
exp_para.add_run(f" | {exp.dates}").italic = True
|
| 167 |
+
|
| 168 |
+
for bullet in exp.bullets:
|
| 169 |
+
doc.add_paragraph(bullet, style="List Bullet")
|
| 170 |
+
|
| 171 |
+
# Education
|
| 172 |
+
if resume.education:
|
| 173 |
+
doc.add_heading("Education", level=1)
|
| 174 |
+
for edu in resume.education:
|
| 175 |
+
edu_para = doc.add_paragraph()
|
| 176 |
+
edu_para.add_run(f"{edu.degree}").bold = True
|
| 177 |
+
edu_para.add_run(f" | {edu.school}")
|
| 178 |
+
edu_para.add_run(f" | {edu.dates}").italic = True
|
| 179 |
+
|
| 180 |
+
# Skills
|
| 181 |
+
if resume.skills:
|
| 182 |
+
doc.add_heading("Skills", level=1)
|
| 183 |
+
doc.add_paragraph(", ".join(resume.skills))
|
| 184 |
+
|
| 185 |
+
buffer = io.BytesIO()
|
| 186 |
+
doc.save(buffer)
|
| 187 |
+
return buffer.getvalue()
|
| 188 |
+
|
| 189 |
+
def _default_template(self) -> str:
|
| 190 |
+
return """
|
| 191 |
+
<!DOCTYPE html>
|
| 192 |
+
<html>
|
| 193 |
+
<head>
|
| 194 |
+
<meta charset="UTF-8">
|
| 195 |
+
<style>
|
| 196 |
+
body { font-family: Arial, sans-serif; margin: 40px; font-size: 11pt; }
|
| 197 |
+
h1 { font-size: 18pt; margin-bottom: 5px; }
|
| 198 |
+
h2 { font-size: 13pt; border-bottom: 1px solid #333; margin-top: 15px; }
|
| 199 |
+
.contact { text-align: center; margin-bottom: 15px; }
|
| 200 |
+
.contact h1 { margin: 0; }
|
| 201 |
+
.contact p { margin: 5px 0; color: #555; }
|
| 202 |
+
.experience-item { margin-bottom: 12px; }
|
| 203 |
+
.experience-header { font-weight: bold; }
|
| 204 |
+
.experience-meta { color: #555; font-style: italic; }
|
| 205 |
+
ul { margin: 5px 0; padding-left: 20px; }
|
| 206 |
+
li { margin: 3px 0; }
|
| 207 |
+
.skills { margin-top: 10px; }
|
| 208 |
+
</style>
|
| 209 |
+
</head>
|
| 210 |
+
<body>
|
| 211 |
+
<div class="contact">
|
| 212 |
+
<h1>{{ resume.contact.name }}</h1>
|
| 213 |
+
<p>{{ resume.contact.email }} | {{ resume.contact.phone }} | {{ resume.contact.location }}</p>
|
| 214 |
+
</div>
|
| 215 |
+
|
| 216 |
+
{% if resume.summary %}
|
| 217 |
+
<h2>Summary</h2>
|
| 218 |
+
<p>{{ resume.summary }}</p>
|
| 219 |
+
{% endif %}
|
| 220 |
+
|
| 221 |
+
{% if resume.experience %}
|
| 222 |
+
<h2>Experience</h2>
|
| 223 |
+
{% for exp in resume.experience %}
|
| 224 |
+
<div class="experience-item">
|
| 225 |
+
<div class="experience-header">{{ exp.title }} | {{ exp.company }}</div>
|
| 226 |
+
<div class="experience-meta">{{ exp.dates }}</div>
|
| 227 |
+
<ul>
|
| 228 |
+
{% for bullet in exp.bullets %}
|
| 229 |
+
<li>{{ bullet }}</li>
|
| 230 |
+
{% endfor %}
|
| 231 |
+
</ul>
|
| 232 |
+
</div>
|
| 233 |
+
{% endfor %}
|
| 234 |
+
{% endif %}
|
| 235 |
+
|
| 236 |
+
{% if resume.education %}
|
| 237 |
+
<h2>Education</h2>
|
| 238 |
+
{% for edu in resume.education %}
|
| 239 |
+
<p><strong>{{ edu.degree }}</strong> | {{ edu.school }} | {{ edu.dates }}</p>
|
| 240 |
+
{% endfor %}
|
| 241 |
+
{% endif %}
|
| 242 |
+
|
| 243 |
+
{% if resume.skills %}
|
| 244 |
+
<h2>Skills</h2>
|
| 245 |
+
<p class="skills">{{ resume.skills | join(', ') }}</p>
|
| 246 |
+
{% endif %}
|
| 247 |
+
</body>
|
| 248 |
+
</html>
|
| 249 |
+
"""
|
app/services/resume_parser.py
ADDED
|
@@ -0,0 +1,84 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
import io
|
| 3 |
+
from pathlib import Path
|
| 4 |
+
from typing import Dict, Any
|
| 5 |
+
import fitz # PyMuPDF
|
| 6 |
+
from docx import Document
|
| 7 |
+
from app.models.resume import ResumeData
|
| 8 |
+
from app.llm.factory import LLMFactory
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
class ResumeParser:
|
| 12 |
+
SUPPORTED_TYPES: Dict[str, str] = {
|
| 13 |
+
"application/pdf": "pdf",
|
| 14 |
+
"application/vnd.openxmlformats-officedocument.wordprocessingml.document": "docx",
|
| 15 |
+
}
|
| 16 |
+
|
| 17 |
+
def __init__(self):
|
| 18 |
+
self.prompts_dir = Path(__file__).parent.parent.parent / "prompts"
|
| 19 |
+
|
| 20 |
+
def is_supported(self, content_type: str) -> bool:
|
| 21 |
+
return content_type in self.SUPPORTED_TYPES
|
| 22 |
+
|
| 23 |
+
def extract_text(self, file_bytes: bytes, content_type: str) -> str:
|
| 24 |
+
file_type = self.SUPPORTED_TYPES.get(content_type)
|
| 25 |
+
if file_type == "pdf":
|
| 26 |
+
return self._extract_pdf(file_bytes)
|
| 27 |
+
elif file_type == "docx":
|
| 28 |
+
return self._extract_docx(file_bytes)
|
| 29 |
+
else:
|
| 30 |
+
raise ValueError(f"Unsupported content type: {content_type}")
|
| 31 |
+
|
| 32 |
+
def _extract_pdf(self, file_bytes: bytes) -> str:
|
| 33 |
+
try:
|
| 34 |
+
doc = fitz.open(stream=file_bytes, filetype="pdf")
|
| 35 |
+
text_parts = []
|
| 36 |
+
for page in doc:
|
| 37 |
+
text_parts.append(page.get_text())
|
| 38 |
+
doc.close()
|
| 39 |
+
text = "\n".join(text_parts).strip()
|
| 40 |
+
if not text:
|
| 41 |
+
raise ValueError("Could not extract text from PDF")
|
| 42 |
+
return text
|
| 43 |
+
except Exception as e:
|
| 44 |
+
raise ValueError(f"Could not extract text from PDF: {e}")
|
| 45 |
+
|
| 46 |
+
def _extract_docx(self, file_bytes: bytes) -> str:
|
| 47 |
+
try:
|
| 48 |
+
doc = Document(io.BytesIO(file_bytes))
|
| 49 |
+
text_parts = []
|
| 50 |
+
for para in doc.paragraphs:
|
| 51 |
+
if para.text.strip():
|
| 52 |
+
text_parts.append(para.text)
|
| 53 |
+
text = "\n".join(text_parts).strip()
|
| 54 |
+
if not text:
|
| 55 |
+
raise ValueError("Could not extract text from DOCX")
|
| 56 |
+
return text
|
| 57 |
+
except Exception as e:
|
| 58 |
+
raise ValueError(f"Could not extract text from DOCX: {e}")
|
| 59 |
+
|
| 60 |
+
async def parse(self, file_bytes: bytes, content_type: str) -> ResumeData:
|
| 61 |
+
raw_text = self.extract_text(file_bytes, content_type)
|
| 62 |
+
|
| 63 |
+
prompt_template = (self.prompts_dir / "structure_resume.txt").read_text()
|
| 64 |
+
prompt = prompt_template.replace("{resume_text}", raw_text)
|
| 65 |
+
|
| 66 |
+
schema: Dict[str, Any] = {
|
| 67 |
+
"contact": {"name": "", "email": "", "phone": "", "linkedin": "", "location": ""},
|
| 68 |
+
"summary": "",
|
| 69 |
+
"experience": [{"company": "", "title": "", "dates": "", "bullets": []}],
|
| 70 |
+
"education": [{"school": "", "degree": "", "dates": ""}],
|
| 71 |
+
"skills": [],
|
| 72 |
+
}
|
| 73 |
+
|
| 74 |
+
llm = LLMFactory.get_fast()
|
| 75 |
+
data = await llm.complete_json(prompt, schema)
|
| 76 |
+
|
| 77 |
+
return ResumeData(
|
| 78 |
+
contact=data.get("contact", {}),
|
| 79 |
+
summary=data.get("summary", ""),
|
| 80 |
+
experience=data.get("experience", []),
|
| 81 |
+
education=data.get("education", []),
|
| 82 |
+
skills=data.get("skills", []),
|
| 83 |
+
raw_text=raw_text,
|
| 84 |
+
)
|
app/workers/__init__.py
ADDED
|
File without changes
|
app/workers/celery_app.py
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from celery import Celery
|
| 2 |
+
from app.core.config import settings
|
| 3 |
+
|
| 4 |
+
# Celery requires explicit ssl_cert_reqs param for rediss:// URLs
|
| 5 |
+
_redis_url = settings.redis_url
|
| 6 |
+
if _redis_url.startswith("rediss://") and "ssl_cert_reqs" not in _redis_url:
|
| 7 |
+
sep = "&" if "?" in _redis_url else "?"
|
| 8 |
+
_redis_url = f"{_redis_url}{sep}ssl_cert_reqs=CERT_NONE"
|
| 9 |
+
|
| 10 |
+
celery_app = Celery(
|
| 11 |
+
"cv_buddy",
|
| 12 |
+
broker=_redis_url,
|
| 13 |
+
backend=_redis_url,
|
| 14 |
+
include=["app.workers.tasks"],
|
| 15 |
+
)
|
| 16 |
+
|
| 17 |
+
celery_app.conf.update(
|
| 18 |
+
task_serializer="json",
|
| 19 |
+
accept_content=["json"],
|
| 20 |
+
result_serializer="json",
|
| 21 |
+
timezone="UTC",
|
| 22 |
+
enable_utc=True,
|
| 23 |
+
task_track_started=True,
|
| 24 |
+
task_time_limit=300, # 5 minute timeout
|
| 25 |
+
task_soft_time_limit=240, # 4 minute soft timeout
|
| 26 |
+
)
|
app/workers/tasks.py
ADDED
|
@@ -0,0 +1,124 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
import asyncio
|
| 3 |
+
import json
|
| 4 |
+
import uuid
|
| 5 |
+
from typing import Optional, Dict, Any
|
| 6 |
+
from app.workers.celery_app import celery_app
|
| 7 |
+
from app.core.redis import get_redis_for_worker
|
| 8 |
+
from app.services.progress import ProgressService, ProgressStep
|
| 9 |
+
from app.services.resume_parser import ResumeParser
|
| 10 |
+
from app.services.job_scraper import JobScraper
|
| 11 |
+
from app.services.resume_customizer import ResumeCustomizer
|
| 12 |
+
from app.services.layout_scanner import LayoutScanner
|
| 13 |
+
from app.models.customization import Intensity
|
| 14 |
+
from app.models.analysis import SafetyScan
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
def run_async(coro):
|
| 18 |
+
"""Run async function in sync context."""
|
| 19 |
+
loop = asyncio.new_event_loop()
|
| 20 |
+
asyncio.set_event_loop(loop)
|
| 21 |
+
try:
|
| 22 |
+
return loop.run_until_complete(coro)
|
| 23 |
+
finally:
|
| 24 |
+
loop.close()
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
@celery_app.task(bind=True, max_retries=3)
|
| 28 |
+
def analyze_and_customize(
|
| 29 |
+
self,
|
| 30 |
+
session_id: str,
|
| 31 |
+
job_url: Optional[str],
|
| 32 |
+
job_text: Optional[str],
|
| 33 |
+
intensity: str,
|
| 34 |
+
) -> Dict[str, Any]:
|
| 35 |
+
"""Main task chain for resume customization."""
|
| 36 |
+
task_id = self.request.id
|
| 37 |
+
|
| 38 |
+
async def _run():
|
| 39 |
+
async with get_redis_for_worker() as redis:
|
| 40 |
+
progress = ProgressService(task_id, redis)
|
| 41 |
+
|
| 42 |
+
try:
|
| 43 |
+
# Step 1: Get resume data from session
|
| 44 |
+
await progress.update(ProgressStep.PARSING_RESUME, "Loading your resume...")
|
| 45 |
+
|
| 46 |
+
resume_data = await redis.get(f"session:{session_id}:resume")
|
| 47 |
+
if not resume_data:
|
| 48 |
+
raise ValueError("Session expired or resume not found")
|
| 49 |
+
|
| 50 |
+
from app.models.resume import ResumeData
|
| 51 |
+
resume = ResumeData(**json.loads(resume_data))
|
| 52 |
+
|
| 53 |
+
# Scan layout for ATS compatibility issues (optional feature)
|
| 54 |
+
safety_scan = SafetyScan()
|
| 55 |
+
try:
|
| 56 |
+
raw_file = await redis.get(f"session:{session_id}:file")
|
| 57 |
+
content_type_bytes = await redis.get(f"session:{session_id}:content_type")
|
| 58 |
+
if raw_file and content_type_bytes:
|
| 59 |
+
# Decode content_type from bytes to string
|
| 60 |
+
content_type = content_type_bytes.decode('utf-8') if isinstance(content_type_bytes, bytes) else content_type_bytes
|
| 61 |
+
scanner = LayoutScanner()
|
| 62 |
+
safety_scan = scanner.scan(raw_file, content_type)
|
| 63 |
+
except Exception as scan_error:
|
| 64 |
+
import logging
|
| 65 |
+
logging.warning(f"Layout scan failed (non-critical): {scan_error}")
|
| 66 |
+
|
| 67 |
+
# Step 2: Scrape job posting
|
| 68 |
+
await progress.update(ProgressStep.SCRAPING_JOB, "Analyzing job posting...")
|
| 69 |
+
|
| 70 |
+
scraper = JobScraper()
|
| 71 |
+
if job_url:
|
| 72 |
+
job = await scraper.scrape(job_url)
|
| 73 |
+
elif job_text:
|
| 74 |
+
job = await scraper.parse_text(job_text)
|
| 75 |
+
else:
|
| 76 |
+
raise ValueError("No job URL or text provided")
|
| 77 |
+
|
| 78 |
+
# Step 3-5: Customize resume (includes scoring)
|
| 79 |
+
await progress.update(ProgressStep.CUSTOMIZING, "Customizing your resume...")
|
| 80 |
+
|
| 81 |
+
customizer = ResumeCustomizer()
|
| 82 |
+
result = await customizer.customize(
|
| 83 |
+
resume=resume,
|
| 84 |
+
job=job,
|
| 85 |
+
intensity=Intensity(intensity),
|
| 86 |
+
)
|
| 87 |
+
|
| 88 |
+
# Step 6: Store result
|
| 89 |
+
await progress.update(ProgressStep.FINALIZING, "Preparing results...")
|
| 90 |
+
|
| 91 |
+
result_id = str(uuid.uuid4())
|
| 92 |
+
result_data = {
|
| 93 |
+
"original": result.original.model_dump(),
|
| 94 |
+
"customized": result.customized.model_dump(),
|
| 95 |
+
"changes": [c.model_dump() for c in result.changes],
|
| 96 |
+
"original_score": result.original_score.model_dump(),
|
| 97 |
+
"customized_score": result.customized_score.model_dump(),
|
| 98 |
+
"job": job.model_dump(),
|
| 99 |
+
# Enhanced analysis fields
|
| 100 |
+
"bullet_analysis": [b.model_dump() for b in result.bullet_analysis],
|
| 101 |
+
"safety_scan": safety_scan.model_dump(),
|
| 102 |
+
"keyword_quality": [k.model_dump() for k in result.keyword_quality],
|
| 103 |
+
}
|
| 104 |
+
|
| 105 |
+
from app.core.config import settings
|
| 106 |
+
await redis.set(
|
| 107 |
+
f"result:{result_id}",
|
| 108 |
+
json.dumps(result_data),
|
| 109 |
+
ex=settings.session_ttl_seconds,
|
| 110 |
+
)
|
| 111 |
+
|
| 112 |
+
await progress.update(ProgressStep.COMPLETE, "Done!", result_id=result_id)
|
| 113 |
+
|
| 114 |
+
return {"result_id": result_id}
|
| 115 |
+
|
| 116 |
+
except Exception as e:
|
| 117 |
+
await progress.error(
|
| 118 |
+
code="PROCESSING_ERROR",
|
| 119 |
+
message=str(e),
|
| 120 |
+
recoverable=True,
|
| 121 |
+
)
|
| 122 |
+
raise
|
| 123 |
+
|
| 124 |
+
return run_async(_run())
|
prompts/analyze_bullets.txt
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Analyze this resume bullet point against the job requirements.
|
| 2 |
+
|
| 3 |
+
BULLET POINT:
|
| 4 |
+
{bullet_text}
|
| 5 |
+
|
| 6 |
+
JOB REQUIREMENTS:
|
| 7 |
+
Required Keywords: {required_keywords}
|
| 8 |
+
Preferred Keywords: {preferred_keywords}
|
| 9 |
+
Key Responsibilities: {responsibilities}
|
| 10 |
+
|
| 11 |
+
Analyze the bullet point and provide:
|
| 12 |
+
1. A relevance score from 0-100 (how well this bullet aligns with the job)
|
| 13 |
+
2. Which keywords from the job are already present in the bullet
|
| 14 |
+
3. Which missing keywords could naturally fit in this bullet
|
| 15 |
+
4. A specific, actionable suggestion to improve this bullet
|
| 16 |
+
|
| 17 |
+
Respond in this exact JSON format:
|
| 18 |
+
{
|
| 19 |
+
"relevance_score": <number 0-100>,
|
| 20 |
+
"matched_keywords": ["keyword1", "keyword2"],
|
| 21 |
+
"missing_keywords": ["keyword3", "keyword4"],
|
| 22 |
+
"suggestion": "Change to: '<improved bullet text>' to include '<keyword>'"
|
| 23 |
+
}
|
| 24 |
+
|
| 25 |
+
Be specific in your suggestion. Don't just say "add keywords" - provide the actual rewritten bullet text.
|
| 26 |
+
If the bullet is already strong (score > 80), the suggestion can acknowledge this.
|
| 27 |
+
Only include missing_keywords that would NATURALLY fit this bullet's context.
|
prompts/customize_resume.txt
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
You are a professional resume writer. Customize this resume for the target job using the Google XYZ format for achievements: "Accomplished [X] as measured by [Y], by doing [Z]"
|
| 2 |
+
|
| 3 |
+
INTENSITY LEVEL: {intensity}
|
| 4 |
+
- conservative: Minor tweaks only. Add missing keywords naturally, keep original phrasing.
|
| 5 |
+
- moderate: Rewrite bullets in XYZ format, add keywords, reorder to highlight relevant experience.
|
| 6 |
+
- aggressive: Significant rewrites for maximum ATS optimization while staying truthful.
|
| 7 |
+
|
| 8 |
+
ORIGINAL RESUME:
|
| 9 |
+
{resume_json}
|
| 10 |
+
|
| 11 |
+
TARGET JOB:
|
| 12 |
+
Title: {job_title}
|
| 13 |
+
Company: {job_company}
|
| 14 |
+
Required Keywords: {keywords_required}
|
| 15 |
+
Preferred Keywords: {keywords_preferred}
|
| 16 |
+
Key Responsibilities: {responsibilities}
|
| 17 |
+
|
| 18 |
+
INSTRUCTIONS:
|
| 19 |
+
1. Rewrite experience bullets using XYZ format where possible
|
| 20 |
+
2. Naturally incorporate missing keywords: {missing_keywords}
|
| 21 |
+
3. Ensure skills section includes all relevant keywords
|
| 22 |
+
4. Keep all facts truthful - only rephrase, don't fabricate
|
| 23 |
+
5. Prioritize recent and relevant experience
|
| 24 |
+
|
| 25 |
+
Return the customized resume as valid JSON matching the original structure exactly.
|
| 26 |
+
Only return the JSON, no explanation.
|
prompts/extract_job.txt
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Extract structured data from this job posting. Return valid JSON matching the schema exactly.
|
| 2 |
+
|
| 3 |
+
Schema:
|
| 4 |
+
{
|
| 5 |
+
"title": "string (job title)",
|
| 6 |
+
"company": "string (company name)",
|
| 7 |
+
"location": "string (job location)",
|
| 8 |
+
"requirements": ["string (required qualification/experience)"],
|
| 9 |
+
"responsibilities": ["string (job duty/responsibility)"],
|
| 10 |
+
"keywords_required": ["string (must-have technical skills, tools, technologies)"],
|
| 11 |
+
"keywords_preferred": ["string (nice-to-have skills, bonus qualifications)"]
|
| 12 |
+
}
|
| 13 |
+
|
| 14 |
+
Job posting text:
|
| 15 |
+
{job_text}
|
prompts/structure_resume.txt
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Extract structured data from this resume text. Return valid JSON matching the schema exactly.
|
| 2 |
+
|
| 3 |
+
Schema:
|
| 4 |
+
{
|
| 5 |
+
"contact": {
|
| 6 |
+
"name": "string",
|
| 7 |
+
"email": "string",
|
| 8 |
+
"phone": "string",
|
| 9 |
+
"linkedin": "string",
|
| 10 |
+
"location": "string"
|
| 11 |
+
},
|
| 12 |
+
"summary": "string (professional summary if present)",
|
| 13 |
+
"experience": [
|
| 14 |
+
{
|
| 15 |
+
"company": "string",
|
| 16 |
+
"title": "string",
|
| 17 |
+
"dates": "string (e.g., 'Jan 2020 - Present')",
|
| 18 |
+
"bullets": ["string (achievement/responsibility)"]
|
| 19 |
+
}
|
| 20 |
+
],
|
| 21 |
+
"education": [
|
| 22 |
+
{
|
| 23 |
+
"school": "string",
|
| 24 |
+
"degree": "string",
|
| 25 |
+
"dates": "string"
|
| 26 |
+
}
|
| 27 |
+
],
|
| 28 |
+
"skills": ["string"]
|
| 29 |
+
}
|
| 30 |
+
|
| 31 |
+
Resume text:
|
| 32 |
+
{resume_text}
|
pyproject.toml
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[project]
|
| 2 |
+
name = "cv-buddy-backend"
|
| 3 |
+
version = "0.1.0"
|
| 4 |
+
description = "AI-powered resume customization backend"
|
| 5 |
+
requires-python = ">=3.11"
|
| 6 |
+
|
| 7 |
+
[tool.pytest.ini_options]
|
| 8 |
+
asyncio_mode = "auto"
|
| 9 |
+
testpaths = ["tests"]
|
requirements.txt
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Core
|
| 2 |
+
fastapi>=0.109.0
|
| 3 |
+
uvicorn[standard]>=0.27.0
|
| 4 |
+
python-multipart>=0.0.6
|
| 5 |
+
|
| 6 |
+
# Background jobs
|
| 7 |
+
celery>=5.3.0
|
| 8 |
+
redis>=5.0.0
|
| 9 |
+
|
| 10 |
+
# Document parsing
|
| 11 |
+
pymupdf>=1.23.0
|
| 12 |
+
python-docx>=1.1.0
|
| 13 |
+
|
| 14 |
+
# Document generation
|
| 15 |
+
weasyprint>=60.0
|
| 16 |
+
jinja2>=3.1.0
|
| 17 |
+
|
| 18 |
+
# Web scraping
|
| 19 |
+
httpx>=0.26.0
|
| 20 |
+
beautifulsoup4>=4.12.0
|
| 21 |
+
|
| 22 |
+
# LLM providers
|
| 23 |
+
openai>=1.10.0
|
| 24 |
+
google-generativeai>=0.4.0
|
| 25 |
+
|
| 26 |
+
# Utilities
|
| 27 |
+
pydantic>=2.5.0
|
| 28 |
+
pydantic-settings>=2.1.0
|
| 29 |
+
|
| 30 |
+
# Testing
|
| 31 |
+
pytest>=8.0.0
|
| 32 |
+
pytest-asyncio>=0.23.0
|
| 33 |
+
pytest-cov>=4.1.0
|
supervisord.conf
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[supervisord]
|
| 2 |
+
nodaemon=true
|
| 3 |
+
logfile=/dev/null
|
| 4 |
+
logfile_maxbytes=0
|
| 5 |
+
|
| 6 |
+
[program:uvicorn]
|
| 7 |
+
command=uvicorn app.main:app --host 0.0.0.0 --port %(ENV_PORT)s
|
| 8 |
+
directory=/app
|
| 9 |
+
autostart=true
|
| 10 |
+
autorestart=true
|
| 11 |
+
stdout_logfile=/dev/fd/1
|
| 12 |
+
stdout_logfile_maxbytes=0
|
| 13 |
+
stderr_logfile=/dev/fd/2
|
| 14 |
+
stderr_logfile_maxbytes=0
|
| 15 |
+
|
| 16 |
+
[program:celery]
|
| 17 |
+
command=celery -A app.workers.celery_app worker --loglevel=info --concurrency=2
|
| 18 |
+
directory=/app
|
| 19 |
+
autostart=true
|
| 20 |
+
autorestart=true
|
| 21 |
+
stdout_logfile=/dev/fd/1
|
| 22 |
+
stdout_logfile_maxbytes=0
|
| 23 |
+
stderr_logfile=/dev/fd/2
|
| 24 |
+
stderr_logfile_maxbytes=0
|