Momal commited on
Commit
366c43e
·
1 Parent(s): 43a78eb

Deploy cv-buddy backend

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitignore +7 -0
  2. Dockerfile +24 -0
  3. app/__init__.py +0 -0
  4. app/api/__init__.py +0 -0
  5. app/api/dependencies.py +21 -0
  6. app/api/routes/__init__.py +0 -0
  7. app/api/routes/analyze.py +129 -0
  8. app/api/routes/compare.py +65 -0
  9. app/api/routes/export.py +50 -0
  10. app/api/routes/progress.py +59 -0
  11. app/api/routes/result.py +17 -0
  12. app/api/routes/upload.py +101 -0
  13. app/core/__init__.py +0 -0
  14. app/core/config.py +25 -0
  15. app/core/redis.py +47 -0
  16. app/llm/__init__.py +0 -0
  17. app/llm/base.py +17 -0
  18. app/llm/factory.py +95 -0
  19. app/llm/fallback_provider.py +142 -0
  20. app/llm/google_provider.py +29 -0
  21. app/llm/groq_provider.py +41 -0
  22. app/llm/openai_provider.py +38 -0
  23. app/llm/zai_provider.py +49 -0
  24. app/main.py +94 -0
  25. app/models/__init__.py +5 -0
  26. app/models/analysis.py +38 -0
  27. app/models/customization.py +32 -0
  28. app/models/job.py +13 -0
  29. app/models/resume.py +32 -0
  30. app/models/score.py +16 -0
  31. app/services/__init__.py +0 -0
  32. app/services/ats_scorer.py +195 -0
  33. app/services/bullet_analyzer.py +145 -0
  34. app/services/job_scraper.py +108 -0
  35. app/services/layout_scanner.py +196 -0
  36. app/services/progress.py +83 -0
  37. app/services/resume_comparator.py +236 -0
  38. app/services/resume_customizer.py +151 -0
  39. app/services/resume_generator.py +249 -0
  40. app/services/resume_parser.py +84 -0
  41. app/workers/__init__.py +0 -0
  42. app/workers/celery_app.py +26 -0
  43. app/workers/tasks.py +124 -0
  44. prompts/analyze_bullets.txt +27 -0
  45. prompts/customize_resume.txt +26 -0
  46. prompts/extract_job.txt +15 -0
  47. prompts/structure_resume.txt +32 -0
  48. pyproject.toml +9 -0
  49. requirements.txt +33 -0
  50. supervisord.conf +24 -0
.gitignore ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ __pycache__/
2
+ *.pyc
3
+ .env
4
+ .env.*
5
+ fly.toml
6
+ *.egg-info/
7
+ .pytest_cache/
Dockerfile ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11-slim
2
+
3
+ WORKDIR /app
4
+
5
+ # Install system dependencies for WeasyPrint + supervisor for running multiple processes
6
+ RUN apt-get update && apt-get install -y \
7
+ libpango-1.0-0 \
8
+ libpangocairo-1.0-0 \
9
+ libgdk-pixbuf-2.0-0 \
10
+ libffi-dev \
11
+ shared-mime-info \
12
+ supervisor \
13
+ && rm -rf /var/lib/apt/lists/*
14
+
15
+ COPY requirements.txt .
16
+ RUN pip install --no-cache-dir -r requirements.txt
17
+
18
+ COPY . .
19
+
20
+ COPY supervisord.conf /etc/supervisor/conf.d/supervisord.conf
21
+
22
+ ENV PORT=7860
23
+
24
+ CMD ["supervisord", "-c", "/etc/supervisor/conf.d/supervisord.conf"]
app/__init__.py ADDED
File without changes
app/api/__init__.py ADDED
File without changes
app/api/dependencies.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+ import uuid
3
+ from typing import Optional
4
+ from fastapi import Header, HTTPException
5
+ from app.core.redis import get_redis
6
+
7
+
8
+ async def get_session_id(x_session_id: Optional[str] = Header(default=None)) -> str:
9
+ if not x_session_id:
10
+ raise HTTPException(status_code=401, detail="No session found. Please upload a resume first.")
11
+
12
+ redis = await get_redis()
13
+ exists = await redis.exists(f"session:{x_session_id}:resume")
14
+ if not exists:
15
+ raise HTTPException(status_code=401, detail="Session expired. Please upload your resume again.")
16
+
17
+ return x_session_id
18
+
19
+
20
+ def generate_session_id() -> str:
21
+ return str(uuid.uuid4())
app/api/routes/__init__.py ADDED
File without changes
app/api/routes/analyze.py ADDED
@@ -0,0 +1,129 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+ import json
3
+ from typing import Optional
4
+ from fastapi import APIRouter, HTTPException, Depends
5
+ from pydantic import BaseModel
6
+ from app.api.dependencies import get_session_id
7
+ from app.core.redis import get_redis
8
+ from app.workers.tasks import analyze_and_customize
9
+ from app.models.customization import Intensity
10
+ from app.models.resume import ResumeData
11
+ from app.services.job_scraper import JobScraper
12
+ from app.services.ats_scorer import ATSScorer
13
+
14
+ router = APIRouter()
15
+
16
+ # Threshold for low ATS score warning
17
+ LOW_SCORE_THRESHOLD = 30
18
+
19
+
20
+ class AnalyzeRequest(BaseModel):
21
+ job_url: Optional[str] = None
22
+ job_text: Optional[str] = None
23
+ intensity: Intensity = Intensity.MODERATE
24
+ confirm_low_score: bool = False # User confirmation for low scores
25
+
26
+
27
+ class PreviewScoreRequest(BaseModel):
28
+ job_url: Optional[str] = None
29
+ job_text: Optional[str] = None
30
+
31
+
32
+ @router.post("/preview-score")
33
+ async def preview_score(
34
+ request: PreviewScoreRequest,
35
+ session_id: str = Depends(get_session_id),
36
+ ):
37
+ """Get preliminary ATS score before full customization."""
38
+ if not request.job_url and not request.job_text:
39
+ raise HTTPException(
40
+ status_code=400,
41
+ detail="Please provide either a job URL or job description text."
42
+ )
43
+
44
+ # Get resume from session
45
+ redis = await get_redis()
46
+ resume_data = await redis.get(f"session:{session_id}:resume")
47
+ if not resume_data:
48
+ raise HTTPException(status_code=404, detail="Session expired or resume not found")
49
+
50
+ resume = ResumeData(**json.loads(resume_data))
51
+
52
+ # Parse job posting
53
+ scraper = JobScraper()
54
+ try:
55
+ if request.job_url:
56
+ job = await scraper.scrape(request.job_url)
57
+ else:
58
+ job = await scraper.parse_text(request.job_text)
59
+ except Exception as e:
60
+ raise HTTPException(status_code=400, detail=f"Failed to parse job posting: {str(e)}")
61
+
62
+ # Calculate preliminary ATS score
63
+ scorer = ATSScorer()
64
+ score = await scorer.calculate(resume, job)
65
+
66
+ # Determine if confirmation is needed
67
+ needs_confirmation = score.total < LOW_SCORE_THRESHOLD
68
+
69
+ return {
70
+ "score": score.total,
71
+ "matched_keywords": score.matched_keywords,
72
+ "missing_keywords": score.missing_keywords,
73
+ "needs_confirmation": needs_confirmation,
74
+ "message": (
75
+ f"Your resume has a {score.total}% match with this job. "
76
+ f"This is quite low and will require significant changes. "
77
+ f"Do you want to proceed?"
78
+ ) if needs_confirmation else None,
79
+ }
80
+
81
+
82
+ @router.post("/analyze-job")
83
+ async def analyze_job(
84
+ request: AnalyzeRequest,
85
+ session_id: str = Depends(get_session_id),
86
+ ):
87
+ if not request.job_url and not request.job_text:
88
+ raise HTTPException(
89
+ status_code=400,
90
+ detail="Please provide either a job URL or job description text."
91
+ )
92
+
93
+ # If not explicitly confirmed, do a quick score check
94
+ if not request.confirm_low_score:
95
+ redis = await get_redis()
96
+ resume_data = await redis.get(f"session:{session_id}:resume")
97
+ if resume_data:
98
+ resume = ResumeData(**json.loads(resume_data))
99
+ scraper = JobScraper()
100
+ try:
101
+ if request.job_url:
102
+ job = await scraper.scrape(request.job_url)
103
+ else:
104
+ job = await scraper.parse_text(request.job_text)
105
+
106
+ scorer = ATSScorer()
107
+ score = await scorer.calculate(resume, job)
108
+
109
+ if score.total < LOW_SCORE_THRESHOLD:
110
+ return {
111
+ "needs_confirmation": True,
112
+ "score": score.total,
113
+ "message": (
114
+ f"Your resume has only a {score.total}% match with this job. "
115
+ f"Major changes will be needed. Do you want to continue?"
116
+ ),
117
+ }
118
+ except Exception:
119
+ pass # If score check fails, proceed anyway
120
+
121
+ # Queue the task
122
+ task = analyze_and_customize.delay(
123
+ session_id=session_id,
124
+ job_url=request.job_url,
125
+ job_text=request.job_text,
126
+ intensity=request.intensity.value,
127
+ )
128
+
129
+ return {"task_id": task.id}
app/api/routes/compare.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+ import json
3
+ from fastapi import APIRouter, HTTPException, UploadFile, File, Form
4
+ from app.core.redis import get_redis
5
+ from app.services.resume_parser import ResumeParser
6
+ from app.services.resume_comparator import ResumeComparator
7
+ from app.models.resume import ResumeData
8
+
9
+ router = APIRouter()
10
+
11
+
12
+ @router.post("/compare/{result_id}")
13
+ async def compare_resumes(
14
+ result_id: str,
15
+ original_file: UploadFile = File(...),
16
+ job_description: str = Form(default="")
17
+ ):
18
+ """
19
+ Compare original (ground truth) resume with LLM-generated version.
20
+
21
+ Args:
22
+ result_id: ID of the LLM analysis result
23
+ original_file: Original resume file uploaded by user
24
+ job_description: Job description text (optional)
25
+
26
+ Returns:
27
+ Detailed comparison metrics
28
+ """
29
+ # Get LLM result from Redis
30
+ redis = await get_redis()
31
+ data = await redis.get(f"result:{result_id}")
32
+
33
+ if not data:
34
+ raise HTTPException(status_code=404, detail="Result not found or expired.")
35
+
36
+ result = json.loads(data)
37
+ llm_resume = ResumeData(**result["customized"])
38
+
39
+ # Parse original resume
40
+ parser = ResumeParser()
41
+
42
+ if not original_file.content_type or not parser.is_supported(original_file.content_type):
43
+ raise HTTPException(
44
+ status_code=400,
45
+ detail="Invalid file type. Please upload a PDF or Word document."
46
+ )
47
+
48
+ try:
49
+ contents = await original_file.read()
50
+ original_resume = await parser.parse(contents, original_file.content_type)
51
+ except Exception as e:
52
+ raise HTTPException(status_code=500, detail=f"Failed to parse original resume: {str(e)}")
53
+
54
+ # Run comparison
55
+ comparator = ResumeComparator(
56
+ original=original_resume,
57
+ llm_version=llm_resume,
58
+ job_description=job_description
59
+ )
60
+
61
+ try:
62
+ comparison_result = comparator.compare()
63
+ return comparison_result
64
+ except Exception as e:
65
+ raise HTTPException(status_code=500, detail=f"Comparison failed: {str(e)}")
app/api/routes/export.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+ import json
3
+ from enum import Enum
4
+ from fastapi import APIRouter, HTTPException, Query
5
+ from fastapi.responses import Response
6
+ from app.core.redis import get_redis
7
+ from app.services.resume_generator import ResumeGenerator
8
+ from app.models.resume import ResumeData
9
+
10
+ router = APIRouter()
11
+
12
+
13
+ class ExportFormat(str, Enum):
14
+ PDF = "pdf"
15
+ DOCX = "docx"
16
+
17
+
18
+ @router.get("/export/{result_id}")
19
+ async def export_resume(
20
+ result_id: str,
21
+ format: ExportFormat = Query(default=ExportFormat.PDF),
22
+ ):
23
+ redis = await get_redis()
24
+ data = await redis.get(f"result:{result_id}")
25
+
26
+ if not data:
27
+ raise HTTPException(status_code=404, detail="Result not found or expired.")
28
+
29
+ result = json.loads(data)
30
+ resume = ResumeData(**result["customized"])
31
+
32
+ generator = ResumeGenerator()
33
+
34
+ try:
35
+ if format == ExportFormat.PDF:
36
+ content = generator.to_pdf(resume)
37
+ media_type = "application/pdf"
38
+ filename = "resume.pdf"
39
+ else:
40
+ content = generator.to_docx(resume)
41
+ media_type = "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
42
+ filename = "resume.docx"
43
+ except Exception as e:
44
+ raise HTTPException(status_code=500, detail=f"Export failed: {str(e)}")
45
+
46
+ return Response(
47
+ content=content,
48
+ media_type=media_type,
49
+ headers={"Content-Disposition": f'attachment; filename="{filename}"'},
50
+ )
app/api/routes/progress.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+ import json
3
+ import asyncio
4
+ from typing import AsyncGenerator
5
+ from fastapi import APIRouter
6
+ from fastapi.responses import StreamingResponse
7
+ from app.core.redis import get_redis
8
+
9
+ router = APIRouter()
10
+
11
+
12
+ @router.get("/progress/{task_id}")
13
+ async def stream_progress(task_id: str):
14
+ async def event_generator() -> AsyncGenerator[str, None]:
15
+ redis = await get_redis()
16
+ pubsub = redis.pubsub()
17
+ channel = f"progress:{task_id}"
18
+
19
+ await pubsub.subscribe(channel)
20
+
21
+ # Send current state if exists
22
+ current_state = await redis.get(f"progress_state:{task_id}")
23
+ if current_state:
24
+ yield f"data: {current_state}\n\n"
25
+
26
+ try:
27
+ while True:
28
+ message = await asyncio.wait_for(
29
+ pubsub.get_message(ignore_subscribe_messages=True),
30
+ timeout=30.0,
31
+ )
32
+
33
+ if message and message["type"] == "message":
34
+ data = message["data"]
35
+ yield f"data: {data}\n\n"
36
+
37
+ # Check if complete or error
38
+ parsed = json.loads(data)
39
+ if parsed.get("step") in ("complete", "error"):
40
+ break
41
+ else:
42
+ # Send heartbeat
43
+ yield ": heartbeat\n\n"
44
+
45
+ except asyncio.TimeoutError:
46
+ yield 'data: {"step": "error", "error": {"code": "TIMEOUT", "message": "Connection timeout"}}\n\n'
47
+ finally:
48
+ await pubsub.unsubscribe(channel)
49
+ await pubsub.close()
50
+
51
+ return StreamingResponse(
52
+ event_generator(),
53
+ media_type="text/event-stream",
54
+ headers={
55
+ "Cache-Control": "no-cache",
56
+ "Connection": "keep-alive",
57
+ "X-Accel-Buffering": "no",
58
+ },
59
+ )
app/api/routes/result.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+ import json
3
+ from fastapi import APIRouter, HTTPException
4
+ from app.core.redis import get_redis
5
+
6
+ router = APIRouter()
7
+
8
+
9
+ @router.get("/result/{result_id}")
10
+ async def get_result(result_id: str):
11
+ redis = await get_redis()
12
+ data = await redis.get(f"result:{result_id}")
13
+
14
+ if not data:
15
+ raise HTTPException(status_code=404, detail="Result not found or expired.")
16
+
17
+ return json.loads(data)
app/api/routes/upload.py ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+ import logging
3
+ from fastapi import APIRouter, UploadFile, File, HTTPException
4
+ from app.api.dependencies import generate_session_id
5
+ from app.core.redis import get_redis
6
+ from app.core.config import settings
7
+ from app.services.resume_parser import ResumeParser
8
+ from app.services.layout_scanner import LayoutScanner
9
+ from app.models.analysis import SafetyScan
10
+
11
+ router = APIRouter()
12
+ logger = logging.getLogger(__name__)
13
+
14
+
15
+ @router.post("/upload")
16
+ async def upload_resume(file: UploadFile = File(...)):
17
+ try:
18
+ parser = ResumeParser()
19
+
20
+ # Validate file type
21
+ if not file.content_type or not parser.is_supported(file.content_type):
22
+ raise HTTPException(
23
+ status_code=400,
24
+ detail="Invalid file type. Please upload a PDF or Word document."
25
+ )
26
+
27
+ # Validate file size (5MB max)
28
+ contents = await file.read()
29
+ if len(contents) > 5 * 1024 * 1024:
30
+ raise HTTPException(status_code=400, detail="File too large. Maximum size is 5MB.")
31
+
32
+ # Scan document format for ATS compatibility issues FIRST
33
+ format_warnings = []
34
+ try:
35
+ scanner = LayoutScanner()
36
+ safety_scan = scanner.scan(contents, file.content_type)
37
+ if safety_scan.has_issues:
38
+ format_warnings = [
39
+ {"type": w.type, "message": w.message, "recommendation": w.recommendation}
40
+ for w in safety_scan.warnings
41
+ ]
42
+ except Exception as e:
43
+ logger.warning(f"Format scan failed: {e}")
44
+
45
+ try:
46
+ resume = await parser.parse(contents, file.content_type)
47
+ except ValueError as e:
48
+ raise HTTPException(status_code=400, detail=str(e))
49
+ except Exception as e:
50
+ logger.error(f"Resume parsing failed: {e}", exc_info=True)
51
+ raise HTTPException(status_code=500, detail=f"Failed to parse resume: {str(e)}")
52
+
53
+ # Validate resume has minimum required content
54
+ content_issues = []
55
+ if not resume.contact.name:
56
+ content_issues.append("No name detected in resume")
57
+ if not resume.experience:
58
+ content_issues.append("No work experience detected")
59
+ if len(resume.skills) == 0:
60
+ content_issues.append("No skills detected")
61
+
62
+ # Store in Redis
63
+ session_id = generate_session_id()
64
+ redis = await get_redis()
65
+
66
+ await redis.set(
67
+ f"session:{session_id}:resume",
68
+ resume.model_dump_json(),
69
+ ex=settings.session_ttl_seconds,
70
+ )
71
+
72
+ # Also store raw file for potential re-processing
73
+ await redis.set(
74
+ f"session:{session_id}:file",
75
+ contents,
76
+ ex=settings.session_ttl_seconds,
77
+ )
78
+
79
+ # Store content type for layout scanning
80
+ await redis.set(
81
+ f"session:{session_id}:content_type",
82
+ file.content_type,
83
+ ex=settings.session_ttl_seconds,
84
+ )
85
+
86
+ return {
87
+ "session_id": session_id,
88
+ "profile": {
89
+ "name": resume.contact.name,
90
+ "email": resume.contact.email,
91
+ "skills": resume.skills[:10],
92
+ "experience_count": len(resume.experience),
93
+ },
94
+ "format_warnings": format_warnings,
95
+ "content_issues": content_issues,
96
+ }
97
+ except HTTPException:
98
+ raise
99
+ except Exception as e:
100
+ logger.error(f"Upload failed with unexpected error: {e}", exc_info=True)
101
+ raise HTTPException(status_code=500, detail=f"Upload failed: {str(e)}")
app/core/__init__.py ADDED
File without changes
app/core/config.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic_settings import BaseSettings, SettingsConfigDict
2
+
3
+
4
+ class Settings(BaseSettings):
5
+ model_config = SettingsConfigDict(env_file=".env", env_file_encoding="utf-8")
6
+
7
+ # Redis
8
+ redis_url: str = "redis://localhost:6379/0"
9
+
10
+ # LLM
11
+ llm_provider: str = "google"
12
+ llm_model_fast: str = "gemini-2.0-flash"
13
+ llm_model_smart: str = "gemini-2.0-flash"
14
+
15
+ # API Keys
16
+ openai_api_key: str = ""
17
+ google_api_key: str = ""
18
+ zai_api_key: str = ""
19
+ groq_api_key: str = ""
20
+
21
+ # Session
22
+ session_ttl_seconds: int = 7200
23
+
24
+
25
+ settings = Settings()
app/core/redis.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+ import ssl
3
+ from contextlib import asynccontextmanager
4
+ from typing import Optional
5
+
6
+ import redis.asyncio as redis
7
+ from app.core.config import settings
8
+
9
+ # Global client for FastAPI (long-lived connection)
10
+ _fastapi_client: Optional[redis.Redis] = None
11
+
12
+
13
+ def _redis_kwargs() -> dict:
14
+ """Extra kwargs for rediss:// (TLS) connections like Upstash."""
15
+ if settings.redis_url.startswith("rediss://"):
16
+ return {"ssl_cert_reqs": None}
17
+ return {}
18
+
19
+
20
+ async def get_redis() -> redis.Redis:
21
+ """Get Redis client for FastAPI (reuses connection)."""
22
+ global _fastapi_client
23
+ if _fastapi_client is None:
24
+ _fastapi_client = redis.from_url(
25
+ settings.redis_url, decode_responses=True, **_redis_kwargs()
26
+ )
27
+ return _fastapi_client
28
+
29
+
30
+ async def close_redis():
31
+ """Close FastAPI Redis connection."""
32
+ global _fastapi_client
33
+ if _fastapi_client:
34
+ await _fastapi_client.close()
35
+ _fastapi_client = None
36
+
37
+
38
+ @asynccontextmanager
39
+ async def get_redis_for_worker():
40
+ """Get fresh Redis client for Celery workers (new connection per task)."""
41
+ client = redis.from_url(
42
+ settings.redis_url, decode_responses=True, **_redis_kwargs()
43
+ )
44
+ try:
45
+ yield client
46
+ finally:
47
+ await client.close()
app/llm/__init__.py ADDED
File without changes
app/llm/base.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+ from typing import Protocol, Any, Dict, Optional
3
+ from abc import abstractmethod
4
+
5
+
6
+ class LLMProvider(Protocol):
7
+ model: str
8
+
9
+ @abstractmethod
10
+ async def complete(self, prompt: str, system: Optional[str] = None) -> str:
11
+ """Generate text completion."""
12
+ ...
13
+
14
+ @abstractmethod
15
+ async def complete_json(self, prompt: str, schema: Dict[str, Any]) -> Dict[str, Any]:
16
+ """Generate structured JSON output."""
17
+ ...
app/llm/factory.py ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+ import logging
3
+ from typing import Dict, List, Tuple, Type
4
+
5
+ from app.llm.base import LLMProvider
6
+ from app.llm.openai_provider import OpenAIProvider
7
+ from app.llm.google_provider import GoogleProvider
8
+ from app.llm.zai_provider import ZAIProvider
9
+ from app.llm.groq_provider import GroqProvider
10
+ from app.llm.fallback_provider import FallbackLLMProvider
11
+ from app.core.config import settings
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+
16
+ class LLMFactory:
17
+ _providers: Dict[str, Type[LLMProvider]] = {
18
+ "openai": OpenAIProvider,
19
+ "google": GoogleProvider,
20
+ "zai": ZAIProvider,
21
+ "groq": GroqProvider,
22
+ }
23
+
24
+ _default_models: Dict[str, Tuple[str, str]] = {
25
+ "openai": ("gpt-4o-mini", "gpt-4o"),
26
+ "google": ("gemini-3-flash-preview", "gemini-3-flash-preview"),
27
+ "zai": ("glm-4.7", "glm-4.7"),
28
+ "groq": ("llama-3.1-8b-instant", "llama-3.1-8b-instant"),
29
+ }
30
+
31
+ @classmethod
32
+ def create(cls, provider: str, model: str, api_key: str) -> LLMProvider:
33
+ if provider not in cls._providers:
34
+ raise ValueError(f"Unknown provider: {provider}. Available: {list(cls._providers.keys())}")
35
+ return cls._providers[provider](model=model, api_key=api_key)
36
+
37
+ @classmethod
38
+ def _get_available_providers(cls, use_fast: bool = True) -> List[LLMProvider]:
39
+ """Get list of available providers with valid API keys, primary first."""
40
+ providers: List[LLMProvider] = []
41
+
42
+ # Map of provider names to their API keys
43
+ api_keys = {
44
+ "openai": settings.openai_api_key,
45
+ "google": settings.google_api_key,
46
+ "zai": settings.zai_api_key,
47
+ "groq": settings.groq_api_key,
48
+ }
49
+
50
+ # Get primary provider's model
51
+ primary = settings.llm_provider
52
+ model_index = 0 if use_fast else 1
53
+
54
+ # Add primary provider first if it has an API key
55
+ if api_keys.get(primary):
56
+ model = settings.llm_model_fast if use_fast else settings.llm_model_smart
57
+ providers.append(cls.create(primary, model, api_keys[primary]))
58
+ logger.info(f"Primary provider: {primary} ({model})")
59
+
60
+ # Add fallback providers
61
+ for name, key in api_keys.items():
62
+ if name != primary and key:
63
+ fast_model, smart_model = cls._default_models[name]
64
+ model = fast_model if use_fast else smart_model
65
+ providers.append(cls.create(name, model, key))
66
+ logger.info(f"Fallback provider: {name} ({model})")
67
+
68
+ return providers
69
+
70
+ @classmethod
71
+ def get_fast(cls) -> LLMProvider:
72
+ """Get configured fast/cheap model with automatic fallback."""
73
+ providers = cls._get_available_providers(use_fast=True)
74
+ if len(providers) == 1:
75
+ return providers[0]
76
+ return FallbackLLMProvider(providers)
77
+
78
+ @classmethod
79
+ def get_smart(cls) -> LLMProvider:
80
+ """Get configured smart model with automatic fallback."""
81
+ providers = cls._get_available_providers(use_fast=False)
82
+ if len(providers) == 1:
83
+ return providers[0]
84
+ return FallbackLLMProvider(providers)
85
+
86
+ @classmethod
87
+ def _get_api_key(cls) -> str:
88
+ """Get API key for primary provider (legacy method)."""
89
+ keys = {
90
+ "openai": settings.openai_api_key,
91
+ "google": settings.google_api_key,
92
+ "zai": settings.zai_api_key,
93
+ "groq": settings.groq_api_key,
94
+ }
95
+ return keys.get(settings.llm_provider, "")
app/llm/fallback_provider.py ADDED
@@ -0,0 +1,142 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+ import asyncio
3
+ import logging
4
+ import re
5
+ import time
6
+ from typing import Any, Dict, List, Optional
7
+
8
+ from app.llm.base import LLMProvider
9
+
10
+ logger = logging.getLogger(__name__)
11
+
12
+
13
+ class QuotaExceededError(Exception):
14
+ """Raised when all LLM providers are exhausted."""
15
+ pass
16
+
17
+
18
+ class FallbackLLMProvider(LLMProvider):
19
+ """LLM provider with automatic fallback and retry with exponential backoff."""
20
+
21
+ # Retry configuration
22
+ MAX_RETRIES_PER_PROVIDER = 3
23
+ INITIAL_BACKOFF_SECONDS = 5
24
+ MAX_BACKOFF_SECONDS = 65
25
+
26
+ def __init__(self, providers: List[LLMProvider]):
27
+ if not providers:
28
+ raise ValueError("At least one provider is required")
29
+ self.providers = providers
30
+ self.model = providers[0].model
31
+
32
+ def _is_rate_limit_error(self, error: Exception) -> bool:
33
+ """Check if error is a rate limit/quota error that might resolve with retry."""
34
+ error_str = str(error).lower()
35
+ rate_limit_indicators = [
36
+ "rate_limit",
37
+ "rate limit",
38
+ "429",
39
+ "too many requests",
40
+ "retry",
41
+ "quota exceeded",
42
+ "resource_exhausted",
43
+ ]
44
+ return any(indicator in error_str for indicator in rate_limit_indicators)
45
+
46
+ def _extract_retry_delay(self, error: Exception) -> Optional[float]:
47
+ """Extract retry delay from error message if present."""
48
+ error_str = str(error)
49
+ # Look for patterns like "retry in 22.428058397s" or "retry_delay { seconds: 22 }"
50
+ patterns = [
51
+ r'retry in ([\d.]+)s',
52
+ r'retry_delay.*?seconds[:\s]+(\d+)',
53
+ r'(\d+)\s*seconds?',
54
+ ]
55
+ for pattern in patterns:
56
+ match = re.search(pattern, error_str, re.IGNORECASE)
57
+ if match:
58
+ try:
59
+ return min(float(match.group(1)), self.MAX_BACKOFF_SECONDS)
60
+ except ValueError:
61
+ pass
62
+ return None
63
+
64
+ async def _call_with_retry(
65
+ self,
66
+ provider: LLMProvider,
67
+ call_func,
68
+ *args,
69
+ **kwargs
70
+ ) -> Any:
71
+ """Call provider method with retry on rate limit errors."""
72
+ last_error = None
73
+ backoff = self.INITIAL_BACKOFF_SECONDS
74
+
75
+ for attempt in range(self.MAX_RETRIES_PER_PROVIDER):
76
+ try:
77
+ # Track time to first token (TTFT)
78
+ start_time = time.time()
79
+ result = await call_func(*args, **kwargs)
80
+ ttft = (time.time() - start_time) * 1000 # Convert to milliseconds
81
+ logger.info(f"⏱️ {provider.__class__.__name__} TTFT: {ttft:.2f}ms ({ttft/1000:.3f}s)")
82
+ return result
83
+ except Exception as e:
84
+ last_error = e
85
+
86
+ if self._is_rate_limit_error(e) and attempt < self.MAX_RETRIES_PER_PROVIDER - 1:
87
+ # Extract delay from error or use exponential backoff
88
+ delay = self._extract_retry_delay(e) or backoff
89
+ logger.warning(
90
+ f"{provider.__class__.__name__} rate limited, "
91
+ f"retrying in {delay:.1f}s (attempt {attempt + 1}/{self.MAX_RETRIES_PER_PROVIDER})"
92
+ )
93
+ await asyncio.sleep(delay)
94
+ backoff = min(backoff * 2, self.MAX_BACKOFF_SECONDS)
95
+ else:
96
+ raise
97
+
98
+ raise last_error
99
+
100
+ async def complete(self, prompt: str, system: Optional[str] = None) -> str:
101
+ last_error: Optional[Exception] = None
102
+ errors_by_provider: List[str] = []
103
+
104
+ for i, provider in enumerate(self.providers):
105
+ try:
106
+ logger.info(f"Trying provider {i + 1}/{len(self.providers)}: {provider.__class__.__name__}")
107
+ return await self._call_with_retry(
108
+ provider,
109
+ provider.complete,
110
+ prompt,
111
+ system
112
+ )
113
+ except Exception as e:
114
+ last_error = e
115
+ errors_by_provider.append(f"{provider.__class__.__name__}: {str(e)[:100]}")
116
+ logger.warning(f"Provider {provider.__class__.__name__} failed after retries: {e}")
117
+ continue
118
+
119
+ error_summary = "; ".join(errors_by_provider)
120
+ raise QuotaExceededError(f"All providers exhausted. Errors: {error_summary}")
121
+
122
+ async def complete_json(self, prompt: str, schema: Dict[str, Any]) -> Dict[str, Any]:
123
+ last_error: Optional[Exception] = None
124
+ errors_by_provider: List[str] = []
125
+
126
+ for i, provider in enumerate(self.providers):
127
+ try:
128
+ logger.info(f"Trying provider {i + 1}/{len(self.providers)}: {provider.__class__.__name__}")
129
+ return await self._call_with_retry(
130
+ provider,
131
+ provider.complete_json,
132
+ prompt,
133
+ schema
134
+ )
135
+ except Exception as e:
136
+ last_error = e
137
+ errors_by_provider.append(f"{provider.__class__.__name__}: {str(e)[:100]}")
138
+ logger.warning(f"Provider {provider.__class__.__name__} failed after retries: {e}")
139
+ continue
140
+
141
+ error_summary = "; ".join(errors_by_provider)
142
+ raise QuotaExceededError(f"All providers exhausted. Errors: {error_summary}")
app/llm/google_provider.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+ import json
3
+ from typing import Any, Dict, Optional
4
+ import google.generativeai as genai
5
+ from app.llm.base import LLMProvider
6
+
7
+
8
+ class GoogleProvider(LLMProvider):
9
+ def __init__(self, model: str, api_key: str):
10
+ self.model = model
11
+ genai.configure(api_key=api_key)
12
+ self._model = genai.GenerativeModel(model)
13
+
14
+ async def complete(self, prompt: str, system: Optional[str] = None) -> str:
15
+ full_prompt = f"{system}\n\n{prompt}" if system else prompt
16
+ response = await self._model.generate_content_async(full_prompt)
17
+ return response.text
18
+
19
+ async def complete_json(self, prompt: str, schema: Dict[str, Any]) -> Dict[str, Any]:
20
+ system = f"Respond only with valid JSON matching this schema: {json.dumps(schema)}. No markdown, no explanation, just JSON."
21
+ full_prompt = f"{system}\n\n{prompt}"
22
+ response = await self._model.generate_content_async(full_prompt)
23
+ content = response.text
24
+ # Strip markdown code blocks if present
25
+ if content.startswith("```"):
26
+ content = content.split("```")[1]
27
+ if content.startswith("json"):
28
+ content = content[4:]
29
+ return json.loads(content.strip())
app/llm/groq_provider.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+ import json
3
+ from typing import Any, Dict, List, Optional
4
+ from openai import AsyncOpenAI
5
+ from app.llm.base import LLMProvider
6
+
7
+
8
+ class GroqProvider(LLMProvider):
9
+ def __init__(self, model: str, api_key: str):
10
+ self.model = model
11
+ self.client = AsyncOpenAI(
12
+ api_key=api_key,
13
+ base_url="https://api.groq.com/openai/v1"
14
+ )
15
+
16
+ async def complete(self, prompt: str, system: Optional[str] = None) -> str:
17
+ messages: List[Dict[str, str]] = []
18
+ if system:
19
+ messages.append({"role": "system", "content": system})
20
+ messages.append({"role": "user", "content": prompt})
21
+
22
+ response = await self.client.chat.completions.create(
23
+ model=self.model,
24
+ messages=messages,
25
+ )
26
+ return response.choices[0].message.content or ""
27
+
28
+ async def complete_json(self, prompt: str, schema: Dict[str, Any]) -> Dict[str, Any]:
29
+ system = f"Respond only with valid JSON matching this schema: {json.dumps(schema)}"
30
+ messages = [
31
+ {"role": "system", "content": system},
32
+ {"role": "user", "content": prompt},
33
+ ]
34
+
35
+ response = await self.client.chat.completions.create(
36
+ model=self.model,
37
+ messages=messages,
38
+ response_format={"type": "json_object"},
39
+ )
40
+ content = response.choices[0].message.content or "{}"
41
+ return json.loads(content)
app/llm/openai_provider.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+ import json
3
+ from typing import Any, Dict, List, Optional
4
+ from openai import AsyncOpenAI
5
+ from app.llm.base import LLMProvider
6
+
7
+
8
+ class OpenAIProvider(LLMProvider):
9
+ def __init__(self, model: str, api_key: str):
10
+ self.model = model
11
+ self.client = AsyncOpenAI(api_key=api_key)
12
+
13
+ async def complete(self, prompt: str, system: Optional[str] = None) -> str:
14
+ messages: List[Dict[str, str]] = []
15
+ if system:
16
+ messages.append({"role": "system", "content": system})
17
+ messages.append({"role": "user", "content": prompt})
18
+
19
+ response = await self.client.chat.completions.create(
20
+ model=self.model,
21
+ messages=messages,
22
+ )
23
+ return response.choices[0].message.content or ""
24
+
25
+ async def complete_json(self, prompt: str, schema: Dict[str, Any]) -> Dict[str, Any]:
26
+ system = f"Respond only with valid JSON matching this schema: {json.dumps(schema)}"
27
+ messages = [
28
+ {"role": "system", "content": system},
29
+ {"role": "user", "content": prompt},
30
+ ]
31
+
32
+ response = await self.client.chat.completions.create(
33
+ model=self.model,
34
+ messages=messages,
35
+ response_format={"type": "json_object"},
36
+ )
37
+ content = response.choices[0].message.content or "{}"
38
+ return json.loads(content)
app/llm/zai_provider.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+ import json
3
+ from typing import Any, Dict, List, Optional
4
+ from openai import AsyncOpenAI
5
+ from app.llm.base import LLMProvider
6
+
7
+
8
+ class ZAIProvider(LLMProvider):
9
+ def __init__(self, model: str, api_key: str):
10
+ self.model = model
11
+ self.client = AsyncOpenAI(
12
+ api_key=api_key,
13
+ base_url="https://api.z.ai/api/coding/paas/v4",
14
+ timeout=60.0
15
+ )
16
+
17
+ async def complete(self, prompt: str, system: Optional[str] = None) -> str:
18
+ messages: List[Dict[str, str]] = []
19
+ if system:
20
+ messages.append({"role": "system", "content": system})
21
+ messages.append({"role": "user", "content": prompt})
22
+
23
+ response = await self.client.chat.completions.create(
24
+ model=self.model,
25
+ messages=messages,
26
+ )
27
+ return response.choices[0].message.content or ""
28
+
29
+ async def complete_json(self, prompt: str, schema: Dict[str, Any]) -> Dict[str, Any]:
30
+ system = f"Respond only with valid JSON matching this schema: {json.dumps(schema)}"
31
+ messages = [
32
+ {"role": "system", "content": system},
33
+ {"role": "user", "content": prompt},
34
+ ]
35
+
36
+ try:
37
+ response = await self.client.chat.completions.create(
38
+ model=self.model,
39
+ messages=messages,
40
+ response_format={"type": "json_object"},
41
+ )
42
+ except Exception:
43
+ # Fallback without response_format if not supported
44
+ response = await self.client.chat.completions.create(
45
+ model=self.model,
46
+ messages=messages,
47
+ )
48
+ content = response.choices[0].message.content or "{}"
49
+ return json.loads(content)
app/main.py ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from contextlib import asynccontextmanager
2
+ import logging
3
+ from fastapi import FastAPI, Request
4
+ from fastapi.middleware.cors import CORSMiddleware
5
+ from fastapi.responses import JSONResponse
6
+ from app.core.redis import close_redis
7
+ from app.api.routes import upload, analyze, progress, result, export, compare
8
+
9
+ logger = logging.getLogger(__name__)
10
+
11
+
12
+ @asynccontextmanager
13
+ async def lifespan(app: FastAPI):
14
+ yield
15
+ await close_redis()
16
+
17
+
18
+ app = FastAPI(
19
+ title="CV-Buddy API",
20
+ description="AI-powered resume customization",
21
+ version="0.1.0",
22
+ lifespan=lifespan,
23
+ )
24
+
25
+ import os
26
+
27
+ allowed_origins = [
28
+ "http://localhost:3000",
29
+ ]
30
+ # Add production Vercel URL if set
31
+ vercel_url = os.environ.get("FRONTEND_URL")
32
+ if vercel_url:
33
+ allowed_origins.append(vercel_url)
34
+
35
+ app.add_middleware(
36
+ CORSMiddleware,
37
+ allow_origins=allowed_origins,
38
+ allow_credentials=True,
39
+ allow_methods=["*"],
40
+ allow_headers=["*"],
41
+ )
42
+
43
+
44
+ # Global exception handler - ALWAYS return JSON
45
+ @app.exception_handler(Exception)
46
+ async def global_exception_handler(request: Request, exc: Exception):
47
+ """Catch all unhandled exceptions and return JSON error response."""
48
+ logger.error(f"Unhandled exception: {exc}", exc_info=True)
49
+
50
+ # Extract useful error message
51
+ error_message = str(exc)
52
+
53
+ # Check for common error types
54
+ if "quota" in error_message.lower() or "rate" in error_message.lower():
55
+ return JSONResponse(
56
+ status_code=429,
57
+ content={
58
+ "detail": "API rate limit reached. Please wait a moment and try again.",
59
+ "error_type": "rate_limit",
60
+ "original_error": error_message[:500],
61
+ }
62
+ )
63
+
64
+ if "exhausted" in error_message.lower():
65
+ return JSONResponse(
66
+ status_code=503,
67
+ content={
68
+ "detail": "AI service temporarily unavailable. Please try again in a few seconds.",
69
+ "error_type": "service_unavailable",
70
+ "original_error": error_message[:500],
71
+ }
72
+ )
73
+
74
+ return JSONResponse(
75
+ status_code=500,
76
+ content={
77
+ "detail": f"An error occurred: {error_message[:200]}",
78
+ "error_type": "internal_error",
79
+ }
80
+ )
81
+
82
+
83
+ # Include routers
84
+ app.include_router(upload.router, prefix="/api", tags=["upload"])
85
+ app.include_router(analyze.router, prefix="/api", tags=["analyze"])
86
+ app.include_router(progress.router, prefix="/api", tags=["progress"])
87
+ app.include_router(result.router, prefix="/api", tags=["result"])
88
+ app.include_router(export.router, prefix="/api", tags=["export"])
89
+ app.include_router(compare.router, prefix="/api", tags=["compare"])
90
+
91
+
92
+ @app.get("/health")
93
+ async def health_check():
94
+ return {"status": "ok"}
app/models/__init__.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ from app.models.resume import ResumeData, ContactInfo, Experience, Education
2
+ from app.models.job import JobData
3
+ from app.models.score import ATSScore, ScoreBreakdown
4
+ from app.models.customization import CustomizationResult, Change, Intensity
5
+ from app.models.analysis import BulletAnalysis, LayoutWarning, SafetyScan, KeywordPlacement
app/models/analysis.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List, Optional
2
+ from pydantic import BaseModel
3
+
4
+
5
+ class BulletAnalysis(BaseModel):
6
+ """Analysis of a single resume bullet point against job requirements."""
7
+ location: str # e.g., "experience[0].bullets[2]"
8
+ original_text: str
9
+ relevance_score: int # 0-100
10
+ matched_keywords: List[str] = []
11
+ missing_keywords: List[str] = []
12
+ suggestion: str = ""
13
+
14
+ # After customization (populated if bullet was modified)
15
+ customized_text: Optional[str] = None
16
+ new_relevance_score: Optional[int] = None
17
+ keywords_added: List[str] = []
18
+
19
+
20
+ class LayoutWarning(BaseModel):
21
+ """A single layout compatibility warning."""
22
+ type: str # "multi_column", "complex_table", "graphics"
23
+ message: str
24
+ recommendation: str
25
+
26
+
27
+ class SafetyScan(BaseModel):
28
+ """Results of layout safety analysis for ATS compatibility."""
29
+ has_issues: bool = False
30
+ warnings: List[LayoutWarning] = []
31
+
32
+
33
+ class KeywordPlacement(BaseModel):
34
+ """Analysis of where a keyword appears and if it's naturally integrated."""
35
+ keyword: str
36
+ locations: List[str] = [] # Where the keyword appears
37
+ is_natural: bool = True
38
+ flag: Optional[str] = None # Warning message if unnatural
app/models/customization.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from enum import Enum
2
+ from typing import List, Optional
3
+ from pydantic import BaseModel
4
+ from app.models.resume import ResumeData
5
+ from app.models.score import ATSScore
6
+ from app.models.analysis import BulletAnalysis, SafetyScan, KeywordPlacement
7
+
8
+
9
+ class Intensity(str, Enum):
10
+ CONSERVATIVE = "conservative"
11
+ MODERATE = "moderate"
12
+ AGGRESSIVE = "aggressive"
13
+
14
+
15
+ class Change(BaseModel):
16
+ type: str # "added", "modified", "reordered"
17
+ location: str # e.g., "experience[0].bullets[2]"
18
+ before: str = ""
19
+ after: str = ""
20
+
21
+
22
+ class CustomizationResult(BaseModel):
23
+ original: ResumeData
24
+ customized: ResumeData
25
+ changes: List[Change] = []
26
+ original_score: ATSScore
27
+ customized_score: ATSScore
28
+
29
+ # Enhanced analysis fields
30
+ bullet_analysis: List[BulletAnalysis] = []
31
+ safety_scan: Optional[SafetyScan] = None
32
+ keyword_quality: List[KeywordPlacement] = []
app/models/job.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List
2
+ from pydantic import BaseModel
3
+
4
+
5
+ class JobData(BaseModel):
6
+ title: str = ""
7
+ company: str = ""
8
+ location: str = ""
9
+ requirements: List[str] = []
10
+ responsibilities: List[str] = []
11
+ keywords_required: List[str] = []
12
+ keywords_preferred: List[str] = []
13
+ raw_text: str = ""
app/models/resume.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List
2
+ from pydantic import BaseModel
3
+
4
+
5
+ class ContactInfo(BaseModel):
6
+ name: str = ""
7
+ email: str = ""
8
+ phone: str = ""
9
+ linkedin: str = ""
10
+ location: str = ""
11
+
12
+
13
+ class Experience(BaseModel):
14
+ company: str
15
+ title: str
16
+ dates: str
17
+ bullets: List[str]
18
+
19
+
20
+ class Education(BaseModel):
21
+ school: str
22
+ degree: str
23
+ dates: str
24
+
25
+
26
+ class ResumeData(BaseModel):
27
+ contact: ContactInfo
28
+ summary: str = ""
29
+ experience: List[Experience] = []
30
+ education: List[Education] = []
31
+ skills: List[str] = []
32
+ raw_text: str = ""
app/models/score.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List
2
+ from pydantic import BaseModel
3
+
4
+
5
+ class ScoreBreakdown(BaseModel):
6
+ keyword_match: float = 0.0 # 0-100
7
+ skills_alignment: float = 0.0 # 0-100
8
+ experience_relevance: float = 0.0 # 0-100
9
+ format_score: float = 100.0 # 0-100 (always 100 for our generated resumes)
10
+
11
+
12
+ class ATSScore(BaseModel):
13
+ total: int = 0 # 0-100
14
+ breakdown: ScoreBreakdown = ScoreBreakdown()
15
+ matched_keywords: List[str] = []
16
+ missing_keywords: List[str] = []
app/services/__init__.py ADDED
File without changes
app/services/ats_scorer.py ADDED
@@ -0,0 +1,195 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+ import re
3
+ from typing import List, Tuple
4
+ from app.models.resume import ResumeData
5
+ from app.models.job import JobData
6
+ from app.models.score import ATSScore, ScoreBreakdown
7
+ from app.models.analysis import KeywordPlacement
8
+ from app.llm.factory import LLMFactory
9
+
10
+
11
+ class ATSScorer:
12
+ # Weights for score components
13
+ WEIGHT_KEYWORD = 0.4
14
+ WEIGHT_SKILLS = 0.3
15
+ WEIGHT_EXPERIENCE = 0.2
16
+ WEIGHT_FORMAT = 0.1
17
+
18
+ def match_keywords(self, text: str, keywords: List[str]) -> Tuple[List[str], List[str]]:
19
+ """Match keywords in text (case-insensitive)."""
20
+ text_lower = text.lower()
21
+ matched = []
22
+ missing = []
23
+
24
+ for keyword in keywords:
25
+ # Create pattern for word boundary matching
26
+ pattern = r'\b' + re.escape(keyword.lower()) + r'\b'
27
+ if re.search(pattern, text_lower):
28
+ matched.append(keyword)
29
+ else:
30
+ missing.append(keyword)
31
+
32
+ return matched, missing
33
+
34
+ def calculate_keyword_score(self, resume: ResumeData, job: JobData) -> Tuple[float, List[str], List[str]]:
35
+ """Calculate keyword match percentage."""
36
+ all_keywords = job.keywords_required + job.keywords_preferred
37
+ if not all_keywords:
38
+ return 100.0, [], []
39
+
40
+ # Combine all resume text
41
+ resume_text = " ".join([
42
+ resume.raw_text,
43
+ " ".join(resume.skills),
44
+ resume.summary,
45
+ ])
46
+
47
+ matched, missing = self.match_keywords(resume_text, all_keywords)
48
+
49
+ # Weight required keywords more heavily
50
+ required_matched = [k for k in matched if k in job.keywords_required]
51
+ preferred_matched = [k for k in matched if k in job.keywords_preferred]
52
+
53
+ required_count = len(job.keywords_required) or 1
54
+ preferred_count = len(job.keywords_preferred) or 1
55
+
56
+ required_score = (len(required_matched) / required_count) * 70
57
+ preferred_score = (len(preferred_matched) / preferred_count) * 30
58
+
59
+ return required_score + preferred_score, matched, missing
60
+
61
+ def calculate_skills_score(self, resume: ResumeData, job: JobData) -> float:
62
+ """Calculate skills alignment score."""
63
+ if not job.keywords_required:
64
+ return 100.0
65
+
66
+ resume_skills_lower = [s.lower() for s in resume.skills]
67
+ matched = sum(1 for k in job.keywords_required if k.lower() in resume_skills_lower)
68
+
69
+ return (matched / len(job.keywords_required)) * 100
70
+
71
+ async def calculate_experience_relevance(self, resume: ResumeData, job: JobData) -> float:
72
+ """Use LLM to judge experience relevance (0-100)."""
73
+ if not resume.experience or not job.responsibilities:
74
+ return 50.0
75
+
76
+ experience_text = "\n".join([
77
+ f"{exp.title} at {exp.company}: " + "; ".join(exp.bullets[:3])
78
+ for exp in resume.experience[:3]
79
+ ])
80
+
81
+ responsibilities_text = "\n".join(job.responsibilities[:5])
82
+
83
+ prompt = f"""Rate how relevant this candidate's experience is to the job responsibilities on a scale of 0-100.
84
+
85
+ Candidate Experience:
86
+ {experience_text}
87
+
88
+ Job Responsibilities:
89
+ {responsibilities_text}
90
+
91
+ Respond with only a number between 0 and 100."""
92
+
93
+ llm = LLMFactory.get_fast()
94
+ response = await llm.complete(prompt)
95
+
96
+ try:
97
+ match = re.search(r'\d+', response)
98
+ if match:
99
+ score = int(match.group())
100
+ return min(100, max(0, score))
101
+ return 50.0
102
+ except (AttributeError, ValueError):
103
+ return 50.0
104
+
105
+ async def calculate(self, resume: ResumeData, job: JobData) -> ATSScore:
106
+ """Calculate complete ATS score."""
107
+ keyword_score, matched, missing = self.calculate_keyword_score(resume, job)
108
+ skills_score = self.calculate_skills_score(resume, job)
109
+ experience_score = await self.calculate_experience_relevance(resume, job)
110
+ format_score = 100.0 # Our generated resumes are always ATS-friendly
111
+
112
+ total = int(
113
+ keyword_score * self.WEIGHT_KEYWORD +
114
+ skills_score * self.WEIGHT_SKILLS +
115
+ experience_score * self.WEIGHT_EXPERIENCE +
116
+ format_score * self.WEIGHT_FORMAT
117
+ )
118
+
119
+ return ATSScore(
120
+ total=total,
121
+ breakdown=ScoreBreakdown(
122
+ keyword_match=keyword_score,
123
+ skills_alignment=skills_score,
124
+ experience_relevance=experience_score,
125
+ format_score=format_score,
126
+ ),
127
+ matched_keywords=matched,
128
+ missing_keywords=missing,
129
+ )
130
+
131
+ def check_keyword_quality(
132
+ self,
133
+ resume: ResumeData,
134
+ job: JobData,
135
+ added_keywords: List[str]
136
+ ) -> List[KeywordPlacement]:
137
+ """Check if keywords are naturally placed or potentially stuffed."""
138
+ placements = []
139
+
140
+ for keyword in added_keywords:
141
+ locations = self._find_keyword_locations(resume, keyword)
142
+ is_natural, flag = self._evaluate_placement(locations, keyword)
143
+
144
+ placements.append(KeywordPlacement(
145
+ keyword=keyword,
146
+ locations=locations,
147
+ is_natural=is_natural,
148
+ flag=flag
149
+ ))
150
+
151
+ return placements
152
+
153
+ def _find_keyword_locations(self, resume: ResumeData, keyword: str) -> List[str]:
154
+ """Find all locations where a keyword appears in the resume."""
155
+ locations = []
156
+ pattern = r'\b' + re.escape(keyword.lower()) + r'\b'
157
+
158
+ # Check summary
159
+ if resume.summary and re.search(pattern, resume.summary.lower()):
160
+ locations.append("summary")
161
+
162
+ # Check skills
163
+ for skill in resume.skills:
164
+ if re.search(pattern, skill.lower()):
165
+ locations.append("skills")
166
+ break
167
+
168
+ # Check experience bullets
169
+ for exp_idx, exp in enumerate(resume.experience):
170
+ for bullet_idx, bullet in enumerate(exp.bullets):
171
+ if re.search(pattern, bullet.lower()):
172
+ locations.append(f"experience[{exp_idx}].bullets[{bullet_idx}]")
173
+
174
+ return locations
175
+
176
+ def _evaluate_placement(self, locations: List[str], keyword: str) -> Tuple[bool, str | None]:
177
+ """Evaluate if a keyword placement is natural or stuffed."""
178
+ if not locations:
179
+ return True, None # Not found, no issue
180
+
181
+ # Check if keyword ONLY appears in skills (potential stuffing)
182
+ if locations == ["skills"]:
183
+ return False, f"'{keyword}' only appears in Skills section - consider demonstrating it in your experience bullets"
184
+
185
+ # Check if keyword appears too many times (over-optimization)
186
+ experience_mentions = [loc for loc in locations if "experience" in loc]
187
+ if len(experience_mentions) > 3:
188
+ return False, f"'{keyword}' appears {len(experience_mentions)} times - this may seem repetitive to reviewers"
189
+
190
+ # Natural: appears in experience or summary with context
191
+ has_context = any("experience" in loc or loc == "summary" for loc in locations)
192
+ if has_context:
193
+ return True, None
194
+
195
+ return True, None
app/services/bullet_analyzer.py ADDED
@@ -0,0 +1,145 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+ import json
3
+ import logging
4
+ import re
5
+ from pathlib import Path
6
+ from typing import List, Optional
7
+
8
+ from app.models.resume import ResumeData
9
+ from app.models.job import JobData
10
+ from app.models.analysis import BulletAnalysis
11
+ from app.llm.factory import LLMFactory
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+ PROMPT_PATH = Path(__file__).parent.parent.parent / "prompts" / "analyze_bullets.txt"
16
+
17
+
18
+ class BulletAnalyzer:
19
+ """Analyzes individual resume bullet points against job requirements."""
20
+
21
+ def __init__(self):
22
+ self.prompt_template = PROMPT_PATH.read_text()
23
+
24
+ def _get_all_bullets(self, resume: ResumeData) -> List[tuple[str, str]]:
25
+ """Extract all bullets with their locations from resume."""
26
+ bullets = []
27
+ for exp_idx, exp in enumerate(resume.experience):
28
+ for bullet_idx, bullet in enumerate(exp.bullets):
29
+ location = f"experience[{exp_idx}].bullets[{bullet_idx}]"
30
+ bullets.append((location, bullet))
31
+ return bullets
32
+
33
+ def _calculate_keyword_overlap(self, text: str, keywords: List[str]) -> List[str]:
34
+ """Find which keywords appear in the text."""
35
+ text_lower = text.lower()
36
+ matched = []
37
+ for keyword in keywords:
38
+ pattern = r'\b' + re.escape(keyword.lower()) + r'\b'
39
+ if re.search(pattern, text_lower):
40
+ matched.append(keyword)
41
+ return matched
42
+
43
+ async def analyze_bullet(
44
+ self,
45
+ bullet_text: str,
46
+ location: str,
47
+ job: JobData
48
+ ) -> BulletAnalysis:
49
+ """Analyze a single bullet point against job requirements."""
50
+ # Quick keyword check first
51
+ all_keywords = job.keywords_required + job.keywords_preferred
52
+ matched = self._calculate_keyword_overlap(bullet_text, all_keywords)
53
+
54
+ # Calculate base relevance from keyword overlap
55
+ if all_keywords:
56
+ base_score = int((len(matched) / len(all_keywords)) * 100)
57
+ else:
58
+ base_score = 50
59
+
60
+ # Use LLM for deeper analysis and suggestions
61
+ prompt = self.prompt_template.format(
62
+ bullet_text=bullet_text,
63
+ required_keywords=", ".join(job.keywords_required[:10]),
64
+ preferred_keywords=", ".join(job.keywords_preferred[:10]),
65
+ responsibilities="\n".join(job.responsibilities[:5])
66
+ )
67
+
68
+ try:
69
+ llm = LLMFactory.get_fast()
70
+ response = await llm.complete(prompt)
71
+
72
+ # Parse JSON response
73
+ json_match = re.search(r'\{[\s\S]*\}', response)
74
+ if json_match:
75
+ data = json.loads(json_match.group())
76
+ return BulletAnalysis(
77
+ location=location,
78
+ original_text=bullet_text,
79
+ relevance_score=data.get("relevance_score", base_score),
80
+ matched_keywords=data.get("matched_keywords", matched),
81
+ missing_keywords=data.get("missing_keywords", []),
82
+ suggestion=data.get("suggestion", "")
83
+ )
84
+ except (json.JSONDecodeError, Exception) as e:
85
+ logger.warning(f"Failed to parse bullet analysis: {e}")
86
+
87
+ # Fallback to basic analysis
88
+ missing = [k for k in all_keywords if k not in matched]
89
+ return BulletAnalysis(
90
+ location=location,
91
+ original_text=bullet_text,
92
+ relevance_score=base_score,
93
+ matched_keywords=matched,
94
+ missing_keywords=missing[:5],
95
+ suggestion=f"Consider adding keywords: {', '.join(missing[:3])}" if missing else ""
96
+ )
97
+
98
+ async def analyze_all_bullets(
99
+ self,
100
+ resume: ResumeData,
101
+ job: JobData,
102
+ max_bullets: int = 15
103
+ ) -> List[BulletAnalysis]:
104
+ """Analyze all bullet points in the resume."""
105
+ bullets = self._get_all_bullets(resume)
106
+ analyses = []
107
+
108
+ # Limit to avoid too many LLM calls
109
+ for location, bullet_text in bullets[:max_bullets]:
110
+ if bullet_text.strip():
111
+ analysis = await self.analyze_bullet(bullet_text, location, job)
112
+ analyses.append(analysis)
113
+
114
+ return analyses
115
+
116
+ def update_with_customized(
117
+ self,
118
+ original_analyses: List[BulletAnalysis],
119
+ customized_resume: ResumeData,
120
+ job: JobData
121
+ ) -> List[BulletAnalysis]:
122
+ """Update bullet analyses with customized versions."""
123
+ customized_bullets = dict(self._get_all_bullets(customized_resume))
124
+ all_keywords = job.keywords_required + job.keywords_preferred
125
+
126
+ updated = []
127
+ for analysis in original_analyses:
128
+ new_text = customized_bullets.get(analysis.location)
129
+
130
+ if new_text and new_text != analysis.original_text:
131
+ # Calculate new score
132
+ new_matched = self._calculate_keyword_overlap(new_text, all_keywords)
133
+ new_score = int((len(new_matched) / len(all_keywords)) * 100) if all_keywords else 50
134
+
135
+ # Find keywords that were added
136
+ old_matched = set(analysis.matched_keywords)
137
+ keywords_added = [k for k in new_matched if k not in old_matched]
138
+
139
+ analysis.customized_text = new_text
140
+ analysis.new_relevance_score = max(new_score, analysis.relevance_score + 10) # Show improvement
141
+ analysis.keywords_added = keywords_added
142
+
143
+ updated.append(analysis)
144
+
145
+ return updated
app/services/job_scraper.py ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+ import re
3
+ from pathlib import Path
4
+ from typing import Dict, Any, List
5
+ import httpx
6
+ from bs4 import BeautifulSoup
7
+ from app.models.job import JobData
8
+ from app.llm.factory import LLMFactory
9
+
10
+
11
+ class JobScraper:
12
+ def __init__(self):
13
+ self.prompts_dir = Path(__file__).parent.parent.parent / "prompts"
14
+
15
+ def validate_url(self, url: str) -> bool:
16
+ if not url:
17
+ return False
18
+ pattern = r'^https?://[^\s/$.?#].[^\s]*$'
19
+ return bool(re.match(pattern, url))
20
+
21
+ async def fetch_page(self, url: str) -> str:
22
+ async with httpx.AsyncClient(follow_redirects=True, timeout=30.0) as client:
23
+ headers = {
24
+ "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36"
25
+ }
26
+ response = await client.get(url, headers=headers)
27
+ response.raise_for_status()
28
+ return response.text
29
+
30
+ def extract_text(self, html: str) -> str:
31
+ soup = BeautifulSoup(html, "html.parser")
32
+
33
+ # Remove script and style elements
34
+ for element in soup(["script", "style", "nav", "footer", "header"]):
35
+ element.decompose()
36
+
37
+ # Get text
38
+ text = soup.get_text(separator="\n", strip=True)
39
+
40
+ # Clean up whitespace
41
+ lines = [line.strip() for line in text.splitlines() if line.strip()]
42
+ return "\n".join(lines)
43
+
44
+ async def scrape(self, url: str) -> JobData:
45
+ if not self.validate_url(url):
46
+ raise ValueError(f"Invalid URL: {url}")
47
+
48
+ html = await self.fetch_page(url)
49
+ raw_text = self.extract_text(html)
50
+
51
+ if len(raw_text) < 100:
52
+ raise ValueError("Could not extract sufficient job content from page")
53
+
54
+ prompt_template = (self.prompts_dir / "extract_job.txt").read_text()
55
+ prompt = prompt_template.replace("{job_text}", raw_text[:8000]) # Limit context
56
+
57
+ schema: Dict[str, Any] = {
58
+ "title": "",
59
+ "company": "",
60
+ "location": "",
61
+ "requirements": [],
62
+ "responsibilities": [],
63
+ "keywords_required": [],
64
+ "keywords_preferred": [],
65
+ }
66
+
67
+ llm = LLMFactory.get_fast()
68
+ data = await llm.complete_json(prompt, schema)
69
+
70
+ return JobData(
71
+ title=data.get("title", ""),
72
+ company=data.get("company", ""),
73
+ location=data.get("location", ""),
74
+ requirements=data.get("requirements", []),
75
+ responsibilities=data.get("responsibilities", []),
76
+ keywords_required=data.get("keywords_required", []),
77
+ keywords_preferred=data.get("keywords_preferred", []),
78
+ raw_text=raw_text,
79
+ )
80
+
81
+ async def parse_text(self, job_text: str) -> JobData:
82
+ """Parse job description from raw text (manual paste fallback)."""
83
+ prompt_template = (self.prompts_dir / "extract_job.txt").read_text()
84
+ prompt = prompt_template.replace("{job_text}", job_text[:8000])
85
+
86
+ schema: Dict[str, Any] = {
87
+ "title": "",
88
+ "company": "",
89
+ "location": "",
90
+ "requirements": [],
91
+ "responsibilities": [],
92
+ "keywords_required": [],
93
+ "keywords_preferred": [],
94
+ }
95
+
96
+ llm = LLMFactory.get_fast()
97
+ data = await llm.complete_json(prompt, schema)
98
+
99
+ return JobData(
100
+ title=data.get("title", ""),
101
+ company=data.get("company", ""),
102
+ location=data.get("location", ""),
103
+ requirements=data.get("requirements", []),
104
+ responsibilities=data.get("responsibilities", []),
105
+ keywords_required=data.get("keywords_required", []),
106
+ keywords_preferred=data.get("keywords_preferred", []),
107
+ raw_text=job_text,
108
+ )
app/services/layout_scanner.py ADDED
@@ -0,0 +1,196 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+ import io
3
+ import logging
4
+ import re
5
+ from typing import List, Tuple
6
+
7
+ import fitz # PyMuPDF
8
+ from docx import Document
9
+
10
+ from app.models.analysis import SafetyScan, LayoutWarning
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+
15
+ class LayoutScanner:
16
+ """Scans resume layout for ATS compatibility issues."""
17
+
18
+ def scan(self, file_bytes: bytes, content_type: str) -> SafetyScan:
19
+ """Scan a resume file for layout issues."""
20
+ warnings: List[LayoutWarning] = []
21
+
22
+ try:
23
+ if "pdf" in content_type:
24
+ warnings = self._scan_pdf(file_bytes)
25
+ elif "wordprocessingml" in content_type or "docx" in content_type:
26
+ warnings = self._scan_docx(file_bytes)
27
+ except Exception as e:
28
+ logger.warning(f"Layout scan failed: {e}")
29
+
30
+ return SafetyScan(
31
+ has_issues=len(warnings) > 0,
32
+ warnings=warnings
33
+ )
34
+
35
+ def _scan_pdf(self, file_bytes: bytes) -> List[LayoutWarning]:
36
+ """Scan PDF for layout issues."""
37
+ warnings = []
38
+
39
+ try:
40
+ doc = fitz.open(stream=file_bytes, filetype="pdf")
41
+
42
+ for page_num, page in enumerate(doc):
43
+ # Check for multiple columns by analyzing text block positions
44
+ if self._detect_columns_pdf(page):
45
+ warnings.append(LayoutWarning(
46
+ type="multi_column",
47
+ message="Resume appears to use a multi-column layout",
48
+ recommendation="Switch to a single-column layout. Many ATS systems read left-to-right, line-by-line, which can scramble multi-column content."
49
+ ))
50
+ break
51
+
52
+ # Check for tables
53
+ if self._detect_tables_pdf(page):
54
+ warnings.append(LayoutWarning(
55
+ type="complex_table",
56
+ message="Tables detected in your resume",
57
+ recommendation="Replace tables with simple bullet points. ATS systems often cannot parse table cells correctly."
58
+ ))
59
+ break
60
+
61
+ # Check for images/graphics
62
+ if self._detect_graphics_pdf(page):
63
+ warnings.append(LayoutWarning(
64
+ type="graphics",
65
+ message="Images or graphics detected in your resume",
66
+ recommendation="Remove decorative graphics, icons, or images. ATS cannot read text in images."
67
+ ))
68
+ break
69
+
70
+ doc.close()
71
+ except Exception as e:
72
+ logger.warning(f"PDF scan error: {e}")
73
+
74
+ return warnings
75
+
76
+ def _detect_columns_pdf(self, page: fitz.Page) -> bool:
77
+ """Detect if page has multiple columns by analyzing text block positions."""
78
+ blocks = page.get_text("dict")["blocks"]
79
+ text_blocks = [b for b in blocks if b.get("type") == 0] # Type 0 = text
80
+
81
+ if len(text_blocks) < 4:
82
+ return False
83
+
84
+ # Get x-coordinates of block starts
85
+ x_coords = [b["bbox"][0] for b in text_blocks]
86
+
87
+ # If we have distinct left margins (clusters of x-coords), it's likely multi-column
88
+ unique_margins = set()
89
+ for x in x_coords:
90
+ # Round to nearest 50 to group similar positions
91
+ margin = round(x / 50) * 50
92
+ unique_margins.add(margin)
93
+
94
+ # More than 2 distinct left margins suggests columns
95
+ # (accounting for slight indentation variations)
96
+ significant_margins = [m for m in unique_margins if m > 50] # Ignore very left margin
97
+ return len(significant_margins) >= 2
98
+
99
+ def _detect_tables_pdf(self, page: fitz.Page) -> bool:
100
+ """Detect tables in PDF by looking for grid-like structures."""
101
+ # Check for drawn lines that might indicate table borders
102
+ drawings = page.get_drawings()
103
+
104
+ horizontal_lines = 0
105
+ vertical_lines = 0
106
+
107
+ for d in drawings:
108
+ if d.get("items"):
109
+ for item in d["items"]:
110
+ if item[0] == "l": # Line
111
+ p1, p2 = item[1], item[2]
112
+ if abs(p1.y - p2.y) < 2: # Horizontal line
113
+ horizontal_lines += 1
114
+ elif abs(p1.x - p2.x) < 2: # Vertical line
115
+ vertical_lines += 1
116
+
117
+ # Multiple horizontal and vertical lines suggest a table
118
+ return horizontal_lines >= 3 and vertical_lines >= 2
119
+
120
+ def _detect_graphics_pdf(self, page: fitz.Page) -> bool:
121
+ """Detect images/graphics in PDF."""
122
+ images = page.get_images()
123
+ # Filter out very small images (likely bullets or icons under 50x50)
124
+ significant_images = []
125
+ for img in images:
126
+ try:
127
+ xref = img[0]
128
+ base_image = page.parent.extract_image(xref)
129
+ if base_image:
130
+ width = base_image.get("width", 0)
131
+ height = base_image.get("height", 0)
132
+ if width > 100 or height > 100:
133
+ significant_images.append(img)
134
+ except:
135
+ pass
136
+
137
+ return len(significant_images) > 0
138
+
139
+ def _scan_docx(self, file_bytes: bytes) -> List[LayoutWarning]:
140
+ """Scan DOCX for layout issues."""
141
+ warnings = []
142
+
143
+ try:
144
+ doc = Document(io.BytesIO(file_bytes))
145
+
146
+ # Check for tables
147
+ if len(doc.tables) > 0:
148
+ # Check if tables are used for layout (common in resumes)
149
+ for table in doc.tables:
150
+ if len(table.columns) > 1:
151
+ warnings.append(LayoutWarning(
152
+ type="complex_table",
153
+ message="Tables detected in your resume",
154
+ recommendation="Replace tables with simple paragraphs and bullet points. ATS systems often misread table layouts."
155
+ ))
156
+ break
157
+
158
+ # Check for multiple columns in sections
159
+ for section in doc.sections:
160
+ if hasattr(section, '_sectPr'):
161
+ cols = section._sectPr.find('.//{http://schemas.openxmlformats.org/wordprocessingml/2006/main}cols')
162
+ if cols is not None:
163
+ num_cols = cols.get('{http://schemas.openxmlformats.org/wordprocessingml/2006/main}num')
164
+ if num_cols and int(num_cols) > 1:
165
+ warnings.append(LayoutWarning(
166
+ type="multi_column",
167
+ message="Resume uses a multi-column layout",
168
+ recommendation="Switch to a single-column format for better ATS compatibility."
169
+ ))
170
+ break
171
+
172
+ # Check for text boxes (often used for sidebar layouts)
173
+ if self._has_text_boxes_docx(doc):
174
+ warnings.append(LayoutWarning(
175
+ type="multi_column",
176
+ message="Text boxes detected (possibly a sidebar layout)",
177
+ recommendation="Remove text boxes and use a linear, single-column layout instead."
178
+ ))
179
+
180
+ except Exception as e:
181
+ logger.warning(f"DOCX scan error: {e}")
182
+
183
+ return warnings
184
+
185
+ def _has_text_boxes_docx(self, doc: Document) -> bool:
186
+ """Check if DOCX contains text boxes."""
187
+ try:
188
+ # Text boxes appear as drawing elements
189
+ for para in doc.paragraphs:
190
+ if para._element.xml and 'textbox' in para._element.xml.lower():
191
+ return True
192
+ if para._element.xml and 'w:drawing' in para._element.xml:
193
+ return True
194
+ except:
195
+ pass
196
+ return False
app/services/progress.py ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+ import json
3
+ from enum import Enum
4
+ from typing import Dict, Any, Optional
5
+ from redis.asyncio import Redis
6
+ from app.core.redis import get_redis
7
+
8
+
9
+ class ProgressStep(str, Enum):
10
+ PARSING_RESUME = "parsing_resume"
11
+ SCRAPING_JOB = "scraping_job"
12
+ CALCULATING_ORIGINAL_SCORE = "calculating_original_score"
13
+ CUSTOMIZING = "customizing"
14
+ CALCULATING_NEW_SCORE = "calculating_new_score"
15
+ FINALIZING = "finalizing"
16
+ COMPLETE = "complete"
17
+ ERROR = "error"
18
+
19
+
20
+ STEP_PROGRESS: Dict[ProgressStep, int] = {
21
+ ProgressStep.PARSING_RESUME: 15,
22
+ ProgressStep.SCRAPING_JOB: 30,
23
+ ProgressStep.CALCULATING_ORIGINAL_SCORE: 40,
24
+ ProgressStep.CUSTOMIZING: 80,
25
+ ProgressStep.CALCULATING_NEW_SCORE: 90,
26
+ ProgressStep.FINALIZING: 95,
27
+ ProgressStep.COMPLETE: 100,
28
+ }
29
+
30
+
31
+ class ProgressService:
32
+ def __init__(self, task_id: str, redis_client: Optional[Redis] = None):
33
+ self.task_id = task_id
34
+ self.channel = f"progress:{task_id}"
35
+ self._redis = redis_client
36
+
37
+ async def _get_redis(self) -> Redis:
38
+ if self._redis:
39
+ return self._redis
40
+ return await get_redis()
41
+
42
+ async def update(self, step: ProgressStep, message: str = "", result_id: str = ""):
43
+ redis = await self._get_redis()
44
+ percent = STEP_PROGRESS.get(step, 0)
45
+
46
+ data: Dict[str, Any] = {
47
+ "step": step.value,
48
+ "percent": percent,
49
+ "message": message,
50
+ }
51
+
52
+ if result_id:
53
+ data["result_id"] = result_id
54
+
55
+ # Publish to channel for SSE subscribers
56
+ await redis.publish(self.channel, json.dumps(data))
57
+
58
+ # Store current state for late subscribers
59
+ await redis.set(
60
+ f"progress_state:{self.task_id}",
61
+ json.dumps(data),
62
+ ex=3600, # 1 hour TTL
63
+ )
64
+
65
+ async def error(self, code: str, message: str, recoverable: bool = True):
66
+ redis = await self._get_redis()
67
+
68
+ data: Dict[str, Any] = {
69
+ "step": ProgressStep.ERROR.value,
70
+ "percent": 0,
71
+ "error": {
72
+ "code": code,
73
+ "message": message,
74
+ "recoverable": recoverable,
75
+ },
76
+ }
77
+
78
+ await redis.publish(self.channel, json.dumps(data))
79
+ await redis.set(
80
+ f"progress_state:{self.task_id}",
81
+ json.dumps(data),
82
+ ex=3600,
83
+ )
app/services/resume_comparator.py ADDED
@@ -0,0 +1,236 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+ import re
3
+ from typing import Dict, List, Any
4
+ from app.models.resume import ResumeData
5
+
6
+
7
+ class ResumeComparator:
8
+ """Compare original (ground truth) resume with LLM-generated version."""
9
+
10
+ # Strong action verbs commonly used in resumes
11
+ STRONG_ACTION_VERBS = {
12
+ 'achieved', 'accelerated', 'accomplished', 'designed', 'developed', 'directed',
13
+ 'established', 'executed', 'generated', 'implemented', 'improved', 'increased',
14
+ 'launched', 'led', 'managed', 'optimized', 'orchestrated', 'pioneered',
15
+ 'reduced', 'resolved', 'spearheaded', 'streamlined', 'transformed', 'architected',
16
+ 'built', 'created', 'delivered', 'drove', 'enhanced', 'expanded', 'founded',
17
+ 'initiated', 'maintained', 'organized', 'produced', 'redesigned', 'restructured',
18
+ 'revamped', 'scaled', 'strengthened', 'automated', 'collaborated', 'coordinated',
19
+ 'facilitated', 'negotiated', 'presented', 'supervised', 'trained'
20
+ }
21
+
22
+ def __init__(self, original: ResumeData, llm_version: ResumeData, job_description: str = ""):
23
+ self.original = original
24
+ self.llm_version = llm_version
25
+ self.job_description = job_description
26
+
27
+ def _extract_text(self, resume: ResumeData) -> str:
28
+ """Extract all text from resume."""
29
+ text_parts = []
30
+
31
+ # Add experience bullets
32
+ for exp in resume.experience:
33
+ text_parts.extend(exp.bullets)
34
+
35
+ # Add skills
36
+ text_parts.extend(resume.skills)
37
+
38
+ # Add education
39
+ for edu in resume.education:
40
+ if edu.degree:
41
+ text_parts.append(edu.degree)
42
+ if edu.institution:
43
+ text_parts.append(edu.institution)
44
+
45
+ return " ".join(text_parts).lower()
46
+
47
+ def _count_keywords(self, text: str, keywords: List[str]) -> int:
48
+ """Count how many keywords appear in text."""
49
+ text_lower = text.lower()
50
+ count = 0
51
+ for keyword in keywords:
52
+ if keyword.lower() in text_lower:
53
+ count += 1
54
+ return count
55
+
56
+ def _extract_jd_keywords(self) -> List[str]:
57
+ """Extract potential keywords from job description."""
58
+ if not self.job_description:
59
+ return []
60
+
61
+ # Extract words longer than 3 characters, excluding common words
62
+ words = re.findall(r'\b[a-zA-Z]{4,}\b', self.job_description.lower())
63
+
64
+ # Common words to exclude
65
+ common_words = {
66
+ 'will', 'with', 'have', 'this', 'that', 'from', 'they', 'were', 'been',
67
+ 'their', 'what', 'about', 'which', 'when', 'make', 'like', 'time', 'than',
68
+ 'into', 'year', 'your', 'some', 'could', 'them', 'other', 'then', 'more',
69
+ 'these', 'would', 'such', 'also', 'only', 'must', 'work', 'team', 'role'
70
+ }
71
+
72
+ keywords = [w for w in set(words) if w not in common_words]
73
+ return keywords[:50] # Top 50 keywords
74
+
75
+ def _count_bullets(self, resume: ResumeData) -> int:
76
+ """Count total number of bullet points."""
77
+ total = 0
78
+ for exp in resume.experience:
79
+ total += len(exp.bullets)
80
+ return total
81
+
82
+ def _count_action_verbs(self, resume: ResumeData) -> int:
83
+ """Count strong action verbs used."""
84
+ count = 0
85
+ for exp in resume.experience:
86
+ for bullet in exp.bullets:
87
+ words = bullet.lower().split()
88
+ if words and words[0] in self.STRONG_ACTION_VERBS:
89
+ count += 1
90
+ return count
91
+
92
+ def _count_quantifiable_metrics(self, text: str) -> int:
93
+ """Count numbers, percentages, and quantifiable achievements."""
94
+ # Match patterns like: 50%, $1M, 10+, 2x, 100K, etc.
95
+ patterns = [
96
+ r'\d+%', # Percentages
97
+ r'\$\d+[KMB]?', # Dollar amounts
98
+ r'\d+[KMB]', # Numbers with K/M/B suffix
99
+ r'\d+x', # Multipliers
100
+ r'\d+\+', # Numbers with +
101
+ r'\d{2,}', # Any number with 2+ digits
102
+ ]
103
+
104
+ count = 0
105
+ for pattern in patterns:
106
+ count += len(re.findall(pattern, text))
107
+ return count
108
+
109
+ def _count_words(self, text: str) -> int:
110
+ """Count words in text."""
111
+ return len(re.findall(r'\b\w+\b', text))
112
+
113
+ def _analyze_section_coverage(self, resume: ResumeData) -> Dict[str, int]:
114
+ """Analyze coverage of different resume sections."""
115
+ return {
116
+ 'experience_count': len(resume.experience),
117
+ 'education_count': len(resume.education),
118
+ 'skills_count': len(resume.skills),
119
+ 'certifications_count': len(resume.certifications)
120
+ }
121
+
122
+ def compare(self) -> Dict[str, Any]:
123
+ """Generate comprehensive comparison report."""
124
+
125
+ original_text = self._extract_text(self.original)
126
+ llm_text = self._extract_text(self.llm_version)
127
+
128
+ jd_keywords = self._extract_jd_keywords()
129
+
130
+ # Metric calculations
131
+ original_metrics = {
132
+ 'bullet_points': self._count_bullets(self.original),
133
+ 'action_verbs': self._count_action_verbs(self.original),
134
+ 'quantifiable_metrics': self._count_quantifiable_metrics(original_text),
135
+ 'word_count': self._count_words(original_text),
136
+ 'character_count': len(original_text),
137
+ 'jd_keyword_matches': self._count_keywords(original_text, jd_keywords) if jd_keywords else 0,
138
+ **self._analyze_section_coverage(self.original)
139
+ }
140
+
141
+ llm_metrics = {
142
+ 'bullet_points': self._count_bullets(self.llm_version),
143
+ 'action_verbs': self._count_action_verbs(self.llm_version),
144
+ 'quantifiable_metrics': self._count_quantifiable_metrics(llm_text),
145
+ 'word_count': self._count_words(llm_text),
146
+ 'character_count': len(llm_text),
147
+ 'jd_keyword_matches': self._count_keywords(llm_text, jd_keywords) if jd_keywords else 0,
148
+ **self._analyze_section_coverage(self.llm_version)
149
+ }
150
+
151
+ # Calculate improvements/changes
152
+ improvements = {}
153
+ for key in original_metrics:
154
+ original_val = original_metrics[key]
155
+ llm_val = llm_metrics[key]
156
+ diff = llm_val - original_val
157
+
158
+ if original_val > 0:
159
+ percent_change = (diff / original_val) * 100
160
+ else:
161
+ percent_change = 100 if llm_val > 0 else 0
162
+
163
+ improvements[key] = {
164
+ 'original': original_val,
165
+ 'llm': llm_val,
166
+ 'difference': diff,
167
+ 'percent_change': round(percent_change, 2)
168
+ }
169
+
170
+ # Overall quality score (simple weighted average)
171
+ weights = {
172
+ 'action_verbs': 0.25,
173
+ 'quantifiable_metrics': 0.30,
174
+ 'jd_keyword_matches': 0.25,
175
+ 'bullet_points': 0.20
176
+ }
177
+
178
+ original_score = sum(
179
+ original_metrics.get(k, 0) * v
180
+ for k, v in weights.items()
181
+ )
182
+ llm_score = sum(
183
+ llm_metrics.get(k, 0) * v
184
+ for k, v in weights.items()
185
+ )
186
+
187
+ # Normalize scores (simple approach)
188
+ max_score = max(original_score, llm_score) or 1
189
+
190
+ return {
191
+ 'original_metrics': original_metrics,
192
+ 'llm_metrics': llm_metrics,
193
+ 'improvements': improvements,
194
+ 'scores': {
195
+ 'original_score': round((original_score / max_score) * 100, 2),
196
+ 'llm_score': round((llm_score / max_score) * 100, 2),
197
+ 'improvement': round(((llm_score - original_score) / max_score) * 100, 2)
198
+ },
199
+ 'summary': self._generate_summary(improvements),
200
+ 'jd_keywords_analyzed': len(jd_keywords)
201
+ }
202
+
203
+ def _generate_summary(self, improvements: Dict[str, Dict]) -> str:
204
+ """Generate human-readable summary."""
205
+ summary_parts = []
206
+
207
+ # Action verbs
208
+ av_diff = improvements['action_verbs']['difference']
209
+ if av_diff > 0:
210
+ summary_parts.append(f"Added {av_diff} more strong action verbs")
211
+ elif av_diff < 0:
212
+ summary_parts.append(f"Removed {abs(av_diff)} action verbs")
213
+
214
+ # Quantifiable metrics
215
+ qm_diff = improvements['quantifiable_metrics']['difference']
216
+ if qm_diff > 0:
217
+ summary_parts.append(f"Added {qm_diff} more quantifiable metrics")
218
+ elif qm_diff < 0:
219
+ summary_parts.append(f"Removed {abs(qm_diff)} quantifiable metrics")
220
+
221
+ # Keywords
222
+ kw_diff = improvements['jd_keyword_matches']['difference']
223
+ if kw_diff > 0:
224
+ summary_parts.append(f"Matched {kw_diff} more JD keywords")
225
+ elif kw_diff < 0:
226
+ summary_parts.append(f"Matched {abs(kw_diff)} fewer JD keywords")
227
+
228
+ # Word count
229
+ wc_change = improvements['word_count']['percent_change']
230
+ if abs(wc_change) > 10:
231
+ if wc_change > 0:
232
+ summary_parts.append(f"Increased content by {abs(wc_change):.1f}%")
233
+ else:
234
+ summary_parts.append(f"Reduced content by {abs(wc_change):.1f}%")
235
+
236
+ return "; ".join(summary_parts) if summary_parts else "Minimal changes"
app/services/resume_customizer.py ADDED
@@ -0,0 +1,151 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+ import json
3
+ from pathlib import Path
4
+ from typing import List, Dict, Any
5
+ from app.models.resume import ResumeData, Experience
6
+ from app.models.job import JobData
7
+ from app.models.customization import CustomizationResult, Change, Intensity
8
+ from app.models.score import ATSScore
9
+ from app.models.analysis import BulletAnalysis, KeywordPlacement
10
+ from app.services.ats_scorer import ATSScorer
11
+ from app.services.bullet_analyzer import BulletAnalyzer
12
+ from app.llm.factory import LLMFactory
13
+
14
+
15
+ class ResumeCustomizer:
16
+ def __init__(self):
17
+ self.prompts_dir = Path(__file__).parent.parent.parent / "prompts"
18
+ self.scorer = ATSScorer()
19
+ self.bullet_analyzer = BulletAnalyzer()
20
+
21
+ def _detect_changes(self, original: ResumeData, customized: ResumeData) -> List[Change]:
22
+ """Compare original and customized to detect changes."""
23
+ changes = []
24
+
25
+ # Compare experience bullets
26
+ for i, (orig_exp, cust_exp) in enumerate(zip(original.experience, customized.experience)):
27
+ for j, (orig_bullet, cust_bullet) in enumerate(zip(orig_exp.bullets, cust_exp.bullets)):
28
+ if orig_bullet != cust_bullet:
29
+ changes.append(Change(
30
+ type="modified",
31
+ location=f"experience[{i}].bullets[{j}]",
32
+ before=orig_bullet,
33
+ after=cust_bullet,
34
+ ))
35
+
36
+ # Check for added bullets
37
+ if len(cust_exp.bullets) > len(orig_exp.bullets):
38
+ for j in range(len(orig_exp.bullets), len(cust_exp.bullets)):
39
+ changes.append(Change(
40
+ type="added",
41
+ location=f"experience[{i}].bullets[{j}]",
42
+ before="",
43
+ after=cust_exp.bullets[j],
44
+ ))
45
+
46
+ # Compare skills
47
+ orig_skills = set(original.skills)
48
+ cust_skills = set(customized.skills)
49
+
50
+ for skill in cust_skills - orig_skills:
51
+ changes.append(Change(
52
+ type="added",
53
+ location="skills",
54
+ before="",
55
+ after=skill,
56
+ ))
57
+
58
+ # Compare summary
59
+ if original.summary != customized.summary:
60
+ changes.append(Change(
61
+ type="modified",
62
+ location="summary",
63
+ before=original.summary,
64
+ after=customized.summary,
65
+ ))
66
+
67
+ return changes
68
+
69
+ async def customize(
70
+ self,
71
+ resume: ResumeData,
72
+ job: JobData,
73
+ intensity: Intensity = Intensity.MODERATE,
74
+ ) -> CustomizationResult:
75
+ """Customize resume for the target job."""
76
+ import logging
77
+ logger = logging.getLogger(__name__)
78
+
79
+ # Calculate original score
80
+ original_score = await self.scorer.calculate(resume, job)
81
+
82
+ # Analyze bullets BEFORE customization (optional feature)
83
+ bullet_analysis: List[BulletAnalysis] = []
84
+ try:
85
+ bullet_analysis = await self.bullet_analyzer.analyze_all_bullets(resume, job)
86
+ except Exception as e:
87
+ logger.warning(f"Bullet analysis failed (non-critical): {e}")
88
+
89
+ # Prepare prompt
90
+ prompt_template = (self.prompts_dir / "customize_resume.txt").read_text()
91
+
92
+ resume_dict = resume.model_dump()
93
+ del resume_dict["raw_text"] # Don't include raw text in prompt
94
+
95
+ prompt = prompt_template.format(
96
+ intensity=intensity.value,
97
+ resume_json=json.dumps(resume_dict, indent=2),
98
+ job_title=job.title,
99
+ job_company=job.company,
100
+ keywords_required=", ".join(job.keywords_required),
101
+ keywords_preferred=", ".join(job.keywords_preferred),
102
+ responsibilities="\n".join(f"- {r}" for r in job.responsibilities[:5]),
103
+ missing_keywords=", ".join(original_score.missing_keywords[:10]),
104
+ )
105
+
106
+ # Get customized resume from LLM
107
+ llm = LLMFactory.get_smart()
108
+ schema: Dict[str, Any] = resume_dict # Use original structure as schema
109
+ customized_dict = await llm.complete_json(prompt, schema)
110
+
111
+ # Preserve raw_text from original
112
+ customized_dict["raw_text"] = resume.raw_text
113
+ customized = ResumeData(**customized_dict)
114
+
115
+ # Calculate new score
116
+ customized_score = await self.scorer.calculate(customized, job)
117
+
118
+ # Detect changes
119
+ changes = self._detect_changes(resume, customized)
120
+
121
+ # Update bullet analysis with customized versions (optional feature)
122
+ try:
123
+ if bullet_analysis:
124
+ bullet_analysis = self.bullet_analyzer.update_with_customized(
125
+ bullet_analysis, customized, job
126
+ )
127
+ except Exception as e:
128
+ logger.warning(f"Bullet analysis update failed (non-critical): {e}")
129
+
130
+ # Check keyword quality (optional feature)
131
+ keyword_quality: List[KeywordPlacement] = []
132
+ try:
133
+ added_keywords = [
134
+ kw for kw in customized_score.matched_keywords
135
+ if kw not in original_score.matched_keywords
136
+ ]
137
+ keyword_quality = self.scorer.check_keyword_quality(
138
+ customized, job, added_keywords
139
+ )
140
+ except Exception as e:
141
+ logger.warning(f"Keyword quality check failed (non-critical): {e}")
142
+
143
+ return CustomizationResult(
144
+ original=resume,
145
+ customized=customized,
146
+ changes=changes,
147
+ original_score=original_score,
148
+ customized_score=customized_score,
149
+ bullet_analysis=bullet_analysis,
150
+ keyword_quality=keyword_quality,
151
+ )
app/services/resume_generator.py ADDED
@@ -0,0 +1,249 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+ import io
3
+ from pathlib import Path
4
+ from typing import TYPE_CHECKING
5
+ from jinja2 import Template
6
+ from docx import Document
7
+ from docx.shared import Pt
8
+ from docx.enum.text import WD_ALIGN_PARAGRAPH
9
+
10
+ if TYPE_CHECKING:
11
+ from app.models.resume import ResumeData
12
+
13
+
14
+ class ResumeGenerator:
15
+ def __init__(self):
16
+ self.templates_dir = Path(__file__).parent.parent.parent / "templates"
17
+
18
+ def to_html(self, resume: "ResumeData") -> str:
19
+ template_path = self.templates_dir / "resume.html"
20
+
21
+ # Use default template if not exists
22
+ if not template_path.exists():
23
+ template_str = self._default_template()
24
+ else:
25
+ template_str = template_path.read_text()
26
+
27
+ template = Template(template_str)
28
+ return template.render(resume=resume)
29
+
30
+ def _sanitize_text(self, text: str) -> str:
31
+ """Remove or replace characters not supported by Helvetica."""
32
+ replacements = {
33
+ '★': '*',
34
+ '☆': '*',
35
+ '•': '-',
36
+ '→': '->',
37
+ '←': '<-',
38
+ '✓': '[x]',
39
+ '✗': '[ ]',
40
+ '…': '...',
41
+ '"': '"',
42
+ '"': '"',
43
+ ''': "'",
44
+ ''': "'",
45
+ '–': '-',
46
+ '—': '-',
47
+ }
48
+ for char, replacement in replacements.items():
49
+ text = text.replace(char, replacement)
50
+ # Remove any remaining non-latin1 characters
51
+ return text.encode('latin-1', errors='replace').decode('latin-1')
52
+
53
+ def to_pdf(self, resume: "ResumeData") -> bytes:
54
+ from fpdf import FPDF
55
+
56
+ pdf = FPDF()
57
+ pdf.add_page()
58
+ pdf.set_margins(15, 15, 15)
59
+ pdf.set_auto_page_break(auto=True, margin=15)
60
+
61
+ sanitize = self._sanitize_text
62
+
63
+ # Contact header
64
+ pdf.set_font("Helvetica", "B", 14)
65
+ pdf.cell(0, 8, sanitize(resume.contact.name or "Name"), ln=True, align="C")
66
+
67
+ pdf.set_font("Helvetica", "", 9)
68
+ contact_parts = [p for p in [resume.contact.email, resume.contact.phone, resume.contact.location] if p]
69
+ if contact_parts:
70
+ pdf.cell(0, 5, sanitize(" | ".join(contact_parts)), ln=True, align="C")
71
+ pdf.ln(4)
72
+
73
+ page_width = pdf.w - pdf.l_margin - pdf.r_margin
74
+
75
+ # Summary
76
+ if resume.summary:
77
+ pdf.set_font("Helvetica", "B", 11)
78
+ pdf.cell(0, 7, "SUMMARY", ln=True)
79
+ pdf.set_draw_color(100, 100, 100)
80
+ pdf.line(pdf.l_margin, pdf.get_y(), pdf.l_margin + page_width, pdf.get_y())
81
+ pdf.ln(2)
82
+ pdf.set_font("Helvetica", "", 9)
83
+ pdf.multi_cell(page_width, 4, sanitize(resume.summary))
84
+ pdf.ln(3)
85
+
86
+ # Experience
87
+ if resume.experience:
88
+ pdf.set_font("Helvetica", "B", 11)
89
+ pdf.cell(0, 7, "EXPERIENCE", ln=True)
90
+ pdf.line(pdf.l_margin, pdf.get_y(), pdf.l_margin + page_width, pdf.get_y())
91
+ pdf.ln(2)
92
+
93
+ for exp in resume.experience:
94
+ pdf.set_font("Helvetica", "B", 10)
95
+ title_company = f"{exp.title} - {exp.company}"
96
+ pdf.cell(0, 5, sanitize(title_company[:80]), ln=True)
97
+ if exp.dates:
98
+ pdf.set_font("Helvetica", "I", 8)
99
+ pdf.cell(0, 4, sanitize(exp.dates), ln=True)
100
+ pdf.set_font("Helvetica", "", 9)
101
+ for bullet in exp.bullets:
102
+ bullet_text = f"* {bullet}"
103
+ pdf.multi_cell(page_width, 4, sanitize(bullet_text))
104
+ pdf.ln(2)
105
+
106
+ # Education
107
+ if resume.education:
108
+ pdf.set_font("Helvetica", "B", 11)
109
+ pdf.cell(0, 7, "EDUCATION", ln=True)
110
+ pdf.line(pdf.l_margin, pdf.get_y(), pdf.l_margin + page_width, pdf.get_y())
111
+ pdf.ln(2)
112
+
113
+ for edu in resume.education:
114
+ pdf.set_font("Helvetica", "B", 10)
115
+ pdf.cell(0, 5, sanitize(f"{edu.degree} - {edu.school}"), ln=True)
116
+ if edu.dates:
117
+ pdf.set_font("Helvetica", "I", 8)
118
+ pdf.cell(0, 4, sanitize(edu.dates), ln=True)
119
+ pdf.ln(2)
120
+
121
+ # Skills
122
+ if resume.skills:
123
+ pdf.set_font("Helvetica", "B", 11)
124
+ pdf.cell(0, 7, "SKILLS", ln=True)
125
+ pdf.line(pdf.l_margin, pdf.get_y(), pdf.l_margin + page_width, pdf.get_y())
126
+ pdf.ln(2)
127
+ pdf.set_font("Helvetica", "", 9)
128
+ skills_text = ", ".join(resume.skills)
129
+ pdf.multi_cell(page_width, 4, sanitize(skills_text))
130
+
131
+ return bytes(pdf.output())
132
+
133
+ def to_docx(self, resume: "ResumeData") -> bytes:
134
+ doc = Document()
135
+
136
+ # Contact info
137
+ name_para = doc.add_paragraph()
138
+ name_run = name_para.add_run(resume.contact.name)
139
+ name_run.bold = True
140
+ name_run.font.size = Pt(16)
141
+ name_para.alignment = WD_ALIGN_PARAGRAPH.CENTER
142
+
143
+ contact_para = doc.add_paragraph()
144
+ contact_para.alignment = WD_ALIGN_PARAGRAPH.CENTER
145
+ contact_parts = []
146
+ if resume.contact.email:
147
+ contact_parts.append(resume.contact.email)
148
+ if resume.contact.phone:
149
+ contact_parts.append(resume.contact.phone)
150
+ if resume.contact.location:
151
+ contact_parts.append(resume.contact.location)
152
+ contact_para.add_run(" | ".join(contact_parts))
153
+
154
+ # Summary
155
+ if resume.summary:
156
+ doc.add_heading("Summary", level=1)
157
+ doc.add_paragraph(resume.summary)
158
+
159
+ # Experience
160
+ if resume.experience:
161
+ doc.add_heading("Experience", level=1)
162
+ for exp in resume.experience:
163
+ exp_para = doc.add_paragraph()
164
+ exp_para.add_run(f"{exp.title}").bold = True
165
+ exp_para.add_run(f" | {exp.company}")
166
+ exp_para.add_run(f" | {exp.dates}").italic = True
167
+
168
+ for bullet in exp.bullets:
169
+ doc.add_paragraph(bullet, style="List Bullet")
170
+
171
+ # Education
172
+ if resume.education:
173
+ doc.add_heading("Education", level=1)
174
+ for edu in resume.education:
175
+ edu_para = doc.add_paragraph()
176
+ edu_para.add_run(f"{edu.degree}").bold = True
177
+ edu_para.add_run(f" | {edu.school}")
178
+ edu_para.add_run(f" | {edu.dates}").italic = True
179
+
180
+ # Skills
181
+ if resume.skills:
182
+ doc.add_heading("Skills", level=1)
183
+ doc.add_paragraph(", ".join(resume.skills))
184
+
185
+ buffer = io.BytesIO()
186
+ doc.save(buffer)
187
+ return buffer.getvalue()
188
+
189
+ def _default_template(self) -> str:
190
+ return """
191
+ <!DOCTYPE html>
192
+ <html>
193
+ <head>
194
+ <meta charset="UTF-8">
195
+ <style>
196
+ body { font-family: Arial, sans-serif; margin: 40px; font-size: 11pt; }
197
+ h1 { font-size: 18pt; margin-bottom: 5px; }
198
+ h2 { font-size: 13pt; border-bottom: 1px solid #333; margin-top: 15px; }
199
+ .contact { text-align: center; margin-bottom: 15px; }
200
+ .contact h1 { margin: 0; }
201
+ .contact p { margin: 5px 0; color: #555; }
202
+ .experience-item { margin-bottom: 12px; }
203
+ .experience-header { font-weight: bold; }
204
+ .experience-meta { color: #555; font-style: italic; }
205
+ ul { margin: 5px 0; padding-left: 20px; }
206
+ li { margin: 3px 0; }
207
+ .skills { margin-top: 10px; }
208
+ </style>
209
+ </head>
210
+ <body>
211
+ <div class="contact">
212
+ <h1>{{ resume.contact.name }}</h1>
213
+ <p>{{ resume.contact.email }} | {{ resume.contact.phone }} | {{ resume.contact.location }}</p>
214
+ </div>
215
+
216
+ {% if resume.summary %}
217
+ <h2>Summary</h2>
218
+ <p>{{ resume.summary }}</p>
219
+ {% endif %}
220
+
221
+ {% if resume.experience %}
222
+ <h2>Experience</h2>
223
+ {% for exp in resume.experience %}
224
+ <div class="experience-item">
225
+ <div class="experience-header">{{ exp.title }} | {{ exp.company }}</div>
226
+ <div class="experience-meta">{{ exp.dates }}</div>
227
+ <ul>
228
+ {% for bullet in exp.bullets %}
229
+ <li>{{ bullet }}</li>
230
+ {% endfor %}
231
+ </ul>
232
+ </div>
233
+ {% endfor %}
234
+ {% endif %}
235
+
236
+ {% if resume.education %}
237
+ <h2>Education</h2>
238
+ {% for edu in resume.education %}
239
+ <p><strong>{{ edu.degree }}</strong> | {{ edu.school }} | {{ edu.dates }}</p>
240
+ {% endfor %}
241
+ {% endif %}
242
+
243
+ {% if resume.skills %}
244
+ <h2>Skills</h2>
245
+ <p class="skills">{{ resume.skills | join(', ') }}</p>
246
+ {% endif %}
247
+ </body>
248
+ </html>
249
+ """
app/services/resume_parser.py ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+ import io
3
+ from pathlib import Path
4
+ from typing import Dict, Any
5
+ import fitz # PyMuPDF
6
+ from docx import Document
7
+ from app.models.resume import ResumeData
8
+ from app.llm.factory import LLMFactory
9
+
10
+
11
+ class ResumeParser:
12
+ SUPPORTED_TYPES: Dict[str, str] = {
13
+ "application/pdf": "pdf",
14
+ "application/vnd.openxmlformats-officedocument.wordprocessingml.document": "docx",
15
+ }
16
+
17
+ def __init__(self):
18
+ self.prompts_dir = Path(__file__).parent.parent.parent / "prompts"
19
+
20
+ def is_supported(self, content_type: str) -> bool:
21
+ return content_type in self.SUPPORTED_TYPES
22
+
23
+ def extract_text(self, file_bytes: bytes, content_type: str) -> str:
24
+ file_type = self.SUPPORTED_TYPES.get(content_type)
25
+ if file_type == "pdf":
26
+ return self._extract_pdf(file_bytes)
27
+ elif file_type == "docx":
28
+ return self._extract_docx(file_bytes)
29
+ else:
30
+ raise ValueError(f"Unsupported content type: {content_type}")
31
+
32
+ def _extract_pdf(self, file_bytes: bytes) -> str:
33
+ try:
34
+ doc = fitz.open(stream=file_bytes, filetype="pdf")
35
+ text_parts = []
36
+ for page in doc:
37
+ text_parts.append(page.get_text())
38
+ doc.close()
39
+ text = "\n".join(text_parts).strip()
40
+ if not text:
41
+ raise ValueError("Could not extract text from PDF")
42
+ return text
43
+ except Exception as e:
44
+ raise ValueError(f"Could not extract text from PDF: {e}")
45
+
46
+ def _extract_docx(self, file_bytes: bytes) -> str:
47
+ try:
48
+ doc = Document(io.BytesIO(file_bytes))
49
+ text_parts = []
50
+ for para in doc.paragraphs:
51
+ if para.text.strip():
52
+ text_parts.append(para.text)
53
+ text = "\n".join(text_parts).strip()
54
+ if not text:
55
+ raise ValueError("Could not extract text from DOCX")
56
+ return text
57
+ except Exception as e:
58
+ raise ValueError(f"Could not extract text from DOCX: {e}")
59
+
60
+ async def parse(self, file_bytes: bytes, content_type: str) -> ResumeData:
61
+ raw_text = self.extract_text(file_bytes, content_type)
62
+
63
+ prompt_template = (self.prompts_dir / "structure_resume.txt").read_text()
64
+ prompt = prompt_template.replace("{resume_text}", raw_text)
65
+
66
+ schema: Dict[str, Any] = {
67
+ "contact": {"name": "", "email": "", "phone": "", "linkedin": "", "location": ""},
68
+ "summary": "",
69
+ "experience": [{"company": "", "title": "", "dates": "", "bullets": []}],
70
+ "education": [{"school": "", "degree": "", "dates": ""}],
71
+ "skills": [],
72
+ }
73
+
74
+ llm = LLMFactory.get_fast()
75
+ data = await llm.complete_json(prompt, schema)
76
+
77
+ return ResumeData(
78
+ contact=data.get("contact", {}),
79
+ summary=data.get("summary", ""),
80
+ experience=data.get("experience", []),
81
+ education=data.get("education", []),
82
+ skills=data.get("skills", []),
83
+ raw_text=raw_text,
84
+ )
app/workers/__init__.py ADDED
File without changes
app/workers/celery_app.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from celery import Celery
2
+ from app.core.config import settings
3
+
4
+ # Celery requires explicit ssl_cert_reqs param for rediss:// URLs
5
+ _redis_url = settings.redis_url
6
+ if _redis_url.startswith("rediss://") and "ssl_cert_reqs" not in _redis_url:
7
+ sep = "&" if "?" in _redis_url else "?"
8
+ _redis_url = f"{_redis_url}{sep}ssl_cert_reqs=CERT_NONE"
9
+
10
+ celery_app = Celery(
11
+ "cv_buddy",
12
+ broker=_redis_url,
13
+ backend=_redis_url,
14
+ include=["app.workers.tasks"],
15
+ )
16
+
17
+ celery_app.conf.update(
18
+ task_serializer="json",
19
+ accept_content=["json"],
20
+ result_serializer="json",
21
+ timezone="UTC",
22
+ enable_utc=True,
23
+ task_track_started=True,
24
+ task_time_limit=300, # 5 minute timeout
25
+ task_soft_time_limit=240, # 4 minute soft timeout
26
+ )
app/workers/tasks.py ADDED
@@ -0,0 +1,124 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+ import asyncio
3
+ import json
4
+ import uuid
5
+ from typing import Optional, Dict, Any
6
+ from app.workers.celery_app import celery_app
7
+ from app.core.redis import get_redis_for_worker
8
+ from app.services.progress import ProgressService, ProgressStep
9
+ from app.services.resume_parser import ResumeParser
10
+ from app.services.job_scraper import JobScraper
11
+ from app.services.resume_customizer import ResumeCustomizer
12
+ from app.services.layout_scanner import LayoutScanner
13
+ from app.models.customization import Intensity
14
+ from app.models.analysis import SafetyScan
15
+
16
+
17
+ def run_async(coro):
18
+ """Run async function in sync context."""
19
+ loop = asyncio.new_event_loop()
20
+ asyncio.set_event_loop(loop)
21
+ try:
22
+ return loop.run_until_complete(coro)
23
+ finally:
24
+ loop.close()
25
+
26
+
27
+ @celery_app.task(bind=True, max_retries=3)
28
+ def analyze_and_customize(
29
+ self,
30
+ session_id: str,
31
+ job_url: Optional[str],
32
+ job_text: Optional[str],
33
+ intensity: str,
34
+ ) -> Dict[str, Any]:
35
+ """Main task chain for resume customization."""
36
+ task_id = self.request.id
37
+
38
+ async def _run():
39
+ async with get_redis_for_worker() as redis:
40
+ progress = ProgressService(task_id, redis)
41
+
42
+ try:
43
+ # Step 1: Get resume data from session
44
+ await progress.update(ProgressStep.PARSING_RESUME, "Loading your resume...")
45
+
46
+ resume_data = await redis.get(f"session:{session_id}:resume")
47
+ if not resume_data:
48
+ raise ValueError("Session expired or resume not found")
49
+
50
+ from app.models.resume import ResumeData
51
+ resume = ResumeData(**json.loads(resume_data))
52
+
53
+ # Scan layout for ATS compatibility issues (optional feature)
54
+ safety_scan = SafetyScan()
55
+ try:
56
+ raw_file = await redis.get(f"session:{session_id}:file")
57
+ content_type_bytes = await redis.get(f"session:{session_id}:content_type")
58
+ if raw_file and content_type_bytes:
59
+ # Decode content_type from bytes to string
60
+ content_type = content_type_bytes.decode('utf-8') if isinstance(content_type_bytes, bytes) else content_type_bytes
61
+ scanner = LayoutScanner()
62
+ safety_scan = scanner.scan(raw_file, content_type)
63
+ except Exception as scan_error:
64
+ import logging
65
+ logging.warning(f"Layout scan failed (non-critical): {scan_error}")
66
+
67
+ # Step 2: Scrape job posting
68
+ await progress.update(ProgressStep.SCRAPING_JOB, "Analyzing job posting...")
69
+
70
+ scraper = JobScraper()
71
+ if job_url:
72
+ job = await scraper.scrape(job_url)
73
+ elif job_text:
74
+ job = await scraper.parse_text(job_text)
75
+ else:
76
+ raise ValueError("No job URL or text provided")
77
+
78
+ # Step 3-5: Customize resume (includes scoring)
79
+ await progress.update(ProgressStep.CUSTOMIZING, "Customizing your resume...")
80
+
81
+ customizer = ResumeCustomizer()
82
+ result = await customizer.customize(
83
+ resume=resume,
84
+ job=job,
85
+ intensity=Intensity(intensity),
86
+ )
87
+
88
+ # Step 6: Store result
89
+ await progress.update(ProgressStep.FINALIZING, "Preparing results...")
90
+
91
+ result_id = str(uuid.uuid4())
92
+ result_data = {
93
+ "original": result.original.model_dump(),
94
+ "customized": result.customized.model_dump(),
95
+ "changes": [c.model_dump() for c in result.changes],
96
+ "original_score": result.original_score.model_dump(),
97
+ "customized_score": result.customized_score.model_dump(),
98
+ "job": job.model_dump(),
99
+ # Enhanced analysis fields
100
+ "bullet_analysis": [b.model_dump() for b in result.bullet_analysis],
101
+ "safety_scan": safety_scan.model_dump(),
102
+ "keyword_quality": [k.model_dump() for k in result.keyword_quality],
103
+ }
104
+
105
+ from app.core.config import settings
106
+ await redis.set(
107
+ f"result:{result_id}",
108
+ json.dumps(result_data),
109
+ ex=settings.session_ttl_seconds,
110
+ )
111
+
112
+ await progress.update(ProgressStep.COMPLETE, "Done!", result_id=result_id)
113
+
114
+ return {"result_id": result_id}
115
+
116
+ except Exception as e:
117
+ await progress.error(
118
+ code="PROCESSING_ERROR",
119
+ message=str(e),
120
+ recoverable=True,
121
+ )
122
+ raise
123
+
124
+ return run_async(_run())
prompts/analyze_bullets.txt ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Analyze this resume bullet point against the job requirements.
2
+
3
+ BULLET POINT:
4
+ {bullet_text}
5
+
6
+ JOB REQUIREMENTS:
7
+ Required Keywords: {required_keywords}
8
+ Preferred Keywords: {preferred_keywords}
9
+ Key Responsibilities: {responsibilities}
10
+
11
+ Analyze the bullet point and provide:
12
+ 1. A relevance score from 0-100 (how well this bullet aligns with the job)
13
+ 2. Which keywords from the job are already present in the bullet
14
+ 3. Which missing keywords could naturally fit in this bullet
15
+ 4. A specific, actionable suggestion to improve this bullet
16
+
17
+ Respond in this exact JSON format:
18
+ {
19
+ "relevance_score": <number 0-100>,
20
+ "matched_keywords": ["keyword1", "keyword2"],
21
+ "missing_keywords": ["keyword3", "keyword4"],
22
+ "suggestion": "Change to: '<improved bullet text>' to include '<keyword>'"
23
+ }
24
+
25
+ Be specific in your suggestion. Don't just say "add keywords" - provide the actual rewritten bullet text.
26
+ If the bullet is already strong (score > 80), the suggestion can acknowledge this.
27
+ Only include missing_keywords that would NATURALLY fit this bullet's context.
prompts/customize_resume.txt ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ You are a professional resume writer. Customize this resume for the target job using the Google XYZ format for achievements: "Accomplished [X] as measured by [Y], by doing [Z]"
2
+
3
+ INTENSITY LEVEL: {intensity}
4
+ - conservative: Minor tweaks only. Add missing keywords naturally, keep original phrasing.
5
+ - moderate: Rewrite bullets in XYZ format, add keywords, reorder to highlight relevant experience.
6
+ - aggressive: Significant rewrites for maximum ATS optimization while staying truthful.
7
+
8
+ ORIGINAL RESUME:
9
+ {resume_json}
10
+
11
+ TARGET JOB:
12
+ Title: {job_title}
13
+ Company: {job_company}
14
+ Required Keywords: {keywords_required}
15
+ Preferred Keywords: {keywords_preferred}
16
+ Key Responsibilities: {responsibilities}
17
+
18
+ INSTRUCTIONS:
19
+ 1. Rewrite experience bullets using XYZ format where possible
20
+ 2. Naturally incorporate missing keywords: {missing_keywords}
21
+ 3. Ensure skills section includes all relevant keywords
22
+ 4. Keep all facts truthful - only rephrase, don't fabricate
23
+ 5. Prioritize recent and relevant experience
24
+
25
+ Return the customized resume as valid JSON matching the original structure exactly.
26
+ Only return the JSON, no explanation.
prompts/extract_job.txt ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Extract structured data from this job posting. Return valid JSON matching the schema exactly.
2
+
3
+ Schema:
4
+ {
5
+ "title": "string (job title)",
6
+ "company": "string (company name)",
7
+ "location": "string (job location)",
8
+ "requirements": ["string (required qualification/experience)"],
9
+ "responsibilities": ["string (job duty/responsibility)"],
10
+ "keywords_required": ["string (must-have technical skills, tools, technologies)"],
11
+ "keywords_preferred": ["string (nice-to-have skills, bonus qualifications)"]
12
+ }
13
+
14
+ Job posting text:
15
+ {job_text}
prompts/structure_resume.txt ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Extract structured data from this resume text. Return valid JSON matching the schema exactly.
2
+
3
+ Schema:
4
+ {
5
+ "contact": {
6
+ "name": "string",
7
+ "email": "string",
8
+ "phone": "string",
9
+ "linkedin": "string",
10
+ "location": "string"
11
+ },
12
+ "summary": "string (professional summary if present)",
13
+ "experience": [
14
+ {
15
+ "company": "string",
16
+ "title": "string",
17
+ "dates": "string (e.g., 'Jan 2020 - Present')",
18
+ "bullets": ["string (achievement/responsibility)"]
19
+ }
20
+ ],
21
+ "education": [
22
+ {
23
+ "school": "string",
24
+ "degree": "string",
25
+ "dates": "string"
26
+ }
27
+ ],
28
+ "skills": ["string"]
29
+ }
30
+
31
+ Resume text:
32
+ {resume_text}
pyproject.toml ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ [project]
2
+ name = "cv-buddy-backend"
3
+ version = "0.1.0"
4
+ description = "AI-powered resume customization backend"
5
+ requires-python = ">=3.11"
6
+
7
+ [tool.pytest.ini_options]
8
+ asyncio_mode = "auto"
9
+ testpaths = ["tests"]
requirements.txt ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Core
2
+ fastapi>=0.109.0
3
+ uvicorn[standard]>=0.27.0
4
+ python-multipart>=0.0.6
5
+
6
+ # Background jobs
7
+ celery>=5.3.0
8
+ redis>=5.0.0
9
+
10
+ # Document parsing
11
+ pymupdf>=1.23.0
12
+ python-docx>=1.1.0
13
+
14
+ # Document generation
15
+ weasyprint>=60.0
16
+ jinja2>=3.1.0
17
+
18
+ # Web scraping
19
+ httpx>=0.26.0
20
+ beautifulsoup4>=4.12.0
21
+
22
+ # LLM providers
23
+ openai>=1.10.0
24
+ google-generativeai>=0.4.0
25
+
26
+ # Utilities
27
+ pydantic>=2.5.0
28
+ pydantic-settings>=2.1.0
29
+
30
+ # Testing
31
+ pytest>=8.0.0
32
+ pytest-asyncio>=0.23.0
33
+ pytest-cov>=4.1.0
supervisord.conf ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [supervisord]
2
+ nodaemon=true
3
+ logfile=/dev/null
4
+ logfile_maxbytes=0
5
+
6
+ [program:uvicorn]
7
+ command=uvicorn app.main:app --host 0.0.0.0 --port %(ENV_PORT)s
8
+ directory=/app
9
+ autostart=true
10
+ autorestart=true
11
+ stdout_logfile=/dev/fd/1
12
+ stdout_logfile_maxbytes=0
13
+ stderr_logfile=/dev/fd/2
14
+ stderr_logfile_maxbytes=0
15
+
16
+ [program:celery]
17
+ command=celery -A app.workers.celery_app worker --loglevel=info --concurrency=2
18
+ directory=/app
19
+ autostart=true
20
+ autorestart=true
21
+ stdout_logfile=/dev/fd/1
22
+ stdout_logfile_maxbytes=0
23
+ stderr_logfile=/dev/fd/2
24
+ stderr_logfile_maxbytes=0