File size: 9,325 Bytes
b8f2a93
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
"""
utils/engine.py β€” Main orchestration logic for the Resume Match Analyzer.
Consolidates the business logic previously handled by FastAPI routes.
"""
from __future__ import annotations
import uuid
import asyncio
from pathlib import Path
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy import select, delete

from utils.db import crud, models
from utils.db.database import get_db, create_tables
from utils.nlp.text_extractor import text_extractor
from utils.nlp.preprocessor import preprocessor
from utils.nlp.similarity_engine import similarity_engine
from utils.nlp.skill_gap_analyzer import skill_gap_analyzer
from utils.nlp.ats_simulator import ats_simulator
from utils.nlp.section_analyzer import section_analyzer
from utils.nlp.llm_suggestions import llm_suggestions
from utils.core.config import settings
from utils.core.logging_config import get_logger

logger = get_logger(__name__)

async def init_engine():
    """Initialize the database tables."""
    await create_tables()
    logger.info("Database tables initialized.")

async def analyze_resume(
    resume_content: bytes,
    resume_filename: str,
    jd_text: str,
    db: AsyncSession
) -> dict:
    """
    Analyze a single resume against a job description.
    Integrated version of the original FastAPI /analyze route.
    """
    # ── 1. Text extraction ─────────────────────────────────────────
    file_ext = Path(resume_filename or "resume.pdf").suffix.lower()
    source_type = file_ext.lstrip(".")
    
    try:
        resume_text = text_extractor.extract(resume_content, source_type=source_type)
    except Exception as exc:
        logger.error(f"Text extraction failed: {exc}")
        raise ValueError(f"Could not extract text from resume: {exc}")

    # JD may be raw text or a URL
    if jd_text.strip().startswith("http"):
        try:
            jd_raw = text_extractor.extract(jd_text, source_type="url")
        except Exception as exc:
            raise ValueError(f"Could not fetch JD from URL: {exc}")
    else:
        jd_raw = jd_text

    if len(resume_text) < 50:
        raise ValueError("Resume appears to be empty or unreadable.")
    if len(jd_raw) < 30:
        raise ValueError("Job description is too short.")

    # ── 2. Run NLP pipeline ─────────────────────────────────────
    clean_resume = preprocessor.clean(resume_text)
    clean_jd     = preprocessor.clean(jd_raw)

    similarity   = similarity_engine.compute(clean_resume, clean_jd)
    skill_gap    = skill_gap_analyzer.analyze(clean_resume, clean_jd)
    ats_report   = ats_simulator.score(resume_text, jd_raw)
    sections     = section_analyzer.analyze(resume_text, jd_raw)
    suggestions  = llm_suggestions.generate(
        resume_text=resume_text,
        jd_text=jd_raw,
        missing_skills=skill_gap.all_missing[:10],
        match_score=similarity["ensemble_score"],
    )

    # ── 3. Persist to DB ────────────────────────────────────────
    resume_db = await crud.create_resume_file(
        db,
        filename=resume_filename,
        file_size=len(resume_content),
        content_type="application/octet-stream",
    )
    
    analysis = await crud.create_analysis(
        db,
        resume_id       =resume_db.id,
        resume_filename =resume_filename,
        jd_snippet      =jd_raw[:300],
        ensemble_score  =similarity["ensemble_score"],
        tfidf_score     =similarity.get("tfidf_score"),
        sbert_score     =similarity.get("sbert_score"),
        bert_score      =similarity.get("bert_score"),
        ats_score       =ats_report.ats_score,
        skill_coverage  =skill_gap.coverage_score,
        skill_gap_data  ={
            "missing_by_category": skill_gap.missing_by_category,
            "all_missing":         skill_gap.all_missing,
            "all_jd_skills":       skill_gap.all_jd_skills,
            "coverage_score":      skill_gap.coverage_score,
            "priority_gaps":       skill_gap.priority_gaps,
        },
        section_scores  =sections.section_scores,
        suggestions     ={
            "bullets":        suggestions.resume_bullets,
            "keywords":       suggestions.keyword_suggestions,
            "summary":        suggestions.summary_rewrite,
            "advice":         suggestions.overall_advice,
            "used_llm":       suggestions.used_llm,
        },
        ats_report      ={
            "ats_score":              ats_report.ats_score,
            "keyword_match_score":    ats_report.keyword_match_score,
            "format_score":           ats_report.format_score,
            "keyword_density_score":  ats_report.keyword_density_score,
            "matched_keywords":       ats_report.matched_keywords[:30],
            "missing_keywords":       ats_report.missing_keywords[:20],
            "format_feedback":        ats_report.format_feedback,
            "improvement_tips":       ats_report.improvement_tips,
        },
        missing_skills  =skill_gap.all_missing,
    )

    return {
        "analysis_id":   analysis.id,
        "resume_file":   resume_filename,
        "match_scores":  similarity,
        "skill_gap": {
            "coverage_score":       skill_gap.coverage_score,
            "missing_by_category":  skill_gap.missing_by_category,
            "all_missing":          skill_gap.all_missing,
            "priority_gaps":        skill_gap.priority_gaps,
            "learning_resources":   skill_gap.learning_resources,
        },
        "ats": {
            "ats_score":            ats_report.ats_score,
            "keyword_match_score":  ats_report.keyword_match_score,
            "format_score":         ats_report.format_score,
            "matched_keywords":     ats_report.matched_keywords[:20],
            "missing_keywords":     ats_report.missing_keywords[:10],
            "improvement_tips":     ats_report.improvement_tips,
        },
        "sections": {
            "scores":           sections.section_scores,
            "feedback":         sections.section_feedback,
            "strongest":        sections.strongest_section,
            "weakest":          sections.weakest_section,
            "overall_score":    sections.overall_section_score,
        },
        "suggestions": {
            "bullets":  suggestions.resume_bullets,
            "keywords": suggestions.keyword_suggestions,
            "summary":  suggestions.summary_rewrite,
            "advice":   suggestions.overall_advice,
            "used_llm": suggestions.used_llm,
        },
    }

async def rank_resumes(
    resumes: list[tuple[str, bytes]],
    jd_text: str
) -> dict:
    """
    Rank multiple resumes against a job description.
    """
    results = []
    for filename, content in resumes:
        file_ext = Path(filename or "resume.pdf").suffix.lower()
        try:
            resume_text = text_extractor.extract(content, source_type=file_ext.lstrip("."))
        except Exception as exc:
            results.append({
                "filename": filename,
                "error":    f"Could not read file: {exc}",
                "rank":     None,
            })
            continue

        clean_resume = preprocessor.clean(resume_text)
        clean_jd     = preprocessor.clean(jd_text)

        sim   = similarity_engine.compute(clean_resume, clean_jd)
        gaps  = skill_gap_analyzer.analyze(clean_resume, clean_jd)

        results.append({
            "filename":       filename,
            "ensemble_score": sim["ensemble_score"],
            "tfidf_score":    sim["tfidf_score"],
            "sbert_score":    sim["sbert_score"],
            "confidence":     sim["confidence"],
            "skill_coverage": gaps.coverage_score,
            "missing_skills": gaps.all_missing[:8],
            "rank":           None,
        })

    # Sort
    valid = [r for r in results if r.get("ensemble_score") is not None]
    valid.sort(key=lambda x: x["ensemble_score"], reverse=True)
    for i, item in enumerate(valid, start=1):
        item["rank"] = i

    return {
        "ranked_resumes":  valid,
        "errors":          [r for r in results if r.get("ensemble_score") is None],
        "jd_snippet":      jd_text[:200],
        "total_submitted": len(resumes),
        "total_ranked":    len(valid),
    }

async def get_history(db: AsyncSession, limit: int = 20) -> list:
    """Retrieve recent analysis history."""
    try:
        stmt = select(models.AnalysisResult).order_by(models.AnalysisResult.created_at.desc()).limit(limit)
        result = await db.execute(stmt)
        return list(result.scalars().all())
    except Exception as exc:
        logger.error(f"History retrieval failed: {exc}")
        return []

async def delete_history_item(db: AsyncSession, analysis_id: str) -> bool:
    """Delete an analysis from history."""
    try:
        stmt = delete(models.AnalysisResult).where(models.AnalysisResult.id == analysis_id)
        await db.execute(stmt)
        await db.commit()
        return True
    except Exception as exc:
        logger.error(f"Deletion failed: {exc}")
        return False