File size: 2,565 Bytes
a9a45af
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a2cd8f6
a9a45af
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import os 
from data import load_text 
from generate_summary_and_embedding import generate_summary_and_embedding
from cal_sim import compute_similarity

def run_single_similarity(jd_path, cv_path, SECTION_WEIGHTS, model_id="sentence-transformers/all-MiniLM-L6-v2", llm_model="meta-llama/Meta-Llama-3-8B-Instruct", pipe=None):
    
    jd_text = load_text(jd_path)
    cv_text = load_text(cv_path)
    
    jd_embeddings, _ = generate_summary_and_embedding(jd_text, text_type="job_description", model_id=model_id, llm_model=llm_model, pipe=pipe)
    cv_embeddings, _ = generate_summary_and_embedding(cv_text, text_type="resume", model_id=model_id, llm_model=llm_model, pipe=pipe)

    scores = compute_similarity(jd_embeddings, cv_embeddings)
    weighted_scores = sum(scores[k] * SECTION_WEIGHTS.get(k, 0) for k in scores)
    return round(float(weighted_scores), 4), scores

def run_batch_similarity(jd_path, cv_input, SECTION_WEIGHTS, model_id="sentence-transformers/all-MiniLM-L6-v2", llm_model="meta-llama/Meta-Llama-3-8B-Instruct", pipe=None):
    
    jd_text = load_text(jd_path)
    jd_embeddings, _ = generate_summary_and_embedding(jd_text, text_type="job_description", model_id=model_id, llm_model=llm_model, pipe=pipe)
    
    if isinstance(cv_input, str):
        cv_paths = [
            os.path.join(cv_input, f)
            for f in os.listdir(cv_input)
            if os.path.isfile(os.path.join(cv_input, f)) and f.lower().endswith(('.pdf', '.txt'))
        ]
    elif isinstance(cv_input, list):
        cv_paths = cv_input 
    else:
        raise ValueError("cv_input must be path directory (str) or list of file paths (list[str])")
        
    results = []
    for full_cv_path in cv_paths:
        filename = os.path.basename(full_cv_path)

        try:
            cv_text = load_text(full_cv_path)
            cv_embeddings, _ = generate_summary_and_embedding(cv_text, text_type="resume", model_id=model_id, llm_model=llm_model, pipe=pipe)
            scores = compute_similarity(jd_embeddings, cv_embeddings)
            weighted_scores = sum(scores[k] * SECTION_WEIGHTS.get(k, 0) for k in scores)

            result = {
                "JD": os.path.basename(jd_path),
                "Resume": filename,
                "Similarity Score": round(float(weighted_scores), 4)
            }

            for sec, val in scores.items():
                result[f"{sec} Score"] = round(val, 4)

            results.append(result)

        except Exception as e:
            print(f"❌ Failed to process {filename}: {e}")

    return results