import os from data import load_text from generate_summary_and_embedding import generate_summary_and_embedding from cal_sim import compute_similarity def run_single_similarity(jd_path, cv_path, SECTION_WEIGHTS, model_id="sentence-transformers/all-MiniLM-L6-v2", llm_model="meta-llama/Meta-Llama-3-8B-Instruct", pipe=None): jd_text = load_text(jd_path) cv_text = load_text(cv_path) jd_embeddings, _ = generate_summary_and_embedding(jd_text, text_type="job_description", model_id=model_id, llm_model=llm_model, pipe=pipe) cv_embeddings, _ = generate_summary_and_embedding(cv_text, text_type="resume", model_id=model_id, llm_model=llm_model, pipe=pipe) scores = compute_similarity(jd_embeddings, cv_embeddings) weighted_scores = sum(scores[k] * SECTION_WEIGHTS.get(k, 0) for k in scores) return round(float(weighted_scores), 4), scores def run_batch_similarity(jd_path, cv_input, SECTION_WEIGHTS, model_id="sentence-transformers/all-MiniLM-L6-v2", llm_model="meta-llama/Meta-Llama-3-8B-Instruct", pipe=None): jd_text = load_text(jd_path) jd_embeddings, _ = generate_summary_and_embedding(jd_text, text_type="job_description", model_id=model_id, llm_model=llm_model, pipe=pipe) if isinstance(cv_input, str): cv_paths = [ os.path.join(cv_input, f) for f in os.listdir(cv_input) if os.path.isfile(os.path.join(cv_input, f)) and f.lower().endswith(('.pdf', '.txt')) ] elif isinstance(cv_input, list): cv_paths = cv_input else: raise ValueError("cv_input must be path directory (str) or list of file paths (list[str])") results = [] for full_cv_path in cv_paths: filename = os.path.basename(full_cv_path) try: cv_text = load_text(full_cv_path) cv_embeddings, _ = generate_summary_and_embedding(cv_text, text_type="resume", model_id=model_id, llm_model=llm_model, pipe=pipe) scores = compute_similarity(jd_embeddings, cv_embeddings) weighted_scores = sum(scores[k] * SECTION_WEIGHTS.get(k, 0) for k in scores) result = { "JD": os.path.basename(jd_path), "Resume": filename, "Similarity Score": round(float(weighted_scores), 4) } for sec, val in scores.items(): result[f"{sec} Score"] = round(val, 4) results.append(result) except Exception as e: print(f"❌ Failed to process {filename}: {e}") return results