Resume-test / scripts /similarity /get_score.py
marchji2415's picture
Upload 38 files
3241ff4 verified
import json
import logging
import os
from typing import List
import yaml
from qdrant_client import QdrantClient
from scripts.utils.logger import init_logging_config
init_logging_config(basic_log_level=logging.INFO)
# Get the logger
logger = logging.getLogger(__name__)
# Set the logging level
logger.setLevel(logging.INFO)
def find_path(folder_name):
"""
The function `find_path` searches for a folder by name starting from the current directory and
traversing up the directory tree until the folder is found or the root directory is reached.
Args:
folder_name: The `find_path` function you provided is designed to search for a folder by name
starting from the current working directory and moving up the directory tree until it finds the
folder or reaches the root directory.
Returns:
The `find_path` function is designed to search for a folder with the given `folder_name` starting
from the current working directory (`os.getcwd()`). It iterates through the directory structure,
checking if the folder exists in the current directory or any of its parent directories. If the
folder is found, it returns the full path to that folder using `os.path.join(curr_dir, folder_name)`
"""
curr_dir = os.getcwd()
while True:
if folder_name in os.listdir(curr_dir):
return os.path.join(curr_dir, folder_name)
else:
parent_dir = os.path.dirname(curr_dir)
if parent_dir == "/":
break
curr_dir = parent_dir
raise ValueError(f"Folder '{folder_name}' not found.")
cwd = find_path("Resume-Matcher")
READ_RESUME_FROM = os.path.join(cwd, "Data", "Processed", "Resumes")
READ_JOB_DESCRIPTION_FROM = os.path.join(cwd, "Data", "Processed", "JobDescription")
config_path = os.path.join(cwd, "scripts", "similarity")
def read_config(filepath):
"""
The `read_config` function reads a configuration file in YAML format and handles exceptions related
to file not found or parsing errors.
Args:
filepath: The `filepath` parameter in the `read_config` function is a string that represents the
path to the configuration file that you want to read and parse. This function attempts to open the
file specified by `filepath`, load its contents as YAML, and return the parsed configuration. If any
errors occur during
Returns:
The function `read_config` will return the configuration loaded from the file if successful, or
`None` if there was an error during the process.
"""
try:
with open(filepath) as f:
config = yaml.safe_load(f)
return config
except FileNotFoundError as e:
logger.error(f"Configuration file {filepath} not found: {e}")
except yaml.YAMLError as e:
logger.error(
f"Error parsing YAML in configuration file {filepath}: {e}", exc_info=True
)
except Exception as e:
logger.error(f"Error reading configuration file {filepath}: {e}")
return None
def read_doc(path):
"""
The `read_doc` function reads a JSON file from the specified path and returns its contents, handling
any exceptions that may occur during the process.
Args:
path: The `path` parameter in the `read_doc` function is a string that represents the file path to
the JSON document that you want to read and load. This function reads the JSON data from the file
located at the specified path.
Returns:
The function `read_doc(path)` reads a JSON file located at the specified `path`, and returns the
data loaded from the file. If there is an error reading the JSON file, it logs the error message and
returns an empty dictionary `{}`.
"""
with open(path) as f:
try:
data = json.load(f)
except Exception as e:
logger.error(f"Error reading JSON file: {e}")
data = {}
return data
def get_score(resume_string, job_description_string):
"""
The function `get_score` uses QdrantClient to calculate the similarity score between a resume and a
job description.
Args:
resume_string: The `resume_string` parameter is a string containing the text of a resume. It
represents the content of a resume that you want to compare with a job description.
job_description_string: The `get_score` function you provided seems to be using a QdrantClient to
calculate the similarity score between a resume and a job description. The function takes in two
parameters: `resume_string` and `job_description_string`, where `resume_string` is the text content
of the resume and
Returns:
The function `get_score` returns the search result obtained by querying a QdrantClient with the
job description string against the resume string provided.
"""
logger.info("Started getting similarity score")
documents: List[str] = [resume_string]
client = QdrantClient(":memory:")
client.set_model("BAAI/bge-base-en")
client.add(
collection_name="demo_collection",
documents=documents,
)
search_result = client.query(
collection_name="demo_collection", query_text=job_description_string
)
logger.info("Finished getting similarity score")
return search_result
if __name__ == "__main__":
# To give your custom resume use this code
resume_dict = read_config(
READ_RESUME_FROM
+ "/Resume-alfred_pennyworth_pm.pdf83632b66-5cce-4322-a3c6-895ff7e3dd96.json"
)
job_dict = read_config(
READ_JOB_DESCRIPTION_FROM
+ "/JobDescription-job_desc_product_manager.pdf6763dc68-12ff-4b32-b652-ccee195de071.json"
)
resume_keywords = resume_dict["extracted_keywords"]
job_description_keywords = job_dict["extracted_keywords"]
resume_string = " ".join(resume_keywords)
jd_string = " ".join(job_description_keywords)
final_result = get_score(resume_string, jd_string)
for r in final_result:
print(r.score)