CV_Process / cv_short.py
bsiddhharth
made some changes in short.py and cv_analyzer_search.py
68cdd74
import logging
from langchain_community.document_loaders import PDFPlumberLoader, TextLoader
import extraction as extr # extraction.py
import streamlit as st
import pandas as pd
# Configure logging
# logging.basicConfig(level=logging.DEBUG , format='%(asctime)s - %(levelname)s - %(message)s')
# logger = logging.getLogger(__name__)
class CVAnalyzer:
def __init__(self):
# Initialize Groq LLM
# logger.info("Initializing CVAnalyzer")
self.llm = extr.initialize_llm() # Updated to use the new function
# logger.info(" LLM initialized")
# Initialize embeddings (if needed)
# self.embeddings = HuggingFaceEmbeddings(
# model_name="sentence-transformers/all-mpnet-base-v2"
# )
def load_document(self, file_path: str) -> str:
# logger.info(f"Loading document from file: {file_path}")
"""Load document based on file type."""
if file_path.endswith('.pdf'):
loader = PDFPlumberLoader(file_path)
else:
loader = TextLoader(file_path)
documents = loader.load()
# logger.info(f"Document loaded from {file_path}")
return " ".join([doc.page_content for doc in documents])
def extract_cv_info(self, cv_text: str) -> list[extr.cv]: # referring to cv class in extraction.py
# logger.info("Extracting CV information")
"""Extract structured information from CV text using new extraction method."""
extracted_data = extr.extract_cv_data(cv_text)
# logger.info(f"Extracted {len(extracted_data)} candidate(s) from CV")
return extracted_data
# return extr.extract_cv_data(cv_text)
def calculate_match_score(self, cv_info: dict, jd_requirements: dict) -> dict:
# logger.info(f"Calculating match score for CV: {cv_info.get('name', 'Unknown')}")
"""Calculate match score between CV and job requirements."""
score_components = {
"skills_match": 0,
"experience_match": 0,
"overall_score": 0
}
# Skills matching
if "skills" in cv_info and "required_skills" in jd_requirements:
cv_skills = set(skill.lower() for skill in cv_info["skills"])
required_skills = set(skill.lower() for skill in jd_requirements["required_skills"])
score_components["skills_match"] = len(cv_skills & required_skills) / len(required_skills)
# Experience matching
if "years_of_exp" in cv_info and "min_years_experience" in jd_requirements:
if cv_info["years_of_exp"] >= jd_requirements["min_years_experience"]:
score_components["experience_match"] = 1.0
else:
score_components["experience_match"] = cv_info["years_of_exp"] / jd_requirements["min_years_experience"]
# Calculate overall score (weighted average)
weights = {"skills_match": 0.5, "experience_match": 0.3}
score_components["overall_score"] = sum(
score * weights[component]
for component, score in score_components.items()
if component != "overall_score"
)
# logger.debug(f"Match score for {cv_info.get('name', 'Unknown')}: {score_components['overall_score']:.2%}")
return score_components
def create_cv_shortlisting_page():
# Initialize session state variables if they don't exist
if 'jd_text' not in st.session_state:
st.session_state.jd_text = ''
if 'min_years' not in st.session_state:
st.session_state.min_years = 0
if 'required_skills_list' not in st.session_state:
st.session_state.required_skills_list = []
if 'uploaded_files' not in st.session_state:
st.session_state.uploaded_files = None
if 'results' not in st.session_state:
st.session_state.results = []
if 'analysis_complete' not in st.session_state:
st.session_state.analysis_complete = False
# Form for input
with st.form("job_description_form"):
# Job Description Input
st.header("Job Description")
jd_text = st.text_area("Enter the job description", value=st.session_state.jd_text)
# Job Requirements Input
st.header("Job Requirements")
min_years = st.number_input("Minimum years of experience", min_value=0, value=st.session_state.min_years)
required_skills = st.text_input("Required skills (comma-separated)",
value=','.join(st.session_state.required_skills_list) if st.session_state.required_skills_list else "")
# CV Upload
st.header("Upload CVs")
uploaded_files = st.file_uploader("Choose CV files", accept_multiple_files=True, type=['pdf', 'txt'], key="unique_cv_upload")
# Submit Button
submit_button = st.form_submit_button(label="Analyze CVs")
# Update session state after form submission
if submit_button:
st.session_state.jd_text = jd_text
st.session_state.min_years = min_years
st.session_state.required_skills_list = [skill.strip() for skill in required_skills.split(",") if skill.strip()]
st.session_state.uploaded_files = uploaded_files
if st.session_state.uploaded_files and st.session_state.jd_text:
with st.spinner('Analyzing CVs...'):
analyzer = CVAnalyzer()
# Prepare job requirements
job_requirements = {
"min_years_experience": st.session_state.min_years,
"required_skills": st.session_state.required_skills_list
}
results = []
st.session_state.results = [] # Reset results for new analysis
# Process each CV
for uploaded_file in st.session_state.uploaded_files:
cv_text = extr.process_file(uploaded_file)
try:
candidates = analyzer.extract_cv_info(cv_text)
for candidate in candidates:
match_scores = analyzer.calculate_match_score(
candidate.__dict__,
job_requirements
)
result = {
"Name": candidate.name or "Unknown",
"Experience (Years)": candidate.years_of_exp or 0,
"Skills": ", ".join(candidate.skills) if candidate.skills else "None",
"Certifications": ", ".join(candidate.certifications) if candidate.certifications else "None",
"Skills Match": f"{match_scores['skills_match']:.2%}",
"Experience Match": f"{match_scores['experience_match']:.2%}",
"Overall Score": f"{match_scores['overall_score']:.2%}"
}
results.append(result)
st.session_state.results.append(result)
except Exception as e:
st.error(f"Error processing CV: {str(e)}")
# Display results
if st.session_state.results:
df = pd.DataFrame(st.session_state.results)
df = df.sort_values("Overall Score", ascending=False)
st.dataframe(df)
st.session_state.analysis_complete = True
else:
st.error("No valid results found from CV analysis")
st.session_state.analysis_complete = False