Spaces:
Sleeping
Sleeping
File size: 7,890 Bytes
1e97cbb c626607 1e97cbb c626607 1e97cbb c626607 1e97cbb c626607 1e97cbb c626607 1e97cbb c626607 1e97cbb c626607 1e97cbb c626607 1e97cbb c626607 1e97cbb d914592 1e97cbb d914592 1e97cbb d914592 1e97cbb d914592 1e97cbb d914592 1e97cbb d914592 1e97cbb d914592 1e97cbb d914592 1e97cbb d914592 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 | import logging
from langchain_community.document_loaders import PDFPlumberLoader, TextLoader
import extraction as extr # extraction.py
import streamlit as st
import pandas as pd
# Configure logging
# logging.basicConfig(level=logging.DEBUG , format='%(asctime)s - %(levelname)s - %(message)s')
# logger = logging.getLogger(__name__)
class CVAnalyzer:
def __init__(self):
# Initialize Groq LLM
# logger.info("Initializing CVAnalyzer")
self.llm = extr.initialize_llm() # Updated to use the new function
# logger.info(" LLM initialized")
# Initialize embeddings (if needed)
# self.embeddings = HuggingFaceEmbeddings(
# model_name="sentence-transformers/all-mpnet-base-v2"
# )
def load_document(self, file_path: str) -> str:
# logger.info(f"Loading document from file: {file_path}")
"""Load document based on file type."""
if file_path.endswith('.pdf'):
loader = PDFPlumberLoader(file_path)
else:
loader = TextLoader(file_path)
documents = loader.load()
# logger.info(f"Document loaded from {file_path}")
return " ".join([doc.page_content for doc in documents])
def extract_cv_info(self, cv_text: str) -> list[extr.cv]: # referring to cv class in extraction.py
# logger.info("Extracting CV information")
"""Extract structured information from CV text using new extraction method."""
extracted_data = extr.extract_cv_data(cv_text)
# logger.info(f"Extracted {len(extracted_data)} candidate(s) from CV")
return extracted_data
# return extr.extract_cv_data(cv_text)
def calculate_match_score(self, cv_info: dict, jd_requirements: dict) -> dict:
# logger.info(f"Calculating match score for CV: {cv_info.get('name', 'Unknown')}")
"""Calculate match score between CV and job requirements."""
score_components = {
"skills_match": 0,
"experience_match": 0,
"overall_score": 0
}
# Skills matching
if "skills" in cv_info and "required_skills" in jd_requirements:
cv_skills = set(skill.lower() for skill in cv_info["skills"])
required_skills = set(skill.lower() for skill in jd_requirements["required_skills"])
score_components["skills_match"] = len(cv_skills & required_skills) / len(required_skills)
# Experience matching
if "years_of_exp" in cv_info and "min_years_experience" in jd_requirements:
if cv_info["years_of_exp"] >= jd_requirements["min_years_experience"]:
score_components["experience_match"] = 1.0
else:
score_components["experience_match"] = cv_info["years_of_exp"] / jd_requirements["min_years_experience"]
# Calculate overall score (weighted average)
weights = {"skills_match": 0.5, "experience_match": 0.3}
score_components["overall_score"] = sum(
score * weights[component]
for component, score in score_components.items()
if component != "overall_score"
)
# logger.debug(f"Match score for {cv_info.get('name', 'Unknown')}: {score_components['overall_score']:.2%}")
return score_components
def create_cv_shortlisting_page():
# Initialize session state variables if they don't exist
if 'jd_text' not in st.session_state:
st.session_state.jd_text = ''
if 'min_years' not in st.session_state:
st.session_state.min_years = 0
if 'required_skills_list' not in st.session_state:
st.session_state.required_skills_list = []
if 'uploaded_files' not in st.session_state:
st.session_state.uploaded_files = None
if 'results' not in st.session_state:
st.session_state.results = []
if 'analysis_complete' not in st.session_state:
st.session_state.analysis_complete = False
# Form for input
with st.form("job_description_form"):
# Job Description Input
st.header("Job Description")
jd_text = st.text_area("Enter the job description", value=st.session_state.jd_text)
# Job Requirements Input
st.header("Job Requirements")
min_years = st.number_input("Minimum years of experience", min_value=0, value=st.session_state.min_years)
required_skills = st.text_input("Required skills (comma-separated)",
value=','.join(st.session_state.required_skills_list) if st.session_state.required_skills_list else "")
# CV Upload
st.header("Upload CVs")
uploaded_files = st.file_uploader("Choose CV files", accept_multiple_files=True, type=['pdf', 'txt'], key="unique_cv_upload")
# Submit Button
submit_button = st.form_submit_button(label="Analyze CVs")
# Update session state after form submission
if submit_button:
st.session_state.jd_text = jd_text
st.session_state.min_years = min_years
st.session_state.required_skills_list = [skill.strip() for skill in required_skills.split(",") if skill.strip()]
st.session_state.uploaded_files = uploaded_files
if st.session_state.uploaded_files and st.session_state.jd_text:
with st.spinner('Analyzing CVs...'):
analyzer = CVAnalyzer()
# Prepare job requirements
job_requirements = {
"min_years_experience": st.session_state.min_years,
"required_skills": st.session_state.required_skills_list
}
results = []
st.session_state.results = [] # Reset results for new analysis
# Process each CV
for uploaded_file in st.session_state.uploaded_files:
cv_text = extr.process_file(uploaded_file)
try:
candidates = analyzer.extract_cv_info(cv_text)
for candidate in candidates:
match_scores = analyzer.calculate_match_score(
candidate.__dict__,
job_requirements
)
result = {
"Name": candidate.name or "Unknown",
"Experience (Years)": candidate.years_of_exp or 0,
"Skills": ", ".join(candidate.skills) if candidate.skills else "None",
"Certifications": ", ".join(candidate.certifications) if candidate.certifications else "None",
"Skills Match": f"{match_scores['skills_match']:.2%}",
"Experience Match": f"{match_scores['experience_match']:.2%}",
"Overall Score": f"{match_scores['overall_score']:.2%}"
}
results.append(result)
st.session_state.results.append(result)
except Exception as e:
st.error(f"Error processing CV: {str(e)}")
# Display results
if st.session_state.results:
df = pd.DataFrame(st.session_state.results)
df = df.sort_values("Overall Score", ascending=False)
st.dataframe(df)
st.session_state.analysis_complete = True
else:
st.error("No valid results found from CV analysis")
st.session_state.analysis_complete = False
|