Spaces:
Sleeping
Sleeping
| import logging | |
| from langchain_community.document_loaders import PDFPlumberLoader, TextLoader | |
| import extraction as extr # extraction.py | |
| import streamlit as st | |
| import pandas as pd | |
| # Configure logging | |
| # logging.basicConfig(level=logging.DEBUG , format='%(asctime)s - %(levelname)s - %(message)s') | |
| # logger = logging.getLogger(__name__) | |
| class CVAnalyzer: | |
| def __init__(self): | |
| # Initialize Groq LLM | |
| # logger.info("Initializing CVAnalyzer") | |
| self.llm = extr.initialize_llm() # Updated to use the new function | |
| # logger.info(" LLM initialized") | |
| # Initialize embeddings (if needed) | |
| # self.embeddings = HuggingFaceEmbeddings( | |
| # model_name="sentence-transformers/all-mpnet-base-v2" | |
| # ) | |
| def load_document(self, file_path: str) -> str: | |
| # logger.info(f"Loading document from file: {file_path}") | |
| """Load document based on file type.""" | |
| if file_path.endswith('.pdf'): | |
| loader = PDFPlumberLoader(file_path) | |
| else: | |
| loader = TextLoader(file_path) | |
| documents = loader.load() | |
| # logger.info(f"Document loaded from {file_path}") | |
| return " ".join([doc.page_content for doc in documents]) | |
| def extract_cv_info(self, cv_text: str) -> list[extr.cv]: # referring to cv class in extraction.py | |
| # logger.info("Extracting CV information") | |
| """Extract structured information from CV text using new extraction method.""" | |
| extracted_data = extr.extract_cv_data(cv_text) | |
| # logger.info(f"Extracted {len(extracted_data)} candidate(s) from CV") | |
| return extracted_data | |
| # return extr.extract_cv_data(cv_text) | |
| def calculate_match_score(self, cv_info: dict, jd_requirements: dict) -> dict: | |
| # logger.info(f"Calculating match score for CV: {cv_info.get('name', 'Unknown')}") | |
| """Calculate match score between CV and job requirements.""" | |
| score_components = { | |
| "skills_match": 0, | |
| "experience_match": 0, | |
| "overall_score": 0 | |
| } | |
| # Skills matching | |
| if "skills" in cv_info and "required_skills" in jd_requirements: | |
| cv_skills = set(skill.lower() for skill in cv_info["skills"]) | |
| required_skills = set(skill.lower() for skill in jd_requirements["required_skills"]) | |
| score_components["skills_match"] = len(cv_skills & required_skills) / len(required_skills) | |
| # Experience matching | |
| if "years_of_exp" in cv_info and "min_years_experience" in jd_requirements: | |
| if cv_info["years_of_exp"] >= jd_requirements["min_years_experience"]: | |
| score_components["experience_match"] = 1.0 | |
| else: | |
| score_components["experience_match"] = cv_info["years_of_exp"] / jd_requirements["min_years_experience"] | |
| # Calculate overall score (weighted average) | |
| weights = {"skills_match": 0.5, "experience_match": 0.3} | |
| score_components["overall_score"] = sum( | |
| score * weights[component] | |
| for component, score in score_components.items() | |
| if component != "overall_score" | |
| ) | |
| # logger.debug(f"Match score for {cv_info.get('name', 'Unknown')}: {score_components['overall_score']:.2%}") | |
| return score_components | |
| def create_cv_shortlisting_page(): | |
| # Initialize session state variables if they don't exist | |
| if 'jd_text' not in st.session_state: | |
| st.session_state.jd_text = '' | |
| if 'min_years' not in st.session_state: | |
| st.session_state.min_years = 0 | |
| if 'required_skills_list' not in st.session_state: | |
| st.session_state.required_skills_list = [] | |
| if 'uploaded_files' not in st.session_state: | |
| st.session_state.uploaded_files = None | |
| if 'results' not in st.session_state: | |
| st.session_state.results = [] | |
| if 'analysis_complete' not in st.session_state: | |
| st.session_state.analysis_complete = False | |
| # Form for input | |
| with st.form("job_description_form"): | |
| # Job Description Input | |
| st.header("Job Description") | |
| jd_text = st.text_area("Enter the job description", value=st.session_state.jd_text) | |
| # Job Requirements Input | |
| st.header("Job Requirements") | |
| min_years = st.number_input("Minimum years of experience", min_value=0, value=st.session_state.min_years) | |
| required_skills = st.text_input("Required skills (comma-separated)", | |
| value=','.join(st.session_state.required_skills_list) if st.session_state.required_skills_list else "") | |
| # CV Upload | |
| st.header("Upload CVs") | |
| uploaded_files = st.file_uploader("Choose CV files", accept_multiple_files=True, type=['pdf', 'txt'], key="unique_cv_upload") | |
| # Submit Button | |
| submit_button = st.form_submit_button(label="Analyze CVs") | |
| # Update session state after form submission | |
| if submit_button: | |
| st.session_state.jd_text = jd_text | |
| st.session_state.min_years = min_years | |
| st.session_state.required_skills_list = [skill.strip() for skill in required_skills.split(",") if skill.strip()] | |
| st.session_state.uploaded_files = uploaded_files | |
| if st.session_state.uploaded_files and st.session_state.jd_text: | |
| with st.spinner('Analyzing CVs...'): | |
| analyzer = CVAnalyzer() | |
| # Prepare job requirements | |
| job_requirements = { | |
| "min_years_experience": st.session_state.min_years, | |
| "required_skills": st.session_state.required_skills_list | |
| } | |
| results = [] | |
| st.session_state.results = [] # Reset results for new analysis | |
| # Process each CV | |
| for uploaded_file in st.session_state.uploaded_files: | |
| cv_text = extr.process_file(uploaded_file) | |
| try: | |
| candidates = analyzer.extract_cv_info(cv_text) | |
| for candidate in candidates: | |
| match_scores = analyzer.calculate_match_score( | |
| candidate.__dict__, | |
| job_requirements | |
| ) | |
| result = { | |
| "Name": candidate.name or "Unknown", | |
| "Experience (Years)": candidate.years_of_exp or 0, | |
| "Skills": ", ".join(candidate.skills) if candidate.skills else "None", | |
| "Certifications": ", ".join(candidate.certifications) if candidate.certifications else "None", | |
| "Skills Match": f"{match_scores['skills_match']:.2%}", | |
| "Experience Match": f"{match_scores['experience_match']:.2%}", | |
| "Overall Score": f"{match_scores['overall_score']:.2%}" | |
| } | |
| results.append(result) | |
| st.session_state.results.append(result) | |
| except Exception as e: | |
| st.error(f"Error processing CV: {str(e)}") | |
| # Display results | |
| if st.session_state.results: | |
| df = pd.DataFrame(st.session_state.results) | |
| df = df.sort_values("Overall Score", ascending=False) | |
| st.dataframe(df) | |
| st.session_state.analysis_complete = True | |
| else: | |
| st.error("No valid results found from CV analysis") | |
| st.session_state.analysis_complete = False | |