import streamlit as st import pandas as pd import os import logging import re import uuid from chromadb import PersistentClient from sentence_transformers import SentenceTransformer from langchain_groq import ChatGroq from rag_utils_updated import extract_text, preprocess_text, get_embeddings, is_image_pdf, assess_cv, extract_job_requirements import plotly.graph_objects as go from dotenv import load_dotenv # Logging setup logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s") logger = logging.getLogger(__name__) load_dotenv() if os.environ.get("LLM_PROMPT") is None: st.error("LLM_PROMPT is missing. Check your .env file!") if os.environ.get("ADMIN_PASSWORD") is None: st.error("ADMIN_PASSWORD is missing. Check your .env file!") st.title("CV Assessment and Ranking App") # Generate a unique session ID for temporary sessions if "session_id" not in st.session_state: st.session_state.session_id = str(uuid.uuid4())[:8] # Short unique session ID # Initialize session state variables for key in ["job_description", "requirements", "detailed_assessments", "cvs", "job_description_embedding"]: if key not in st.session_state: st.session_state[key] = None if key in ["job_description", "requirements", "job_description_embedding"] else {} if "assessment_completed" not in st.session_state: st.session_state.assessment_completed = False if "admin_logged_in" not in st.session_state: st.session_state.admin_logged_in = False # Persistent Storage for Embeddings PERMANENT_DB_PATH = "./cv_db" db_client = PersistentClient(path=PERMANENT_DB_PATH) st.session_state.collection = db_client.get_or_create_collection(f"cv_embeddings_{st.session_state.session_id}") if "embedding_model" not in st.session_state: st.session_state.embedding_model = SentenceTransformer('all-mpnet-base-v2') if "groq_client" not in st.session_state: st.session_state.groq_client = ChatGroq(api_key=os.environ.get("GROQ_API_KEY")) def clear_chroma_db(): """Clears only the current session's embeddings.""" try: st.session_state.collection.delete(where={"session_id": st.session_state.session_id}) # Delete only this session's embeddings st.info("Session-specific embeddings cleared. Starting fresh!") except Exception as e: st.error(f"Error clearing session embeddings: {e}") st.stop() # Ensure the session clears its own embeddings on startup clear_chroma_db() import shutil def clear_all_sessions_data(): """Admin function to delete old session embeddings and reclaim disk space while keeping active sessions.""" try: global db_client existing_collections = db_client.list_collections() # Identify active sessions (all currently running session IDs) active_sessions = [f"cv_embeddings_{st.session_state.session_id}"] # Delete all collections except currently active ones for collection_name in existing_collections: if collection_name not in active_sessions: db_client.delete_collection(collection_name) # Delete only old session data # Force database compaction to free up space db_client = None # Close database connection shutil.rmtree(PERMANENT_DB_PATH) # Delete database folder os.makedirs(PERMANENT_DB_PATH, exist_ok=True) # Recreate empty database db_client = PersistentClient(path=PERMANENT_DB_PATH) # Reinitialize database st.success("Old session embeddings deleted. Active sessions retained. Database size optimized.") except Exception as e: st.error(f"Error deleting old session data: {e}") # Admin Panel for Clearing Old Data with st.sidebar: st.subheader("Admin Login") admin_user = st.text_input("Username", key="admin_user") admin_pass = st.text_input("Password", type="password", key="admin_pass") if st.button("Login as Admin"): if admin_user == "admin" and admin_pass == os.environ.get("ADMIN_PASSWORD"): st.session_state.admin_logged_in = True st.success("Admin login successful!") else: st.error("Invalid credentials. Access denied.") if st.session_state.admin_logged_in: st.subheader("Admin Actions") if st.button("Clear All Stored Embeddings"): clear_all_sessions_data() def process_cv(uploaded_file): """Processes a single CV file: extracts text, preprocesses, and stores embeddings with a session ID.""" filename = uploaded_file.name session_filename = f"{st.session_state.session_id}_{filename}" # Unique per session try: if is_image_pdf(uploaded_file): st.warning(f"{filename} appears to be an image-based PDF and cannot be processed.") return None text = extract_text(uploaded_file) preprocessed_text = preprocess_text(text) embedding = get_embeddings(preprocessed_text, st.session_state.embedding_model) st.session_state.collection.add( embeddings=[embedding], documents=[preprocessed_text], ids=[session_filename], # Store session-unique ID metadatas=[{"session_id": st.session_state.session_id, "filename": filename}] ) return {"text": preprocessed_text, "embedding": embedding, "session_filename": session_filename} except Exception as e: st.error(f"Failed to process {filename}: {e}") return None def parse_assessment(raw_response, requirements): """Parses the LLM's assessment with robust error handling.""" matches = { "technical_lead": "Not Found", "hr_specialist": "Not Found", "project_manager": "Not Found", "final_assessment": "Not Found", "recommendation": "Not Found", "technical_lead_score": "Not Found", "hr_specialist_score": "Not Found", "project_manager_score": "Not Found", "final_assessment_score": "Not Found", } try: technical_lead_match = re.search(r"Technical Lead Assessment:\s*(.*?)\s*Technical Lead Score:\s*(\d+)", raw_response, re.IGNORECASE | re.DOTALL) if technical_lead_match: matches["technical_lead"] = technical_lead_match.group(1).strip() matches["technical_lead_score"] = technical_lead_match.group(2) hr_specialist_match = re.search(r"HR Specialist Assessment:\s*(.*?)\s*HR Specialist Score:\s*(\d+)", raw_response, re.IGNORECASE | re.DOTALL) if hr_specialist_match: matches["hr_specialist"] = hr_specialist_match.group(1).strip() matches["hr_specialist_score"] = hr_specialist_match.group(2) project_manager_match = re.search(r"Project Manager Assessment:\s*(.*?)\s*Project Manager Score:\s*(\d+)", raw_response, re.IGNORECASE | re.DOTALL) if project_manager_match: matches["project_manager"] = project_manager_match.group(1).strip() matches["project_manager_score"] = project_manager_match.group(2) final_assessment_match = re.search(r"Final Assessment:\s*(.*?)\s*Final Assessment Score:\s*(\d+)", raw_response, re.IGNORECASE | re.DOTALL) if final_assessment_match: matches["final_assessment"] = final_assessment_match.group(1).strip() matches["final_assessment_score"] = final_assessment_match.group(2) recommendation_match = re.search(r"Recommendation:\s*(.*?)$", raw_response, re.IGNORECASE | re.DOTALL) if recommendation_match: matches["recommendation"] = recommendation_match.group(1).strip() except Exception as e: print(f"Error parsing assessment: {e}") return matches # 1. Input Job Description st.subheader("Enter Job Description") requirements_source = st.radio("Source:", ("File Upload", "Web Page Link", "Text Input")) if requirements_source == "File Upload": uploaded_file = st.file_uploader("Upload Job Requirements (PDF/DOCX)", type=["pdf", "docx"]) if uploaded_file: st.session_state.job_description = extract_text(uploaded_file) elif requirements_source == "Text Input": st.session_state.job_description = st.text_area("Enter Job Requirements", height=200) if st.session_state.job_description: st.success("Job description uploaded successfully!") if st.session_state.job_description_embedding is None: st.session_state.job_description_embedding = get_embeddings(st.session_state.job_description, st.session_state.embedding_model) if not st.session_state.requirements: st.session_state.requirements = extract_job_requirements(st.session_state.job_description, st.session_state.groq_client) if st.session_state.requirements: with st.expander("Extracted Job Requirements:"): for req in st.session_state.requirements: st.write(f"- {req}") # 2. Upload CVs st.subheader("Upload CVs (Folder)") uploaded_files = st.file_uploader("Choose CV files", accept_multiple_files=True) if uploaded_files and not st.session_state.assessment_completed: with st.spinner("Processing uploaded CVs, please wait..."): st.write(f"{len(uploaded_files)} CV(s) uploaded.") st.session_state.cvs = {} for uploaded_file in uploaded_files: result = process_cv(uploaded_file) if result: st.session_state.cvs[result["session_filename"]] = result st.success("CV embeddings created successfully!") st.session_state.assessment_completed = True # Perform detailed assessments automatically if st.session_state.assessment_completed: st.write("Performing detailed assessments...") detailed_assessments = st.session_state.detailed_assessments # Store reference for efficiency if not detailed_assessments: with st.spinner("Assessing CVs..."): for filename, cv_data in st.session_state.cvs.items(): try: assessment = assess_cv(cv_data["text"], st.session_state.requirements, filename, st.session_state.groq_client) detailed_assessments[filename] = assessment except Exception as e: st.error(f"Error assessing {filename}: {e}") st.success("Detailed assessments complete!") st.subheader("Candidates Assessment and Ranking") assessments_df = pd.DataFrame([{**parse_assessment(a["raw_response"], st.session_state.requirements), "filename": f} for f, a in st.session_state.detailed_assessments.items()]) assessments_df = assessments_df.sort_values(by='final_assessment_score', ascending=False) st.dataframe(assessments_df) st.subheader("Detailed Assessment Results") # Iterate through the DataFrame rows to display the UI for each assessment for index, row in assessments_df.iterrows(): st.write(f"**Filename:** {row['filename']}") scores = { "Technical Lead": int(row["technical_lead_score"]), "HR Specialist": int(row["hr_specialist_score"]), "Project Manager": int(row["project_manager_score"]), "Final Assessment": int(row["final_assessment_score"]), } scores_df = pd.DataFrame(list(scores.items()), columns=["Expert", "Score"]) # Create Plotly bar chart with annotations fig = go.Figure(data=[go.Bar( x=scores_df["Expert"], y=scores_df["Score"], text=scores_df["Score"], textposition='auto', )]) fig.update_layout(yaxis_range=[0, 100]) # Create columns layout col1, col2 = st.columns([1, 3]) # Display bar chart in the first column with a unique key with col1: st.plotly_chart(fig, use_container_width=True, key=f"chart_{index}") # Display collapsed panels in the second column with col2: with st.expander("Technical Lead Assessment"): st.write(f"{row['technical_lead']}") st.write(f"**Technical Lead Score:** {row['technical_lead_score']}") with st.expander("HR Specialist Assessment"): st.write(f"{row['hr_specialist']}") st.write(f"**HR Specialist Score:** {row['hr_specialist_score']}") with st.expander("Project Manager Assessment"): st.write(f"{row['project_manager']}") st.write(f"**Project Manager Score:** {row['project_manager_score']}") with st.expander("Final Assessment"): st.write(f"{row['final_assessment']}") st.write(f"**Final Assessment Score:** {row['final_assessment_score']}") with st.expander("Recommendation"): st.write(f"{row['recommendation']}") st.write("---")