Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import pandas as pd | |
| import os | |
| import logging | |
| import re | |
| import uuid | |
| from chromadb import PersistentClient | |
| from sentence_transformers import SentenceTransformer | |
| from langchain_groq import ChatGroq | |
| from rag_utils_updated import extract_text, preprocess_text, get_embeddings, is_image_pdf, assess_cv, extract_job_requirements | |
| import plotly.graph_objects as go | |
| from dotenv import load_dotenv | |
| # Logging setup | |
| logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s") | |
| logger = logging.getLogger(__name__) | |
| load_dotenv() | |
| if os.environ.get("LLM_PROMPT") is None: | |
| st.error("LLM_PROMPT is missing. Check your .env file!") | |
| if os.environ.get("ADMIN_PASSWORD") is None: | |
| st.error("ADMIN_PASSWORD is missing. Check your .env file!") | |
| st.title("CV Assessment and Ranking App") | |
| # Generate a unique session ID for temporary sessions | |
| if "session_id" not in st.session_state: | |
| st.session_state.session_id = str(uuid.uuid4())[:8] # Short unique session ID | |
| # Initialize session state variables | |
| for key in ["job_description", "requirements", "detailed_assessments", "cvs", "job_description_embedding"]: | |
| if key not in st.session_state: | |
| st.session_state[key] = None if key in ["job_description", "requirements", "job_description_embedding"] else {} | |
| if "assessment_completed" not in st.session_state: | |
| st.session_state.assessment_completed = False | |
| if "admin_logged_in" not in st.session_state: | |
| st.session_state.admin_logged_in = False | |
| # Persistent Storage for Embeddings | |
| PERMANENT_DB_PATH = "./cv_db" | |
| db_client = PersistentClient(path=PERMANENT_DB_PATH) | |
| st.session_state.collection = db_client.get_or_create_collection(f"cv_embeddings_{st.session_state.session_id}") | |
| if "embedding_model" not in st.session_state: | |
| st.session_state.embedding_model = SentenceTransformer('all-mpnet-base-v2') | |
| if "groq_client" not in st.session_state: | |
| st.session_state.groq_client = ChatGroq(api_key=os.environ.get("GROQ_API_KEY")) | |
| def clear_chroma_db(): | |
| """Clears only the current session's embeddings.""" | |
| try: | |
| st.session_state.collection.delete(where={"session_id": st.session_state.session_id}) # Delete only this session's embeddings | |
| st.info("Session-specific embeddings cleared. Starting fresh!") | |
| except Exception as e: | |
| st.error(f"Error clearing session embeddings: {e}") | |
| st.stop() | |
| # Ensure the session clears its own embeddings on startup | |
| clear_chroma_db() | |
| import shutil | |
| def clear_all_sessions_data(): | |
| """Admin function to delete old session embeddings and reclaim disk space while keeping active sessions.""" | |
| try: | |
| global db_client | |
| existing_collections = db_client.list_collections() | |
| # Identify active sessions (all currently running session IDs) | |
| active_sessions = [f"cv_embeddings_{st.session_state.session_id}"] | |
| # Delete all collections except currently active ones | |
| for collection_name in existing_collections: | |
| if collection_name not in active_sessions: | |
| db_client.delete_collection(collection_name) # Delete only old session data | |
| # Force database compaction to free up space | |
| db_client = None # Close database connection | |
| shutil.rmtree(PERMANENT_DB_PATH) # Delete database folder | |
| os.makedirs(PERMANENT_DB_PATH, exist_ok=True) # Recreate empty database | |
| db_client = PersistentClient(path=PERMANENT_DB_PATH) # Reinitialize database | |
| st.success("Old session embeddings deleted. Active sessions retained. Database size optimized.") | |
| except Exception as e: | |
| st.error(f"Error deleting old session data: {e}") | |
| # Admin Panel for Clearing Old Data | |
| with st.sidebar: | |
| st.subheader("Admin Login") | |
| admin_user = st.text_input("Username", key="admin_user") | |
| admin_pass = st.text_input("Password", type="password", key="admin_pass") | |
| if st.button("Login as Admin"): | |
| if admin_user == "admin" and admin_pass == os.environ.get("ADMIN_PASSWORD"): | |
| st.session_state.admin_logged_in = True | |
| st.success("Admin login successful!") | |
| else: | |
| st.error("Invalid credentials. Access denied.") | |
| if st.session_state.admin_logged_in: | |
| st.subheader("Admin Actions") | |
| if st.button("Clear All Stored Embeddings"): | |
| clear_all_sessions_data() | |
| def process_cv(uploaded_file): | |
| """Processes a single CV file: extracts text, preprocesses, and stores embeddings with a session ID.""" | |
| filename = uploaded_file.name | |
| session_filename = f"{st.session_state.session_id}_{filename}" # Unique per session | |
| try: | |
| if is_image_pdf(uploaded_file): | |
| st.warning(f"{filename} appears to be an image-based PDF and cannot be processed.") | |
| return None | |
| text = extract_text(uploaded_file) | |
| preprocessed_text = preprocess_text(text) | |
| embedding = get_embeddings(preprocessed_text, st.session_state.embedding_model) | |
| st.session_state.collection.add( | |
| embeddings=[embedding], | |
| documents=[preprocessed_text], | |
| ids=[session_filename], # Store session-unique ID | |
| metadatas=[{"session_id": st.session_state.session_id, "filename": filename}] | |
| ) | |
| return {"text": preprocessed_text, "embedding": embedding, "session_filename": session_filename} | |
| except Exception as e: | |
| st.error(f"Failed to process {filename}: {e}") | |
| return None | |
| def parse_assessment(raw_response, requirements): | |
| """Parses the LLM's assessment with robust error handling.""" | |
| matches = { | |
| "technical_lead": "Not Found", | |
| "hr_specialist": "Not Found", | |
| "project_manager": "Not Found", | |
| "final_assessment": "Not Found", | |
| "recommendation": "Not Found", | |
| "technical_lead_score": "Not Found", | |
| "hr_specialist_score": "Not Found", | |
| "project_manager_score": "Not Found", | |
| "final_assessment_score": "Not Found", | |
| } | |
| try: | |
| technical_lead_match = re.search(r"Technical Lead Assessment:\s*(.*?)\s*Technical Lead Score:\s*(\d+)", raw_response, re.IGNORECASE | re.DOTALL) | |
| if technical_lead_match: | |
| matches["technical_lead"] = technical_lead_match.group(1).strip() | |
| matches["technical_lead_score"] = technical_lead_match.group(2) | |
| hr_specialist_match = re.search(r"HR Specialist Assessment:\s*(.*?)\s*HR Specialist Score:\s*(\d+)", raw_response, re.IGNORECASE | re.DOTALL) | |
| if hr_specialist_match: | |
| matches["hr_specialist"] = hr_specialist_match.group(1).strip() | |
| matches["hr_specialist_score"] = hr_specialist_match.group(2) | |
| project_manager_match = re.search(r"Project Manager Assessment:\s*(.*?)\s*Project Manager Score:\s*(\d+)", raw_response, re.IGNORECASE | re.DOTALL) | |
| if project_manager_match: | |
| matches["project_manager"] = project_manager_match.group(1).strip() | |
| matches["project_manager_score"] = project_manager_match.group(2) | |
| final_assessment_match = re.search(r"Final Assessment:\s*(.*?)\s*Final Assessment Score:\s*(\d+)", raw_response, re.IGNORECASE | re.DOTALL) | |
| if final_assessment_match: | |
| matches["final_assessment"] = final_assessment_match.group(1).strip() | |
| matches["final_assessment_score"] = final_assessment_match.group(2) | |
| recommendation_match = re.search(r"Recommendation:\s*(.*?)$", raw_response, re.IGNORECASE | re.DOTALL) | |
| if recommendation_match: | |
| matches["recommendation"] = recommendation_match.group(1).strip() | |
| except Exception as e: | |
| print(f"Error parsing assessment: {e}") | |
| return matches | |
| # 1. Input Job Description | |
| st.subheader("Enter Job Description") | |
| requirements_source = st.radio("Source:", ("File Upload", "Web Page Link", "Text Input")) | |
| if requirements_source == "File Upload": | |
| uploaded_file = st.file_uploader("Upload Job Requirements (PDF/DOCX)", type=["pdf", "docx"]) | |
| if uploaded_file: | |
| st.session_state.job_description = extract_text(uploaded_file) | |
| elif requirements_source == "Text Input": | |
| st.session_state.job_description = st.text_area("Enter Job Requirements", height=200) | |
| if st.session_state.job_description: | |
| st.success("Job description uploaded successfully!") | |
| if st.session_state.job_description_embedding is None: | |
| st.session_state.job_description_embedding = get_embeddings(st.session_state.job_description, st.session_state.embedding_model) | |
| if not st.session_state.requirements: | |
| st.session_state.requirements = extract_job_requirements(st.session_state.job_description, st.session_state.groq_client) | |
| if st.session_state.requirements: | |
| with st.expander("Extracted Job Requirements:"): | |
| for req in st.session_state.requirements: | |
| st.write(f"- {req}") | |
| # 2. Upload CVs | |
| st.subheader("Upload CVs (Folder)") | |
| uploaded_files = st.file_uploader("Choose CV files", accept_multiple_files=True) | |
| if uploaded_files and not st.session_state.assessment_completed: | |
| with st.spinner("Processing uploaded CVs, please wait..."): | |
| st.write(f"{len(uploaded_files)} CV(s) uploaded.") | |
| st.session_state.cvs = {} | |
| for uploaded_file in uploaded_files: | |
| result = process_cv(uploaded_file) | |
| if result: | |
| st.session_state.cvs[result["session_filename"]] = result | |
| st.success("CV embeddings created successfully!") | |
| st.session_state.assessment_completed = True | |
| # Perform detailed assessments automatically | |
| if st.session_state.assessment_completed: | |
| st.write("Performing detailed assessments...") | |
| detailed_assessments = st.session_state.detailed_assessments # Store reference for efficiency | |
| if not detailed_assessments: | |
| with st.spinner("Assessing CVs..."): | |
| for filename, cv_data in st.session_state.cvs.items(): | |
| try: | |
| assessment = assess_cv(cv_data["text"], st.session_state.requirements, filename, st.session_state.groq_client) | |
| detailed_assessments[filename] = assessment | |
| except Exception as e: | |
| st.error(f"Error assessing {filename}: {e}") | |
| st.success("Detailed assessments complete!") | |
| st.subheader("Candidates Assessment and Ranking") | |
| assessments_df = pd.DataFrame([{**parse_assessment(a["raw_response"], st.session_state.requirements), "filename": f} for f, a in st.session_state.detailed_assessments.items()]) | |
| assessments_df = assessments_df.sort_values(by='final_assessment_score', ascending=False) | |
| st.dataframe(assessments_df) | |
| st.subheader("Detailed Assessment Results") | |
| # Iterate through the DataFrame rows to display the UI for each assessment | |
| for index, row in assessments_df.iterrows(): | |
| st.write(f"**Filename:** {row['filename']}") | |
| scores = { | |
| "Technical Lead": int(row["technical_lead_score"]), | |
| "HR Specialist": int(row["hr_specialist_score"]), | |
| "Project Manager": int(row["project_manager_score"]), | |
| "Final Assessment": int(row["final_assessment_score"]), | |
| } | |
| scores_df = pd.DataFrame(list(scores.items()), columns=["Expert", "Score"]) | |
| # Create Plotly bar chart with annotations | |
| fig = go.Figure(data=[go.Bar( | |
| x=scores_df["Expert"], | |
| y=scores_df["Score"], | |
| text=scores_df["Score"], | |
| textposition='auto', | |
| )]) | |
| fig.update_layout(yaxis_range=[0, 100]) | |
| # Create columns layout | |
| col1, col2 = st.columns([1, 3]) | |
| # Display bar chart in the first column with a unique key | |
| with col1: | |
| st.plotly_chart(fig, use_container_width=True, key=f"chart_{index}") | |
| # Display collapsed panels in the second column | |
| with col2: | |
| with st.expander("Technical Lead Assessment"): | |
| st.write(f"{row['technical_lead']}") | |
| st.write(f"**Technical Lead Score:** {row['technical_lead_score']}") | |
| with st.expander("HR Specialist Assessment"): | |
| st.write(f"{row['hr_specialist']}") | |
| st.write(f"**HR Specialist Score:** {row['hr_specialist_score']}") | |
| with st.expander("Project Manager Assessment"): | |
| st.write(f"{row['project_manager']}") | |
| st.write(f"**Project Manager Score:** {row['project_manager_score']}") | |
| with st.expander("Final Assessment"): | |
| st.write(f"{row['final_assessment']}") | |
| st.write(f"**Final Assessment Score:** {row['final_assessment_score']}") | |
| with st.expander("Recommendation"): | |
| st.write(f"{row['recommendation']}") | |
| st.write("---") | |