File size: 16,533 Bytes
1c71f7c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
import streamlit as st
import pandas as pd
import os
import logging
import re
from chromadb import PersistentClient
from sentence_transformers import SentenceTransformer
from langchain_groq import ChatGroq
from rag_utils_updated import extract_text, preprocess_text, get_embeddings, is_image_pdf, assess_cv, extract_job_requirements
import plotly.graph_objects as go

# Logging setup
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
logger = logging.getLogger(__name__)

# Initialize session state (ONLY for job description and flags)
if "job_description" not in st.session_state:
    st.session_state.job_description = ""
if "continue_to_detailed_assessment" not in st.session_state:
    st.session_state.continue_to_detailed_assessment = False
if "requirements" not in st.session_state:
    st.session_state.requirements = None
if "detailed_assessments" not in st.session_state:
    st.session_state.detailed_assessments = {}  # Initialize as an empty dictionary
if "chromadb_initialized" not in st.session_state:
    st.session_state.chromadb_initialized = False
if "cvs" not in st.session_state:
    st.session_state.cvs = {}
if "job_description_embedding" not in st.session_state:
    st.session_state.job_description_embedding = None
# Initialize session state variable
if "assessment_completed" not in st.session_state:
    st.session_state.assessment_completed = False

# Persistent Storage for Embeddings
PERMANENT_DB_PATH = "./cv_db"
if "collection" not in st.session_state:
    db_client = PersistentClient(path=PERMANENT_DB_PATH)
    st.session_state.collection = db_client.get_or_create_collection("cv_embeddings")

if "embedding_model" not in st.session_state:
    st.session_state.embedding_model = SentenceTransformer('all-mpnet-base-v2')

if "groq_client" not in st.session_state:
    st.session_state.groq_client = ChatGroq(api_key=os.environ.get("GROQ_API_KEY"))

st.title("CV Assessment and Ranking App")

# 1. Input Job Description
st.subheader("Enter Job Description")
requirements_source = st.radio("Source:", ("File Upload", "Web Page Link", "Text Input"))

job_description_text = ""
if requirements_source == "File Upload":
    uploaded_file = st.file_uploader("Upload Job Requirements (PDF/DOCX)", type=["pdf", "docx"])
    if uploaded_file:
        job_description_text = extract_text(uploaded_file)
elif requirements_source == "Web Page Link":
    # webpage_url = st.text_input("Enter Web Page URL")
    # if webpage_url:
    #    job_description_text = extract_text(webpage_url)
    st.warning("This function is not available in MVP yet.")
elif requirements_source == "Text Input":
    job_description_text = st.text_area("Enter Job Requirements", height=200)

st.session_state.job_description = job_description_text

if st.session_state.job_description:
    st.success("Job description uploaded successfully!")

# 2. Upload CVs (Folder Upload)
st.subheader("Upload CVs (Folder)")
uploaded_files = st.file_uploader("Choose a folder containing CV files", accept_multiple_files=True)

if uploaded_files and not st.session_state.assessment_completed:
    st.write(f"{len(uploaded_files)} CV(s) uploaded.")

    st.session_state.cvs = {}
    cv_embeddings_created = 0

    if not st.session_state.chromadb_initialized:
        try:
            ids_in_collection = st.session_state.collection.get()['ids']
            if ids_in_collection:
                st.session_state.collection.delete(ids=ids_in_collection)
                logger.info("ChromaDB collection cleared.")
            else:
                logger.info("ChromaDB collection is already empty. Skipping deletion.")
        except Exception as e:
            st.error(f"Error clearing ChromaDB collection: {e}")
            st.stop()
        st.session_state.chromadb_initialized = True

    for uploaded_file in uploaded_files:
        filename = uploaded_file.name
        if filename in st.session_state.cvs:
            continue

        for attempt in range(2):
            try:
                if is_image_pdf(uploaded_file):
                    st.warning(f"{filename} appears to be an image-based PDF and cannot be processed.")
                    break

                text = extract_text(uploaded_file)
                if not text.strip():
                    raise ValueError("No text extracted.")

                preprocessed_text = preprocess_text(text)
                embedding = get_embeddings(preprocessed_text, st.session_state.embedding_model)

                st.session_state.cvs[filename] = {
                    "text": preprocessed_text,
                    "embedding": embedding,
                }
                cv_embeddings_created += 1

                try:
                    st.session_state.collection.add(
                        embeddings=[embedding],
                        documents=[preprocessed_text],
                        ids=[filename],
                        metadatas=[{"filename": filename}]
                    )
                    logger.info(f"Embedding for {filename} added to ChromaDB.")
                except Exception as e:
                    st.error(f"Error adding embedding to ChromaDB for {filename}: {e}")
                    st.stop()

                break

            except Exception as e:
                logger.error(f"Text extraction failed for {filename} on attempt {attempt + 1}: {e}")
                if attempt == 1:
                    st.error(f"Failed to process {filename} after multiple attempts.")

    if cv_embeddings_created > 0:
        st.success(f"{cv_embeddings_created} CV embeddings created successfully!")

    num_errors = len(uploaded_files) - cv_embeddings_created
    if num_errors > 0:
        st.error(f"Error in CV embeddings creation for {num_errors} CV(s).")

    if st.button("Continue Assessment"):
        st.session_state.continue_to_detailed_assessment = True

elif uploaded_files and st.session_state.assessment_completed:
    st.warning("This is an MVP. Please refresh the page before uploading and assessing new files.")

if st.session_state.continue_to_detailed_assessment:
    st.session_state.continue_to_detailed_assessment = False  # reset value
    st.write("Performing detailed assessments...")

    # Extract Job Requirements
    if st.session_state.job_description and st.session_state.requirements is None:
        st.session_state.requirements = extract_job_requirements(st.session_state.job_description, st.session_state.groq_client)
        if st.session_state.requirements:
            with st.expander("Extracted Job Requirements:"):
                for req in st.session_state.requirements:
                    st.write(f"- {req}")
        #    st.write("Extracted Job Requirements:")
        #    for req in st.session_state.requirements:
        #        st.write(f"- {req}")
        else:
            st.warning("Could not extract job requirements.")

    # Generate job description embedding if not already done
    if st.session_state.job_description and st.session_state.job_description_embedding is None:
        try:
            job_description_embedding = get_embeddings(st.session_state.job_description, st.session_state.embedding_model)
            st.session_state.job_description_embedding = job_description_embedding
        except Exception as e:
            st.error(f"Error creating job description embedding: {e}")
            st.stop()

    # Detailed CV Assessments
    selected_cvs = list(st.session_state.cvs.keys())

    if not st.session_state.detailed_assessments:
        st.session_state.detailed_assessments = {}
        with st.spinner("Performing detailed assessments..."):
            for filename in selected_cvs:
                if filename in st.session_state.cvs:
                    cv_text = st.session_state.cvs[filename]["text"]
                    try:
                        assessment = assess_cv(cv_text, st.session_state.requirements, filename, st.session_state.groq_client)
                        st.session_state.detailed_assessments[filename] = assessment
                    except Exception as e:
                        st.error(f"Error during detailed assessment of {filename}: {e}")

# Display Results (Remaining part of the code)
    st.session_state.assessment_completed = True
    st.success("Detailed assessments complete!")

    st.subheader("Candidates Assessment and Ranking")

    def parse_assessment(raw_response, requirements):
        """Parses the LLM's assessment with robust error handling."""
        matches = {
            "technical_lead": "Not Found", 
            "hr_specialist": "Not Found",
            "project_manager": "Not Found",
            "final_assessment": "Not Found",
            "recommendation": "Not Found",
            "technical_lead_score": "Not Found",
            "hr_specialist_score": "Not Found",
            "project_manager_score": "Not Found",
            "final_assessment_score": "Not Found",
        }

        try:
            # Parse labeled scores
            technical_lead_match = re.search(r"Technical Lead Assessment:\s*(.*?)\s*Technical Lead Score:\s*(\d+)", raw_response, re.IGNORECASE | re.DOTALL)
            if technical_lead_match:
                matches["technical_lead"] = technical_lead_match.group(1).strip()
                matches["technical_lead_score"] = technical_lead_match.group(2)

            hr_specialist_match = re.search(r"HR Specialist Assessment:\s*(.*?)\s*HR Specialist Score:\s*(\d+)", raw_response, re.IGNORECASE | re.DOTALL)
            if hr_specialist_match:
                matches["hr_specialist"] = hr_specialist_match.group(1).strip()
                matches["hr_specialist_score"] = hr_specialist_match.group(2)

            project_manager_match = re.search(r"Project Manager Assessment:\s*(.*?)\s*Project Manager Score:\s*(\d+)", raw_response, re.IGNORECASE | re.DOTALL)
            if project_manager_match:
                matches["project_manager"] = project_manager_match.group(1).strip()
                matches["project_manager_score"] = project_manager_match.group(2)

            final_assessment_match = re.search(r"Final Assessment:\s*(.*?)\s*Final Assessment Score:\s*(\d+)", raw_response, re.IGNORECASE | re.DOTALL)
            if final_assessment_match:
                matches["final_assessment"] = final_assessment_match.group(1).strip()
                matches["final_assessment_score"] = final_assessment_match.group(2)

            recommendation_match = re.search(r"Recommendation:\s*(.*?)$", raw_response, re.IGNORECASE | re.DOTALL)
            if recommendation_match:
                matches["recommendation"] = recommendation_match.group(1).strip()

            # Fallback mechanism: extract scores from raw response if labels are not found
            if matches["technical_lead_score"] == "Not Found":
                score_match = re.search(r"Technical Lead Assessment:.*?score(?:s)?\s*(?:of)?\s*(\d+)\s*(?:out\s*of|\/)\s*100", raw_response, re.IGNORECASE | re.DOTALL)
                if score_match:
                    matches["technical_lead_score"] = score_match.group(1)
            if matches["hr_specialist_score"] == "Not Found":
                score_match = re.search(r"HR Specialist Assessment:.*?score(?:s)?\s*(?:of)?\s*(\d+)\s*(?:out\s*of|\/)\s*100", raw_response, re.IGNORECASE | re.DOTALL)
                if score_match:
                    matches["hr_specialist_score"] = score_match.group(1)
            if matches["project_manager_score"] == "Not Found":
                score_match = re.search(r"Project Manager Assessment:.*?score(?:s)?\s*(?:of)?\s*(\d+)\s*(?:out\s*of|\/)\s*100", raw_response, re.IGNORECASE | re.DOTALL)
                if score_match:
                    matches["project_manager_score"] = score_match.group(1)
            if matches["final_assessment_score"] == "Not Found":
                score_match = re.search(r"Final Assessment:.*?(?:Consensus Score|total of|final score).*?(\d+)\s*(?:out of)?\s*100", raw_response, re.IGNORECASE | re.DOTALL)
                if score_match:
                    matches["final_assessment_score"] = score_match.group(1)

        except Exception as e:
            print(f"Error parsing assessment: {e}")

        return matches
    
    # Data frame logic
    if st.session_state.detailed_assessments:
        assessments_df = pd.DataFrame(columns=["filename", 
                                            "final_assessment_score", "final_assessment",
                                            "technical_lead_score", "technical_lead",
                                            "hr_specialist_score", "hr_specialist",
                                            "project_manager_score", "project_manager",
                                            "recommendation"
                                            ])
        for filename, assessment in st.session_state.detailed_assessments.items():
            if "error" in assessment:
                st.error(assessment["error"])
            elif "raw_response" in assessment:
                parsed_data = parse_assessment(assessment["raw_response"], st.session_state.requirements)
                # Append the new dictionary as a row
                assessments_df = pd.concat([assessments_df, pd.DataFrame([parsed_data])], ignore_index=True)
                assessments_df.loc[assessments_df.index[-1], 'filename'] = filename
            #st.write("---")

        # Sort the DataFrame by 'final_assessment_score' in descending order
        # Convert the column to numeric before sorting
        assessments_df['final_assessment_score'] = pd.to_numeric(assessments_df['final_assessment_score'], errors='coerce') #coerce turns non numeric values to NaN.
        assessments_df = assessments_df.sort_values(by='final_assessment_score', ascending=False)

        st.dataframe(assessments_df)

        
        st.subheader("Detailed Assessment Results")
        # Iterate through the DataFrame rows to display the UI for each assessment
        for index, row in assessments_df.iterrows():
            st.write(f"**Filename:** {row['filename']}")
            scores = {
                "Technical Lead": int(row["technical_lead_score"]),
                "HR Specialist": int(row["hr_specialist_score"]),
                "Project Manager": int(row["project_manager_score"]),
                "Final Assessment": int(row["final_assessment_score"]),
            }
            scores_df = pd.DataFrame(list(scores.items()), columns=["Expert", "Score"])

            # Create Plotly bar chart with annotations
            fig = go.Figure(data=[go.Bar(
                x=scores_df["Expert"],
                y=scores_df["Score"],
                text=scores_df["Score"],
                textposition='auto',
            )])
            fig.update_layout(yaxis_range=[0, 100])

            # Create columns layout
            col1, col2 = st.columns([1, 3])

            # Display bar chart in the first column
            with col1:
                st.plotly_chart(fig, use_container_width=True)

            # Display collapsed panels in the second column
            with col2:
                with st.expander("Technical Lead Assessment"):
                    st.write(f"{row['technical_lead']}")
                    st.write(f"**Technical Lead Score:** {row['technical_lead_score']}")

                with st.expander("HR Specialist Assessment"):
                    st.write(f"{row['hr_specialist']}")
                    st.write(f"**HR Specialist Score:** {row['hr_specialist_score']}")

                with st.expander("Project Manager Assessment"):
                    st.write(f"{row['project_manager']}")
                    st.write(f"**Project Manager Score:** {row['project_manager_score']}")

                with st.expander("Final Assessment"):
                    st.write(f"{row['final_assessment']}")
                    st.write(f"**Final Assessment Score:** {row['final_assessment_score']}")

                with st.expander("Recommendation"):
                    st.write(f"{row['recommendation']}")

            st.write("---")

    else:
        st.write("No detailed assessments were performed.")