File size: 17,191 Bytes
d915eee
 
58b9e2b
 
 
d68ff83
d915eee
58b9e2b
ad5e7c5
 
5fe15a6
 
58b9e2b
d915eee
 
 
58b9e2b
 
d915eee
 
 
58b9e2b
 
 
 
 
 
 
 
d915eee
58b9e2b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ad5e7c5
58b9e2b
 
 
 
 
5fe15a6
58b9e2b
 
d915eee
5fe15a6
 
 
 
 
 
 
 
 
 
 
 
d915eee
58b9e2b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d915eee
58b9e2b
 
 
 
 
 
 
 
 
 
5fe15a6
 
 
d915eee
 
5fe15a6
 
d915eee
5fe15a6
58b9e2b
 
 
 
 
 
5fe15a6
 
 
08231ff
ad5e7c5
58b9e2b
 
5fe15a6
58b9e2b
 
 
 
 
 
 
 
5fe15a6
58b9e2b
5fe15a6
 
 
 
 
 
 
58b9e2b
 
5fe15a6
 
 
 
58b9e2b
d915eee
 
58b9e2b
 
 
 
 
 
 
 
d915eee
58b9e2b
 
 
 
 
 
 
 
5fe15a6
 
d915eee
58b9e2b
5fe15a6
58b9e2b
d915eee
58b9e2b
 
 
 
 
 
 
 
 
5fe15a6
 
 
58b9e2b
 
 
 
5fe15a6
 
58b9e2b
 
 
 
 
 
 
 
 
 
 
 
d915eee
58b9e2b
 
 
 
5fe15a6
 
 
 
 
 
58b9e2b
 
 
 
 
 
 
 
 
5fe15a6
58b9e2b
 
5fe15a6
 
 
58b9e2b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d915eee
58b9e2b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d915eee
58b9e2b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ad5e7c5
58b9e2b
 
 
 
 
2b674bb
58b9e2b
 
 
 
 
 
 
ad5e7c5
58b9e2b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
# src/streamlit_app.py

import streamlit as st
import pandas as pd
import io
import os
import fitz
import docx2txt
from groq import Groq
from dotenv import load_dotenv
from pydantic import BaseModel, Field, ValidationError # Added ValidationError
from typing import Optional, List # Added Optional and List

# --- 0. FIX: SET PAGE CONFIG AS THE FIRST STREAMLIT COMMAND ---
st.set_page_config(layout="wide", page_title="Quantum Scrutiny Platform | Groq-Powered")
# FIX for Hugging Face Deployment: Read the key from the environment/Secrets.
GROQ_API_KEY = os.getenv("GROQ_API_KEY")

# Admin Password (as requested)
ADMIN_PASSWORD = "admin"

# Initialize Groq Client
if GROQ_API_KEY:
    try:
        groq_client = Groq(api_key=GROQ_API_KEY)
    except Exception as e:
        st.error(f"Error initializing Groq Client: {e}")
        st.stop()
else:
    st.error("GROQ_API_KEY not found. Please ensure the key is set as a Secret in Hugging Face or in the local .env file.")
    st.stop()

# Initialize Session State
if 'is_admin_logged_in' not in st.session_state:
    st.session_state.is_admin_logged_in = False
if 'analyzed_data' not in st.session_state:
    initial_cols = [
        'Name', 'Job Role', 'Resume Score (100)', 'Email', 'Phone', 'Shortlisted',
        'Experience Summary', 'Education Summary', 'Communication Rating (1-10)', 
        'Skills/Technologies', 'Certifications', 'ABA Skills (1-10)', 
        'RBT/BCBA Cert', 'Autism-Care Exp (1-10)'
    ]
    st.session_state.analyzed_data = pd.DataFrame(columns=initial_cols)


# --- 2. DATA STRUCTURE FOR GROQ OUTPUT (Pydantic Schema) ---

class ResumeAnalysis(BaseModel):
    """Pydantic model for structured resume data extraction."""
    name: str = Field(description="Full name of the candidate.")
    email: str = Field(description="Professional email address.")
    phone: str = Field(description="Primary phone number.")
    certifications: List[str] = Field(description="List of professional certifications.")
    experience_summary: str = Field(description="A concise summary of the candidate's professional experience.")
    education_summary: str = Field(description="A concise summary of the candidate's highest education.")
    
    # --- CRITICAL FIX: Use str or Optional[str] and improve coercion ---
    # The Groq model is returning INT (8) instead of STR ('8') for communication_skills. 
    # The most stable fix is to keep the field as str and rely on Groq's JSON mode 
    # but improve the prompt guidance. We will also update the helper functions to be more robust.
    communication_skills: str = Field(description="A score as a STRING (e.g., '8') or description of communication skills.")
    technical_skills: List[str] = Field(description="List of technical skills/technologies mentioned.")
    
    # These fields can sometimes return None, so we make them Optional[str]
    # and default them to "N/A" in the final output in the analyze function if still None.
    aba_therapy_skills: Optional[str] = Field(default="N/A", description="Specific score as a STRING (e.g., '7'). Use 'N/A' if not applicable.")
    rbt_bcba_certification: Optional[str] = Field(default="N/A", description="Indicate 'Yes' or 'No'. Use 'N/A' if not applicable.")
    autism_care_experience_score: Optional[str] = Field(default="N/A", description="A score as a STRING (e.g., '9'). Use 'N/A' if not applicable.")


# --- 3. HELPER FUNCTIONS ---

def extract_text_from_file(uploaded_file):
    """Extracts text from PDF or DOCX files."""
    file_type = uploaded_file.type
    try:
        if file_type == "application/pdf":
            with fitz.open(stream=uploaded_file.read(), filetype="pdf") as doc:
                text = ""
                for page in doc:
                    text += page.get_text()
            return text
        elif file_type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
            return docx2txt.process(uploaded_file)
        else:
            return ""
    except Exception as e:
        print(f"Error extracting text: {e}")
        return ""

@st.cache_data(show_spinner="Analyzing resume with Groq...")
def analyze_resume_with_groq(resume_text: str, job_role: str) -> ResumeAnalysis:
    """Uses Groq and the Pydantic schema for structured extraction."""
    
    # Custom instructions for Therapist role
    therapist_instructions = ""
    if job_role == "Therapist":
        therapist_instructions = (
            "Because the job role is 'Therapist', you MUST carefully look for ABA Therapy Skills, RBT/BCBA Certification, and Autism-Care Experience. "
            "Provide a score from 1-10 as a **STRING** (e.g., '7') for the specialized fields. "
            "If any specialized therapist field is not found, you MUST return **null** or **N/A** for that field."
        )
    else:
        # For non-therapist roles, explicitly instruct the model to use 'null' 
        # so Optional[str] handles it cleanly.
        therapist_instructions = (
            "Since the role is not 'Therapist', set 'aba_therapy_skills', 'autism_care_experience_score', and 'rbt_bcba_certification' to **null** or **N/A**."
        )

    # System Prompt for Groq
    system_prompt = (
        f"You are a professional Resume Analyzer. Your task is to extract specific information from the provided resume text. "
        f"The candidate is applying for the role of '{job_role}'. "
        f"Return a JSON object that strictly adheres to the provided Pydantic schema. "
        f"**CRITICAL:** Ensure 'communication_skills' is returned as a **STRING** value, even if it's a number (e.g., \"8\" NOT 8). " # <-- Re-emphasizing string output for the specific failing field
        f"{therapist_instructions}"
    )

    try:
        chat_completion = groq_client.chat.completions.create(
            model="mixtral-8x7b-32768", 
            messages=[
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": f"Analyze the following resume text:\n\n---\n{resume_text}\n---"}
            ],
            response_model={"type": "json_object", "schema": ResumeAnalysis.schema()},
            temperature=0.0
        )
        
        # Parse the JSON response
        analysis = ResumeAnalysis.parse_raw(chat_completion.choices[0].message.content)

        # Post-processing: Ensure Optional fields are strings for score calculation
        analysis.aba_therapy_skills = str(analysis.aba_therapy_skills or 'N/A')
        analysis.rbt_bcba_certification = str(analysis.rbt_bcba_certification or 'N/A')
        analysis.autism_care_experience_score = str(analysis.autism_care_experience_score or 'N/A')
        analysis.communication_skills = str(analysis.communication_skills) # Coerce communication_skills to string just in case it passed validation as an int somehow

        return analysis
        
    except ValidationError as ve:
        st.error(f"Groq API Validation Error: The model returned incompatible data. Details: {ve}")
        print(f"Failed JSON: {chat_completion.choices[0].message.content}") # Print the bad JSON for debugging
        return ResumeAnalysis(name="Extraction Failed", email="", phone="", certifications=[], experience_summary="", education_summary="", communication_skills="N/A", technical_skills=[], aba_therapy_skills="N/A", rbt_bcba_certification="N/A", autism_care_experience_score="N/A")
    except Exception as e:
        st.error(f"Groq API Error: {e}") 
        return ResumeAnalysis(name="Extraction Failed", email="", phone="", certifications=[], experience_summary="", education_summary="", communication_skills="N/A", technical_skills=[], aba_therapy_skills="N/A", rbt_bcba_certification="N/A", autism_care_experience_score="N/A")


def calculate_resume_score(analysis: ResumeAnalysis) -> float:
    """Calculates the weighted score out of 100."""
    
    total_score = 0.0

    # 1. Experience Score (Max 40 points)
    exp_factor = min(len(analysis.experience_summary) / 100.0, 1.0)
    total_score += exp_factor * 40.0

    # 2. Skills Score (Max 30 points)
    skills_factor = min(len(analysis.technical_skills) / 10.0, 1.0)
    total_score += skills_factor * 30.0

    # 3. Communication Score (Max 20 points)
    try:
        # Safely parse the communication score string, handling N/A or raw numbers
        score_str = str(analysis.communication_skills).split('-')[0].strip() # Use str() to handle if it somehow remained an int
        comm_rating = float(score_str)
    except (ValueError, IndexError):
        comm_rating = 5.0 
        
    score_comm = (comm_rating / 10.0) * 20.0
    total_score += score_comm

    # 4. Certification Score (Max 10 points)
    score_cert = min(len(analysis.certifications), 10) * 1.0 
    total_score += score_cert

    # --- Therapist-Specific Bonus Checks ---
    if st.session_state.get('selected_role') == "Therapist":
        try:
            # Safely parse specialized scores, handling 'N/A' or None
            aba_score = float(str(analysis.aba_therapy_skills).split('-')[0].strip()) if str(analysis.aba_therapy_skills).upper() not in ['N/A', 'NONE'] else 0.0
            autism_score = float(str(analysis.autism_care_experience_score).split('-')[0].strip()) if str(analysis.autism_care_experience_score).upper() not in ['N/A', 'NONE'] else 0.0
            
            # Add a bonus based on the average specialized scores (max 10 points)
            specialized_bonus = ((aba_score + autism_score) / 20.0) * 10.0
            total_score += specialized_bonus
        except (ValueError, IndexError, TypeError):
            pass # Ignore if specialized scores are still corrupted

    
    final_score = round(min(total_score, 100))
    return float(final_score)


def append_analysis_to_dataframe(job_role: str, analysis: ResumeAnalysis, score: float):
    """Formats and appends the new analysis to the session state DataFrame."""
    
    data = analysis.dict()
    data['Job Role'] = job_role
    data['Resume Score'] = score
    data['Shortlisted'] = 'No'
    
    technical_skills_list = ", ".join(data['technical_skills'])
    certifications_list = ", ".join(data['certifications'])
    
    # Ensure fields that might have been None are now strings for the DataFrame
    comm_skills = str(data['communication_skills'] or 'N/A')
    aba_skills = str(data['aba_therapy_skills'] or 'N/A')
    rbt_cert = str(data['rbt_bcba_certification'] or 'N/A')
    autism_exp = str(data['autism_care_experience_score'] or 'N/A')
    
    df_data = {
        'Name': data['name'],
        'Job Role': job_role,
        'Resume Score (100)': score,
        'Email': data['email'],
        'Phone': data['phone'],
        'Shortlisted': data['Shortlisted'],
        'Experience Summary': data['experience_summary'],
        'Education Summary': data['education_summary'],
        'Communication Rating (1-10)': comm_skills,
        'Skills/Technologies': technical_skills_list,
        'Certifications': certifications_list,
        'ABA Skills (1-10)': aba_skills,
        'RBT/BCBA Cert': rbt_cert,
        'Autism-Care Exp (1-10)': autism_exp,
    }

    new_df = pd.DataFrame([df_data])
    st.session_state.analyzed_data = pd.concat([st.session_state.analyzed_data, new_df], ignore_index=True)


# --- 4. APP LAYOUT AND LOGIC ---

st.title("🌌 Quantum Scrutiny Platform: AI Resume Analysis")

tab_user, tab_admin = st.tabs(["πŸ‘€ Resume Uploader (User Panel)", "πŸ”’ Admin Dashboard (Password Protected)"])

# =========================================================================
# A. Resume Upload (User Panel)
# =========================================================================
with tab_user:
    st.header("Upload Resumes for Analysis")
    st.info("Upload multiple PDF or DOCX files. The Groq AI engine will quickly extract and score the key data.")
    
    job_role_options = ["Software Engineer", "ML Engineer", "Therapist", "Data Analyst", "Project Manager"]
    selected_role = st.selectbox(
        "**1. Select the Target Job Role** (Influences analysis and scoring)",
        options=job_role_options,
        key='selected_role'
    )

    uploaded_files = st.file_uploader(
        "**2. Upload Resumes** (PDF or DOCX)",
        type=["pdf", "docx"],
        accept_multiple_files=True
    )
    
    if st.button("πŸš€ Analyze All Uploaded Resumes"):
        if not uploaded_files:
            st.warning("Please upload one or more resume files to begin analysis.")
        else:
            total_files = len(uploaded_files)
            progress_bar = st.progress(0)
            
            st.session_state.individual_analysis = []
            
            with st.status("Processing Resumes...", expanded=True) as status_box:
                
                for i, file in enumerate(uploaded_files):
                    file_name = file.name
                    st.write(f"Analyzing **{file_name}**...")
                    
                    resume_text = extract_text_from_file(file)
                    
                    if not resume_text:
                        st.error(f"Could not extract text from {file_name}. Skipping.")
                        continue
                    
                    analysis = analyze_resume_with_groq(resume_text, selected_role)
                    
                    if analysis.name == "Extraction Failed":
                         st.error(f"Groq extraction failed for {file_name}. Skipping.")
                         continue
                        
                    score = calculate_resume_score(analysis)
                    append_analysis_to_dataframe(selected_role, analysis, score)
                    
                    st.session_state.individual_analysis.append({
                        'name': analysis.name,
                        'score': score,
                        'role': selected_role,
                        'file_name': file_name
                    })

                    progress_bar.progress((i + 1) / total_files)
                    
                status_box.update(label="Analysis Complete!", state="complete", expanded=False)
            
            st.success(f"**βœ… Successfully analyzed {total_files} resumes.**")
            
    if 'individual_analysis' in st.session_state and st.session_state.individual_analysis:
        st.subheader("Last Analysis Summary")
        for item in st.session_state.individual_analysis:
            st.markdown(f"**{item['name']}** (for **{item['role']}**) - **Score: {item['score']}/100**")
            
        st.markdown("---")
        st.caption("All analyzed data is stored in the **Admin Dashboard**.")

# =========================================================================
# B. Admin Panel (Password Protected)
# =========================================================================
with tab_admin:
    
    if not st.session_state.is_admin_logged_in:
        st.header("Admin Login")
        password = st.text_input("Enter Admin Password", type="password")
        if st.button("πŸ”‘ Login"):
            if password == ADMIN_PASSWORD:
                st.session_state.is_admin_logged_in = True
                st.rerun()
            else:
                st.error("Incorrect password.")
        st.stop()
    
    st.header("🎯 Recruitment Dashboard")
    st.markdown("---")
    
    if st.button("πŸšͺ Logout"):
        st.session_state.is_admin_logged_in = False
        st.rerun()

    if st.session_state.analyzed_data.empty:
        st.warning("No resume data has been analyzed yet. Please upload files in the User Panel.")
    else:
        df = st.session_state.analyzed_data.copy()
        
        st.subheader("Candidate Data Table")
        st.success(f"**Total Candidates Analyzed: {len(df)}**")

        display_cols = ['Name', 'Job Role', 'Resume Score (100)', 'Shortlisted', 'Email', 'Skills/Technologies']
        
        edited_df = st.data_editor(
            df[display_cols],
            column_config={
                "Shortlisted": st.column_config.SelectboxColumn(
                    "Shortlisted",
                    help="Mark the candidate as Shortlisted or Rejected.",
                    options=["No", "Yes"],
                    required=True,
                )
            },
            key="dashboard_editor",
            hide_index=True
        )
        
        st.session_state.analyzed_data['Shortlisted'] = edited_df['Shortlisted']

        st.markdown("---")

        st.subheader("πŸ“₯ Download Data")

        df_export = st.session_state.analyzed_data.copy()
        excel_buffer = io.BytesIO()
        with pd.ExcelWriter(excel_buffer, engine='openpyxl') as writer:
            df_export.to_excel(writer, index=False, sheet_name='Resume Analysis Data')
        excel_buffer.seek(0)

        st.download_button(
            label="πŸ’Ύ Download All Data as Excel (.xlsx)",
            data=excel_buffer,
            file_name="quantum_scrutiny_report.xlsx",
            mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
            help="Downloads the full table including all extracted fields and shortlist status."
        )

# --- End of src/streamlit_app.py ---